aboutsummaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorThomas Gleixner2020-08-26 01:36:07 +0200
committerThomas Gleixner2020-08-26 01:36:07 +0200
commitceb2465c51195967f11f6507538579816ac67cb8 (patch)
treeeab534b1062608e36a7b54ccc7be693625f235b8 /arch
parentf107cee94ba4d2c7357fde59a1d84346c73d4958 (diff)
parent821fc9e261f3af235752f46e59084467cfd440c4 (diff)
Merge tag 'irqchip-fixes-5.9-2' of git://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms into irq/urgent
Pull irqchip fixes from Marc Zyngier: - Revert the wholesale conversion to platform drivers of the pdc, sysirq and cirq drivers, as it breaks a number of platforms even when the driver is built-in (probe ordering bites you). - Prevent interrupt from being lost with the STM32 exti driver - Fix wake-up interrupts for the MIPS Ingenic driver - Fix an embarassing typo in the new module helpers, leading to the probe failing most of the time - The promised TI firmware rework that couldn't make it into the merge window due to a very badly managed set of dependencies
Diffstat (limited to 'arch')
-rw-r--r--arch/Kconfig3
-rw-r--r--arch/alpha/include/asm/core_apecs.h6
-rw-r--r--arch/alpha/include/asm/core_cia.h6
-rw-r--r--arch/alpha/include/asm/core_lca.h6
-rw-r--r--arch/alpha/include/asm/core_marvel.h4
-rw-r--r--arch/alpha/include/asm/core_mcpcia.h6
-rw-r--r--arch/alpha/include/asm/core_t2.h2
-rw-r--r--arch/alpha/include/asm/io.h20
-rw-r--r--arch/alpha/include/asm/io_trivial.h16
-rw-r--r--arch/alpha/include/asm/jensen.h2
-rw-r--r--arch/alpha/include/asm/machvec.h6
-rw-r--r--arch/alpha/include/asm/uaccess.h2
-rw-r--r--arch/alpha/kernel/core_marvel.c2
-rw-r--r--arch/alpha/kernel/io.c12
-rw-r--r--arch/alpha/kernel/syscalls/syscall.tbl2
-rw-r--r--arch/alpha/mm/fault.c8
-rw-r--r--arch/arc/include/asm/segment.h3
-rw-r--r--arch/arc/kernel/process.c2
-rw-r--r--arch/arc/mm/fault.c18
-rw-r--r--arch/arm/configs/am200epdkit_defconfig1
-rw-r--r--arch/arm/include/asm/uaccess.h4
-rw-r--r--arch/arm/include/asm/vdso/gettimeofday.h3
-rw-r--r--arch/arm/kernel/signal.c2
-rw-r--r--arch/arm/mm/fault.c25
-rw-r--r--arch/arm/tools/syscall.tbl2
-rw-r--r--arch/arm64/Kconfig20
-rw-r--r--arch/arm64/boot/dts/ti/k3-am65-main.dtsi36
-rw-r--r--arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi12
-rw-r--r--arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi8
-rw-r--r--arch/arm64/boot/dts/ti/k3-am654-base-board.dts4
-rw-r--r--arch/arm64/boot/dts/ti/k3-j721e-common-proc-board.dts10
-rw-r--r--arch/arm64/boot/dts/ti/k3-j721e-main.dtsi43
-rw-r--r--arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi12
-rw-r--r--arch/arm64/include/asm/kvm_asm.h75
-rw-r--r--arch/arm64/include/asm/kvm_coproc.h8
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h75
-rw-r--r--arch/arm64/include/asm/kvm_host.h94
-rw-r--r--arch/arm64/include/asm/kvm_hyp.h15
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h16
-rw-r--r--arch/arm64/include/asm/kvm_ptrauth.h34
-rw-r--r--arch/arm64/include/asm/mmu.h7
-rw-r--r--arch/arm64/include/asm/uaccess.h2
-rw-r--r--arch/arm64/include/asm/unistd32.h4
-rw-r--r--arch/arm64/include/asm/vdso/compat_gettimeofday.h3
-rw-r--r--arch/arm64/include/asm/vdso/gettimeofday.h3
-rw-r--r--arch/arm64/include/asm/virt.h13
-rw-r--r--arch/arm64/kernel/asm-offsets.c3
-rw-r--r--arch/arm64/kernel/cpu_errata.c4
-rw-r--r--arch/arm64/kernel/image-vars.h54
-rw-r--r--arch/arm64/kernel/sdei.c2
-rw-r--r--arch/arm64/kvm/Kconfig2
-rw-r--r--arch/arm64/kvm/Makefile4
-rw-r--r--arch/arm64/kvm/arch_timer.c157
-rw-r--r--arch/arm64/kvm/arm.c57
-rw-r--r--arch/arm64/kvm/fpsimd.c6
-rw-r--r--arch/arm64/kvm/guest.c79
-rw-r--r--arch/arm64/kvm/handle_exit.c32
-rw-r--r--arch/arm64/kvm/hyp/Makefile24
-rw-r--r--arch/arm64/kvm/hyp/aarch32.c8
-rw-r--r--arch/arm64/kvm/hyp/entry.S4
-rw-r--r--arch/arm64/kvm/hyp/fpsimd.S1
-rw-r--r--arch/arm64/kvm/hyp/hyp-entry.S21
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/debug-sr.h (renamed from arch/arm64/kvm/hyp/debug-sr.c)88
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h511
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h193
-rw-r--r--arch/arm64/kvm/hyp/nvhe/Makefile62
-rw-r--r--arch/arm64/kvm/hyp/nvhe/debug-sr.c77
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-init.S (renamed from arch/arm64/kvm/hyp-init.S)5
-rw-r--r--arch/arm64/kvm/hyp/nvhe/switch.c272
-rw-r--r--arch/arm64/kvm/hyp/nvhe/sysreg-sr.c46
-rw-r--r--arch/arm64/kvm/hyp/nvhe/timer-sr.c (renamed from arch/arm64/kvm/hyp/timer-sr.c)6
-rw-r--r--arch/arm64/kvm/hyp/nvhe/tlb.c154
-rw-r--r--arch/arm64/kvm/hyp/smccc_wa.S32
-rw-r--r--arch/arm64/kvm/hyp/switch.c936
-rw-r--r--arch/arm64/kvm/hyp/sysreg-sr.c333
-rw-r--r--arch/arm64/kvm/hyp/tlb.c242
-rw-r--r--arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c4
-rw-r--r--arch/arm64/kvm/hyp/vgic-v3-sr.c134
-rw-r--r--arch/arm64/kvm/hyp/vhe/Makefile11
-rw-r--r--arch/arm64/kvm/hyp/vhe/debug-sr.c26
-rw-r--r--arch/arm64/kvm/hyp/vhe/switch.c219
-rw-r--r--arch/arm64/kvm/hyp/vhe/sysreg-sr.c114
-rw-r--r--arch/arm64/kvm/hyp/vhe/timer-sr.c12
-rw-r--r--arch/arm64/kvm/hyp/vhe/tlb.c162
-rw-r--r--arch/arm64/kvm/inject_fault.c2
-rw-r--r--arch/arm64/kvm/mmio.c6
-rw-r--r--arch/arm64/kvm/mmu.c311
-rw-r--r--arch/arm64/kvm/regmap.c37
-rw-r--r--arch/arm64/kvm/reset.c23
-rw-r--r--arch/arm64/kvm/sys_regs.c207
-rw-r--r--arch/arm64/kvm/sys_regs_generic_v8.c96
-rw-r--r--arch/arm64/kvm/trace_arm.h8
-rw-r--r--arch/arm64/kvm/va_layout.c2
-rw-r--r--arch/arm64/kvm/vgic/vgic-irqfd.c24
-rw-r--r--arch/arm64/kvm/vgic/vgic-its.c3
-rw-r--r--arch/arm64/kvm/vgic/vgic-mmio-v3.c2
-rw-r--r--arch/arm64/mm/fault.c29
-rw-r--r--arch/arm64/mm/numa.c10
-rw-r--r--arch/csky/include/asm/segment.h2
-rw-r--r--arch/csky/mm/fault.c13
-rw-r--r--arch/h8300/include/asm/segment.h2
-rw-r--r--arch/hexagon/mm/vm_fault.c9
-rw-r--r--arch/ia64/include/asm/uaccess.h2
-rw-r--r--arch/ia64/kernel/syscalls/syscall.tbl2
-rw-r--r--arch/ia64/mm/fault.c9
-rw-r--r--arch/ia64/mm/numa.c2
-rw-r--r--arch/m68k/Kconfig4
-rw-r--r--arch/m68k/Kconfig.machine1
-rw-r--r--arch/m68k/include/asm/segment.h2
-rw-r--r--arch/m68k/include/asm/tlbflush.h6
-rw-r--r--arch/m68k/kernel/syscalls/syscall.tbl2
-rw-r--r--arch/m68k/mm/fault.c14
-rw-r--r--arch/microblaze/include/asm/uaccess.h2
-rw-r--r--arch/microblaze/kernel/syscalls/syscall.tbl2
-rw-r--r--arch/microblaze/mm/fault.c9
-rw-r--r--arch/mips/configs/cu1000-neo_defconfig1
-rw-r--r--arch/mips/include/asm/uaccess.h2
-rw-r--r--arch/mips/include/asm/vdso/gettimeofday.h5
-rw-r--r--arch/mips/kernel/syscalls/syscall_n32.tbl2
-rw-r--r--arch/mips/kernel/syscalls/syscall_n64.tbl2
-rw-r--r--arch/mips/kernel/syscalls/syscall_o32.tbl2
-rw-r--r--arch/mips/kernel/unaligned.c27
-rw-r--r--arch/mips/kvm/emulate.c2
-rw-r--r--arch/mips/kvm/vz.c5
-rw-r--r--arch/mips/mm/fault.c14
-rw-r--r--arch/nds32/include/asm/uaccess.h2
-rw-r--r--arch/nds32/kernel/process.c2
-rw-r--r--arch/nds32/mm/alignment.c7
-rw-r--r--arch/nds32/mm/fault.c19
-rw-r--r--arch/nios2/include/asm/uaccess.h2
-rw-r--r--arch/nios2/mm/fault.c14
-rw-r--r--arch/openrisc/include/asm/io.h9
-rw-r--r--arch/openrisc/include/asm/uaccess.h25
-rw-r--r--arch/openrisc/kernel/setup.c8
-rw-r--r--arch/openrisc/kernel/signal.c14
-rw-r--r--arch/openrisc/kernel/smp.c85
-rw-r--r--arch/openrisc/kernel/stacktrace.c18
-rw-r--r--arch/openrisc/kernel/vmlinux.lds.S12
-rw-r--r--arch/openrisc/mm/fault.c9
-rw-r--r--arch/openrisc/mm/tlb.c17
-rw-r--r--arch/parisc/include/asm/atomic.h8
-rw-r--r--arch/parisc/include/asm/barrier.h61
-rw-r--r--arch/parisc/include/asm/io.h4
-rw-r--r--arch/parisc/include/asm/pgalloc.h6
-rw-r--r--arch/parisc/include/asm/uaccess.h2
-rw-r--r--arch/parisc/kernel/ftrace.c3
-rw-r--r--arch/parisc/kernel/syscalls/syscall.tbl2
-rw-r--r--arch/parisc/lib/iomap.c72
-rw-r--r--arch/parisc/mm/fault.c8
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_uvmem.h14
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h2
-rw-r--r--arch/powerpc/include/asm/reg.h4
-rw-r--r--arch/powerpc/include/asm/uaccess.h3
-rw-r--r--arch/powerpc/kernel/iomap.c28
-rw-r--r--arch/powerpc/kernel/syscalls/syscall.tbl2
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c8
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_radix.c4
-rw-r--r--arch/powerpc/kvm/book3s_hv.c26
-rw-r--r--arch/powerpc/kvm/book3s_hv_nested.c30
-rw-r--r--arch/powerpc/kvm/book3s_hv_uvmem.c700
-rw-r--r--arch/powerpc/kvm/book3s_interrupts.S56
-rw-r--r--arch/powerpc/kvm/book3s_pr.c9
-rw-r--r--arch/powerpc/kvm/book3s_rtas.c2
-rw-r--r--arch/powerpc/kvm/booke.c9
-rw-r--r--arch/powerpc/kvm/booke_interrupts.S9
-rw-r--r--arch/powerpc/kvm/bookehv_interrupts.S10
-rw-r--r--arch/powerpc/kvm/powerpc.c5
-rw-r--r--arch/powerpc/mm/book3s64/hash_utils.c5
-rw-r--r--arch/powerpc/mm/book3s64/pkeys.c12
-rw-r--r--arch/powerpc/mm/copro_fault.c7
-rw-r--r--arch/powerpc/mm/fault.c11
-rw-r--r--arch/riscv/include/asm/uaccess.h6
-rw-r--r--arch/riscv/include/asm/vdso/gettimeofday.h3
-rw-r--r--arch/riscv/kernel/head.S25
-rw-r--r--arch/riscv/mm/fault.c16
-rw-r--r--arch/s390/Kbuild1
-rw-r--r--arch/s390/Kconfig2
-rw-r--r--arch/s390/include/asm/atomic.h12
-rw-r--r--arch/s390/include/asm/debug.h17
-rw-r--r--arch/s390/include/asm/topology.h6
-rw-r--r--arch/s390/include/asm/uaccess.h2
-rw-r--r--arch/s390/kernel/Makefile1
-rw-r--r--arch/s390/kernel/debug.c32
-rw-r--r--arch/s390/kernel/numa.c (renamed from arch/s390/numa/numa.c)0
-rw-r--r--arch/s390/kernel/syscalls/syscall.tbl2
-rw-r--r--arch/s390/kvm/interrupt.c2
-rw-r--r--arch/s390/kvm/kvm-s390.c2
-rw-r--r--arch/s390/kvm/priv.c8
-rw-r--r--arch/s390/lib/test_unwind.c1
-rw-r--r--arch/s390/mm/fault.c16
-rw-r--r--arch/s390/mm/gmap.c31
-rw-r--r--arch/s390/numa/Makefile2
-rw-r--r--arch/sh/Kconfig109
-rw-r--r--arch/sh/Makefile5
-rw-r--r--arch/sh/boards/Kconfig6
-rw-r--r--arch/sh/boards/board-sh2007.c4
-rw-r--r--arch/sh/boards/mach-cayman/Makefile5
-rw-r--r--arch/sh/boards/mach-cayman/irq.c148
-rw-r--r--arch/sh/boards/mach-cayman/panic.c46
-rw-r--r--arch/sh/boards/mach-cayman/setup.c181
-rw-r--r--arch/sh/boards/mach-landisk/setup.c3
-rw-r--r--arch/sh/configs/cayman_defconfig66
-rw-r--r--arch/sh/configs/dreamcast_defconfig1
-rw-r--r--arch/sh/configs/espt_defconfig1
-rw-r--r--arch/sh/configs/hp6xx_defconfig1
-rw-r--r--arch/sh/configs/landisk_defconfig1
-rw-r--r--arch/sh/configs/lboxre2_defconfig1
-rw-r--r--arch/sh/configs/microdev_defconfig1
-rw-r--r--arch/sh/configs/migor_defconfig1
-rw-r--r--arch/sh/configs/r7780mp_defconfig1
-rw-r--r--arch/sh/configs/r7785rp_defconfig1
-rw-r--r--arch/sh/configs/rts7751r2d1_defconfig1
-rw-r--r--arch/sh/configs/rts7751r2dplus_defconfig1
-rw-r--r--arch/sh/configs/se7206_defconfig1
-rw-r--r--arch/sh/configs/se7343_defconfig1
-rw-r--r--arch/sh/configs/se7619_defconfig1
-rw-r--r--arch/sh/configs/se7705_defconfig1
-rw-r--r--arch/sh/configs/se7750_defconfig1
-rw-r--r--arch/sh/configs/se7751_defconfig1
-rw-r--r--arch/sh/configs/secureedge5410_defconfig1
-rw-r--r--arch/sh/configs/sh03_defconfig1
-rw-r--r--arch/sh/configs/sh7710voipgw_defconfig1
-rw-r--r--arch/sh/configs/sh7757lcr_defconfig1
-rw-r--r--arch/sh/configs/sh7763rdp_defconfig1
-rw-r--r--arch/sh/configs/shmin_defconfig1
-rw-r--r--arch/sh/configs/titan_defconfig1
-rw-r--r--arch/sh/drivers/pci/Makefile1
-rw-r--r--arch/sh/drivers/pci/common.c6
-rw-r--r--arch/sh/drivers/pci/fixups-cayman.c78
-rw-r--r--arch/sh/drivers/pci/pci-sh7780.c23
-rw-r--r--arch/sh/drivers/pci/pci.c11
-rw-r--r--arch/sh/include/asm/adc.h2
-rw-r--r--arch/sh/include/asm/addrspace.h3
-rw-r--r--arch/sh/include/asm/bitops.h4
-rw-r--r--arch/sh/include/asm/cache.h2
-rw-r--r--arch/sh/include/asm/cacheflush.h3
-rw-r--r--arch/sh/include/asm/dma.h2
-rw-r--r--arch/sh/include/asm/elf.h2
-rw-r--r--arch/sh/include/asm/freq.h2
-rw-r--r--arch/sh/include/asm/futex.h3
-rw-r--r--arch/sh/include/asm/io.h119
-rw-r--r--arch/sh/include/asm/kdebug.h3
-rw-r--r--arch/sh/include/asm/mmu_context.h2
-rw-r--r--arch/sh/include/asm/mmzone.h3
-rw-r--r--arch/sh/include/asm/pci.h4
-rw-r--r--arch/sh/include/asm/processor_32.h2
-rw-r--r--arch/sh/include/asm/segment.h3
-rw-r--r--arch/sh/include/asm/smc37c93x.h4
-rw-r--r--arch/sh/include/asm/sparsemem.h7
-rw-r--r--arch/sh/include/asm/stacktrace.h2
-rw-r--r--arch/sh/include/asm/string_32.h30
-rw-r--r--arch/sh/include/asm/syscall_32.h5
-rw-r--r--arch/sh/include/asm/syscalls_32.h3
-rw-r--r--arch/sh/include/asm/thread_info.h5
-rw-r--r--arch/sh/include/asm/uaccess_32.h53
-rw-r--r--arch/sh/include/asm/watchdog.h2
-rw-r--r--arch/sh/kernel/Makefile2
-rw-r--r--arch/sh/kernel/disassemble.c103
-rw-r--r--arch/sh/kernel/dma-coherent.c51
-rw-r--r--arch/sh/kernel/dumpstack.c30
-rw-r--r--arch/sh/kernel/entry-common.S59
-rw-r--r--arch/sh/kernel/io_trapped.c2
-rw-r--r--arch/sh/kernel/iomap.c22
-rw-r--r--arch/sh/kernel/ioport.c1
-rw-r--r--arch/sh/kernel/machvec.c8
-rw-r--r--arch/sh/kernel/perf_callchain.c6
-rw-r--r--arch/sh/kernel/process_32.c38
-rw-r--r--arch/sh/kernel/ptrace_32.c5
-rw-r--r--arch/sh/kernel/stacktrace.c7
-rw-r--r--arch/sh/kernel/syscalls/syscall.tbl2
-rw-r--r--arch/sh/kernel/traps_32.c12
-rw-r--r--arch/sh/lib/Makefile2
-rw-r--r--arch/sh/lib/delay.c1
-rw-r--r--arch/sh/mm/Makefile2
-rw-r--r--arch/sh/mm/consistent.c2
-rw-r--r--arch/sh/mm/fault.c14
-rw-r--r--arch/sh/mm/init.c10
-rw-r--r--arch/sh/mm/ioremap.c55
-rw-r--r--arch/sh/mm/ioremap.h23
-rw-r--r--arch/sh/mm/ioremap_fixed.c1
-rw-r--r--arch/sh/mm/pgtable.c7
-rw-r--r--arch/sh/oprofile/backtrace.c7
-rw-r--r--arch/sh/tools/mach-types1
-rw-r--r--arch/sparc/include/asm/sparsemem.h1
-rw-r--r--arch/sparc/include/asm/uaccess_32.h2
-rw-r--r--arch/sparc/include/asm/uaccess_64.h2
-rw-r--r--arch/sparc/kernel/syscalls/syscall.tbl2
-rw-r--r--arch/sparc/mm/fault_32.c13
-rw-r--r--arch/sparc/mm/fault_64.c11
-rw-r--r--arch/um/Kconfig4
-rw-r--r--arch/um/kernel/trap.c6
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/entry/entry_32.S109
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl2
-rw-r--r--arch/x86/entry/syscalls/syscall_64.tbl2
-rw-r--r--arch/x86/entry/vdso/vdso32/note.S30
-rw-r--r--arch/x86/events/rapl.c46
-rw-r--r--arch/x86/include/asm/mshyperv.h12
-rw-r--r--arch/x86/include/asm/proto.h2
-rw-r--r--arch/x86/include/asm/segment.h2
-rw-r--r--arch/x86/include/asm/uaccess.h2
-rw-r--r--arch/x86/include/asm/vdso/gettimeofday.h3
-rw-r--r--arch/x86/kernel/alternative.c6
-rw-r--r--arch/x86/kernel/cpu/acrn.c12
-rw-r--r--arch/x86/kernel/cpu/bugs.c8
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c7
-rw-r--r--arch/x86/kernel/crash.c2
-rw-r--r--arch/x86/kernel/fpu/xstate.c33
-rw-r--r--arch/x86/kernel/head_32.S31
-rw-r--r--arch/x86/kernel/process_64.c2
-rw-r--r--arch/x86/kernel/tsc_msr.c9
-rw-r--r--arch/x86/kvm/cpuid.c3
-rw-r--r--arch/x86/kvm/hyperv.c1
-rw-r--r--arch/x86/kvm/x86.c24
-rw-r--r--arch/x86/mm/fault.c17
-rw-r--r--arch/x86/mm/init_64.c9
-rw-r--r--arch/x86/mm/numa.c1
-rw-r--r--arch/x86/purgatory/Makefile5
-rw-r--r--arch/x86/xen/Kconfig3
-rw-r--r--arch/x86/xen/Makefile3
-rw-r--r--arch/x86/xen/apic.c17
-rw-r--r--arch/x86/xen/enlighten_pv.c78
-rw-r--r--arch/x86/xen/mmu_pv.c492
-rw-r--r--arch/x86/xen/p2m.c6
-rw-r--r--arch/x86/xen/setup.c36
-rw-r--r--arch/x86/xen/smp_pv.c18
-rw-r--r--arch/x86/xen/vdso.h6
-rw-r--r--arch/x86/xen/xen-asm.S194
-rw-r--r--arch/x86/xen/xen-asm_32.S185
-rw-r--r--arch/x86/xen/xen-asm_64.S192
-rw-r--r--arch/x86/xen/xen-head.S6
-rw-r--r--arch/x86/xen/xen-ops.h1
-rw-r--r--arch/xtensa/include/asm/uaccess.h2
-rw-r--r--arch/xtensa/kernel/syscalls/syscall.tbl2
-rw-r--r--arch/xtensa/mm/fault.c15
335 files changed, 4857 insertions, 5491 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index 3329fa143637..af14a567b493 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -972,6 +972,9 @@ config HAVE_SPARSE_SYSCALL_NR
entries at 4000, 5000 and 6000 locations. This option turns on syscall
related optimizations for a given architecture.
+config ARCH_HAS_VDSO_DATA
+ bool
+
source "kernel/gcov/Kconfig"
source "scripts/gcc-plugins/Kconfig"
diff --git a/arch/alpha/include/asm/core_apecs.h b/arch/alpha/include/asm/core_apecs.h
index 0a07055bc0fe..2d9726fc02ef 100644
--- a/arch/alpha/include/asm/core_apecs.h
+++ b/arch/alpha/include/asm/core_apecs.h
@@ -384,7 +384,7 @@ struct el_apecs_procdata
} \
} while (0)
-__EXTERN_INLINE unsigned int apecs_ioread8(void __iomem *xaddr)
+__EXTERN_INLINE unsigned int apecs_ioread8(const void __iomem *xaddr)
{
unsigned long addr = (unsigned long) xaddr;
unsigned long result, base_and_type;
@@ -420,7 +420,7 @@ __EXTERN_INLINE void apecs_iowrite8(u8 b, void __iomem *xaddr)
*(vuip) ((addr << 5) + base_and_type) = w;
}
-__EXTERN_INLINE unsigned int apecs_ioread16(void __iomem *xaddr)
+__EXTERN_INLINE unsigned int apecs_ioread16(const void __iomem *xaddr)
{
unsigned long addr = (unsigned long) xaddr;
unsigned long result, base_and_type;
@@ -456,7 +456,7 @@ __EXTERN_INLINE void apecs_iowrite16(u16 b, void __iomem *xaddr)
*(vuip) ((addr << 5) + base_and_type) = w;
}
-__EXTERN_INLINE unsigned int apecs_ioread32(void __iomem *xaddr)
+__EXTERN_INLINE unsigned int apecs_ioread32(const void __iomem *xaddr)
{
unsigned long addr = (unsigned long) xaddr;
if (addr < APECS_DENSE_MEM)
diff --git a/arch/alpha/include/asm/core_cia.h b/arch/alpha/include/asm/core_cia.h
index c706a7f2b061..cb22991f6761 100644
--- a/arch/alpha/include/asm/core_cia.h
+++ b/arch/alpha/include/asm/core_cia.h
@@ -342,7 +342,7 @@ struct el_CIA_sysdata_mcheck {
#define vuip volatile unsigned int __force *
#define vulp volatile unsigned long __force *
-__EXTERN_INLINE unsigned int cia_ioread8(void __iomem *xaddr)
+__EXTERN_INLINE unsigned int cia_ioread8(const void __iomem *xaddr)
{
unsigned long addr = (unsigned long) xaddr;
unsigned long result, base_and_type;
@@ -374,7 +374,7 @@ __EXTERN_INLINE void cia_iowrite8(u8 b, void __iomem *xaddr)
*(vuip) ((addr << 5) + base_and_type) = w;
}
-__EXTERN_INLINE unsigned int cia_ioread16(void __iomem *xaddr)
+__EXTERN_INLINE unsigned int cia_ioread16(const void __iomem *xaddr)
{
unsigned long addr = (unsigned long) xaddr;
unsigned long result, base_and_type;
@@ -404,7 +404,7 @@ __EXTERN_INLINE void cia_iowrite16(u16 b, void __iomem *xaddr)
*(vuip) ((addr << 5) + base_and_type) = w;
}
-__EXTERN_INLINE unsigned int cia_ioread32(void __iomem *xaddr)
+__EXTERN_INLINE unsigned int cia_ioread32(const void __iomem *xaddr)
{
unsigned long addr = (unsigned long) xaddr;
if (addr < CIA_DENSE_MEM)
diff --git a/arch/alpha/include/asm/core_lca.h b/arch/alpha/include/asm/core_lca.h
index 84d5e5b84f4f..ec86314418cb 100644
--- a/arch/alpha/include/asm/core_lca.h
+++ b/arch/alpha/include/asm/core_lca.h
@@ -230,7 +230,7 @@ union el_lca {
} while (0)
-__EXTERN_INLINE unsigned int lca_ioread8(void __iomem *xaddr)
+__EXTERN_INLINE unsigned int lca_ioread8(const void __iomem *xaddr)
{
unsigned long addr = (unsigned long) xaddr;
unsigned long result, base_and_type;
@@ -266,7 +266,7 @@ __EXTERN_INLINE void lca_iowrite8(u8 b, void __iomem *xaddr)
*(vuip) ((addr << 5) + base_and_type) = w;
}
-__EXTERN_INLINE unsigned int lca_ioread16(void __iomem *xaddr)
+__EXTERN_INLINE unsigned int lca_ioread16(const void __iomem *xaddr)
{
unsigned long addr = (unsigned long) xaddr;
unsigned long result, base_and_type;
@@ -302,7 +302,7 @@ __EXTERN_INLINE void lca_iowrite16(u16 b, void __iomem *xaddr)
*(vuip) ((addr << 5) + base_and_type) = w;
}
-__EXTERN_INLINE unsigned int lca_ioread32(void __iomem *xaddr)
+__EXTERN_INLINE unsigned int lca_ioread32(const void __iomem *xaddr)
{
unsigned long addr = (unsigned long) xaddr;
if (addr < LCA_DENSE_MEM)
diff --git a/arch/alpha/include/asm/core_marvel.h b/arch/alpha/include/asm/core_marvel.h
index cc6fd92d5fa9..b266e02e284b 100644
--- a/arch/alpha/include/asm/core_marvel.h
+++ b/arch/alpha/include/asm/core_marvel.h
@@ -332,10 +332,10 @@ struct io7 {
#define vucp volatile unsigned char __force *
#define vusp volatile unsigned short __force *
-extern unsigned int marvel_ioread8(void __iomem *);
+extern unsigned int marvel_ioread8(const void __iomem *);
extern void marvel_iowrite8(u8 b, void __iomem *);
-__EXTERN_INLINE unsigned int marvel_ioread16(void __iomem *addr)
+__EXTERN_INLINE unsigned int marvel_ioread16(const void __iomem *addr)
{
return __kernel_ldwu(*(vusp)addr);
}
diff --git a/arch/alpha/include/asm/core_mcpcia.h b/arch/alpha/include/asm/core_mcpcia.h
index b30dc128210d..cb24d1bd6141 100644
--- a/arch/alpha/include/asm/core_mcpcia.h
+++ b/arch/alpha/include/asm/core_mcpcia.h
@@ -267,7 +267,7 @@ extern inline int __mcpcia_is_mmio(unsigned long addr)
return (addr & 0x80000000UL) == 0;
}
-__EXTERN_INLINE unsigned int mcpcia_ioread8(void __iomem *xaddr)
+__EXTERN_INLINE unsigned int mcpcia_ioread8(const void __iomem *xaddr)
{
unsigned long addr = (unsigned long)xaddr & MCPCIA_MEM_MASK;
unsigned long hose = (unsigned long)xaddr & ~MCPCIA_MEM_MASK;
@@ -291,7 +291,7 @@ __EXTERN_INLINE void mcpcia_iowrite8(u8 b, void __iomem *xaddr)
*(vuip) ((addr << 5) + hose + 0x00) = w;
}
-__EXTERN_INLINE unsigned int mcpcia_ioread16(void __iomem *xaddr)
+__EXTERN_INLINE unsigned int mcpcia_ioread16(const void __iomem *xaddr)
{
unsigned long addr = (unsigned long)xaddr & MCPCIA_MEM_MASK;
unsigned long hose = (unsigned long)xaddr & ~MCPCIA_MEM_MASK;
@@ -315,7 +315,7 @@ __EXTERN_INLINE void mcpcia_iowrite16(u16 b, void __iomem *xaddr)
*(vuip) ((addr << 5) + hose + 0x08) = w;
}
-__EXTERN_INLINE unsigned int mcpcia_ioread32(void __iomem *xaddr)
+__EXTERN_INLINE unsigned int mcpcia_ioread32(const void __iomem *xaddr)
{
unsigned long addr = (unsigned long)xaddr;
diff --git a/arch/alpha/include/asm/core_t2.h b/arch/alpha/include/asm/core_t2.h
index e0b33d09e93a..12bb7addc789 100644
--- a/arch/alpha/include/asm/core_t2.h
+++ b/arch/alpha/include/asm/core_t2.h
@@ -572,7 +572,7 @@ __EXTERN_INLINE int t2_is_mmio(const volatile void __iomem *addr)
it doesn't make sense to merge the pio and mmio routines. */
#define IOPORT(OS, NS) \
-__EXTERN_INLINE unsigned int t2_ioread##NS(void __iomem *xaddr) \
+__EXTERN_INLINE unsigned int t2_ioread##NS(const void __iomem *xaddr) \
{ \
if (t2_is_mmio(xaddr)) \
return t2_read##OS(xaddr); \
diff --git a/arch/alpha/include/asm/io.h b/arch/alpha/include/asm/io.h
index a4d0c19f1e79..1f6a909d1fa5 100644
--- a/arch/alpha/include/asm/io.h
+++ b/arch/alpha/include/asm/io.h
@@ -150,9 +150,9 @@ static inline void generic_##NAME(TYPE b, QUAL void __iomem *addr) \
alpha_mv.mv_##NAME(b, addr); \
}
-REMAP1(unsigned int, ioread8, /**/)
-REMAP1(unsigned int, ioread16, /**/)
-REMAP1(unsigned int, ioread32, /**/)
+REMAP1(unsigned int, ioread8, const)
+REMAP1(unsigned int, ioread16, const)
+REMAP1(unsigned int, ioread32, const)
REMAP1(u8, readb, const volatile)
REMAP1(u16, readw, const volatile)
REMAP1(u32, readl, const volatile)
@@ -307,7 +307,7 @@ static inline int __is_mmio(const volatile void __iomem *addr)
*/
#if IO_CONCAT(__IO_PREFIX,trivial_io_bw)
-extern inline unsigned int ioread8(void __iomem *addr)
+extern inline unsigned int ioread8(const void __iomem *addr)
{
unsigned int ret;
mb();
@@ -316,7 +316,7 @@ extern inline unsigned int ioread8(void __iomem *addr)
return ret;
}
-extern inline unsigned int ioread16(void __iomem *addr)
+extern inline unsigned int ioread16(const void __iomem *addr)
{
unsigned int ret;
mb();
@@ -359,7 +359,7 @@ extern inline void outw(u16 b, unsigned long port)
#endif
#if IO_CONCAT(__IO_PREFIX,trivial_io_lq)
-extern inline unsigned int ioread32(void __iomem *addr)
+extern inline unsigned int ioread32(const void __iomem *addr)
{
unsigned int ret;
mb();
@@ -489,10 +489,10 @@ extern inline void writeq(u64 b, volatile void __iomem *addr)
}
#endif
-#define ioread16be(p) be16_to_cpu(ioread16(p))
-#define ioread32be(p) be32_to_cpu(ioread32(p))
-#define iowrite16be(v,p) iowrite16(cpu_to_be16(v), (p))
-#define iowrite32be(v,p) iowrite32(cpu_to_be32(v), (p))
+#define ioread16be(p) swab16(ioread16(p))
+#define ioread32be(p) swab32(ioread32(p))
+#define iowrite16be(v,p) iowrite16(swab16(v), (p))
+#define iowrite32be(v,p) iowrite32(swab32(v), (p))
#define inb_p inb
#define inw_p inw
diff --git a/arch/alpha/include/asm/io_trivial.h b/arch/alpha/include/asm/io_trivial.h
index ba3d8f0cfe0c..a1a29cbe02fa 100644
--- a/arch/alpha/include/asm/io_trivial.h
+++ b/arch/alpha/include/asm/io_trivial.h
@@ -7,15 +7,15 @@
#if IO_CONCAT(__IO_PREFIX,trivial_io_bw)
__EXTERN_INLINE unsigned int
-IO_CONCAT(__IO_PREFIX,ioread8)(void __iomem *a)
+IO_CONCAT(__IO_PREFIX,ioread8)(const void __iomem *a)
{
- return __kernel_ldbu(*(volatile u8 __force *)a);
+ return __kernel_ldbu(*(const volatile u8 __force *)a);
}
__EXTERN_INLINE unsigned int
-IO_CONCAT(__IO_PREFIX,ioread16)(void __iomem *a)
+IO_CONCAT(__IO_PREFIX,ioread16)(const void __iomem *a)
{
- return __kernel_ldwu(*(volatile u16 __force *)a);
+ return __kernel_ldwu(*(const volatile u16 __force *)a);
}
__EXTERN_INLINE void
@@ -33,9 +33,9 @@ IO_CONCAT(__IO_PREFIX,iowrite16)(u16 b, void __iomem *a)
#if IO_CONCAT(__IO_PREFIX,trivial_io_lq)
__EXTERN_INLINE unsigned int
-IO_CONCAT(__IO_PREFIX,ioread32)(void __iomem *a)
+IO_CONCAT(__IO_PREFIX,ioread32)(const void __iomem *a)
{
- return *(volatile u32 __force *)a;
+ return *(const volatile u32 __force *)a;
}
__EXTERN_INLINE void
@@ -73,14 +73,14 @@ IO_CONCAT(__IO_PREFIX,writew)(u16 b, volatile void __iomem *a)
__EXTERN_INLINE u8
IO_CONCAT(__IO_PREFIX,readb)(const volatile void __iomem *a)
{
- void __iomem *addr = (void __iomem *)a;
+ const void __iomem *addr = (const void __iomem *)a;
return IO_CONCAT(__IO_PREFIX,ioread8)(addr);
}
__EXTERN_INLINE u16
IO_CONCAT(__IO_PREFIX,readw)(const volatile void __iomem *a)
{
- void __iomem *addr = (void __iomem *)a;
+ const void __iomem *addr = (const void __iomem *)a;
return IO_CONCAT(__IO_PREFIX,ioread16)(addr);
}
diff --git a/arch/alpha/include/asm/jensen.h b/arch/alpha/include/asm/jensen.h
index 436dc905b6ad..916895155a88 100644
--- a/arch/alpha/include/asm/jensen.h
+++ b/arch/alpha/include/asm/jensen.h
@@ -305,7 +305,7 @@ __EXTERN_INLINE int jensen_is_mmio(const volatile void __iomem *addr)
that it doesn't make sense to merge them. */
#define IOPORT(OS, NS) \
-__EXTERN_INLINE unsigned int jensen_ioread##NS(void __iomem *xaddr) \
+__EXTERN_INLINE unsigned int jensen_ioread##NS(const void __iomem *xaddr) \
{ \
if (jensen_is_mmio(xaddr)) \
return jensen_read##OS(xaddr - 0x100000000ul); \
diff --git a/arch/alpha/include/asm/machvec.h b/arch/alpha/include/asm/machvec.h
index a6b73c6d10ee..a4e96e2bec74 100644
--- a/arch/alpha/include/asm/machvec.h
+++ b/arch/alpha/include/asm/machvec.h
@@ -46,9 +46,9 @@ struct alpha_machine_vector
void (*mv_pci_tbi)(struct pci_controller *hose,
dma_addr_t start, dma_addr_t end);
- unsigned int (*mv_ioread8)(void __iomem *);
- unsigned int (*mv_ioread16)(void __iomem *);
- unsigned int (*mv_ioread32)(void __iomem *);
+ unsigned int (*mv_ioread8)(const void __iomem *);
+ unsigned int (*mv_ioread16)(const void __iomem *);
+ unsigned int (*mv_ioread32)(const void __iomem *);
void (*mv_iowrite8)(u8, void __iomem *);
void (*mv_iowrite16)(u16, void __iomem *);
diff --git a/arch/alpha/include/asm/uaccess.h b/arch/alpha/include/asm/uaccess.h
index 1fe2b56cb861..1b6f25efa247 100644
--- a/arch/alpha/include/asm/uaccess.h
+++ b/arch/alpha/include/asm/uaccess.h
@@ -20,7 +20,7 @@
#define get_fs() (current_thread_info()->addr_limit)
#define set_fs(x) (current_thread_info()->addr_limit = (x))
-#define segment_eq(a, b) ((a).seg == (b).seg)
+#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg)
/*
* Is a address valid? This does a straightforward calculation rather
diff --git a/arch/alpha/kernel/core_marvel.c b/arch/alpha/kernel/core_marvel.c
index 4c80d992a659..4485b77f8658 100644
--- a/arch/alpha/kernel/core_marvel.c
+++ b/arch/alpha/kernel/core_marvel.c
@@ -806,7 +806,7 @@ void __iomem *marvel_ioportmap (unsigned long addr)
}
unsigned int
-marvel_ioread8(void __iomem *xaddr)
+marvel_ioread8(const void __iomem *xaddr)
{
unsigned long addr = (unsigned long) xaddr;
if (__marvel_is_port_kbd(addr))
diff --git a/arch/alpha/kernel/io.c b/arch/alpha/kernel/io.c
index 938de13adfbf..838586abb1e0 100644
--- a/arch/alpha/kernel/io.c
+++ b/arch/alpha/kernel/io.c
@@ -14,7 +14,7 @@
"generic", which bumps through the machine vector. */
unsigned int
-ioread8(void __iomem *addr)
+ioread8(const void __iomem *addr)
{
unsigned int ret;
mb();
@@ -23,7 +23,7 @@ ioread8(void __iomem *addr)
return ret;
}
-unsigned int ioread16(void __iomem *addr)
+unsigned int ioread16(const void __iomem *addr)
{
unsigned int ret;
mb();
@@ -32,7 +32,7 @@ unsigned int ioread16(void __iomem *addr)
return ret;
}
-unsigned int ioread32(void __iomem *addr)
+unsigned int ioread32(const void __iomem *addr)
{
unsigned int ret;
mb();
@@ -257,7 +257,7 @@ EXPORT_SYMBOL(readq_relaxed);
/*
* Read COUNT 8-bit bytes from port PORT into memory starting at SRC.
*/
-void ioread8_rep(void __iomem *port, void *dst, unsigned long count)
+void ioread8_rep(const void __iomem *port, void *dst, unsigned long count)
{
while ((unsigned long)dst & 0x3) {
if (!count)
@@ -300,7 +300,7 @@ EXPORT_SYMBOL(insb);
* the interfaces seems to be slow: just using the inlined version
* of the inw() breaks things.
*/
-void ioread16_rep(void __iomem *port, void *dst, unsigned long count)
+void ioread16_rep(const void __iomem *port, void *dst, unsigned long count)
{
if (unlikely((unsigned long)dst & 0x3)) {
if (!count)
@@ -340,7 +340,7 @@ EXPORT_SYMBOL(insw);
* but the interfaces seems to be slow: just using the inlined version
* of the inl() breaks things.
*/
-void ioread32_rep(void __iomem *port, void *dst, unsigned long count)
+void ioread32_rep(const void __iomem *port, void *dst, unsigned long count)
{
if (unlikely((unsigned long)dst & 0x3)) {
while (count--) {
diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl
index a28fb211881d..ec8bed9e7b75 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -249,7 +249,7 @@
316 common mlockall sys_mlockall
317 common munlockall sys_munlockall
318 common sysinfo sys_sysinfo
-319 common _sysctl sys_sysctl
+319 common _sysctl sys_ni_syscall
# 320 was sys_idle
321 common oldumount sys_oldumount
322 common swapon sys_swapon
diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c
index c2303a8c2b9f..09172f017efc 100644
--- a/arch/alpha/mm/fault.c
+++ b/arch/alpha/mm/fault.c
@@ -25,6 +25,7 @@
#include <linux/interrupt.h>
#include <linux/extable.h>
#include <linux/uaccess.h>
+#include <linux/perf_event.h>
extern void die_if_kernel(char *,struct pt_regs *,long, unsigned long *);
@@ -116,6 +117,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
#endif
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
retry:
mmap_read_lock(mm);
vma = find_vma(mm, address);
@@ -148,7 +150,7 @@ retry:
/* If for any reason at all we couldn't handle the fault,
make sure we exit gracefully rather than endlessly redo
the fault. */
- fault = handle_mm_fault(vma, address, flags);
+ fault = handle_mm_fault(vma, address, flags, regs);
if (fault_signal_pending(fault, regs))
return;
@@ -164,10 +166,6 @@ retry:
}
if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_MAJOR)
- current->maj_flt++;
- else
- current->min_flt++;
if (fault & VM_FAULT_RETRY) {
flags |= FAULT_FLAG_TRIED;
diff --git a/arch/arc/include/asm/segment.h b/arch/arc/include/asm/segment.h
index 6a2a5be5026d..871f8ab11bfd 100644
--- a/arch/arc/include/asm/segment.h
+++ b/arch/arc/include/asm/segment.h
@@ -14,8 +14,7 @@ typedef unsigned long mm_segment_t;
#define KERNEL_DS MAKE_MM_SEG(0)
#define USER_DS MAKE_MM_SEG(TASK_SIZE)
-
-#define segment_eq(a, b) ((a) == (b))
+#define uaccess_kernel() (get_fs() == KERNEL_DS)
#endif /* __ASSEMBLY__ */
#endif /* __ASMARC_SEGMENT_H */
diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c
index e12c80d71b78..efeba1fe7252 100644
--- a/arch/arc/kernel/process.c
+++ b/arch/arc/kernel/process.c
@@ -91,7 +91,7 @@ fault:
goto fail;
mmap_read_lock(current->mm);
- ret = fixup_user_fault(current, current->mm, (unsigned long) uaddr,
+ ret = fixup_user_fault(current->mm, (unsigned long) uaddr,
FAULT_FLAG_WRITE, NULL);
mmap_read_unlock(current->mm);
diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c
index 7287c793d1c9..f5657cb68e4f 100644
--- a/arch/arc/mm/fault.c
+++ b/arch/arc/mm/fault.c
@@ -105,6 +105,7 @@ void do_page_fault(unsigned long address, struct pt_regs *regs)
if (write)
flags |= FAULT_FLAG_WRITE;
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
retry:
mmap_read_lock(mm);
@@ -130,7 +131,7 @@ retry:
goto bad_area;
}
- fault = handle_mm_fault(vma, address, flags);
+ fault = handle_mm_fault(vma, address, flags, regs);
/* Quick path to respond to signals */
if (fault_signal_pending(fault, regs)) {
@@ -155,22 +156,9 @@ bad_area:
* Major/minor page fault accounting
* (in case of retry we only land here once)
*/
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
-
- if (likely(!(fault & VM_FAULT_ERROR))) {
- if (fault & VM_FAULT_MAJOR) {
- tsk->maj_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
- regs, address);
- } else {
- tsk->min_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
- regs, address);
- }
-
+ if (likely(!(fault & VM_FAULT_ERROR)))
/* Normal return path: fault Handled Gracefully */
return;
- }
if (!user_mode(regs))
goto no_context;
diff --git a/arch/arm/configs/am200epdkit_defconfig b/arch/arm/configs/am200epdkit_defconfig
index f56ac394caf1..4e49d6cb2f62 100644
--- a/arch/arm/configs/am200epdkit_defconfig
+++ b/arch/arm/configs/am200epdkit_defconfig
@@ -3,7 +3,6 @@ CONFIG_LOCALVERSION="gum"
CONFIG_SYSVIPC=y
CONFIG_SYSFS_DEPRECATED_V2=y
CONFIG_EXPERT=y
-# CONFIG_SYSCTL_SYSCALL is not set
# CONFIG_EPOLL is not set
# CONFIG_SHMEM is not set
# CONFIG_VM_EVENT_COUNTERS is not set
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index b5fdd30252f8..a13d90206472 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -76,7 +76,7 @@ static inline void set_fs(mm_segment_t fs)
modify_domain(DOMAIN_KERNEL, fs ? DOMAIN_CLIENT : DOMAIN_MANAGER);
}
-#define segment_eq(a, b) ((a) == (b))
+#define uaccess_kernel() (get_fs() == KERNEL_DS)
/*
* We use 33-bit arithmetic here. Success returns zero, failure returns
@@ -267,7 +267,7 @@ extern int __put_user_8(void *, unsigned long long);
*/
#define USER_DS KERNEL_DS
-#define segment_eq(a, b) (1)
+#define uaccess_kernel() (true)
#define __addr_ok(addr) ((void)(addr), 1)
#define __range_ok(addr, size) ((void)(addr), 0)
#define get_fs() (KERNEL_DS)
diff --git a/arch/arm/include/asm/vdso/gettimeofday.h b/arch/arm/include/asm/vdso/gettimeofday.h
index 1b207cf07697..2134cbd5469f 100644
--- a/arch/arm/include/asm/vdso/gettimeofday.h
+++ b/arch/arm/include/asm/vdso/gettimeofday.h
@@ -113,7 +113,8 @@ static inline bool arm_vdso_hres_capable(void)
}
#define __arch_vdso_hres_capable arm_vdso_hres_capable
-static __always_inline u64 __arch_get_hw_counter(int clock_mode)
+static __always_inline u64 __arch_get_hw_counter(int clock_mode,
+ const struct vdso_data *vd)
{
#ifdef CONFIG_ARM_ARCH_TIMER
u64 cycle_now;
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index ab2568996ddb..c9dc912b83f0 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -713,7 +713,9 @@ struct page *get_signal_page(void)
/* Defer to generic check */
asmlinkage void addr_limit_check_failed(void)
{
+#ifdef CONFIG_MMU
addr_limit_user_check();
+#endif
}
#ifdef CONFIG_DEBUG_RSEQ
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index c6550eddfce1..efa402025031 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -202,7 +202,8 @@ static inline bool access_error(unsigned int fsr, struct vm_area_struct *vma)
static vm_fault_t __kprobes
__do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
- unsigned int flags, struct task_struct *tsk)
+ unsigned int flags, struct task_struct *tsk,
+ struct pt_regs *regs)
{
struct vm_area_struct *vma;
vm_fault_t fault;
@@ -224,7 +225,7 @@ good_area:
goto out;
}
- return handle_mm_fault(vma, addr & PAGE_MASK, flags);
+ return handle_mm_fault(vma, addr & PAGE_MASK, flags, regs);
check_stack:
/* Don't allow expansion below FIRST_USER_ADDRESS */
@@ -266,6 +267,8 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
if ((fsr & FSR_WRITE) && !(fsr & FSR_CM))
flags |= FAULT_FLAG_WRITE;
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
+
/*
* As per x86, we may deadlock here. However, since the kernel only
* validly references user space from well defined areas of the code,
@@ -290,7 +293,7 @@ retry:
#endif
}
- fault = __do_page_fault(mm, addr, fsr, flags, tsk);
+ fault = __do_page_fault(mm, addr, fsr, flags, tsk, regs);
/* If we need to retry but a fatal signal is pending, handle the
* signal first. We do not need to release the mmap_lock because
@@ -302,23 +305,7 @@ retry:
return 0;
}
- /*
- * Major/minor page fault accounting is only done on the
- * initial attempt. If we go through a retry, it is extremely
- * likely that the page will be found in page cache at that point.
- */
-
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
if (!(fault & VM_FAULT_ERROR) && flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_MAJOR) {
- tsk->maj_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
- regs, addr);
- } else {
- tsk->min_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
- regs, addr);
- }
if (fault & VM_FAULT_RETRY) {
flags |= FAULT_FLAG_TRIED;
goto retry;
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index 7e8ee4adf269..171077cbf419 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -162,7 +162,7 @@
146 common writev sys_writev
147 common getsid sys_getsid
148 common fdatasync sys_fdatasync
-149 common _sysctl sys_sysctl
+149 common _sysctl sys_ni_syscall
150 common mlock sys_mlock
151 common munlock sys_munlock
152 common mlockall sys_mlockall
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index e11b4ea06127..6d232837cbee 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1182,22 +1182,6 @@ config HARDEN_BRANCH_PREDICTOR
If unsure, say Y.
-config HARDEN_EL2_VECTORS
- bool "Harden EL2 vector mapping against system register leak" if EXPERT
- default y
- help
- Speculation attacks against some high-performance processors can
- be used to leak privileged information such as the vector base
- register, resulting in a potential defeat of the EL2 layout
- randomization.
-
- This config option will map the vectors to a fixed location,
- independent of the EL2 code mapping, so that revealing VBAR_EL2
- to an attacker does not give away any extra information. This
- only gets enabled on affected CPUs.
-
- If unsure, say Y.
-
config ARM64_SSBD
bool "Speculative Store Bypass Disable" if EXPERT
default y
@@ -1520,7 +1504,6 @@ menu "ARMv8.3 architectural features"
config ARM64_PTR_AUTH
bool "Enable support for pointer authentication"
default y
- depends on !KVM || ARM64_VHE
depends on (CC_HAS_SIGN_RETURN_ADDRESS || CC_HAS_BRANCH_PROT_PAC_RET) && AS_HAS_PAC
# Modern compilers insert a .note.gnu.property section note for PAC
# which is only understood by binutils starting with version 2.33.1.
@@ -1547,8 +1530,7 @@ config ARM64_PTR_AUTH
The feature is detected at runtime. If the feature is not present in
hardware it will not be advertised to userspace/KVM guest nor will it
- be enabled. However, KVM guest also require VHE mode and hence
- CONFIG_ARM64_VHE=y option to use this feature.
+ be enabled.
If the feature is present on the boot CPU but not on a late CPU, then
the late CPU will be parked. Also, if the boot CPU does not have
diff --git a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi
index 9edfae5944f7..24ef18fe77df 100644
--- a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi
@@ -417,10 +417,10 @@
ti,intr-trigger-type = <1>;
interrupt-controller;
interrupt-parent = <&gic500>;
- #interrupt-cells = <2>;
+ #interrupt-cells = <1>;
ti,sci = <&dmsc>;
- ti,sci-dst-id = <56>;
- ti,sci-rm-range-girq = <0x1>;
+ ti,sci-dev-id = <100>;
+ ti,interrupt-ranges = <0 392 32>;
};
main_navss {
@@ -438,10 +438,11 @@
ti,intr-trigger-type = <4>;
interrupt-controller;
interrupt-parent = <&gic500>;
- #interrupt-cells = <2>;
+ #interrupt-cells = <1>;
ti,sci = <&dmsc>;
- ti,sci-dst-id = <56>;
- ti,sci-rm-range-girq = <0x0>, <0x2>;
+ ti,sci-dev-id = <182>;
+ ti,interrupt-ranges = <0 64 64>,
+ <64 448 64>;
};
inta_main_udmass: interrupt-controller@33d00000 {
@@ -452,8 +453,7 @@
msi-controller;
ti,sci = <&dmsc>;
ti,sci-dev-id = <179>;
- ti,sci-rm-range-vint = <0x0>;
- ti,sci-rm-range-global-event = <0x1>;
+ ti,interrupt-ranges = <0 0 256>;
};
secure_proxy_main: mailbox@32c00000 {
@@ -589,7 +589,7 @@
<0x0 0x33000000 0x0 0x40000>;
reg-names = "rt", "fifos", "proxy_gcfg", "proxy_target";
ti,num-rings = <818>;
- ti,sci-rm-range-gp-rings = <0x2>; /* GP ring range */
+ ti,sci-rm-range-gp-rings = <0x1>; /* GP ring range */
ti,dma-ring-reset-quirk;
ti,sci = <&dmsc>;
ti,sci-dev-id = <187>;
@@ -609,11 +609,11 @@
ti,sci-dev-id = <188>;
ti,ringacc = <&ringacc>;
- ti,sci-rm-range-tchan = <0x1>, /* TX_HCHAN */
- <0x2>; /* TX_CHAN */
- ti,sci-rm-range-rchan = <0x4>, /* RX_HCHAN */
- <0x5>; /* RX_CHAN */
- ti,sci-rm-range-rflow = <0x6>; /* GP RFLOW */
+ ti,sci-rm-range-tchan = <0xf>, /* TX_HCHAN */
+ <0xd>; /* TX_CHAN */
+ ti,sci-rm-range-rchan = <0xb>, /* RX_HCHAN */
+ <0xa>; /* RX_CHAN */
+ ti,sci-rm-range-rflow = <0x0>; /* GP RFLOW */
};
cpts@310d0000 {
@@ -622,7 +622,7 @@
reg-names = "cpts";
clocks = <&main_cpts_mux>;
clock-names = "cpts";
- interrupts-extended = <&intr_main_navss 163 0>;
+ interrupts-extended = <&intr_main_navss 391>;
interrupt-names = "cpts";
ti,cpts-periodic-outputs = <6>;
ti,cpts-ext-ts-inputs = <8>;
@@ -645,8 +645,7 @@
gpio-controller;
#gpio-cells = <2>;
interrupt-parent = <&intr_main_gpio>;
- interrupts = <57 256>, <57 257>, <57 258>, <57 259>, <57 260>,
- <57 261>;
+ interrupts = <192>, <193>, <194>, <195>, <196>, <197>;
interrupt-controller;
#interrupt-cells = <2>;
ti,ngpio = <96>;
@@ -661,8 +660,7 @@
gpio-controller;
#gpio-cells = <2>;
interrupt-parent = <&intr_main_gpio>;
- interrupts = <58 256>, <58 257>, <58 258>, <58 259>, <58 260>,
- <58 261>;
+ interrupts = <200>, <201>, <202>, <203>, <204>, <205>;
interrupt-controller;
#interrupt-cells = <2>;
ti,ngpio = <90>;
diff --git a/arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi b/arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi
index 8c1abcfe0860..51ca4b4d4c21 100644
--- a/arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi
@@ -134,7 +134,7 @@
<0x0 0x2a500000 0x0 0x40000>;
reg-names = "rt", "fifos", "proxy_gcfg", "proxy_target";
ti,num-rings = <286>;
- ti,sci-rm-range-gp-rings = <0x2>; /* GP ring range */
+ ti,sci-rm-range-gp-rings = <0x1>; /* GP ring range */
ti,dma-ring-reset-quirk;
ti,sci = <&dmsc>;
ti,sci-dev-id = <195>;
@@ -154,11 +154,11 @@
ti,sci-dev-id = <194>;
ti,ringacc = <&mcu_ringacc>;
- ti,sci-rm-range-tchan = <0x1>, /* TX_HCHAN */
- <0x2>; /* TX_CHAN */
- ti,sci-rm-range-rchan = <0x3>, /* RX_HCHAN */
- <0x4>; /* RX_CHAN */
- ti,sci-rm-range-rflow = <0x5>; /* GP RFLOW */
+ ti,sci-rm-range-tchan = <0xf>, /* TX_HCHAN */
+ <0xd>; /* TX_CHAN */
+ ti,sci-rm-range-rchan = <0xb>, /* RX_HCHAN */
+ <0xa>; /* RX_CHAN */
+ ti,sci-rm-range-rflow = <0x0>; /* GP RFLOW */
};
};
diff --git a/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi
index 5f55b9e82cf1..a1ffe88d9664 100644
--- a/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi
@@ -74,10 +74,10 @@
ti,intr-trigger-type = <1>;
interrupt-controller;
interrupt-parent = <&gic500>;
- #interrupt-cells = <2>;
+ #interrupt-cells = <1>;
ti,sci = <&dmsc>;
- ti,sci-dst-id = <56>;
- ti,sci-rm-range-girq = <0x4>;
+ ti,sci-dev-id = <156>;
+ ti,interrupt-ranges = <0 712 16>;
};
wkup_gpio0: wkup_gpio0@42110000 {
@@ -86,7 +86,7 @@
gpio-controller;
#gpio-cells = <2>;
interrupt-parent = <&intr_wkup_gpio>;
- interrupts = <59 128>, <59 129>, <59 130>, <59 131>;
+ interrupts = <60>, <61>, <62>, <63>;
interrupt-controller;
#interrupt-cells = <2>;
ti,ngpio = <56>;
diff --git a/arch/arm64/boot/dts/ti/k3-am654-base-board.dts b/arch/arm64/boot/dts/ti/k3-am654-base-board.dts
index 611e66207010..b8a8a0fcb8af 100644
--- a/arch/arm64/boot/dts/ti/k3-am654-base-board.dts
+++ b/arch/arm64/boot/dts/ti/k3-am654-base-board.dts
@@ -384,7 +384,7 @@
};
&mailbox0_cluster0 {
- interrupts = <164 0>;
+ interrupts = <436>;
mbox_mcu_r5fss0_core0: mbox-mcu-r5fss0-core0 {
ti,mbox-tx = <1 0 0>;
@@ -393,7 +393,7 @@
};
&mailbox0_cluster1 {
- interrupts = <165 0>;
+ interrupts = <432>;
mbox_mcu_r5fss0_core1: mbox-mcu-r5fss0-core1 {
ti,mbox-tx = <1 0 0>;
diff --git a/arch/arm64/boot/dts/ti/k3-j721e-common-proc-board.dts b/arch/arm64/boot/dts/ti/k3-j721e-common-proc-board.dts
index 8bc1e6ecc50e..e8fc01d97ada 100644
--- a/arch/arm64/boot/dts/ti/k3-j721e-common-proc-board.dts
+++ b/arch/arm64/boot/dts/ti/k3-j721e-common-proc-board.dts
@@ -287,7 +287,7 @@
};
&mailbox0_cluster0 {
- interrupts = <214 0>;
+ interrupts = <436>;
mbox_mcu_r5fss0_core0: mbox-mcu-r5fss0-core0 {
ti,mbox-rx = <0 0 0>;
@@ -301,7 +301,7 @@
};
&mailbox0_cluster1 {
- interrupts = <215 0>;
+ interrupts = <432>;
mbox_main_r5fss0_core0: mbox-main-r5fss0-core0 {
ti,mbox-rx = <0 0 0>;
@@ -315,7 +315,7 @@
};
&mailbox0_cluster2 {
- interrupts = <216 0>;
+ interrupts = <428>;
mbox_main_r5fss1_core0: mbox-main-r5fss1-core0 {
ti,mbox-rx = <0 0 0>;
@@ -329,7 +329,7 @@
};
&mailbox0_cluster3 {
- interrupts = <217 0>;
+ interrupts = <424>;
mbox_c66_0: mbox-c66-0 {
ti,mbox-rx = <0 0 0>;
@@ -343,7 +343,7 @@
};
&mailbox0_cluster4 {
- interrupts = <218 0>;
+ interrupts = <420>;
mbox_c71_0: mbox-c71-0 {
ti,mbox-rx = <0 0 0>;
diff --git a/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi b/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi
index d14060207f00..12ceea9b3c9a 100644
--- a/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi
@@ -80,10 +80,10 @@
ti,intr-trigger-type = <1>;
interrupt-controller;
interrupt-parent = <&gic500>;
- #interrupt-cells = <2>;
+ #interrupt-cells = <1>;
ti,sci = <&dmsc>;
- ti,sci-dst-id = <14>;
- ti,sci-rm-range-girq = <0x1>;
+ ti,sci-dev-id = <131>;
+ ti,interrupt-ranges = <8 392 56>;
};
main_navss {
@@ -101,10 +101,12 @@
ti,intr-trigger-type = <4>;
interrupt-controller;
interrupt-parent = <&gic500>;
- #interrupt-cells = <2>;
+ #interrupt-cells = <1>;
ti,sci = <&dmsc>;
- ti,sci-dst-id = <14>;
- ti,sci-rm-range-girq = <0>, <2>;
+ ti,sci-dev-id = <213>;
+ ti,interrupt-ranges = <0 64 64>,
+ <64 448 64>,
+ <128 672 64>;
};
main_udmass_inta: interrupt-controller@33d00000 {
@@ -115,8 +117,7 @@
msi-controller;
ti,sci = <&dmsc>;
ti,sci-dev-id = <209>;
- ti,sci-rm-range-vint = <0xa>;
- ti,sci-rm-range-global-event = <0xd>;
+ ti,interrupt-ranges = <0 0 256>;
};
secure_proxy_main: mailbox@32c00000 {
@@ -296,7 +297,7 @@
reg-names = "cpts";
clocks = <&k3_clks 201 1>;
clock-names = "cpts";
- interrupts-extended = <&main_navss_intr 201 0>;
+ interrupts-extended = <&main_navss_intr 391>;
interrupt-names = "cpts";
ti,cpts-periodic-outputs = <6>;
ti,cpts-ext-ts-inputs = <8>;
@@ -688,8 +689,8 @@
gpio-controller;
#gpio-cells = <2>;
interrupt-parent = <&main_gpio_intr>;
- interrupts = <105 0>, <105 1>, <105 2>, <105 3>,
- <105 4>, <105 5>, <105 6>, <105 7>;
+ interrupts = <256>, <257>, <258>, <259>,
+ <260>, <261>, <262>, <263>;
interrupt-controller;
#interrupt-cells = <2>;
ti,ngpio = <128>;
@@ -705,7 +706,7 @@
gpio-controller;
#gpio-cells = <2>;
interrupt-parent = <&main_gpio_intr>;
- interrupts = <106 0>, <106 1>, <106 2>;
+ interrupts = <288>, <289>, <290>;
interrupt-controller;
#interrupt-cells = <2>;
ti,ngpio = <36>;
@@ -721,8 +722,8 @@
gpio-controller;
#gpio-cells = <2>;
interrupt-parent = <&main_gpio_intr>;
- interrupts = <107 0>, <107 1>, <107 2>, <107 3>,
- <107 4>, <107 5>, <107 6>, <107 7>;
+ interrupts = <264>, <265>, <266>, <267>,
+ <268>, <269>, <270>, <271>;
interrupt-controller;
#interrupt-cells = <2>;
ti,ngpio = <128>;
@@ -738,7 +739,7 @@
gpio-controller;
#gpio-cells = <2>;
interrupt-parent = <&main_gpio_intr>;
- interrupts = <108 0>, <108 1>, <108 2>;
+ interrupts = <292>, <293>, <294>;
interrupt-controller;
#interrupt-cells = <2>;
ti,ngpio = <36>;
@@ -754,8 +755,8 @@
gpio-controller;
#gpio-cells = <2>;
interrupt-parent = <&main_gpio_intr>;
- interrupts = <109 0>, <109 1>, <109 2>, <109 3>,
- <109 4>, <109 5>, <109 6>, <109 7>;
+ interrupts = <272>, <273>, <274>, <275>,
+ <276>, <277>, <278>, <279>;
interrupt-controller;
#interrupt-cells = <2>;
ti,ngpio = <128>;
@@ -771,7 +772,7 @@
gpio-controller;
#gpio-cells = <2>;
interrupt-parent = <&main_gpio_intr>;
- interrupts = <110 0>, <110 1>, <110 2>;
+ interrupts = <296>, <297>, <298>;
interrupt-controller;
#interrupt-cells = <2>;
ti,ngpio = <36>;
@@ -787,8 +788,8 @@
gpio-controller;
#gpio-cells = <2>;
interrupt-parent = <&main_gpio_intr>;
- interrupts = <111 0>, <111 1>, <111 2>, <111 3>,
- <111 4>, <111 5>, <111 6>, <111 7>;
+ interrupts = <280>, <281>, <282>, <283>,
+ <284>, <285>, <286>, <287>;
interrupt-controller;
#interrupt-cells = <2>;
ti,ngpio = <128>;
@@ -804,7 +805,7 @@
gpio-controller;
#gpio-cells = <2>;
interrupt-parent = <&main_gpio_intr>;
- interrupts = <112 0>, <112 1>, <112 2>;
+ interrupts = <300>, <301>, <302>;
interrupt-controller;
#interrupt-cells = <2>;
ti,ngpio = <36>;
diff --git a/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi
index 30a735bcd0c8..c4a48e8d420a 100644
--- a/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi
@@ -101,10 +101,10 @@
ti,intr-trigger-type = <1>;
interrupt-controller;
interrupt-parent = <&gic500>;
- #interrupt-cells = <2>;
+ #interrupt-cells = <1>;
ti,sci = <&dmsc>;
- ti,sci-dst-id = <14>;
- ti,sci-rm-range-girq = <0x5>;
+ ti,sci-dev-id = <137>;
+ ti,interrupt-ranges = <16 960 16>;
};
wkup_gpio0: gpio@42110000 {
@@ -113,8 +113,7 @@
gpio-controller;
#gpio-cells = <2>;
interrupt-parent = <&wkup_gpio_intr>;
- interrupts = <113 0>, <113 1>, <113 2>,
- <113 3>, <113 4>, <113 5>;
+ interrupts = <103>, <104>, <105>, <106>, <107>, <108>;
interrupt-controller;
#interrupt-cells = <2>;
ti,ngpio = <84>;
@@ -130,8 +129,7 @@
gpio-controller;
#gpio-cells = <2>;
interrupt-parent = <&wkup_gpio_intr>;
- interrupts = <114 0>, <114 1>, <114 2>,
- <114 3>, <114 4>, <114 5>;
+ interrupts = <112>, <113>, <114>, <115>, <116>, <117>;
interrupt-controller;
#interrupt-cells = <2>;
ti,ngpio = <84>;
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 352aaebf4198..fb1a922b31ba 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -42,33 +42,81 @@
#include <linux/mm.h>
-/* Translate a kernel address of @sym into its equivalent linear mapping */
-#define kvm_ksym_ref(sym) \
+/*
+ * Translate name of a symbol defined in nVHE hyp to the name seen
+ * by kernel proper. All nVHE symbols are prefixed by the build system
+ * to avoid clashes with the VHE variants.
+ */
+#define kvm_nvhe_sym(sym) __kvm_nvhe_##sym
+
+#define DECLARE_KVM_VHE_SYM(sym) extern char sym[]
+#define DECLARE_KVM_NVHE_SYM(sym) extern char kvm_nvhe_sym(sym)[]
+
+/*
+ * Define a pair of symbols sharing the same name but one defined in
+ * VHE and the other in nVHE hyp implementations.
+ */
+#define DECLARE_KVM_HYP_SYM(sym) \
+ DECLARE_KVM_VHE_SYM(sym); \
+ DECLARE_KVM_NVHE_SYM(sym)
+
+#define CHOOSE_VHE_SYM(sym) sym
+#define CHOOSE_NVHE_SYM(sym) kvm_nvhe_sym(sym)
+
+#ifndef __KVM_NVHE_HYPERVISOR__
+/*
+ * BIG FAT WARNINGS:
+ *
+ * - Don't be tempted to change the following is_kernel_in_hyp_mode()
+ * to has_vhe(). has_vhe() is implemented as a *final* capability,
+ * while this is used early at boot time, when the capabilities are
+ * not final yet....
+ *
+ * - Don't let the nVHE hypervisor have access to this, as it will
+ * pick the *wrong* symbol (yes, it runs at EL2...).
+ */
+#define CHOOSE_HYP_SYM(sym) (is_kernel_in_hyp_mode() ? CHOOSE_VHE_SYM(sym) \
+ : CHOOSE_NVHE_SYM(sym))
+#else
+/* The nVHE hypervisor shouldn't even try to access anything */
+extern void *__nvhe_undefined_symbol;
+#define CHOOSE_HYP_SYM(sym) __nvhe_undefined_symbol
+#endif
+
+/* Translate a kernel address @ptr into its equivalent linear mapping */
+#define kvm_ksym_ref(ptr) \
({ \
- void *val = &sym; \
+ void *val = (ptr); \
if (!is_kernel_in_hyp_mode()) \
- val = lm_alias(&sym); \
+ val = lm_alias((ptr)); \
val; \
})
+#define kvm_ksym_ref_nvhe(sym) kvm_ksym_ref(kvm_nvhe_sym(sym))
struct kvm;
struct kvm_vcpu;
+struct kvm_s2_mmu;
-extern char __kvm_hyp_init[];
-extern char __kvm_hyp_init_end[];
+DECLARE_KVM_NVHE_SYM(__kvm_hyp_init);
+DECLARE_KVM_HYP_SYM(__kvm_hyp_vector);
+#define __kvm_hyp_init CHOOSE_NVHE_SYM(__kvm_hyp_init)
+#define __kvm_hyp_vector CHOOSE_HYP_SYM(__kvm_hyp_vector)
-extern char __kvm_hyp_vector[];
+#ifdef CONFIG_KVM_INDIRECT_VECTORS
+extern atomic_t arm64_el2_vector_last_slot;
+DECLARE_KVM_HYP_SYM(__bp_harden_hyp_vecs);
+#define __bp_harden_hyp_vecs CHOOSE_HYP_SYM(__bp_harden_hyp_vecs)
+#endif
extern void __kvm_flush_vm_context(void);
-extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
-extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
-extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu);
+extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa,
+ int level);
+extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu);
+extern void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu);
extern void __kvm_timer_set_cntvoff(u64 cntvoff);
-extern int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu);
-
-extern int __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu);
+extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
extern void __kvm_enable_ssbs(void);
@@ -143,7 +191,6 @@ extern char __smccc_workaround_1_smc[__SMCCC_WORKAROUND_1_SMC_SZ];
.macro get_vcpu_ptr vcpu, ctxt
get_host_ctxt \ctxt, \vcpu
ldr \vcpu, [\ctxt, #HOST_CONTEXT_VCPU]
- kern_hyp_va \vcpu
.endm
#endif
diff --git a/arch/arm64/include/asm/kvm_coproc.h b/arch/arm64/include/asm/kvm_coproc.h
index 454373704b8a..d6bb40122fdb 100644
--- a/arch/arm64/include/asm/kvm_coproc.h
+++ b/arch/arm64/include/asm/kvm_coproc.h
@@ -19,14 +19,6 @@ struct kvm_sys_reg_table {
size_t num;
};
-struct kvm_sys_reg_target_table {
- struct kvm_sys_reg_table table64;
- struct kvm_sys_reg_table table32;
-};
-
-void kvm_register_target_sys_reg_table(unsigned int target,
- struct kvm_sys_reg_target_table *table);
-
int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu);
int kvm_handle_cp14_32(struct kvm_vcpu *vcpu);
int kvm_handle_cp14_64(struct kvm_vcpu *vcpu);
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 4d0f8ea600ba..49a55be2b9a2 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -124,33 +124,12 @@ static inline void vcpu_set_vsesr(struct kvm_vcpu *vcpu, u64 vsesr)
static __always_inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu)
{
- return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc;
-}
-
-static inline unsigned long *__vcpu_elr_el1(const struct kvm_vcpu *vcpu)
-{
- return (unsigned long *)&vcpu_gp_regs(vcpu)->elr_el1;
-}
-
-static inline unsigned long vcpu_read_elr_el1(const struct kvm_vcpu *vcpu)
-{
- if (vcpu->arch.sysregs_loaded_on_cpu)
- return read_sysreg_el1(SYS_ELR);
- else
- return *__vcpu_elr_el1(vcpu);
-}
-
-static inline void vcpu_write_elr_el1(const struct kvm_vcpu *vcpu, unsigned long v)
-{
- if (vcpu->arch.sysregs_loaded_on_cpu)
- write_sysreg_el1(v, SYS_ELR);
- else
- *__vcpu_elr_el1(vcpu) = v;
+ return (unsigned long *)&vcpu_gp_regs(vcpu)->pc;
}
static __always_inline unsigned long *vcpu_cpsr(const struct kvm_vcpu *vcpu)
{
- return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pstate;
+ return (unsigned long *)&vcpu_gp_regs(vcpu)->pstate;
}
static __always_inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu)
@@ -179,14 +158,14 @@ static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu)
static __always_inline unsigned long vcpu_get_reg(const struct kvm_vcpu *vcpu,
u8 reg_num)
{
- return (reg_num == 31) ? 0 : vcpu_gp_regs(vcpu)->regs.regs[reg_num];
+ return (reg_num == 31) ? 0 : vcpu_gp_regs(vcpu)->regs[reg_num];
}
static __always_inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u8 reg_num,
unsigned long val)
{
if (reg_num != 31)
- vcpu_gp_regs(vcpu)->regs.regs[reg_num] = val;
+ vcpu_gp_regs(vcpu)->regs[reg_num] = val;
}
static inline unsigned long vcpu_read_spsr(const struct kvm_vcpu *vcpu)
@@ -197,7 +176,7 @@ static inline unsigned long vcpu_read_spsr(const struct kvm_vcpu *vcpu)
if (vcpu->arch.sysregs_loaded_on_cpu)
return read_sysreg_el1(SYS_SPSR);
else
- return vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1];
+ return __vcpu_sys_reg(vcpu, SPSR_EL1);
}
static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v)
@@ -210,7 +189,7 @@ static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v)
if (vcpu->arch.sysregs_loaded_on_cpu)
write_sysreg_el1(v, SYS_SPSR);
else
- vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1] = v;
+ __vcpu_sys_reg(vcpu, SPSR_EL1) = v;
}
/*
@@ -259,14 +238,14 @@ static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu)
return mode != PSR_MODE_EL0t;
}
-static __always_inline u32 kvm_vcpu_get_hsr(const struct kvm_vcpu *vcpu)
+static __always_inline u32 kvm_vcpu_get_esr(const struct kvm_vcpu *vcpu)
{
return vcpu->arch.fault.esr_el2;
}
static __always_inline int kvm_vcpu_get_condition(const struct kvm_vcpu *vcpu)
{
- u32 esr = kvm_vcpu_get_hsr(vcpu);
+ u32 esr = kvm_vcpu_get_esr(vcpu);
if (esr & ESR_ELx_CV)
return (esr & ESR_ELx_COND_MASK) >> ESR_ELx_COND_SHIFT;
@@ -291,64 +270,64 @@ static inline u64 kvm_vcpu_get_disr(const struct kvm_vcpu *vcpu)
static inline u32 kvm_vcpu_hvc_get_imm(const struct kvm_vcpu *vcpu)
{
- return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_xVC_IMM_MASK;
+ return kvm_vcpu_get_esr(vcpu) & ESR_ELx_xVC_IMM_MASK;
}
static __always_inline bool kvm_vcpu_dabt_isvalid(const struct kvm_vcpu *vcpu)
{
- return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_ISV);
+ return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_ISV);
}
static inline unsigned long kvm_vcpu_dabt_iss_nisv_sanitized(const struct kvm_vcpu *vcpu)
{
- return kvm_vcpu_get_hsr(vcpu) & (ESR_ELx_CM | ESR_ELx_WNR | ESR_ELx_FSC);
+ return kvm_vcpu_get_esr(vcpu) & (ESR_ELx_CM | ESR_ELx_WNR | ESR_ELx_FSC);
}
static inline bool kvm_vcpu_dabt_issext(const struct kvm_vcpu *vcpu)
{
- return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SSE);
+ return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_SSE);
}
static inline bool kvm_vcpu_dabt_issf(const struct kvm_vcpu *vcpu)
{
- return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SF);
+ return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_SF);
}
static __always_inline int kvm_vcpu_dabt_get_rd(const struct kvm_vcpu *vcpu)
{
- return (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT;
+ return (kvm_vcpu_get_esr(vcpu) & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT;
}
static __always_inline bool kvm_vcpu_dabt_iss1tw(const struct kvm_vcpu *vcpu)
{
- return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_S1PTW);
+ return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_S1PTW);
}
static __always_inline bool kvm_vcpu_dabt_iswrite(const struct kvm_vcpu *vcpu)
{
- return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WNR) ||
+ return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_WNR) ||
kvm_vcpu_dabt_iss1tw(vcpu); /* AF/DBM update */
}
static inline bool kvm_vcpu_dabt_is_cm(const struct kvm_vcpu *vcpu)
{
- return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_CM);
+ return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_CM);
}
static __always_inline unsigned int kvm_vcpu_dabt_get_as(const struct kvm_vcpu *vcpu)
{
- return 1 << ((kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SAS) >> ESR_ELx_SAS_SHIFT);
+ return 1 << ((kvm_vcpu_get_esr(vcpu) & ESR_ELx_SAS) >> ESR_ELx_SAS_SHIFT);
}
/* This one is not specific to Data Abort */
static __always_inline bool kvm_vcpu_trap_il_is32bit(const struct kvm_vcpu *vcpu)
{
- return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_IL);
+ return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_IL);
}
static __always_inline u8 kvm_vcpu_trap_get_class(const struct kvm_vcpu *vcpu)
{
- return ESR_ELx_EC(kvm_vcpu_get_hsr(vcpu));
+ return ESR_ELx_EC(kvm_vcpu_get_esr(vcpu));
}
static inline bool kvm_vcpu_trap_is_iabt(const struct kvm_vcpu *vcpu)
@@ -358,15 +337,15 @@ static inline bool kvm_vcpu_trap_is_iabt(const struct kvm_vcpu *vcpu)
static __always_inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu)
{
- return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_FSC;
+ return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC;
}
static __always_inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu)
{
- return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_FSC_TYPE;
+ return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC_TYPE;
}
-static __always_inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu)
+static __always_inline bool kvm_vcpu_abt_issea(const struct kvm_vcpu *vcpu)
{
switch (kvm_vcpu_trap_get_fault(vcpu)) {
case FSC_SEA:
@@ -387,7 +366,7 @@ static __always_inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu)
static __always_inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu)
{
- u32 esr = kvm_vcpu_get_hsr(vcpu);
+ u32 esr = kvm_vcpu_get_esr(vcpu);
return ESR_ELx_SYS64_ISS_RT(esr);
}
@@ -516,14 +495,14 @@ static __always_inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_i
* Skip an instruction which has been emulated at hyp while most guest sysregs
* are live.
*/
-static __always_inline void __hyp_text __kvm_skip_instr(struct kvm_vcpu *vcpu)
+static __always_inline void __kvm_skip_instr(struct kvm_vcpu *vcpu)
{
*vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR);
- vcpu->arch.ctxt.gp_regs.regs.pstate = read_sysreg_el2(SYS_SPSR);
+ vcpu_gp_regs(vcpu)->pstate = read_sysreg_el2(SYS_SPSR);
kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
- write_sysreg_el2(vcpu->arch.ctxt.gp_regs.regs.pstate, SYS_SPSR);
+ write_sysreg_el2(vcpu_gp_regs(vcpu)->pstate, SYS_SPSR);
write_sysreg_el2(*vcpu_pc(vcpu), SYS_ELR);
}
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index f81151ad3d3c..65568b23868a 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -66,19 +66,34 @@ struct kvm_vmid {
u32 vmid;
};
-struct kvm_arch {
+struct kvm_s2_mmu {
struct kvm_vmid vmid;
- /* stage2 entry level table */
- pgd_t *pgd;
- phys_addr_t pgd_phys;
-
- /* VTCR_EL2 value for this VM */
- u64 vtcr;
+ /*
+ * stage2 entry level table
+ *
+ * Two kvm_s2_mmu structures in the same VM can point to the same
+ * pgd here. This happens when running a guest using a
+ * translation regime that isn't affected by its own stage-2
+ * translation, such as a non-VHE hypervisor running at vEL2, or
+ * for vEL1/EL0 with vHCR_EL2.VM == 0. In that case, we use the
+ * canonical stage-2 page tables.
+ */
+ pgd_t *pgd;
+ phys_addr_t pgd_phys;
/* The last vcpu id that ran on each physical CPU */
int __percpu *last_vcpu_ran;
+ struct kvm *kvm;
+};
+
+struct kvm_arch {
+ struct kvm_s2_mmu mmu;
+
+ /* VTCR_EL2 value for this VM */
+ u64 vtcr;
+
/* The maximum number of vCPUs depends on the used GIC model */
int max_vcpus;
@@ -159,6 +174,16 @@ enum vcpu_sysreg {
APGAKEYLO_EL1,
APGAKEYHI_EL1,
+ ELR_EL1,
+ SP_EL1,
+ SPSR_EL1,
+
+ CNTVOFF_EL2,
+ CNTV_CVAL_EL0,
+ CNTV_CTL_EL0,
+ CNTP_CVAL_EL0,
+ CNTP_CTL_EL0,
+
/* 32bit specific registers. Keep them at the end of the range */
DACR32_EL2, /* Domain Access Control Register */
IFSR32_EL2, /* Instruction Fault Status Register */
@@ -210,7 +235,15 @@ enum vcpu_sysreg {
#define NR_COPRO_REGS (NR_SYS_REGS * 2)
struct kvm_cpu_context {
- struct kvm_regs gp_regs;
+ struct user_pt_regs regs; /* sp = sp_el0 */
+
+ u64 spsr_abt;
+ u64 spsr_und;
+ u64 spsr_irq;
+ u64 spsr_fiq;
+
+ struct user_fpsimd_state fp_regs;
+
union {
u64 sys_regs[NR_SYS_REGS];
u32 copro[NR_COPRO_REGS];
@@ -243,6 +276,9 @@ struct kvm_vcpu_arch {
void *sve_state;
unsigned int sve_max_vl;
+ /* Stage 2 paging state used by the hardware on next switch */
+ struct kvm_s2_mmu *hw_mmu;
+
/* HYP configuration */
u64 hcr_el2;
u32 mdcr_el2;
@@ -327,7 +363,7 @@ struct kvm_vcpu_arch {
struct vcpu_reset_state reset_state;
/* True when deferrable sysregs are loaded on the physical CPU,
- * see kvm_vcpu_load_sysregs and kvm_vcpu_put_sysregs. */
+ * see kvm_vcpu_load_sysregs_vhe and kvm_vcpu_put_sysregs_vhe. */
bool sysregs_loaded_on_cpu;
/* Guest PV state */
@@ -378,15 +414,20 @@ struct kvm_vcpu_arch {
#define vcpu_has_ptrauth(vcpu) false
#endif
-#define vcpu_gp_regs(v) (&(v)->arch.ctxt.gp_regs)
+#define vcpu_gp_regs(v) (&(v)->arch.ctxt.regs)
/*
- * Only use __vcpu_sys_reg if you know you want the memory backed version of a
- * register, and not the one most recently accessed by a running VCPU. For
- * example, for userspace access or for system registers that are never context
- * switched, but only emulated.
+ * Only use __vcpu_sys_reg/ctxt_sys_reg if you know you want the
+ * memory backed version of a register, and not the one most recently
+ * accessed by a running VCPU. For example, for userspace access or
+ * for system registers that are never context switched, but only
+ * emulated.
*/
-#define __vcpu_sys_reg(v,r) ((v)->arch.ctxt.sys_regs[(r)])
+#define __ctxt_sys_reg(c,r) (&(c)->sys_regs[(r)])
+
+#define ctxt_sys_reg(c,r) (*__ctxt_sys_reg(c,r))
+
+#define __vcpu_sys_reg(v,r) (ctxt_sys_reg(&(v)->arch.ctxt, (r)))
u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg);
void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg);
@@ -442,6 +483,18 @@ void kvm_arm_resume_guest(struct kvm *kvm);
u64 __kvm_call_hyp(void *hypfn, ...);
+#define kvm_call_hyp_nvhe(f, ...) \
+ do { \
+ DECLARE_KVM_NVHE_SYM(f); \
+ __kvm_call_hyp(kvm_ksym_ref_nvhe(f), ##__VA_ARGS__); \
+ } while(0)
+
+#define kvm_call_hyp_nvhe_ret(f, ...) \
+ ({ \
+ DECLARE_KVM_NVHE_SYM(f); \
+ __kvm_call_hyp(kvm_ksym_ref_nvhe(f), ##__VA_ARGS__); \
+ })
+
/*
* The couple of isb() below are there to guarantee the same behaviour
* on VHE as on !VHE, where the eret to EL1 acts as a context
@@ -453,7 +506,7 @@ u64 __kvm_call_hyp(void *hypfn, ...);
f(__VA_ARGS__); \
isb(); \
} else { \
- __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__); \
+ kvm_call_hyp_nvhe(f, ##__VA_ARGS__); \
} \
} while(0)
@@ -465,8 +518,7 @@ u64 __kvm_call_hyp(void *hypfn, ...);
ret = f(__VA_ARGS__); \
isb(); \
} else { \
- ret = __kvm_call_hyp(kvm_ksym_ref(f), \
- ##__VA_ARGS__); \
+ ret = kvm_call_hyp_nvhe_ret(f, ##__VA_ARGS__); \
} \
\
ret; \
@@ -518,7 +570,7 @@ DECLARE_PER_CPU(kvm_host_data_t, kvm_host_data);
static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt)
{
/* The host's MPIDR is immutable, so let's set it up at boot time */
- cpu_ctxt->sys_regs[MPIDR_EL1] = read_cpuid_mpidr();
+ ctxt_sys_reg(cpu_ctxt, MPIDR_EL1) = read_cpuid_mpidr();
}
static inline bool kvm_arch_requires_vhe(void)
@@ -619,8 +671,8 @@ static inline int kvm_arm_have_ssbd(void)
}
}
-void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu);
-void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu);
+void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu);
+void kvm_vcpu_put_sysregs_vhe(struct kvm_vcpu *vcpu);
int kvm_set_ipa_limit(void);
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index ce3080834bfa..46689e7db46c 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -12,8 +12,6 @@
#include <asm/alternative.h>
#include <asm/sysreg.h>
-#define __hyp_text __section(.hyp.text) notrace __noscs
-
#define read_sysreg_elx(r,nvh,vh) \
({ \
u64 reg; \
@@ -63,17 +61,20 @@ void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if);
void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if);
int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu);
+#ifdef __KVM_NVHE_HYPERVISOR__
void __timer_enable_traps(struct kvm_vcpu *vcpu);
void __timer_disable_traps(struct kvm_vcpu *vcpu);
+#endif
+#ifdef __KVM_NVHE_HYPERVISOR__
void __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt);
void __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt);
+#else
void sysreg_save_host_state_vhe(struct kvm_cpu_context *ctxt);
void sysreg_restore_host_state_vhe(struct kvm_cpu_context *ctxt);
void sysreg_save_guest_state_vhe(struct kvm_cpu_context *ctxt);
void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt);
-void __sysreg32_save_state(struct kvm_vcpu *vcpu);
-void __sysreg32_restore_state(struct kvm_vcpu *vcpu);
+#endif
void __debug_switch_to_guest(struct kvm_vcpu *vcpu);
void __debug_switch_to_host(struct kvm_vcpu *vcpu);
@@ -81,11 +82,17 @@ void __debug_switch_to_host(struct kvm_vcpu *vcpu);
void __fpsimd_save_state(struct user_fpsimd_state *fp_regs);
void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs);
+#ifndef __KVM_NVHE_HYPERVISOR__
void activate_traps_vhe_load(struct kvm_vcpu *vcpu);
void deactivate_traps_vhe_put(void);
+#endif
u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt);
+
+void __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt);
+#ifdef __KVM_NVHE_HYPERVISOR__
void __noreturn __hyp_do_panic(unsigned long, ...);
+#endif
#endif /* __ARM64_KVM_HYP_H__ */
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 40be8f6c7351..189839c3706a 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -134,8 +134,8 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
void free_hyp_pgds(void);
void stage2_unmap_vm(struct kvm *kvm);
-int kvm_alloc_stage2_pgd(struct kvm *kvm);
-void kvm_free_stage2_pgd(struct kvm *kvm);
+int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu);
+void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu);
int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
phys_addr_t pa, unsigned long size, bool writable);
@@ -577,13 +577,13 @@ static inline u64 kvm_vttbr_baddr_mask(struct kvm *kvm)
return vttbr_baddr_mask(kvm_phys_shift(kvm), kvm_stage2_levels(kvm));
}
-static __always_inline u64 kvm_get_vttbr(struct kvm *kvm)
+static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu)
{
- struct kvm_vmid *vmid = &kvm->arch.vmid;
+ struct kvm_vmid *vmid = &mmu->vmid;
u64 vmid_field, baddr;
u64 cnp = system_supports_cnp() ? VTTBR_CNP_BIT : 0;
- baddr = kvm->arch.pgd_phys;
+ baddr = mmu->pgd_phys;
vmid_field = (u64)vmid->vmid << VTTBR_VMID_SHIFT;
return kvm_phys_to_vttbr(baddr) | vmid_field | cnp;
}
@@ -592,10 +592,10 @@ static __always_inline u64 kvm_get_vttbr(struct kvm *kvm)
* Must be called from hyp code running at EL2 with an updated VTTBR
* and interrupts disabled.
*/
-static __always_inline void __load_guest_stage2(struct kvm *kvm)
+static __always_inline void __load_guest_stage2(struct kvm_s2_mmu *mmu)
{
- write_sysreg(kvm->arch.vtcr, vtcr_el2);
- write_sysreg(kvm_get_vttbr(kvm), vttbr_el2);
+ write_sysreg(kern_hyp_va(mmu->kvm)->arch.vtcr, vtcr_el2);
+ write_sysreg(kvm_get_vttbr(mmu), vttbr_el2);
/*
* ARM errata 1165522 and 1530923 require the actual execution of the
diff --git a/arch/arm64/include/asm/kvm_ptrauth.h b/arch/arm64/include/asm/kvm_ptrauth.h
index 6301813dcace..0ddf98c3ba9f 100644
--- a/arch/arm64/include/asm/kvm_ptrauth.h
+++ b/arch/arm64/include/asm/kvm_ptrauth.h
@@ -61,44 +61,36 @@
/*
* Both ptrauth_switch_to_guest and ptrauth_switch_to_host macros will
- * check for the presence of one of the cpufeature flag
- * ARM64_HAS_ADDRESS_AUTH_ARCH or ARM64_HAS_ADDRESS_AUTH_IMP_DEF and
+ * check for the presence ARM64_HAS_ADDRESS_AUTH, which is defined as
+ * (ARM64_HAS_ADDRESS_AUTH_ARCH || ARM64_HAS_ADDRESS_AUTH_IMP_DEF) and
* then proceed ahead with the save/restore of Pointer Authentication
- * key registers.
+ * key registers if enabled for the guest.
*/
.macro ptrauth_switch_to_guest g_ctxt, reg1, reg2, reg3
-alternative_if ARM64_HAS_ADDRESS_AUTH_ARCH
- b 1000f
+alternative_if_not ARM64_HAS_ADDRESS_AUTH
+ b .L__skip_switch\@
alternative_else_nop_endif
-alternative_if_not ARM64_HAS_ADDRESS_AUTH_IMP_DEF
- b 1001f
-alternative_else_nop_endif
-1000:
- ldr \reg1, [\g_ctxt, #(VCPU_HCR_EL2 - VCPU_CONTEXT)]
+ mrs \reg1, hcr_el2
and \reg1, \reg1, #(HCR_API | HCR_APK)
- cbz \reg1, 1001f
+ cbz \reg1, .L__skip_switch\@
add \reg1, \g_ctxt, #CPU_APIAKEYLO_EL1
ptrauth_restore_state \reg1, \reg2, \reg3
-1001:
+.L__skip_switch\@:
.endm
.macro ptrauth_switch_to_host g_ctxt, h_ctxt, reg1, reg2, reg3
-alternative_if ARM64_HAS_ADDRESS_AUTH_ARCH
- b 2000f
-alternative_else_nop_endif
-alternative_if_not ARM64_HAS_ADDRESS_AUTH_IMP_DEF
- b 2001f
+alternative_if_not ARM64_HAS_ADDRESS_AUTH
+ b .L__skip_switch\@
alternative_else_nop_endif
-2000:
- ldr \reg1, [\g_ctxt, #(VCPU_HCR_EL2 - VCPU_CONTEXT)]
+ mrs \reg1, hcr_el2
and \reg1, \reg1, #(HCR_API | HCR_APK)
- cbz \reg1, 2001f
+ cbz \reg1, .L__skip_switch\@
add \reg1, \g_ctxt, #CPU_APIAKEYLO_EL1
ptrauth_save_state \reg1, \reg2, \reg3
add \reg1, \h_ctxt, #CPU_APIAKEYLO_EL1
ptrauth_restore_state \reg1, \reg2, \reg3
isb
-2001:
+.L__skip_switch\@:
.endm
#else /* !CONFIG_ARM64_PTR_AUTH */
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index 8444df000181..a7a5ecaa2e83 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -45,13 +45,6 @@ struct bp_hardening_data {
bp_hardening_cb_t fn;
};
-#if (defined(CONFIG_HARDEN_BRANCH_PREDICTOR) || \
- defined(CONFIG_HARDEN_EL2_VECTORS))
-
-extern char __bp_harden_hyp_vecs[];
-extern atomic_t arm64_el2_vector_last_slot;
-#endif /* CONFIG_HARDEN_BRANCH_PREDICTOR || CONFIG_HARDEN_EL2_VECTORS */
-
#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
DECLARE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data);
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 8d7c466f809b..991dd5f031e4 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -50,7 +50,7 @@ static inline void set_fs(mm_segment_t fs)
CONFIG_ARM64_UAO));
}
-#define segment_eq(a, b) ((a) == (b))
+#define uaccess_kernel() (get_fs() == KERNEL_DS)
/*
* Test whether a block of memory is a valid user space address.
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 17e81bd9a2d3..734860ac7cf9 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -308,8 +308,8 @@ __SYSCALL(__NR_writev, compat_sys_writev)
__SYSCALL(__NR_getsid, sys_getsid)
#define __NR_fdatasync 148
__SYSCALL(__NR_fdatasync, sys_fdatasync)
-#define __NR__sysctl 149
-__SYSCALL(__NR__sysctl, compat_sys_sysctl)
+ /* 149 was sys_sysctl */
+__SYSCALL(149, sys_ni_syscall)
#define __NR_mlock 150
__SYSCALL(__NR_mlock, sys_mlock)
#define __NR_munlock 151
diff --git a/arch/arm64/include/asm/vdso/compat_gettimeofday.h b/arch/arm64/include/asm/vdso/compat_gettimeofday.h
index 75cbae60455b..7508b0ac1d21 100644
--- a/arch/arm64/include/asm/vdso/compat_gettimeofday.h
+++ b/arch/arm64/include/asm/vdso/compat_gettimeofday.h
@@ -103,7 +103,8 @@ int clock_getres32_fallback(clockid_t _clkid, struct old_timespec32 *_ts)
return ret;
}
-static __always_inline u64 __arch_get_hw_counter(s32 clock_mode)
+static __always_inline u64 __arch_get_hw_counter(s32 clock_mode,
+ const struct vdso_data *vd)
{
u64 res;
diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h
index 9c29ad3049f8..631ab1281633 100644
--- a/arch/arm64/include/asm/vdso/gettimeofday.h
+++ b/arch/arm64/include/asm/vdso/gettimeofday.h
@@ -64,7 +64,8 @@ int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
return ret;
}
-static __always_inline u64 __arch_get_hw_counter(s32 clock_mode)
+static __always_inline u64 __arch_get_hw_counter(s32 clock_mode,
+ const struct vdso_data *vd)
{
u64 res;
diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index 5051b388c654..09977acc007d 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -85,10 +85,17 @@ static inline bool is_kernel_in_hyp_mode(void)
static __always_inline bool has_vhe(void)
{
- if (cpus_have_final_cap(ARM64_HAS_VIRT_HOST_EXTN))
+ /*
+ * The following macros are defined for code specic to VHE/nVHE.
+ * If has_vhe() is inlined into those compilation units, it can
+ * be determined statically. Otherwise fall back to caps.
+ */
+ if (__is_defined(__KVM_VHE_HYPERVISOR__))
return true;
-
- return false;
+ else if (__is_defined(__KVM_NVHE_HYPERVISOR__))
+ return false;
+ else
+ return cpus_have_final_cap(ARM64_HAS_VIRT_HOST_EXTN);
}
#endif /* __ASSEMBLY__ */
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 0577e2142284..7d32fc959b1a 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -102,13 +102,12 @@ int main(void)
DEFINE(VCPU_FAULT_DISR, offsetof(struct kvm_vcpu, arch.fault.disr_el1));
DEFINE(VCPU_WORKAROUND_FLAGS, offsetof(struct kvm_vcpu, arch.workaround_flags));
DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2));
- DEFINE(CPU_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs));
+ DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_cpu_context, regs));
DEFINE(CPU_APIAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APIAKEYLO_EL1]));
DEFINE(CPU_APIBKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APIBKEYLO_EL1]));
DEFINE(CPU_APDAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APDAKEYLO_EL1]));
DEFINE(CPU_APDBKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APDBKEYLO_EL1]));
DEFINE(CPU_APGAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APGAKEYLO_EL1]));
- DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_regs, regs));
DEFINE(HOST_CONTEXT_VCPU, offsetof(struct kvm_cpu_context, __hyp_running_vcpu));
DEFINE(HOST_DATA_CONTEXT, offsetof(struct kvm_host_data, host_ctxt));
#endif
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 79728bfb5351..6bd1d3ad037a 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -632,7 +632,7 @@ has_neoverse_n1_erratum_1542419(const struct arm64_cpu_capabilities *entry,
return is_midr_in_range(midr, &range) && has_dic;
}
-#if defined(CONFIG_HARDEN_EL2_VECTORS)
+#ifdef CONFIG_RANDOMIZE_BASE
static const struct midr_range ca57_a72[] = {
MIDR_ALL_VERSIONS(MIDR_CORTEX_A57),
@@ -891,7 +891,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
.matches = check_branch_predictor,
},
-#ifdef CONFIG_HARDEN_EL2_VECTORS
+#ifdef CONFIG_RANDOMIZE_BASE
{
.desc = "EL2 vector hardening",
.capability = ARM64_HARDEN_EL2_VECTORS,
diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
index be0a63ffed23..9e897c500237 100644
--- a/arch/arm64/kernel/image-vars.h
+++ b/arch/arm64/kernel/image-vars.h
@@ -51,4 +51,58 @@ __efistub__ctype = _ctype;
#endif
+#ifdef CONFIG_KVM
+
+/*
+ * KVM nVHE code has its own symbol namespace prefixed with __kvm_nvhe_, to
+ * separate it from the kernel proper. The following symbols are legally
+ * accessed by it, therefore provide aliases to make them linkable.
+ * Do not include symbols which may not be safely accessed under hypervisor
+ * memory mappings.
+ */
+
+#define KVM_NVHE_ALIAS(sym) __kvm_nvhe_##sym = sym;
+
+/* Alternative callbacks for init-time patching of nVHE hyp code. */
+KVM_NVHE_ALIAS(arm64_enable_wa2_handling);
+KVM_NVHE_ALIAS(kvm_patch_vector_branch);
+KVM_NVHE_ALIAS(kvm_update_va_mask);
+
+/* Global kernel state accessed by nVHE hyp code. */
+KVM_NVHE_ALIAS(arm64_ssbd_callback_required);
+KVM_NVHE_ALIAS(kvm_host_data);
+KVM_NVHE_ALIAS(kvm_vgic_global_state);
+
+/* Kernel constant needed to compute idmap addresses. */
+KVM_NVHE_ALIAS(kimage_voffset);
+
+/* Kernel symbols used to call panic() from nVHE hyp code (via ERET). */
+KVM_NVHE_ALIAS(__hyp_panic_string);
+KVM_NVHE_ALIAS(panic);
+
+/* Vectors installed by hyp-init on reset HVC. */
+KVM_NVHE_ALIAS(__hyp_stub_vectors);
+
+/* IDMAP TCR_EL1.T0SZ as computed by the EL1 init code */
+KVM_NVHE_ALIAS(idmap_t0sz);
+
+/* Kernel symbol used by icache_is_vpipt(). */
+KVM_NVHE_ALIAS(__icache_flags);
+
+/* Kernel symbols needed for cpus_have_final/const_caps checks. */
+KVM_NVHE_ALIAS(arm64_const_caps_ready);
+KVM_NVHE_ALIAS(cpu_hwcap_keys);
+KVM_NVHE_ALIAS(cpu_hwcaps);
+
+/* Static keys which are set if a vGIC trap should be handled in hyp. */
+KVM_NVHE_ALIAS(vgic_v2_cpuif_trap);
+KVM_NVHE_ALIAS(vgic_v3_cpuif_trap);
+
+/* Static key checked in pmr_sync(). */
+#ifdef CONFIG_ARM64_PSEUDO_NMI
+KVM_NVHE_ALIAS(gic_pmr_sync);
+#endif
+
+#endif /* CONFIG_KVM */
+
#endif /* __ARM64_KERNEL_IMAGE_VARS_H */
diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c
index dab88260b137..7689f2031c0c 100644
--- a/arch/arm64/kernel/sdei.c
+++ b/arch/arm64/kernel/sdei.c
@@ -180,7 +180,7 @@ static __kprobes unsigned long _sdei_handler(struct pt_regs *regs,
/*
* We didn't take an exception to get here, set PAN. UAO will be cleared
- * by sdei_event_handler()s set_fs(USER_DS) call.
+ * by sdei_event_handler()s force_uaccess_begin() call.
*/
__uaccess_enable_hw_pan();
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index 13489aff4440..318c8f2df245 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -58,7 +58,7 @@ config KVM_ARM_PMU
virtual machines.
config KVM_INDIRECT_VECTORS
- def_bool HARDEN_BRANCH_PREDICTOR || HARDEN_EL2_VECTORS
+ def_bool HARDEN_BRANCH_PREDICTOR || RANDOMIZE_BASE
endif # KVM
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 8d3d9513cbfe..99977c1972cc 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -13,8 +13,8 @@ obj-$(CONFIG_KVM) += hyp/
kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \
$(KVM)/vfio.o $(KVM)/irqchip.o \
arm.o mmu.o mmio.o psci.o perf.o hypercalls.o pvtime.o \
- inject_fault.o regmap.o va_layout.o hyp.o hyp-init.o handle_exit.o \
- guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o \
+ inject_fault.o regmap.o va_layout.o hyp.o handle_exit.o \
+ guest.o debug.o reset.o sys_regs.o \
vgic-sys-reg-v3.o fpsimd.o pmu.o \
aarch32.o arch_timer.o \
vgic/vgic.o vgic/vgic-init.o \
diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index a1fe0ea3254e..32ba6fbc3814 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -51,6 +51,93 @@ static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu,
struct arch_timer_context *timer,
enum kvm_arch_timer_regs treg);
+u32 timer_get_ctl(struct arch_timer_context *ctxt)
+{
+ struct kvm_vcpu *vcpu = ctxt->vcpu;
+
+ switch(arch_timer_ctx_index(ctxt)) {
+ case TIMER_VTIMER:
+ return __vcpu_sys_reg(vcpu, CNTV_CTL_EL0);
+ case TIMER_PTIMER:
+ return __vcpu_sys_reg(vcpu, CNTP_CTL_EL0);
+ default:
+ WARN_ON(1);
+ return 0;
+ }
+}
+
+u64 timer_get_cval(struct arch_timer_context *ctxt)
+{
+ struct kvm_vcpu *vcpu = ctxt->vcpu;
+
+ switch(arch_timer_ctx_index(ctxt)) {
+ case TIMER_VTIMER:
+ return __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0);
+ case TIMER_PTIMER:
+ return __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0);
+ default:
+ WARN_ON(1);
+ return 0;
+ }
+}
+
+static u64 timer_get_offset(struct arch_timer_context *ctxt)
+{
+ struct kvm_vcpu *vcpu = ctxt->vcpu;
+
+ switch(arch_timer_ctx_index(ctxt)) {
+ case TIMER_VTIMER:
+ return __vcpu_sys_reg(vcpu, CNTVOFF_EL2);
+ default:
+ return 0;
+ }
+}
+
+static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl)
+{
+ struct kvm_vcpu *vcpu = ctxt->vcpu;
+
+ switch(arch_timer_ctx_index(ctxt)) {
+ case TIMER_VTIMER:
+ __vcpu_sys_reg(vcpu, CNTV_CTL_EL0) = ctl;
+ break;
+ case TIMER_PTIMER:
+ __vcpu_sys_reg(vcpu, CNTP_CTL_EL0) = ctl;
+ break;
+ default:
+ WARN_ON(1);
+ }
+}
+
+static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval)
+{
+ struct kvm_vcpu *vcpu = ctxt->vcpu;
+
+ switch(arch_timer_ctx_index(ctxt)) {
+ case TIMER_VTIMER:
+ __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0) = cval;
+ break;
+ case TIMER_PTIMER:
+ __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = cval;
+ break;
+ default:
+ WARN_ON(1);
+ }
+}
+
+static void timer_set_offset(struct arch_timer_context *ctxt, u64 offset)
+{
+ struct kvm_vcpu *vcpu = ctxt->vcpu;
+
+ switch(arch_timer_ctx_index(ctxt)) {
+ case TIMER_VTIMER:
+ __vcpu_sys_reg(vcpu, CNTVOFF_EL2) = offset;
+ break;
+ default:
+ WARN(offset, "timer %ld\n", arch_timer_ctx_index(ctxt));
+ }
+}
+
u64 kvm_phys_timer_read(void)
{
return timecounter->cc->read(timecounter->cc);
@@ -124,8 +211,8 @@ static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx)
{
u64 cval, now;
- cval = timer_ctx->cnt_cval;
- now = kvm_phys_timer_read() - timer_ctx->cntvoff;
+ cval = timer_get_cval(timer_ctx);
+ now = kvm_phys_timer_read() - timer_get_offset(timer_ctx);
if (now < cval) {
u64 ns;
@@ -144,8 +231,8 @@ static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx)
{
WARN_ON(timer_ctx && timer_ctx->loaded);
return timer_ctx &&
- !(timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_IT_MASK) &&
- (timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_ENABLE);
+ ((timer_get_ctl(timer_ctx) &
+ (ARCH_TIMER_CTRL_IT_MASK | ARCH_TIMER_CTRL_ENABLE)) == ARCH_TIMER_CTRL_ENABLE);
}
/*
@@ -256,8 +343,8 @@ static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx)
if (!kvm_timer_irq_can_fire(timer_ctx))
return false;
- cval = timer_ctx->cnt_cval;
- now = kvm_phys_timer_read() - timer_ctx->cntvoff;
+ cval = timer_get_cval(timer_ctx);
+ now = kvm_phys_timer_read() - timer_get_offset(timer_ctx);
return cval <= now;
}
@@ -350,8 +437,8 @@ static void timer_save_state(struct arch_timer_context *ctx)
switch (index) {
case TIMER_VTIMER:
- ctx->cnt_ctl = read_sysreg_el0(SYS_CNTV_CTL);
- ctx->cnt_cval = read_sysreg_el0(SYS_CNTV_CVAL);
+ timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTV_CTL));
+ timer_set_cval(ctx, read_sysreg_el0(SYS_CNTV_CVAL));
/* Disable the timer */
write_sysreg_el0(0, SYS_CNTV_CTL);
@@ -359,8 +446,8 @@ static void timer_save_state(struct arch_timer_context *ctx)
break;
case TIMER_PTIMER:
- ctx->cnt_ctl = read_sysreg_el0(SYS_CNTP_CTL);
- ctx->cnt_cval = read_sysreg_el0(SYS_CNTP_CVAL);
+ timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTP_CTL));
+ timer_set_cval(ctx, read_sysreg_el0(SYS_CNTP_CVAL));
/* Disable the timer */
write_sysreg_el0(0, SYS_CNTP_CTL);
@@ -429,14 +516,14 @@ static void timer_restore_state(struct arch_timer_context *ctx)
switch (index) {
case TIMER_VTIMER:
- write_sysreg_el0(ctx->cnt_cval, SYS_CNTV_CVAL);
+ write_sysreg_el0(timer_get_cval(ctx), SYS_CNTV_CVAL);
isb();
- write_sysreg_el0(ctx->cnt_ctl, SYS_CNTV_CTL);
+ write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTV_CTL);
break;
case TIMER_PTIMER:
- write_sysreg_el0(ctx->cnt_cval, SYS_CNTP_CVAL);
+ write_sysreg_el0(timer_get_cval(ctx), SYS_CNTP_CVAL);
isb();
- write_sysreg_el0(ctx->cnt_ctl, SYS_CNTP_CTL);
+ write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTP_CTL);
break;
case NR_KVM_TIMERS:
BUG();
@@ -528,7 +615,7 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
kvm_timer_vcpu_load_nogic(vcpu);
}
- set_cntvoff(map.direct_vtimer->cntvoff);
+ set_cntvoff(timer_get_offset(map.direct_vtimer));
kvm_timer_unblocking(vcpu);
@@ -615,7 +702,7 @@ static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu)
}
}
-void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
+void kvm_timer_sync_user(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = vcpu_timer(vcpu);
@@ -639,8 +726,8 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
* resets the timer to be disabled and unmasked and is compliant with
* the ARMv7 architecture.
*/
- vcpu_vtimer(vcpu)->cnt_ctl = 0;
- vcpu_ptimer(vcpu)->cnt_ctl = 0;
+ timer_set_ctl(vcpu_vtimer(vcpu), 0);
+ timer_set_ctl(vcpu_ptimer(vcpu), 0);
if (timer->enabled) {
kvm_timer_update_irq(vcpu, false, vcpu_vtimer(vcpu));
@@ -668,13 +755,13 @@ static void update_vtimer_cntvoff(struct kvm_vcpu *vcpu, u64 cntvoff)
mutex_lock(&kvm->lock);
kvm_for_each_vcpu(i, tmp, kvm)
- vcpu_vtimer(tmp)->cntvoff = cntvoff;
+ timer_set_offset(vcpu_vtimer(tmp), cntvoff);
/*
* When called from the vcpu create path, the CPU being created is not
* included in the loop above, so we just set it here as well.
*/
- vcpu_vtimer(vcpu)->cntvoff = cntvoff;
+ timer_set_offset(vcpu_vtimer(vcpu), cntvoff);
mutex_unlock(&kvm->lock);
}
@@ -684,9 +771,12 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
+ vtimer->vcpu = vcpu;
+ ptimer->vcpu = vcpu;
+
/* Synchronize cntvoff across all vtimers of a VM. */
update_vtimer_cntvoff(vcpu, kvm_phys_timer_read());
- ptimer->cntvoff = 0;
+ timer_set_offset(ptimer, 0);
hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
timer->bg_timer.function = kvm_bg_timer_expire;
@@ -704,9 +794,6 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
vtimer->host_timer_irq_flags = host_vtimer_irq_flags;
ptimer->host_timer_irq_flags = host_ptimer_irq_flags;
-
- vtimer->vcpu = vcpu;
- ptimer->vcpu = vcpu;
}
static void kvm_timer_init_interrupt(void *info)
@@ -756,10 +843,12 @@ static u64 read_timer_ctl(struct arch_timer_context *timer)
* UNKNOWN when ENABLE bit is 0, so we chose to set ISTATUS bit
* regardless of ENABLE bit for our implementation convenience.
*/
+ u32 ctl = timer_get_ctl(timer);
+
if (!kvm_timer_compute_delta(timer))
- return timer->cnt_ctl | ARCH_TIMER_CTRL_IT_STAT;
- else
- return timer->cnt_ctl;
+ ctl |= ARCH_TIMER_CTRL_IT_STAT;
+
+ return ctl;
}
u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid)
@@ -795,8 +884,8 @@ static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu,
switch (treg) {
case TIMER_REG_TVAL:
- val = timer->cnt_cval - kvm_phys_timer_read() + timer->cntvoff;
- val &= lower_32_bits(val);
+ val = timer_get_cval(timer) - kvm_phys_timer_read() + timer_get_offset(timer);
+ val = lower_32_bits(val);
break;
case TIMER_REG_CTL:
@@ -804,11 +893,11 @@ static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu,
break;
case TIMER_REG_CVAL:
- val = timer->cnt_cval;
+ val = timer_get_cval(timer);
break;
case TIMER_REG_CNT:
- val = kvm_phys_timer_read() - timer->cntvoff;
+ val = kvm_phys_timer_read() - timer_get_offset(timer);
break;
default:
@@ -842,15 +931,15 @@ static void kvm_arm_timer_write(struct kvm_vcpu *vcpu,
{
switch (treg) {
case TIMER_REG_TVAL:
- timer->cnt_cval = kvm_phys_timer_read() - timer->cntvoff + (s32)val;
+ timer_set_cval(timer, kvm_phys_timer_read() - timer_get_offset(timer) + (s32)val);
break;
case TIMER_REG_CTL:
- timer->cnt_ctl = val & ~ARCH_TIMER_CTRL_IT_STAT;
+ timer_set_ctl(timer, val & ~ARCH_TIMER_CTRL_IT_STAT);
break;
case TIMER_REG_CVAL:
- timer->cnt_cval = val;
+ timer_set_cval(timer, val);
break;
default:
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 73e12869afe3..691d21e4c717 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -106,22 +106,15 @@ static int kvm_arm_default_max_vcpus(void)
*/
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
{
- int ret, cpu;
+ int ret;
ret = kvm_arm_setup_stage2(kvm, type);
if (ret)
return ret;
- kvm->arch.last_vcpu_ran = alloc_percpu(typeof(*kvm->arch.last_vcpu_ran));
- if (!kvm->arch.last_vcpu_ran)
- return -ENOMEM;
-
- for_each_possible_cpu(cpu)
- *per_cpu_ptr(kvm->arch.last_vcpu_ran, cpu) = -1;
-
- ret = kvm_alloc_stage2_pgd(kvm);
+ ret = kvm_init_stage2_mmu(kvm, &kvm->arch.mmu);
if (ret)
- goto out_fail_alloc;
+ return ret;
ret = create_hyp_mappings(kvm, kvm + 1, PAGE_HYP);
if (ret)
@@ -129,18 +122,12 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm_vgic_early_init(kvm);
- /* Mark the initial VMID generation invalid */
- kvm->arch.vmid.vmid_gen = 0;
-
/* The maximum number of VCPUs is limited by the host's GIC model */
kvm->arch.max_vcpus = kvm_arm_default_max_vcpus();
return ret;
out_free_stage2_pgd:
- kvm_free_stage2_pgd(kvm);
-out_fail_alloc:
- free_percpu(kvm->arch.last_vcpu_ran);
- kvm->arch.last_vcpu_ran = NULL;
+ kvm_free_stage2_pgd(&kvm->arch.mmu);
return ret;
}
@@ -160,9 +147,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvm_vgic_destroy(kvm);
- free_percpu(kvm->arch.last_vcpu_ran);
- kvm->arch.last_vcpu_ran = NULL;
-
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
if (kvm->vcpus[i]) {
kvm_vcpu_destroy(kvm->vcpus[i]);
@@ -281,6 +265,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
kvm_arm_pvtime_vcpu_init(&vcpu->arch);
+ vcpu->arch.hw_mmu = &vcpu->kvm->arch.mmu;
+
err = kvm_vgic_vcpu_init(vcpu);
if (err)
return err;
@@ -336,16 +322,18 @@ void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
+ struct kvm_s2_mmu *mmu;
int *last_ran;
- last_ran = this_cpu_ptr(vcpu->kvm->arch.last_vcpu_ran);
+ mmu = vcpu->arch.hw_mmu;
+ last_ran = this_cpu_ptr(mmu->last_vcpu_ran);
/*
* We might get preempted before the vCPU actually runs, but
* over-invalidation doesn't affect correctness.
*/
if (*last_ran != vcpu->vcpu_id) {
- kvm_call_hyp(__kvm_tlb_flush_local_vmid, vcpu);
+ kvm_call_hyp(__kvm_tlb_flush_local_vmid, mmu);
*last_ran = vcpu->vcpu_id;
}
@@ -353,7 +341,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
kvm_vgic_load(vcpu);
kvm_timer_vcpu_load(vcpu);
- kvm_vcpu_load_sysregs(vcpu);
+ if (has_vhe())
+ kvm_vcpu_load_sysregs_vhe(vcpu);
kvm_arch_vcpu_load_fp(vcpu);
kvm_vcpu_pmu_restore_guest(vcpu);
if (kvm_arm_is_pvtime_enabled(&vcpu->arch))
@@ -371,7 +360,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
kvm_arch_vcpu_put_fp(vcpu);
- kvm_vcpu_put_sysregs(vcpu);
+ if (has_vhe())
+ kvm_vcpu_put_sysregs_vhe(vcpu);
kvm_timer_vcpu_put(vcpu);
kvm_vgic_put(vcpu);
kvm_vcpu_pmu_restore_host(vcpu);
@@ -468,7 +458,6 @@ static bool need_new_vmid_gen(struct kvm_vmid *vmid)
/**
* update_vmid - Update the vmid with a valid VMID for the current generation
- * @kvm: The guest that struct vmid belongs to
* @vmid: The stage-2 VMID information struct
*/
static void update_vmid(struct kvm_vmid *vmid)
@@ -680,7 +669,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
*/
cond_resched();
- update_vmid(&vcpu->kvm->arch.vmid);
+ update_vmid(&vcpu->arch.hw_mmu->vmid);
check_vcpu_requests(vcpu);
@@ -729,13 +718,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
*/
smp_store_mb(vcpu->mode, IN_GUEST_MODE);
- if (ret <= 0 || need_new_vmid_gen(&vcpu->kvm->arch.vmid) ||
+ if (ret <= 0 || need_new_vmid_gen(&vcpu->arch.hw_mmu->vmid) ||
kvm_request_pending(vcpu)) {
vcpu->mode = OUTSIDE_GUEST_MODE;
isb(); /* Ensure work in x_flush_hwstate is committed */
kvm_pmu_sync_hwstate(vcpu);
if (static_branch_unlikely(&userspace_irqchip_in_use))
- kvm_timer_sync_hwstate(vcpu);
+ kvm_timer_sync_user(vcpu);
kvm_vgic_sync_hwstate(vcpu);
local_irq_enable();
preempt_enable();
@@ -750,11 +739,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
trace_kvm_entry(*vcpu_pc(vcpu));
guest_enter_irqoff();
- if (has_vhe()) {
- ret = kvm_vcpu_run_vhe(vcpu);
- } else {
- ret = kvm_call_hyp_ret(__kvm_vcpu_run_nvhe, vcpu);
- }
+ ret = kvm_call_hyp_ret(__kvm_vcpu_run, vcpu);
vcpu->mode = OUTSIDE_GUEST_MODE;
vcpu->stat.exits++;
@@ -784,7 +769,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
* timer virtual interrupt state.
*/
if (static_branch_unlikely(&userspace_irqchip_in_use))
- kvm_timer_sync_hwstate(vcpu);
+ kvm_timer_sync_user(vcpu);
kvm_arch_vcpu_ctxsync_fp(vcpu);
@@ -1287,7 +1272,7 @@ static void cpu_init_hyp_mode(void)
* so that we can use adr_l to access per-cpu variables in EL2.
*/
tpidr_el2 = ((unsigned long)this_cpu_ptr(&kvm_host_data) -
- (unsigned long)kvm_ksym_ref(kvm_host_data));
+ (unsigned long)kvm_ksym_ref(&kvm_host_data));
pgd_ptr = kvm_mmu_get_httbr();
hyp_stack_ptr = __this_cpu_read(kvm_arm_hyp_stack_page) + PAGE_SIZE;
@@ -1308,7 +1293,7 @@ static void cpu_init_hyp_mode(void)
*/
if (this_cpu_has_cap(ARM64_SSBS) &&
arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) {
- kvm_call_hyp(__kvm_enable_ssbs);
+ kvm_call_hyp_nvhe(__kvm_enable_ssbs);
}
}
diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
index e329a36b2bee..3e081d556e81 100644
--- a/arch/arm64/kvm/fpsimd.c
+++ b/arch/arm64/kvm/fpsimd.c
@@ -85,7 +85,7 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
WARN_ON_ONCE(!irqs_disabled());
if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) {
- fpsimd_bind_state_to_cpu(&vcpu->arch.ctxt.gp_regs.fp_regs,
+ fpsimd_bind_state_to_cpu(&vcpu->arch.ctxt.fp_regs,
vcpu->arch.sve_state,
vcpu->arch.sve_max_vl);
@@ -109,12 +109,10 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
local_irq_save(flags);
if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) {
- u64 *guest_zcr = &vcpu->arch.ctxt.sys_regs[ZCR_EL1];
-
fpsimd_save_and_flush_cpu_state();
if (guest_has_sve)
- *guest_zcr = read_sysreg_s(SYS_ZCR_EL12);
+ __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_s(SYS_ZCR_EL12);
} else if (host_has_sve) {
/*
* The FPSIMD/SVE state in the CPU has not been touched, and we
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index aea43ec60f37..dfb5218137ca 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -101,19 +101,69 @@ static int core_reg_size_from_offset(const struct kvm_vcpu *vcpu, u64 off)
return size;
}
-static int validate_core_offset(const struct kvm_vcpu *vcpu,
- const struct kvm_one_reg *reg)
+static void *core_reg_addr(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
{
u64 off = core_reg_offset_from_id(reg->id);
int size = core_reg_size_from_offset(vcpu, off);
if (size < 0)
- return -EINVAL;
+ return NULL;
if (KVM_REG_SIZE(reg->id) != size)
- return -EINVAL;
+ return NULL;
- return 0;
+ switch (off) {
+ case KVM_REG_ARM_CORE_REG(regs.regs[0]) ...
+ KVM_REG_ARM_CORE_REG(regs.regs[30]):
+ off -= KVM_REG_ARM_CORE_REG(regs.regs[0]);
+ off /= 2;
+ return &vcpu->arch.ctxt.regs.regs[off];
+
+ case KVM_REG_ARM_CORE_REG(regs.sp):
+ return &vcpu->arch.ctxt.regs.sp;
+
+ case KVM_REG_ARM_CORE_REG(regs.pc):
+ return &vcpu->arch.ctxt.regs.pc;
+
+ case KVM_REG_ARM_CORE_REG(regs.pstate):
+ return &vcpu->arch.ctxt.regs.pstate;
+
+ case KVM_REG_ARM_CORE_REG(sp_el1):
+ return __ctxt_sys_reg(&vcpu->arch.ctxt, SP_EL1);
+
+ case KVM_REG_ARM_CORE_REG(elr_el1):
+ return __ctxt_sys_reg(&vcpu->arch.ctxt, ELR_EL1);
+
+ case KVM_REG_ARM_CORE_REG(spsr[KVM_SPSR_EL1]):
+ return __ctxt_sys_reg(&vcpu->arch.ctxt, SPSR_EL1);
+
+ case KVM_REG_ARM_CORE_REG(spsr[KVM_SPSR_ABT]):
+ return &vcpu->arch.ctxt.spsr_abt;
+
+ case KVM_REG_ARM_CORE_REG(spsr[KVM_SPSR_UND]):
+ return &vcpu->arch.ctxt.spsr_und;
+
+ case KVM_REG_ARM_CORE_REG(spsr[KVM_SPSR_IRQ]):
+ return &vcpu->arch.ctxt.spsr_irq;
+
+ case KVM_REG_ARM_CORE_REG(spsr[KVM_SPSR_FIQ]):
+ return &vcpu->arch.ctxt.spsr_fiq;
+
+ case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ...
+ KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]):
+ off -= KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]);
+ off /= 4;
+ return &vcpu->arch.ctxt.fp_regs.vregs[off];
+
+ case KVM_REG_ARM_CORE_REG(fp_regs.fpsr):
+ return &vcpu->arch.ctxt.fp_regs.fpsr;
+
+ case KVM_REG_ARM_CORE_REG(fp_regs.fpcr):
+ return &vcpu->arch.ctxt.fp_regs.fpcr;
+
+ default:
+ return NULL;
+ }
}
static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
@@ -125,8 +175,8 @@ static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
* off the index in the "array".
*/
__u32 __user *uaddr = (__u32 __user *)(unsigned long)reg->addr;
- struct kvm_regs *regs = vcpu_gp_regs(vcpu);
- int nr_regs = sizeof(*regs) / sizeof(__u32);
+ int nr_regs = sizeof(struct kvm_regs) / sizeof(__u32);
+ void *addr;
u32 off;
/* Our ID is an index into the kvm_regs struct. */
@@ -135,10 +185,11 @@ static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
(off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs)
return -ENOENT;
- if (validate_core_offset(vcpu, reg))
+ addr = core_reg_addr(vcpu, reg);
+ if (!addr)
return -EINVAL;
- if (copy_to_user(uaddr, ((u32 *)regs) + off, KVM_REG_SIZE(reg->id)))
+ if (copy_to_user(uaddr, addr, KVM_REG_SIZE(reg->id)))
return -EFAULT;
return 0;
@@ -147,10 +198,9 @@ static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
{
__u32 __user *uaddr = (__u32 __user *)(unsigned long)reg->addr;
- struct kvm_regs *regs = vcpu_gp_regs(vcpu);
- int nr_regs = sizeof(*regs) / sizeof(__u32);
+ int nr_regs = sizeof(struct kvm_regs) / sizeof(__u32);
__uint128_t tmp;
- void *valp = &tmp;
+ void *valp = &tmp, *addr;
u64 off;
int err = 0;
@@ -160,7 +210,8 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
(off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs)
return -ENOENT;
- if (validate_core_offset(vcpu, reg))
+ addr = core_reg_addr(vcpu, reg);
+ if (!addr)
return -EINVAL;
if (KVM_REG_SIZE(reg->id) > sizeof(tmp))
@@ -198,7 +249,7 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
}
}
- memcpy((u32 *)regs + off, valp, KVM_REG_SIZE(reg->id));
+ memcpy(addr, valp, KVM_REG_SIZE(reg->id));
if (*vcpu_cpsr(vcpu) & PSR_MODE32_BIT) {
int i;
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 1df3beafd73f..fe6c7d79309d 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -89,7 +89,7 @@ static int handle_no_fpsimd(struct kvm_vcpu *vcpu)
*/
static int kvm_handle_wfx(struct kvm_vcpu *vcpu)
{
- if (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WFx_ISS_WFE) {
+ if (kvm_vcpu_get_esr(vcpu) & ESR_ELx_WFx_ISS_WFE) {
trace_kvm_wfx_arm64(*vcpu_pc(vcpu), true);
vcpu->stat.wfe_exit_stat++;
kvm_vcpu_on_spin(vcpu, vcpu_mode_priv(vcpu));
@@ -119,13 +119,13 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu)
static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu)
{
struct kvm_run *run = vcpu->run;
- u32 hsr = kvm_vcpu_get_hsr(vcpu);
+ u32 esr = kvm_vcpu_get_esr(vcpu);
int ret = 0;
run->exit_reason = KVM_EXIT_DEBUG;
- run->debug.arch.hsr = hsr;
+ run->debug.arch.hsr = esr;
- switch (ESR_ELx_EC(hsr)) {
+ switch (ESR_ELx_EC(esr)) {
case ESR_ELx_EC_WATCHPT_LOW:
run->debug.arch.far = vcpu->arch.fault.far_el2;
/* fall through */
@@ -135,8 +135,8 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu)
case ESR_ELx_EC_BRK64:
break;
default:
- kvm_err("%s: un-handled case hsr: %#08x\n",
- __func__, (unsigned int) hsr);
+ kvm_err("%s: un-handled case esr: %#08x\n",
+ __func__, (unsigned int) esr);
ret = -1;
break;
}
@@ -146,10 +146,10 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu)
static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu)
{
- u32 hsr = kvm_vcpu_get_hsr(vcpu);
+ u32 esr = kvm_vcpu_get_esr(vcpu);
- kvm_pr_unimpl("Unknown exception class: hsr: %#08x -- %s\n",
- hsr, esr_get_class_string(hsr));
+ kvm_pr_unimpl("Unknown exception class: esr: %#08x -- %s\n",
+ esr, esr_get_class_string(esr));
kvm_inject_undefined(vcpu);
return 1;
@@ -200,10 +200,10 @@ static exit_handle_fn arm_exit_handlers[] = {
static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
{
- u32 hsr = kvm_vcpu_get_hsr(vcpu);
- u8 hsr_ec = ESR_ELx_EC(hsr);
+ u32 esr = kvm_vcpu_get_esr(vcpu);
+ u8 esr_ec = ESR_ELx_EC(esr);
- return arm_exit_handlers[hsr_ec];
+ return arm_exit_handlers[esr_ec];
}
/*
@@ -242,15 +242,15 @@ int handle_exit(struct kvm_vcpu *vcpu, int exception_index)
struct kvm_run *run = vcpu->run;
if (ARM_SERROR_PENDING(exception_index)) {
- u8 hsr_ec = ESR_ELx_EC(kvm_vcpu_get_hsr(vcpu));
+ u8 esr_ec = ESR_ELx_EC(kvm_vcpu_get_esr(vcpu));
/*
* HVC/SMC already have an adjusted PC, which we need
* to correct in order to return to after having
* injected the SError.
*/
- if (hsr_ec == ESR_ELx_EC_HVC32 || hsr_ec == ESR_ELx_EC_HVC64 ||
- hsr_ec == ESR_ELx_EC_SMC32 || hsr_ec == ESR_ELx_EC_SMC64) {
+ if (esr_ec == ESR_ELx_EC_HVC32 || esr_ec == ESR_ELx_EC_HVC64 ||
+ esr_ec == ESR_ELx_EC_SMC32 || esr_ec == ESR_ELx_EC_SMC64) {
u32 adj = kvm_vcpu_trap_il_is32bit(vcpu) ? 4 : 2;
*vcpu_pc(vcpu) -= adj;
}
@@ -307,5 +307,5 @@ void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index)
exception_index = ARM_EXCEPTION_CODE(exception_index);
if (exception_index == ARM_EXCEPTION_EL1_SERROR)
- kvm_handle_guest_serror(vcpu, kvm_vcpu_get_hsr(vcpu));
+ kvm_handle_guest_serror(vcpu, kvm_vcpu_get_esr(vcpu));
}
diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
index 8c9880783839..f54f0e89a71c 100644
--- a/arch/arm64/kvm/hyp/Makefile
+++ b/arch/arm64/kvm/hyp/Makefile
@@ -3,18 +3,12 @@
# Makefile for Kernel-based Virtual Machine module, HYP part
#
-ccflags-y += -fno-stack-protector -DDISABLE_BRANCH_PROFILING \
- $(DISABLE_STACKLEAK_PLUGIN)
-
-obj-$(CONFIG_KVM) += hyp.o
-
-hyp-y := vgic-v3-sr.o timer-sr.o aarch32.o vgic-v2-cpuif-proxy.o sysreg-sr.o \
- debug-sr.o entry.o switch.o fpsimd.o tlb.o hyp-entry.o
-
-# KVM code is run at a different exception code with a different map, so
-# compiler instrumentation that inserts callbacks or checks into the code may
-# cause crashes. Just disable it.
-GCOV_PROFILE := n
-KASAN_SANITIZE := n
-UBSAN_SANITIZE := n
-KCOV_INSTRUMENT := n
+incdir := $(srctree)/$(src)/include
+subdir-asflags-y := -I$(incdir)
+subdir-ccflags-y := -I$(incdir) \
+ -fno-stack-protector \
+ -DDISABLE_BRANCH_PROFILING \
+ $(DISABLE_STACKLEAK_PLUGIN)
+
+obj-$(CONFIG_KVM) += vhe/ nvhe/
+obj-$(CONFIG_KVM_INDIRECT_VECTORS) += smccc_wa.o
diff --git a/arch/arm64/kvm/hyp/aarch32.c b/arch/arm64/kvm/hyp/aarch32.c
index 25c0e47d57cb..ae56d8a4b382 100644
--- a/arch/arm64/kvm/hyp/aarch32.c
+++ b/arch/arm64/kvm/hyp/aarch32.c
@@ -44,14 +44,14 @@ static const unsigned short cc_map[16] = {
/*
* Check if a trapped instruction should have been executed or not.
*/
-bool __hyp_text kvm_condition_valid32(const struct kvm_vcpu *vcpu)
+bool kvm_condition_valid32(const struct kvm_vcpu *vcpu)
{
unsigned long cpsr;
u32 cpsr_cond;
int cond;
/* Top two bits non-zero? Unconditional. */
- if (kvm_vcpu_get_hsr(vcpu) >> 30)
+ if (kvm_vcpu_get_esr(vcpu) >> 30)
return true;
/* Is condition field valid? */
@@ -93,7 +93,7 @@ bool __hyp_text kvm_condition_valid32(const struct kvm_vcpu *vcpu)
*
* IT[7:0] -> CPSR[26:25],CPSR[15:10]
*/
-static void __hyp_text kvm_adjust_itstate(struct kvm_vcpu *vcpu)
+static void kvm_adjust_itstate(struct kvm_vcpu *vcpu)
{
unsigned long itbits, cond;
unsigned long cpsr = *vcpu_cpsr(vcpu);
@@ -123,7 +123,7 @@ static void __hyp_text kvm_adjust_itstate(struct kvm_vcpu *vcpu)
* kvm_skip_instr - skip a trapped instruction and proceed to the next
* @vcpu: The vcpu pointer
*/
-void __hyp_text kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr)
+void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr)
{
u32 pc = *vcpu_pc(vcpu);
bool is_thumb;
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index 90186cf6473e..ee32a7743389 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -16,12 +16,10 @@
#include <asm/kvm_mmu.h>
#include <asm/kvm_ptrauth.h>
-#define CPU_GP_REG_OFFSET(x) (CPU_GP_REGS + x)
-#define CPU_XREG_OFFSET(x) CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x)
+#define CPU_XREG_OFFSET(x) (CPU_USER_PT_REGS + 8*x)
#define CPU_SP_EL0_OFFSET (CPU_XREG_OFFSET(30) + 8)
.text
- .pushsection .hyp.text, "ax"
/*
* We treat x18 as callee-saved as the host may use it as a platform
diff --git a/arch/arm64/kvm/hyp/fpsimd.S b/arch/arm64/kvm/hyp/fpsimd.S
index 5b8ff517ff10..01f114aa47b0 100644
--- a/arch/arm64/kvm/hyp/fpsimd.S
+++ b/arch/arm64/kvm/hyp/fpsimd.S
@@ -9,7 +9,6 @@
#include <asm/fpsimdmacros.h>
.text
- .pushsection .hyp.text, "ax"
SYM_FUNC_START(__fpsimd_save_state)
fpsimd_save x0, 1
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index 9c5cfb04170e..689fccbc9de7 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -16,7 +16,6 @@
#include <asm/mmu.h>
.text
- .pushsection .hyp.text, "ax"
.macro do_el2_call
/*
@@ -40,6 +39,7 @@ el1_sync: // Guest trapped into EL2
ccmp x0, #ESR_ELx_EC_HVC32, #4, ne
b.ne el1_trap
+#ifdef __KVM_NVHE_HYPERVISOR__
mrs x1, vttbr_el2 // If vttbr is valid, the guest
cbnz x1, el1_hvc_guest // called HVC
@@ -74,6 +74,7 @@ el1_sync: // Guest trapped into EL2
eret
sb
+#endif /* __KVM_NVHE_HYPERVISOR__ */
el1_hvc_guest:
/*
@@ -180,6 +181,7 @@ el2_error:
eret
sb
+#ifdef __KVM_NVHE_HYPERVISOR__
SYM_FUNC_START(__hyp_do_panic)
mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
PSR_MODE_EL1h)
@@ -189,6 +191,7 @@ SYM_FUNC_START(__hyp_do_panic)
eret
sb
SYM_FUNC_END(__hyp_do_panic)
+#endif
SYM_CODE_START(__hyp_panic)
get_host_ctxt x0, x1
@@ -318,20 +321,4 @@ SYM_CODE_START(__bp_harden_hyp_vecs)
1: .org __bp_harden_hyp_vecs + __BP_HARDEN_HYP_VECS_SZ
.org 1b
SYM_CODE_END(__bp_harden_hyp_vecs)
-
- .popsection
-
-SYM_CODE_START(__smccc_workaround_1_smc)
- esb
- sub sp, sp, #(8 * 4)
- stp x2, x3, [sp, #(8 * 0)]
- stp x0, x1, [sp, #(8 * 2)]
- mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1
- smc #0
- ldp x2, x3, [sp, #(8 * 0)]
- ldp x0, x1, [sp, #(8 * 2)]
- add sp, sp, #(8 * 4)
-1: .org __smccc_workaround_1_smc + __SMCCC_WORKAROUND_1_SMC_SZ
- .org 1b
-SYM_CODE_END(__smccc_workaround_1_smc)
#endif
diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h
index e95af204fec7..0297dc63988c 100644
--- a/arch/arm64/kvm/hyp/debug-sr.c
+++ b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h
@@ -4,6 +4,9 @@
* Author: Marc Zyngier <marc.zyngier@arm.com>
*/
+#ifndef __ARM64_KVM_HYP_DEBUG_SR_H__
+#define __ARM64_KVM_HYP_DEBUG_SR_H__
+
#include <linux/compiler.h>
#include <linux/kvm_host.h>
@@ -85,53 +88,8 @@
default: write_debug(ptr[0], reg, 0); \
}
-static void __hyp_text __debug_save_spe_nvhe(u64 *pmscr_el1)
-{
- u64 reg;
-
- /* Clear pmscr in case of early return */
- *pmscr_el1 = 0;
-
- /* SPE present on this CPU? */
- if (!cpuid_feature_extract_unsigned_field(read_sysreg(id_aa64dfr0_el1),
- ID_AA64DFR0_PMSVER_SHIFT))
- return;
-
- /* Yes; is it owned by EL3? */
- reg = read_sysreg_s(SYS_PMBIDR_EL1);
- if (reg & BIT(SYS_PMBIDR_EL1_P_SHIFT))
- return;
-
- /* No; is the host actually using the thing? */
- reg = read_sysreg_s(SYS_PMBLIMITR_EL1);
- if (!(reg & BIT(SYS_PMBLIMITR_EL1_E_SHIFT)))
- return;
-
- /* Yes; save the control register and disable data generation */
- *pmscr_el1 = read_sysreg_s(SYS_PMSCR_EL1);
- write_sysreg_s(0, SYS_PMSCR_EL1);
- isb();
-
- /* Now drain all buffered data to memory */
- psb_csync();
- dsb(nsh);
-}
-
-static void __hyp_text __debug_restore_spe_nvhe(u64 pmscr_el1)
-{
- if (!pmscr_el1)
- return;
-
- /* The host page table is installed, but not yet synchronised */
- isb();
-
- /* Re-enable data generation */
- write_sysreg_s(pmscr_el1, SYS_PMSCR_EL1);
-}
-
-static void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu,
- struct kvm_guest_debug_arch *dbg,
- struct kvm_cpu_context *ctxt)
+static void __debug_save_state(struct kvm_guest_debug_arch *dbg,
+ struct kvm_cpu_context *ctxt)
{
u64 aa64dfr0;
int brps, wrps;
@@ -145,12 +103,11 @@ static void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu,
save_debug(dbg->dbg_wcr, dbgwcr, wrps);
save_debug(dbg->dbg_wvr, dbgwvr, wrps);
- ctxt->sys_regs[MDCCINT_EL1] = read_sysreg(mdccint_el1);
+ ctxt_sys_reg(ctxt, MDCCINT_EL1) = read_sysreg(mdccint_el1);
}
-static void __hyp_text __debug_restore_state(struct kvm_vcpu *vcpu,
- struct kvm_guest_debug_arch *dbg,
- struct kvm_cpu_context *ctxt)
+static void __debug_restore_state(struct kvm_guest_debug_arch *dbg,
+ struct kvm_cpu_context *ctxt)
{
u64 aa64dfr0;
int brps, wrps;
@@ -165,23 +122,16 @@ static void __hyp_text __debug_restore_state(struct kvm_vcpu *vcpu,
restore_debug(dbg->dbg_wcr, dbgwcr, wrps);
restore_debug(dbg->dbg_wvr, dbgwvr, wrps);
- write_sysreg(ctxt->sys_regs[MDCCINT_EL1], mdccint_el1);
+ write_sysreg(ctxt_sys_reg(ctxt, MDCCINT_EL1), mdccint_el1);
}
-void __hyp_text __debug_switch_to_guest(struct kvm_vcpu *vcpu)
+static inline void __debug_switch_to_guest_common(struct kvm_vcpu *vcpu)
{
struct kvm_cpu_context *host_ctxt;
struct kvm_cpu_context *guest_ctxt;
struct kvm_guest_debug_arch *host_dbg;
struct kvm_guest_debug_arch *guest_dbg;
- /*
- * Non-VHE: Disable and flush SPE data generation
- * VHE: The vcpu can run, but it can't hide.
- */
- if (!has_vhe())
- __debug_save_spe_nvhe(&vcpu->arch.host_debug_state.pmscr_el1);
-
if (!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY))
return;
@@ -190,20 +140,17 @@ void __hyp_text __debug_switch_to_guest(struct kvm_vcpu *vcpu)
host_dbg = &vcpu->arch.host_debug_state.regs;
guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr);
- __debug_save_state(vcpu, host_dbg, host_ctxt);
- __debug_restore_state(vcpu, guest_dbg, guest_ctxt);
+ __debug_save_state(host_dbg, host_ctxt);
+ __debug_restore_state(guest_dbg, guest_ctxt);
}
-void __hyp_text __debug_switch_to_host(struct kvm_vcpu *vcpu)
+static inline void __debug_switch_to_host_common(struct kvm_vcpu *vcpu)
{
struct kvm_cpu_context *host_ctxt;
struct kvm_cpu_context *guest_ctxt;
struct kvm_guest_debug_arch *host_dbg;
struct kvm_guest_debug_arch *guest_dbg;
- if (!has_vhe())
- __debug_restore_spe_nvhe(vcpu->arch.host_debug_state.pmscr_el1);
-
if (!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY))
return;
@@ -212,13 +159,10 @@ void __hyp_text __debug_switch_to_host(struct kvm_vcpu *vcpu)
host_dbg = &vcpu->arch.host_debug_state.regs;
guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr);
- __debug_save_state(vcpu, guest_dbg, guest_ctxt);
- __debug_restore_state(vcpu, host_dbg, host_ctxt);
+ __debug_save_state(guest_dbg, guest_ctxt);
+ __debug_restore_state(host_dbg, host_ctxt);
vcpu->arch.flags &= ~KVM_ARM64_DEBUG_DIRTY;
}
-u32 __hyp_text __kvm_get_mdcr_el2(void)
-{
- return read_sysreg(mdcr_el2);
-}
+#endif /* __ARM64_KVM_HYP_DEBUG_SR_H__ */
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
new file mode 100644
index 000000000000..426ef65601dd
--- /dev/null
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -0,0 +1,511 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ */
+
+#ifndef __ARM64_KVM_HYP_SWITCH_H__
+#define __ARM64_KVM_HYP_SWITCH_H__
+
+#include <linux/arm-smccc.h>
+#include <linux/kvm_host.h>
+#include <linux/types.h>
+#include <linux/jump_label.h>
+#include <uapi/linux/psci.h>
+
+#include <kvm/arm_psci.h>
+
+#include <asm/barrier.h>
+#include <asm/cpufeature.h>
+#include <asm/kprobes.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
+#include <asm/fpsimd.h>
+#include <asm/debug-monitors.h>
+#include <asm/processor.h>
+#include <asm/thread_info.h>
+
+extern const char __hyp_panic_string[];
+
+/* Check whether the FP regs were dirtied while in the host-side run loop: */
+static inline bool update_fp_enabled(struct kvm_vcpu *vcpu)
+{
+ /*
+ * When the system doesn't support FP/SIMD, we cannot rely on
+ * the _TIF_FOREIGN_FPSTATE flag. However, we always inject an
+ * abort on the very first access to FP and thus we should never
+ * see KVM_ARM64_FP_ENABLED. For added safety, make sure we always
+ * trap the accesses.
+ */
+ if (!system_supports_fpsimd() ||
+ vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE)
+ vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED |
+ KVM_ARM64_FP_HOST);
+
+ return !!(vcpu->arch.flags & KVM_ARM64_FP_ENABLED);
+}
+
+/* Save the 32-bit only FPSIMD system register state */
+static inline void __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu)
+{
+ if (!vcpu_el1_is_32bit(vcpu))
+ return;
+
+ __vcpu_sys_reg(vcpu, FPEXC32_EL2) = read_sysreg(fpexc32_el2);
+}
+
+static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu)
+{
+ /*
+ * We are about to set CPTR_EL2.TFP to trap all floating point
+ * register accesses to EL2, however, the ARM ARM clearly states that
+ * traps are only taken to EL2 if the operation would not otherwise
+ * trap to EL1. Therefore, always make sure that for 32-bit guests,
+ * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit.
+ * If FP/ASIMD is not implemented, FPEXC is UNDEFINED and any access to
+ * it will cause an exception.
+ */
+ if (vcpu_el1_is_32bit(vcpu) && system_supports_fpsimd()) {
+ write_sysreg(1 << 30, fpexc32_el2);
+ isb();
+ }
+}
+
+static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
+{
+ /* Trap on AArch32 cp15 c15 (impdef sysregs) accesses (EL1 or EL0) */
+ write_sysreg(1 << 15, hstr_el2);
+
+ /*
+ * Make sure we trap PMU access from EL0 to EL2. Also sanitize
+ * PMSELR_EL0 to make sure it never contains the cycle
+ * counter, which could make a PMXEVCNTR_EL0 access UNDEF at
+ * EL1 instead of being trapped to EL2.
+ */
+ write_sysreg(0, pmselr_el0);
+ write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0);
+ write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
+}
+
+static inline void __deactivate_traps_common(void)
+{
+ write_sysreg(0, hstr_el2);
+ write_sysreg(0, pmuserenr_el0);
+}
+
+static inline void ___activate_traps(struct kvm_vcpu *vcpu)
+{
+ u64 hcr = vcpu->arch.hcr_el2;
+
+ if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM))
+ hcr |= HCR_TVM;
+
+ write_sysreg(hcr, hcr_el2);
+
+ if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE))
+ write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2);
+}
+
+static inline void ___deactivate_traps(struct kvm_vcpu *vcpu)
+{
+ /*
+ * If we pended a virtual abort, preserve it until it gets
+ * cleared. See D1.14.3 (Virtual Interrupts) for details, but
+ * the crucial bit is "On taking a vSError interrupt,
+ * HCR_EL2.VSE is cleared to 0."
+ */
+ if (vcpu->arch.hcr_el2 & HCR_VSE) {
+ vcpu->arch.hcr_el2 &= ~HCR_VSE;
+ vcpu->arch.hcr_el2 |= read_sysreg(hcr_el2) & HCR_VSE;
+ }
+}
+
+static inline void __activate_vm(struct kvm_s2_mmu *mmu)
+{
+ __load_guest_stage2(mmu);
+}
+
+static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar)
+{
+ u64 par, tmp;
+
+ /*
+ * Resolve the IPA the hard way using the guest VA.
+ *
+ * Stage-1 translation already validated the memory access
+ * rights. As such, we can use the EL1 translation regime, and
+ * don't have to distinguish between EL0 and EL1 access.
+ *
+ * We do need to save/restore PAR_EL1 though, as we haven't
+ * saved the guest context yet, and we may return early...
+ */
+ par = read_sysreg(par_el1);
+ asm volatile("at s1e1r, %0" : : "r" (far));
+ isb();
+
+ tmp = read_sysreg(par_el1);
+ write_sysreg(par, par_el1);
+
+ if (unlikely(tmp & SYS_PAR_EL1_F))
+ return false; /* Translation failed, back to guest */
+
+ /* Convert PAR to HPFAR format */
+ *hpfar = PAR_TO_HPFAR(tmp);
+ return true;
+}
+
+static inline bool __populate_fault_info(struct kvm_vcpu *vcpu)
+{
+ u8 ec;
+ u64 esr;
+ u64 hpfar, far;
+
+ esr = vcpu->arch.fault.esr_el2;
+ ec = ESR_ELx_EC(esr);
+
+ if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW)
+ return true;
+
+ far = read_sysreg_el2(SYS_FAR);
+
+ /*
+ * The HPFAR can be invalid if the stage 2 fault did not
+ * happen during a stage 1 page table walk (the ESR_EL2.S1PTW
+ * bit is clear) and one of the two following cases are true:
+ * 1. The fault was due to a permission fault
+ * 2. The processor carries errata 834220
+ *
+ * Therefore, for all non S1PTW faults where we either have a
+ * permission fault or the errata workaround is enabled, we
+ * resolve the IPA using the AT instruction.
+ */
+ if (!(esr & ESR_ELx_S1PTW) &&
+ (cpus_have_final_cap(ARM64_WORKAROUND_834220) ||
+ (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) {
+ if (!__translate_far_to_hpfar(far, &hpfar))
+ return false;
+ } else {
+ hpfar = read_sysreg(hpfar_el2);
+ }
+
+ vcpu->arch.fault.far_el2 = far;
+ vcpu->arch.fault.hpfar_el2 = hpfar;
+ return true;
+}
+
+/* Check for an FPSIMD/SVE trap and handle as appropriate */
+static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu)
+{
+ bool vhe, sve_guest, sve_host;
+ u8 esr_ec;
+
+ if (!system_supports_fpsimd())
+ return false;
+
+ /*
+ * Currently system_supports_sve() currently implies has_vhe(),
+ * so the check is redundant. However, has_vhe() can be determined
+ * statically and helps the compiler remove dead code.
+ */
+ if (has_vhe() && system_supports_sve()) {
+ sve_guest = vcpu_has_sve(vcpu);
+ sve_host = vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE;
+ vhe = true;
+ } else {
+ sve_guest = false;
+ sve_host = false;
+ vhe = has_vhe();
+ }
+
+ esr_ec = kvm_vcpu_trap_get_class(vcpu);
+ if (esr_ec != ESR_ELx_EC_FP_ASIMD &&
+ esr_ec != ESR_ELx_EC_SVE)
+ return false;
+
+ /* Don't handle SVE traps for non-SVE vcpus here: */
+ if (!sve_guest)
+ if (esr_ec != ESR_ELx_EC_FP_ASIMD)
+ return false;
+
+ /* Valid trap. Switch the context: */
+
+ if (vhe) {
+ u64 reg = read_sysreg(cpacr_el1) | CPACR_EL1_FPEN;
+
+ if (sve_guest)
+ reg |= CPACR_EL1_ZEN;
+
+ write_sysreg(reg, cpacr_el1);
+ } else {
+ write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP,
+ cptr_el2);
+ }
+
+ isb();
+
+ if (vcpu->arch.flags & KVM_ARM64_FP_HOST) {
+ /*
+ * In the SVE case, VHE is assumed: it is enforced by
+ * Kconfig and kvm_arch_init().
+ */
+ if (sve_host) {
+ struct thread_struct *thread = container_of(
+ vcpu->arch.host_fpsimd_state,
+ struct thread_struct, uw.fpsimd_state);
+
+ sve_save_state(sve_pffr(thread),
+ &vcpu->arch.host_fpsimd_state->fpsr);
+ } else {
+ __fpsimd_save_state(vcpu->arch.host_fpsimd_state);
+ }
+
+ vcpu->arch.flags &= ~KVM_ARM64_FP_HOST;
+ }
+
+ if (sve_guest) {
+ sve_load_state(vcpu_sve_pffr(vcpu),
+ &vcpu->arch.ctxt.fp_regs.fpsr,
+ sve_vq_from_vl(vcpu->arch.sve_max_vl) - 1);
+ write_sysreg_s(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR_EL12);
+ } else {
+ __fpsimd_restore_state(&vcpu->arch.ctxt.fp_regs);
+ }
+
+ /* Skip restoring fpexc32 for AArch64 guests */
+ if (!(read_sysreg(hcr_el2) & HCR_RW))
+ write_sysreg(__vcpu_sys_reg(vcpu, FPEXC32_EL2), fpexc32_el2);
+
+ vcpu->arch.flags |= KVM_ARM64_FP_ENABLED;
+
+ return true;
+}
+
+static inline bool handle_tx2_tvm(struct kvm_vcpu *vcpu)
+{
+ u32 sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_esr(vcpu));
+ int rt = kvm_vcpu_sys_get_rt(vcpu);
+ u64 val = vcpu_get_reg(vcpu, rt);
+
+ /*
+ * The normal sysreg handling code expects to see the traps,
+ * let's not do anything here.
+ */
+ if (vcpu->arch.hcr_el2 & HCR_TVM)
+ return false;
+
+ switch (sysreg) {
+ case SYS_SCTLR_EL1:
+ write_sysreg_el1(val, SYS_SCTLR);
+ break;
+ case SYS_TTBR0_EL1:
+ write_sysreg_el1(val, SYS_TTBR0);
+ break;
+ case SYS_TTBR1_EL1:
+ write_sysreg_el1(val, SYS_TTBR1);
+ break;
+ case SYS_TCR_EL1:
+ write_sysreg_el1(val, SYS_TCR);
+ break;
+ case SYS_ESR_EL1:
+ write_sysreg_el1(val, SYS_ESR);
+ break;
+ case SYS_FAR_EL1:
+ write_sysreg_el1(val, SYS_FAR);
+ break;
+ case SYS_AFSR0_EL1:
+ write_sysreg_el1(val, SYS_AFSR0);
+ break;
+ case SYS_AFSR1_EL1:
+ write_sysreg_el1(val, SYS_AFSR1);
+ break;
+ case SYS_MAIR_EL1:
+ write_sysreg_el1(val, SYS_MAIR);
+ break;
+ case SYS_AMAIR_EL1:
+ write_sysreg_el1(val, SYS_AMAIR);
+ break;
+ case SYS_CONTEXTIDR_EL1:
+ write_sysreg_el1(val, SYS_CONTEXTIDR);
+ break;
+ default:
+ return false;
+ }
+
+ __kvm_skip_instr(vcpu);
+ return true;
+}
+
+static inline bool esr_is_ptrauth_trap(u32 esr)
+{
+ u32 ec = ESR_ELx_EC(esr);
+
+ if (ec == ESR_ELx_EC_PAC)
+ return true;
+
+ if (ec != ESR_ELx_EC_SYS64)
+ return false;
+
+ switch (esr_sys64_to_sysreg(esr)) {
+ case SYS_APIAKEYLO_EL1:
+ case SYS_APIAKEYHI_EL1:
+ case SYS_APIBKEYLO_EL1:
+ case SYS_APIBKEYHI_EL1:
+ case SYS_APDAKEYLO_EL1:
+ case SYS_APDAKEYHI_EL1:
+ case SYS_APDBKEYLO_EL1:
+ case SYS_APDBKEYHI_EL1:
+ case SYS_APGAKEYLO_EL1:
+ case SYS_APGAKEYHI_EL1:
+ return true;
+ }
+
+ return false;
+}
+
+#define __ptrauth_save_key(ctxt, key) \
+ do { \
+ u64 __val; \
+ __val = read_sysreg_s(SYS_ ## key ## KEYLO_EL1); \
+ ctxt_sys_reg(ctxt, key ## KEYLO_EL1) = __val; \
+ __val = read_sysreg_s(SYS_ ## key ## KEYHI_EL1); \
+ ctxt_sys_reg(ctxt, key ## KEYHI_EL1) = __val; \
+} while(0)
+
+static inline bool __hyp_handle_ptrauth(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpu_context *ctxt;
+ u64 val;
+
+ if (!vcpu_has_ptrauth(vcpu) ||
+ !esr_is_ptrauth_trap(kvm_vcpu_get_esr(vcpu)))
+ return false;
+
+ ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt;
+ __ptrauth_save_key(ctxt, APIA);
+ __ptrauth_save_key(ctxt, APIB);
+ __ptrauth_save_key(ctxt, APDA);
+ __ptrauth_save_key(ctxt, APDB);
+ __ptrauth_save_key(ctxt, APGA);
+
+ vcpu_ptrauth_enable(vcpu);
+
+ val = read_sysreg(hcr_el2);
+ val |= (HCR_API | HCR_APK);
+ write_sysreg(val, hcr_el2);
+
+ return true;
+}
+
+/*
+ * Return true when we were able to fixup the guest exit and should return to
+ * the guest, false when we should restore the host state and return to the
+ * main run loop.
+ */
+static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ)
+ vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR);
+
+ /*
+ * We're using the raw exception code in order to only process
+ * the trap if no SError is pending. We will come back to the
+ * same PC once the SError has been injected, and replay the
+ * trapping instruction.
+ */
+ if (*exit_code != ARM_EXCEPTION_TRAP)
+ goto exit;
+
+ if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) &&
+ kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 &&
+ handle_tx2_tvm(vcpu))
+ return true;
+
+ /*
+ * We trap the first access to the FP/SIMD to save the host context
+ * and restore the guest context lazily.
+ * If FP/SIMD is not implemented, handle the trap and inject an
+ * undefined instruction exception to the guest.
+ * Similarly for trapped SVE accesses.
+ */
+ if (__hyp_handle_fpsimd(vcpu))
+ return true;
+
+ if (__hyp_handle_ptrauth(vcpu))
+ return true;
+
+ if (!__populate_fault_info(vcpu))
+ return true;
+
+ if (static_branch_unlikely(&vgic_v2_cpuif_trap)) {
+ bool valid;
+
+ valid = kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_DABT_LOW &&
+ kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT &&
+ kvm_vcpu_dabt_isvalid(vcpu) &&
+ !kvm_vcpu_abt_issea(vcpu) &&
+ !kvm_vcpu_dabt_iss1tw(vcpu);
+
+ if (valid) {
+ int ret = __vgic_v2_perform_cpuif_access(vcpu);
+
+ if (ret == 1)
+ return true;
+
+ /* Promote an illegal access to an SError.*/
+ if (ret == -1)
+ *exit_code = ARM_EXCEPTION_EL1_SERROR;
+
+ goto exit;
+ }
+ }
+
+ if (static_branch_unlikely(&vgic_v3_cpuif_trap) &&
+ (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 ||
+ kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) {
+ int ret = __vgic_v3_perform_cpuif_access(vcpu);
+
+ if (ret == 1)
+ return true;
+ }
+
+exit:
+ /* Return to the host kernel and handle the exit */
+ return false;
+}
+
+static inline bool __needs_ssbd_off(struct kvm_vcpu *vcpu)
+{
+ if (!cpus_have_final_cap(ARM64_SSBD))
+ return false;
+
+ return !(vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG);
+}
+
+static inline void __set_guest_arch_workaround_state(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_ARM64_SSBD
+ /*
+ * The host runs with the workaround always present. If the
+ * guest wants it disabled, so be it...
+ */
+ if (__needs_ssbd_off(vcpu) &&
+ __hyp_this_cpu_read(arm64_ssbd_callback_required))
+ arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 0, NULL);
+#endif
+}
+
+static inline void __set_host_arch_workaround_state(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_ARM64_SSBD
+ /*
+ * If the guest has disabled the workaround, bring it back on.
+ */
+ if (__needs_ssbd_off(vcpu) &&
+ __hyp_this_cpu_read(arm64_ssbd_callback_required))
+ arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 1, NULL);
+#endif
+}
+
+#endif /* __ARM64_KVM_HYP_SWITCH_H__ */
diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
new file mode 100644
index 000000000000..7a986030145f
--- /dev/null
+++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
@@ -0,0 +1,193 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2012-2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ */
+
+#ifndef __ARM64_KVM_HYP_SYSREG_SR_H__
+#define __ARM64_KVM_HYP_SYSREG_SR_H__
+
+#include <linux/compiler.h>
+#include <linux/kvm_host.h>
+
+#include <asm/kprobes.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_hyp.h>
+
+static inline void __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
+{
+ ctxt_sys_reg(ctxt, MDSCR_EL1) = read_sysreg(mdscr_el1);
+}
+
+static inline void __sysreg_save_user_state(struct kvm_cpu_context *ctxt)
+{
+ ctxt_sys_reg(ctxt, TPIDR_EL0) = read_sysreg(tpidr_el0);
+ ctxt_sys_reg(ctxt, TPIDRRO_EL0) = read_sysreg(tpidrro_el0);
+}
+
+static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
+{
+ ctxt_sys_reg(ctxt, CSSELR_EL1) = read_sysreg(csselr_el1);
+ ctxt_sys_reg(ctxt, SCTLR_EL1) = read_sysreg_el1(SYS_SCTLR);
+ ctxt_sys_reg(ctxt, CPACR_EL1) = read_sysreg_el1(SYS_CPACR);
+ ctxt_sys_reg(ctxt, TTBR0_EL1) = read_sysreg_el1(SYS_TTBR0);
+ ctxt_sys_reg(ctxt, TTBR1_EL1) = read_sysreg_el1(SYS_TTBR1);
+ ctxt_sys_reg(ctxt, TCR_EL1) = read_sysreg_el1(SYS_TCR);
+ ctxt_sys_reg(ctxt, ESR_EL1) = read_sysreg_el1(SYS_ESR);
+ ctxt_sys_reg(ctxt, AFSR0_EL1) = read_sysreg_el1(SYS_AFSR0);
+ ctxt_sys_reg(ctxt, AFSR1_EL1) = read_sysreg_el1(SYS_AFSR1);
+ ctxt_sys_reg(ctxt, FAR_EL1) = read_sysreg_el1(SYS_FAR);
+ ctxt_sys_reg(ctxt, MAIR_EL1) = read_sysreg_el1(SYS_MAIR);
+ ctxt_sys_reg(ctxt, VBAR_EL1) = read_sysreg_el1(SYS_VBAR);
+ ctxt_sys_reg(ctxt, CONTEXTIDR_EL1) = read_sysreg_el1(SYS_CONTEXTIDR);
+ ctxt_sys_reg(ctxt, AMAIR_EL1) = read_sysreg_el1(SYS_AMAIR);
+ ctxt_sys_reg(ctxt, CNTKCTL_EL1) = read_sysreg_el1(SYS_CNTKCTL);
+ ctxt_sys_reg(ctxt, PAR_EL1) = read_sysreg(par_el1);
+ ctxt_sys_reg(ctxt, TPIDR_EL1) = read_sysreg(tpidr_el1);
+
+ ctxt_sys_reg(ctxt, SP_EL1) = read_sysreg(sp_el1);
+ ctxt_sys_reg(ctxt, ELR_EL1) = read_sysreg_el1(SYS_ELR);
+ ctxt_sys_reg(ctxt, SPSR_EL1) = read_sysreg_el1(SYS_SPSR);
+}
+
+static inline void __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt)
+{
+ ctxt->regs.pc = read_sysreg_el2(SYS_ELR);
+ ctxt->regs.pstate = read_sysreg_el2(SYS_SPSR);
+
+ if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN))
+ ctxt_sys_reg(ctxt, DISR_EL1) = read_sysreg_s(SYS_VDISR_EL2);
+}
+
+static inline void __sysreg_restore_common_state(struct kvm_cpu_context *ctxt)
+{
+ write_sysreg(ctxt_sys_reg(ctxt, MDSCR_EL1), mdscr_el1);
+}
+
+static inline void __sysreg_restore_user_state(struct kvm_cpu_context *ctxt)
+{
+ write_sysreg(ctxt_sys_reg(ctxt, TPIDR_EL0), tpidr_el0);
+ write_sysreg(ctxt_sys_reg(ctxt, TPIDRRO_EL0), tpidrro_el0);
+}
+
+static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
+{
+ write_sysreg(ctxt_sys_reg(ctxt, MPIDR_EL1), vmpidr_el2);
+ write_sysreg(ctxt_sys_reg(ctxt, CSSELR_EL1), csselr_el1);
+
+ if (has_vhe() ||
+ !cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
+ write_sysreg_el1(ctxt_sys_reg(ctxt, SCTLR_EL1), SYS_SCTLR);
+ write_sysreg_el1(ctxt_sys_reg(ctxt, TCR_EL1), SYS_TCR);
+ } else if (!ctxt->__hyp_running_vcpu) {
+ /*
+ * Must only be done for guest registers, hence the context
+ * test. We're coming from the host, so SCTLR.M is already
+ * set. Pairs with nVHE's __activate_traps().
+ */
+ write_sysreg_el1((ctxt_sys_reg(ctxt, TCR_EL1) |
+ TCR_EPD1_MASK | TCR_EPD0_MASK),
+ SYS_TCR);
+ isb();
+ }
+
+ write_sysreg_el1(ctxt_sys_reg(ctxt, CPACR_EL1), SYS_CPACR);
+ write_sysreg_el1(ctxt_sys_reg(ctxt, TTBR0_EL1), SYS_TTBR0);
+ write_sysreg_el1(ctxt_sys_reg(ctxt, TTBR1_EL1), SYS_TTBR1);
+ write_sysreg_el1(ctxt_sys_reg(ctxt, ESR_EL1), SYS_ESR);
+ write_sysreg_el1(ctxt_sys_reg(ctxt, AFSR0_EL1), SYS_AFSR0);
+ write_sysreg_el1(ctxt_sys_reg(ctxt, AFSR1_EL1), SYS_AFSR1);
+ write_sysreg_el1(ctxt_sys_reg(ctxt, FAR_EL1), SYS_FAR);
+ write_sysreg_el1(ctxt_sys_reg(ctxt, MAIR_EL1), SYS_MAIR);
+ write_sysreg_el1(ctxt_sys_reg(ctxt, VBAR_EL1), SYS_VBAR);
+ write_sysreg_el1(ctxt_sys_reg(ctxt, CONTEXTIDR_EL1), SYS_CONTEXTIDR);
+ write_sysreg_el1(ctxt_sys_reg(ctxt, AMAIR_EL1), SYS_AMAIR);
+ write_sysreg_el1(ctxt_sys_reg(ctxt, CNTKCTL_EL1), SYS_CNTKCTL);
+ write_sysreg(ctxt_sys_reg(ctxt, PAR_EL1), par_el1);
+ write_sysreg(ctxt_sys_reg(ctxt, TPIDR_EL1), tpidr_el1);
+
+ if (!has_vhe() &&
+ cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT) &&
+ ctxt->__hyp_running_vcpu) {
+ /*
+ * Must only be done for host registers, hence the context
+ * test. Pairs with nVHE's __deactivate_traps().
+ */
+ isb();
+ /*
+ * At this stage, and thanks to the above isb(), S2 is
+ * deconfigured and disabled. We can now restore the host's
+ * S1 configuration: SCTLR, and only then TCR.
+ */
+ write_sysreg_el1(ctxt_sys_reg(ctxt, SCTLR_EL1), SYS_SCTLR);
+ isb();
+ write_sysreg_el1(ctxt_sys_reg(ctxt, TCR_EL1), SYS_TCR);
+ }
+
+ write_sysreg(ctxt_sys_reg(ctxt, SP_EL1), sp_el1);
+ write_sysreg_el1(ctxt_sys_reg(ctxt, ELR_EL1), SYS_ELR);
+ write_sysreg_el1(ctxt_sys_reg(ctxt, SPSR_EL1), SYS_SPSR);
+}
+
+static inline void __sysreg_restore_el2_return_state(struct kvm_cpu_context *ctxt)
+{
+ u64 pstate = ctxt->regs.pstate;
+ u64 mode = pstate & PSR_AA32_MODE_MASK;
+
+ /*
+ * Safety check to ensure we're setting the CPU up to enter the guest
+ * in a less privileged mode.
+ *
+ * If we are attempting a return to EL2 or higher in AArch64 state,
+ * program SPSR_EL2 with M=EL2h and the IL bit set which ensures that
+ * we'll take an illegal exception state exception immediately after
+ * the ERET to the guest. Attempts to return to AArch32 Hyp will
+ * result in an illegal exception return because EL2's execution state
+ * is determined by SCR_EL3.RW.
+ */
+ if (!(mode & PSR_MODE32_BIT) && mode >= PSR_MODE_EL2t)
+ pstate = PSR_MODE_EL2h | PSR_IL_BIT;
+
+ write_sysreg_el2(ctxt->regs.pc, SYS_ELR);
+ write_sysreg_el2(pstate, SYS_SPSR);
+
+ if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN))
+ write_sysreg_s(ctxt_sys_reg(ctxt, DISR_EL1), SYS_VDISR_EL2);
+}
+
+static inline void __sysreg32_save_state(struct kvm_vcpu *vcpu)
+{
+ if (!vcpu_el1_is_32bit(vcpu))
+ return;
+
+ vcpu->arch.ctxt.spsr_abt = read_sysreg(spsr_abt);
+ vcpu->arch.ctxt.spsr_und = read_sysreg(spsr_und);
+ vcpu->arch.ctxt.spsr_irq = read_sysreg(spsr_irq);
+ vcpu->arch.ctxt.spsr_fiq = read_sysreg(spsr_fiq);
+
+ __vcpu_sys_reg(vcpu, DACR32_EL2) = read_sysreg(dacr32_el2);
+ __vcpu_sys_reg(vcpu, IFSR32_EL2) = read_sysreg(ifsr32_el2);
+
+ if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)
+ __vcpu_sys_reg(vcpu, DBGVCR32_EL2) = read_sysreg(dbgvcr32_el2);
+}
+
+static inline void __sysreg32_restore_state(struct kvm_vcpu *vcpu)
+{
+ if (!vcpu_el1_is_32bit(vcpu))
+ return;
+
+ write_sysreg(vcpu->arch.ctxt.spsr_abt, spsr_abt);
+ write_sysreg(vcpu->arch.ctxt.spsr_und, spsr_und);
+ write_sysreg(vcpu->arch.ctxt.spsr_irq, spsr_irq);
+ write_sysreg(vcpu->arch.ctxt.spsr_fiq, spsr_fiq);
+
+ write_sysreg(__vcpu_sys_reg(vcpu, DACR32_EL2), dacr32_el2);
+ write_sysreg(__vcpu_sys_reg(vcpu, IFSR32_EL2), ifsr32_el2);
+
+ if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)
+ write_sysreg(__vcpu_sys_reg(vcpu, DBGVCR32_EL2), dbgvcr32_el2);
+}
+
+#endif /* __ARM64_KVM_HYP_SYSREG_SR_H__ */
diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
new file mode 100644
index 000000000000..aef76487edc2
--- /dev/null
+++ b/arch/arm64/kvm/hyp/nvhe/Makefile
@@ -0,0 +1,62 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for Kernel-based Virtual Machine module, HYP/nVHE part
+#
+
+asflags-y := -D__KVM_NVHE_HYPERVISOR__
+ccflags-y := -D__KVM_NVHE_HYPERVISOR__
+
+obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o
+obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
+ ../fpsimd.o ../hyp-entry.o
+
+obj-y := $(patsubst %.o,%.hyp.o,$(obj-y))
+extra-y := $(patsubst %.hyp.o,%.hyp.tmp.o,$(obj-y))
+
+$(obj)/%.hyp.tmp.o: $(src)/%.c FORCE
+ $(call if_changed_rule,cc_o_c)
+$(obj)/%.hyp.tmp.o: $(src)/%.S FORCE
+ $(call if_changed_rule,as_o_S)
+$(obj)/%.hyp.o: $(obj)/%.hyp.tmp.o FORCE
+ $(call if_changed,hypcopy)
+
+# Disable reordering functions by GCC (enabled at -O2).
+# This pass puts functions into '.text.*' sections to aid the linker
+# in optimizing ELF layout. See HYPCOPY comment below for more info.
+ccflags-y += $(call cc-option,-fno-reorder-functions)
+
+# The HYPCOPY command uses `objcopy` to prefix all ELF symbol names
+# and relevant ELF section names to avoid clashes with VHE code/data.
+#
+# Hyp code is assumed to be in the '.text' section of the input object
+# files (with the exception of specialized sections such as
+# '.hyp.idmap.text'). This assumption may be broken by a compiler that
+# divides code into sections like '.text.unlikely' so as to optimize
+# ELF layout. HYPCOPY checks that no such sections exist in the input
+# using `objdump`, otherwise they would be linked together with other
+# kernel code and not memory-mapped correctly at runtime.
+quiet_cmd_hypcopy = HYPCOPY $@
+ cmd_hypcopy = \
+ if $(OBJDUMP) -h $< | grep -F '.text.'; then \
+ echo "$@: function reordering not supported in nVHE hyp code" >&2; \
+ /bin/false; \
+ fi; \
+ $(OBJCOPY) --prefix-symbols=__kvm_nvhe_ \
+ --rename-section=.text=.hyp.text \
+ $< $@
+
+# Remove ftrace and Shadow Call Stack CFLAGS.
+# This is equivalent to the 'notrace' and '__noscs' annotations.
+KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))
+
+# KVM nVHE code is run at a different exception code with a different map, so
+# compiler instrumentation that inserts callbacks or checks into the code may
+# cause crashes. Just disable it.
+GCOV_PROFILE := n
+KASAN_SANITIZE := n
+UBSAN_SANITIZE := n
+KCOV_INSTRUMENT := n
+
+# Skip objtool checking for this directory because nVHE code is compiled with
+# non-standard build rules.
+OBJECT_FILES_NON_STANDARD := y
diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
new file mode 100644
index 000000000000..91a711aa8382
--- /dev/null
+++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ */
+
+#include <hyp/debug-sr.h>
+
+#include <linux/compiler.h>
+#include <linux/kvm_host.h>
+
+#include <asm/debug-monitors.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
+
+static void __debug_save_spe(u64 *pmscr_el1)
+{
+ u64 reg;
+
+ /* Clear pmscr in case of early return */
+ *pmscr_el1 = 0;
+
+ /* SPE present on this CPU? */
+ if (!cpuid_feature_extract_unsigned_field(read_sysreg(id_aa64dfr0_el1),
+ ID_AA64DFR0_PMSVER_SHIFT))
+ return;
+
+ /* Yes; is it owned by EL3? */
+ reg = read_sysreg_s(SYS_PMBIDR_EL1);
+ if (reg & BIT(SYS_PMBIDR_EL1_P_SHIFT))
+ return;
+
+ /* No; is the host actually using the thing? */
+ reg = read_sysreg_s(SYS_PMBLIMITR_EL1);
+ if (!(reg & BIT(SYS_PMBLIMITR_EL1_E_SHIFT)))
+ return;
+
+ /* Yes; save the control register and disable data generation */
+ *pmscr_el1 = read_sysreg_s(SYS_PMSCR_EL1);
+ write_sysreg_s(0, SYS_PMSCR_EL1);
+ isb();
+
+ /* Now drain all buffered data to memory */
+ psb_csync();
+ dsb(nsh);
+}
+
+static void __debug_restore_spe(u64 pmscr_el1)
+{
+ if (!pmscr_el1)
+ return;
+
+ /* The host page table is installed, but not yet synchronised */
+ isb();
+
+ /* Re-enable data generation */
+ write_sysreg_s(pmscr_el1, SYS_PMSCR_EL1);
+}
+
+void __debug_switch_to_guest(struct kvm_vcpu *vcpu)
+{
+ /* Disable and flush SPE data generation */
+ __debug_save_spe(&vcpu->arch.host_debug_state.pmscr_el1);
+ __debug_switch_to_guest_common(vcpu);
+}
+
+void __debug_switch_to_host(struct kvm_vcpu *vcpu)
+{
+ __debug_restore_spe(vcpu->arch.host_debug_state.pmscr_el1);
+ __debug_switch_to_host_common(vcpu);
+}
+
+u32 __kvm_get_mdcr_el2(void)
+{
+ return read_sysreg(mdcr_el2);
+}
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S
index 86971fe26f3d..d9434e90c06d 100644
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S
@@ -105,6 +105,11 @@ alternative_else_nop_endif
*/
mov_q x4, (SCTLR_EL2_RES1 | (SCTLR_ELx_FLAGS & ~SCTLR_ELx_A))
CPU_BE( orr x4, x4, #SCTLR_ELx_EE)
+alternative_if ARM64_HAS_ADDRESS_AUTH
+ mov_q x5, (SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | \
+ SCTLR_ELx_ENDA | SCTLR_ELx_ENDB)
+ orr x4, x4, x5
+alternative_else_nop_endif
msr sctlr_el2, x4
isb
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
new file mode 100644
index 000000000000..341be2f2f312
--- /dev/null
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -0,0 +1,272 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ */
+
+#include <hyp/switch.h>
+#include <hyp/sysreg-sr.h>
+
+#include <linux/arm-smccc.h>
+#include <linux/kvm_host.h>
+#include <linux/types.h>
+#include <linux/jump_label.h>
+#include <uapi/linux/psci.h>
+
+#include <kvm/arm_psci.h>
+
+#include <asm/barrier.h>
+#include <asm/cpufeature.h>
+#include <asm/kprobes.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
+#include <asm/fpsimd.h>
+#include <asm/debug-monitors.h>
+#include <asm/processor.h>
+#include <asm/thread_info.h>
+
+static void __activate_traps(struct kvm_vcpu *vcpu)
+{
+ u64 val;
+
+ ___activate_traps(vcpu);
+ __activate_traps_common(vcpu);
+
+ val = CPTR_EL2_DEFAULT;
+ val |= CPTR_EL2_TTA | CPTR_EL2_TZ | CPTR_EL2_TAM;
+ if (!update_fp_enabled(vcpu)) {
+ val |= CPTR_EL2_TFP;
+ __activate_traps_fpsimd32(vcpu);
+ }
+
+ write_sysreg(val, cptr_el2);
+
+ if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
+ struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt;
+
+ isb();
+ /*
+ * At this stage, and thanks to the above isb(), S2 is
+ * configured and enabled. We can now restore the guest's S1
+ * configuration: SCTLR, and only then TCR.
+ */
+ write_sysreg_el1(ctxt_sys_reg(ctxt, SCTLR_EL1), SYS_SCTLR);
+ isb();
+ write_sysreg_el1(ctxt_sys_reg(ctxt, TCR_EL1), SYS_TCR);
+ }
+}
+
+static void __deactivate_traps(struct kvm_vcpu *vcpu)
+{
+ u64 mdcr_el2;
+
+ ___deactivate_traps(vcpu);
+
+ mdcr_el2 = read_sysreg(mdcr_el2);
+
+ if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
+ u64 val;
+
+ /*
+ * Set the TCR and SCTLR registers in the exact opposite
+ * sequence as __activate_traps (first prevent walks,
+ * then force the MMU on). A generous sprinkling of isb()
+ * ensure that things happen in this exact order.
+ */
+ val = read_sysreg_el1(SYS_TCR);
+ write_sysreg_el1(val | TCR_EPD1_MASK | TCR_EPD0_MASK, SYS_TCR);
+ isb();
+ val = read_sysreg_el1(SYS_SCTLR);
+ write_sysreg_el1(val | SCTLR_ELx_M, SYS_SCTLR);
+ isb();
+ }
+
+ __deactivate_traps_common();
+
+ mdcr_el2 &= MDCR_EL2_HPMN_MASK;
+ mdcr_el2 |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT;
+
+ write_sysreg(mdcr_el2, mdcr_el2);
+ write_sysreg(HCR_HOST_NVHE_FLAGS, hcr_el2);
+ write_sysreg(CPTR_EL2_DEFAULT, cptr_el2);
+}
+
+static void __deactivate_vm(struct kvm_vcpu *vcpu)
+{
+ write_sysreg(0, vttbr_el2);
+}
+
+/* Save VGICv3 state on non-VHE systems */
+static void __hyp_vgic_save_state(struct kvm_vcpu *vcpu)
+{
+ if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
+ __vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3);
+ __vgic_v3_deactivate_traps(&vcpu->arch.vgic_cpu.vgic_v3);
+ }
+}
+
+/* Restore VGICv3 state on non_VEH systems */
+static void __hyp_vgic_restore_state(struct kvm_vcpu *vcpu)
+{
+ if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
+ __vgic_v3_activate_traps(&vcpu->arch.vgic_cpu.vgic_v3);
+ __vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3);
+ }
+}
+
+/**
+ * Disable host events, enable guest events
+ */
+static bool __pmu_switch_to_guest(struct kvm_cpu_context *host_ctxt)
+{
+ struct kvm_host_data *host;
+ struct kvm_pmu_events *pmu;
+
+ host = container_of(host_ctxt, struct kvm_host_data, host_ctxt);
+ pmu = &host->pmu_events;
+
+ if (pmu->events_host)
+ write_sysreg(pmu->events_host, pmcntenclr_el0);
+
+ if (pmu->events_guest)
+ write_sysreg(pmu->events_guest, pmcntenset_el0);
+
+ return (pmu->events_host || pmu->events_guest);
+}
+
+/**
+ * Disable guest events, enable host events
+ */
+static void __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt)
+{
+ struct kvm_host_data *host;
+ struct kvm_pmu_events *pmu;
+
+ host = container_of(host_ctxt, struct kvm_host_data, host_ctxt);
+ pmu = &host->pmu_events;
+
+ if (pmu->events_guest)
+ write_sysreg(pmu->events_guest, pmcntenclr_el0);
+
+ if (pmu->events_host)
+ write_sysreg(pmu->events_host, pmcntenset_el0);
+}
+
+/* Switch to the guest for legacy non-VHE systems */
+int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpu_context *host_ctxt;
+ struct kvm_cpu_context *guest_ctxt;
+ bool pmu_switch_needed;
+ u64 exit_code;
+
+ /*
+ * Having IRQs masked via PMR when entering the guest means the GIC
+ * will not signal the CPU of interrupts of lower priority, and the
+ * only way to get out will be via guest exceptions.
+ * Naturally, we want to avoid this.
+ */
+ if (system_uses_irq_prio_masking()) {
+ gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
+ pmr_sync();
+ }
+
+ vcpu = kern_hyp_va(vcpu);
+
+ host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt;
+ host_ctxt->__hyp_running_vcpu = vcpu;
+ guest_ctxt = &vcpu->arch.ctxt;
+
+ pmu_switch_needed = __pmu_switch_to_guest(host_ctxt);
+
+ __sysreg_save_state_nvhe(host_ctxt);
+
+ /*
+ * We must restore the 32-bit state before the sysregs, thanks
+ * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72).
+ *
+ * Also, and in order to be able to deal with erratum #1319537 (A57)
+ * and #1319367 (A72), we must ensure that all VM-related sysreg are
+ * restored before we enable S2 translation.
+ */
+ __sysreg32_restore_state(vcpu);
+ __sysreg_restore_state_nvhe(guest_ctxt);
+
+ __activate_vm(kern_hyp_va(vcpu->arch.hw_mmu));
+ __activate_traps(vcpu);
+
+ __hyp_vgic_restore_state(vcpu);
+ __timer_enable_traps(vcpu);
+
+ __debug_switch_to_guest(vcpu);
+
+ __set_guest_arch_workaround_state(vcpu);
+
+ do {
+ /* Jump in the fire! */
+ exit_code = __guest_enter(vcpu, host_ctxt);
+
+ /* And we're baaack! */
+ } while (fixup_guest_exit(vcpu, &exit_code));
+
+ __set_host_arch_workaround_state(vcpu);
+
+ __sysreg_save_state_nvhe(guest_ctxt);
+ __sysreg32_save_state(vcpu);
+ __timer_disable_traps(vcpu);
+ __hyp_vgic_save_state(vcpu);
+
+ __deactivate_traps(vcpu);
+ __deactivate_vm(vcpu);
+
+ __sysreg_restore_state_nvhe(host_ctxt);
+
+ if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED)
+ __fpsimd_save_fpexc32(vcpu);
+
+ /*
+ * This must come after restoring the host sysregs, since a non-VHE
+ * system may enable SPE here and make use of the TTBRs.
+ */
+ __debug_switch_to_host(vcpu);
+
+ if (pmu_switch_needed)
+ __pmu_switch_to_host(host_ctxt);
+
+ /* Returning to host will clear PSR.I, remask PMR if needed */
+ if (system_uses_irq_prio_masking())
+ gic_write_pmr(GIC_PRIO_IRQOFF);
+
+ return exit_code;
+}
+
+void __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt)
+{
+ u64 spsr = read_sysreg_el2(SYS_SPSR);
+ u64 elr = read_sysreg_el2(SYS_ELR);
+ u64 par = read_sysreg(par_el1);
+ struct kvm_vcpu *vcpu = host_ctxt->__hyp_running_vcpu;
+ unsigned long str_va;
+
+ if (read_sysreg(vttbr_el2)) {
+ __timer_disable_traps(vcpu);
+ __deactivate_traps(vcpu);
+ __deactivate_vm(vcpu);
+ __sysreg_restore_state_nvhe(host_ctxt);
+ }
+
+ /*
+ * Force the panic string to be loaded from the literal pool,
+ * making sure it is a kernel address and not a PC-relative
+ * reference.
+ */
+ asm volatile("ldr %0, =%1" : "=r" (str_va) : "S" (__hyp_panic_string));
+
+ __hyp_do_panic(str_va,
+ spsr, elr,
+ read_sysreg(esr_el2), read_sysreg_el2(SYS_FAR),
+ read_sysreg(hpfar_el2), par, vcpu);
+ unreachable();
+}
diff --git a/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c
new file mode 100644
index 000000000000..88a25fc8fcd3
--- /dev/null
+++ b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2012-2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ */
+
+#include <hyp/sysreg-sr.h>
+
+#include <linux/compiler.h>
+#include <linux/kvm_host.h>
+
+#include <asm/kprobes.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_hyp.h>
+
+/*
+ * Non-VHE: Both host and guest must save everything.
+ */
+
+void __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt)
+{
+ __sysreg_save_el1_state(ctxt);
+ __sysreg_save_common_state(ctxt);
+ __sysreg_save_user_state(ctxt);
+ __sysreg_save_el2_return_state(ctxt);
+}
+
+void __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt)
+{
+ __sysreg_restore_el1_state(ctxt);
+ __sysreg_restore_common_state(ctxt);
+ __sysreg_restore_user_state(ctxt);
+ __sysreg_restore_el2_return_state(ctxt);
+}
+
+void __kvm_enable_ssbs(void)
+{
+ u64 tmp;
+
+ asm volatile(
+ "mrs %0, sctlr_el2\n"
+ "orr %0, %0, %1\n"
+ "msr sctlr_el2, %0"
+ : "=&r" (tmp) : "L" (SCTLR_ELx_DSSBS));
+}
diff --git a/arch/arm64/kvm/hyp/timer-sr.c b/arch/arm64/kvm/hyp/nvhe/timer-sr.c
index fb5c0be33223..9072e71693ba 100644
--- a/arch/arm64/kvm/hyp/timer-sr.c
+++ b/arch/arm64/kvm/hyp/nvhe/timer-sr.c
@@ -10,7 +10,7 @@
#include <asm/kvm_hyp.h>
-void __hyp_text __kvm_timer_set_cntvoff(u64 cntvoff)
+void __kvm_timer_set_cntvoff(u64 cntvoff)
{
write_sysreg(cntvoff, cntvoff_el2);
}
@@ -19,7 +19,7 @@ void __hyp_text __kvm_timer_set_cntvoff(u64 cntvoff)
* Should only be called on non-VHE systems.
* VHE systems use EL2 timers and configure EL1 timers in kvm_timer_init_vhe().
*/
-void __hyp_text __timer_disable_traps(struct kvm_vcpu *vcpu)
+void __timer_disable_traps(struct kvm_vcpu *vcpu)
{
u64 val;
@@ -33,7 +33,7 @@ void __hyp_text __timer_disable_traps(struct kvm_vcpu *vcpu)
* Should only be called on non-VHE systems.
* VHE systems use EL2 timers and configure EL1 timers in kvm_timer_init_vhe().
*/
-void __hyp_text __timer_enable_traps(struct kvm_vcpu *vcpu)
+void __timer_enable_traps(struct kvm_vcpu *vcpu)
{
u64 val;
diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c
new file mode 100644
index 000000000000..69eae608d670
--- /dev/null
+++ b/arch/arm64/kvm/hyp/nvhe/tlb.c
@@ -0,0 +1,154 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ */
+
+#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
+#include <asm/tlbflush.h>
+
+struct tlb_inv_context {
+ u64 tcr;
+};
+
+static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu,
+ struct tlb_inv_context *cxt)
+{
+ if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
+ u64 val;
+
+ /*
+ * For CPUs that are affected by ARM 1319367, we need to
+ * avoid a host Stage-1 walk while we have the guest's
+ * VMID set in the VTTBR in order to invalidate TLBs.
+ * We're guaranteed that the S1 MMU is enabled, so we can
+ * simply set the EPD bits to avoid any further TLB fill.
+ */
+ val = cxt->tcr = read_sysreg_el1(SYS_TCR);
+ val |= TCR_EPD1_MASK | TCR_EPD0_MASK;
+ write_sysreg_el1(val, SYS_TCR);
+ isb();
+ }
+
+ __load_guest_stage2(mmu);
+}
+
+static void __tlb_switch_to_host(struct tlb_inv_context *cxt)
+{
+ write_sysreg(0, vttbr_el2);
+
+ if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
+ /* Ensure write of the host VMID */
+ isb();
+ /* Restore the host's TCR_EL1 */
+ write_sysreg_el1(cxt->tcr, SYS_TCR);
+ }
+}
+
+void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
+ phys_addr_t ipa, int level)
+{
+ struct tlb_inv_context cxt;
+
+ dsb(ishst);
+
+ /* Switch to requested VMID */
+ mmu = kern_hyp_va(mmu);
+ __tlb_switch_to_guest(mmu, &cxt);
+
+ /*
+ * We could do so much better if we had the VA as well.
+ * Instead, we invalidate Stage-2 for this IPA, and the
+ * whole of Stage-1. Weep...
+ */
+ ipa >>= 12;
+ __tlbi_level(ipas2e1is, ipa, level);
+
+ /*
+ * We have to ensure completion of the invalidation at Stage-2,
+ * since a table walk on another CPU could refill a TLB with a
+ * complete (S1 + S2) walk based on the old Stage-2 mapping if
+ * the Stage-1 invalidation happened first.
+ */
+ dsb(ish);
+ __tlbi(vmalle1is);
+ dsb(ish);
+ isb();
+
+ /*
+ * If the host is running at EL1 and we have a VPIPT I-cache,
+ * then we must perform I-cache maintenance at EL2 in order for
+ * it to have an effect on the guest. Since the guest cannot hit
+ * I-cache lines allocated with a different VMID, we don't need
+ * to worry about junk out of guest reset (we nuke the I-cache on
+ * VMID rollover), but we do need to be careful when remapping
+ * executable pages for the same guest. This can happen when KSM
+ * takes a CoW fault on an executable page, copies the page into
+ * a page that was previously mapped in the guest and then needs
+ * to invalidate the guest view of the I-cache for that page
+ * from EL1. To solve this, we invalidate the entire I-cache when
+ * unmapping a page from a guest if we have a VPIPT I-cache but
+ * the host is running at EL1. As above, we could do better if
+ * we had the VA.
+ *
+ * The moral of this story is: if you have a VPIPT I-cache, then
+ * you should be running with VHE enabled.
+ */
+ if (icache_is_vpipt())
+ __flush_icache_all();
+
+ __tlb_switch_to_host(&cxt);
+}
+
+void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
+{
+ struct tlb_inv_context cxt;
+
+ dsb(ishst);
+
+ /* Switch to requested VMID */
+ mmu = kern_hyp_va(mmu);
+ __tlb_switch_to_guest(mmu, &cxt);
+
+ __tlbi(vmalls12e1is);
+ dsb(ish);
+ isb();
+
+ __tlb_switch_to_host(&cxt);
+}
+
+void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu)
+{
+ struct tlb_inv_context cxt;
+
+ /* Switch to requested VMID */
+ mmu = kern_hyp_va(mmu);
+ __tlb_switch_to_guest(mmu, &cxt);
+
+ __tlbi(vmalle1);
+ dsb(nsh);
+ isb();
+
+ __tlb_switch_to_host(&cxt);
+}
+
+void __kvm_flush_vm_context(void)
+{
+ dsb(ishst);
+ __tlbi(alle1is);
+
+ /*
+ * VIPT and PIPT caches are not affected by VMID, so no maintenance
+ * is necessary across a VMID rollover.
+ *
+ * VPIPT caches constrain lookup and maintenance to the active VMID,
+ * so we need to invalidate lines with a stale VMID to avoid an ABA
+ * race after multiple rollovers.
+ *
+ */
+ if (icache_is_vpipt())
+ asm volatile("ic ialluis");
+
+ dsb(ish);
+}
diff --git a/arch/arm64/kvm/hyp/smccc_wa.S b/arch/arm64/kvm/hyp/smccc_wa.S
new file mode 100644
index 000000000000..b0441dbdf68b
--- /dev/null
+++ b/arch/arm64/kvm/hyp/smccc_wa.S
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2015-2018 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ */
+
+#include <linux/arm-smccc.h>
+#include <linux/linkage.h>
+
+#include <asm/kvm_asm.h>
+#include <asm/kvm_mmu.h>
+
+ /*
+ * This is not executed directly and is instead copied into the vectors
+ * by install_bp_hardening_cb().
+ */
+ .data
+ .pushsection .rodata
+ .global __smccc_workaround_1_smc
+SYM_DATA_START(__smccc_workaround_1_smc)
+ esb
+ sub sp, sp, #(8 * 4)
+ stp x2, x3, [sp, #(8 * 0)]
+ stp x0, x1, [sp, #(8 * 2)]
+ mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1
+ smc #0
+ ldp x2, x3, [sp, #(8 * 0)]
+ ldp x0, x1, [sp, #(8 * 2)]
+ add sp, sp, #(8 * 4)
+1: .org __smccc_workaround_1_smc + __SMCCC_WORKAROUND_1_SMC_SZ
+ .org 1b
+SYM_DATA_END(__smccc_workaround_1_smc)
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
deleted file mode 100644
index db1c4487d95d..000000000000
--- a/arch/arm64/kvm/hyp/switch.c
+++ /dev/null
@@ -1,936 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2015 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- */
-
-#include <linux/arm-smccc.h>
-#include <linux/kvm_host.h>
-#include <linux/types.h>
-#include <linux/jump_label.h>
-#include <uapi/linux/psci.h>
-
-#include <kvm/arm_psci.h>
-
-#include <asm/barrier.h>
-#include <asm/cpufeature.h>
-#include <asm/kprobes.h>
-#include <asm/kvm_asm.h>
-#include <asm/kvm_emulate.h>
-#include <asm/kvm_hyp.h>
-#include <asm/kvm_mmu.h>
-#include <asm/fpsimd.h>
-#include <asm/debug-monitors.h>
-#include <asm/processor.h>
-#include <asm/thread_info.h>
-
-/* Check whether the FP regs were dirtied while in the host-side run loop: */
-static bool __hyp_text update_fp_enabled(struct kvm_vcpu *vcpu)
-{
- /*
- * When the system doesn't support FP/SIMD, we cannot rely on
- * the _TIF_FOREIGN_FPSTATE flag. However, we always inject an
- * abort on the very first access to FP and thus we should never
- * see KVM_ARM64_FP_ENABLED. For added safety, make sure we always
- * trap the accesses.
- */
- if (!system_supports_fpsimd() ||
- vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE)
- vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED |
- KVM_ARM64_FP_HOST);
-
- return !!(vcpu->arch.flags & KVM_ARM64_FP_ENABLED);
-}
-
-/* Save the 32-bit only FPSIMD system register state */
-static void __hyp_text __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu)
-{
- if (!vcpu_el1_is_32bit(vcpu))
- return;
-
- vcpu->arch.ctxt.sys_regs[FPEXC32_EL2] = read_sysreg(fpexc32_el2);
-}
-
-static void __hyp_text __activate_traps_fpsimd32(struct kvm_vcpu *vcpu)
-{
- /*
- * We are about to set CPTR_EL2.TFP to trap all floating point
- * register accesses to EL2, however, the ARM ARM clearly states that
- * traps are only taken to EL2 if the operation would not otherwise
- * trap to EL1. Therefore, always make sure that for 32-bit guests,
- * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit.
- * If FP/ASIMD is not implemented, FPEXC is UNDEFINED and any access to
- * it will cause an exception.
- */
- if (vcpu_el1_is_32bit(vcpu) && system_supports_fpsimd()) {
- write_sysreg(1 << 30, fpexc32_el2);
- isb();
- }
-}
-
-static void __hyp_text __activate_traps_common(struct kvm_vcpu *vcpu)
-{
- /* Trap on AArch32 cp15 c15 (impdef sysregs) accesses (EL1 or EL0) */
- write_sysreg(1 << 15, hstr_el2);
-
- /*
- * Make sure we trap PMU access from EL0 to EL2. Also sanitize
- * PMSELR_EL0 to make sure it never contains the cycle
- * counter, which could make a PMXEVCNTR_EL0 access UNDEF at
- * EL1 instead of being trapped to EL2.
- */
- write_sysreg(0, pmselr_el0);
- write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0);
- write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
-}
-
-static void __hyp_text __deactivate_traps_common(void)
-{
- write_sysreg(0, hstr_el2);
- write_sysreg(0, pmuserenr_el0);
-}
-
-static void activate_traps_vhe(struct kvm_vcpu *vcpu)
-{
- u64 val;
-
- val = read_sysreg(cpacr_el1);
- val |= CPACR_EL1_TTA;
- val &= ~CPACR_EL1_ZEN;
-
- /*
- * With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to
- * CPTR_EL2. In general, CPACR_EL1 has the same layout as CPTR_EL2,
- * except for some missing controls, such as TAM.
- * In this case, CPTR_EL2.TAM has the same position with or without
- * VHE (HCR.E2H == 1) which allows us to use here the CPTR_EL2.TAM
- * shift value for trapping the AMU accesses.
- */
-
- val |= CPTR_EL2_TAM;
-
- if (update_fp_enabled(vcpu)) {
- if (vcpu_has_sve(vcpu))
- val |= CPACR_EL1_ZEN;
- } else {
- val &= ~CPACR_EL1_FPEN;
- __activate_traps_fpsimd32(vcpu);
- }
-
- write_sysreg(val, cpacr_el1);
-
- write_sysreg(kvm_get_hyp_vector(), vbar_el1);
-}
-NOKPROBE_SYMBOL(activate_traps_vhe);
-
-static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu)
-{
- u64 val;
-
- __activate_traps_common(vcpu);
-
- val = CPTR_EL2_DEFAULT;
- val |= CPTR_EL2_TTA | CPTR_EL2_TZ | CPTR_EL2_TAM;
- if (!update_fp_enabled(vcpu)) {
- val |= CPTR_EL2_TFP;
- __activate_traps_fpsimd32(vcpu);
- }
-
- write_sysreg(val, cptr_el2);
-
- if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
- struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt;
-
- isb();
- /*
- * At this stage, and thanks to the above isb(), S2 is
- * configured and enabled. We can now restore the guest's S1
- * configuration: SCTLR, and only then TCR.
- */
- write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR);
- isb();
- write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR);
- }
-}
-
-static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
-{
- u64 hcr = vcpu->arch.hcr_el2;
-
- if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM))
- hcr |= HCR_TVM;
-
- write_sysreg(hcr, hcr_el2);
-
- if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE))
- write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2);
-
- if (has_vhe())
- activate_traps_vhe(vcpu);
- else
- __activate_traps_nvhe(vcpu);
-}
-
-static void deactivate_traps_vhe(void)
-{
- extern char vectors[]; /* kernel exception vectors */
- write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
-
- /*
- * ARM errata 1165522 and 1530923 require the actual execution of the
- * above before we can switch to the EL2/EL0 translation regime used by
- * the host.
- */
- asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT));
-
- write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1);
- write_sysreg(vectors, vbar_el1);
-}
-NOKPROBE_SYMBOL(deactivate_traps_vhe);
-
-static void __hyp_text __deactivate_traps_nvhe(void)
-{
- u64 mdcr_el2 = read_sysreg(mdcr_el2);
-
- if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
- u64 val;
-
- /*
- * Set the TCR and SCTLR registers in the exact opposite
- * sequence as __activate_traps_nvhe (first prevent walks,
- * then force the MMU on). A generous sprinkling of isb()
- * ensure that things happen in this exact order.
- */
- val = read_sysreg_el1(SYS_TCR);
- write_sysreg_el1(val | TCR_EPD1_MASK | TCR_EPD0_MASK, SYS_TCR);
- isb();
- val = read_sysreg_el1(SYS_SCTLR);
- write_sysreg_el1(val | SCTLR_ELx_M, SYS_SCTLR);
- isb();
- }
-
- __deactivate_traps_common();
-
- mdcr_el2 &= MDCR_EL2_HPMN_MASK;
- mdcr_el2 |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT;
-
- write_sysreg(mdcr_el2, mdcr_el2);
- write_sysreg(HCR_HOST_NVHE_FLAGS, hcr_el2);
- write_sysreg(CPTR_EL2_DEFAULT, cptr_el2);
-}
-
-static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
-{
- /*
- * If we pended a virtual abort, preserve it until it gets
- * cleared. See D1.14.3 (Virtual Interrupts) for details, but
- * the crucial bit is "On taking a vSError interrupt,
- * HCR_EL2.VSE is cleared to 0."
- */
- if (vcpu->arch.hcr_el2 & HCR_VSE) {
- vcpu->arch.hcr_el2 &= ~HCR_VSE;
- vcpu->arch.hcr_el2 |= read_sysreg(hcr_el2) & HCR_VSE;
- }
-
- if (has_vhe())
- deactivate_traps_vhe();
- else
- __deactivate_traps_nvhe();
-}
-
-void activate_traps_vhe_load(struct kvm_vcpu *vcpu)
-{
- __activate_traps_common(vcpu);
-}
-
-void deactivate_traps_vhe_put(void)
-{
- u64 mdcr_el2 = read_sysreg(mdcr_el2);
-
- mdcr_el2 &= MDCR_EL2_HPMN_MASK |
- MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT |
- MDCR_EL2_TPMS;
-
- write_sysreg(mdcr_el2, mdcr_el2);
-
- __deactivate_traps_common();
-}
-
-static void __hyp_text __activate_vm(struct kvm *kvm)
-{
- __load_guest_stage2(kvm);
-}
-
-static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu)
-{
- write_sysreg(0, vttbr_el2);
-}
-
-/* Save VGICv3 state on non-VHE systems */
-static void __hyp_text __hyp_vgic_save_state(struct kvm_vcpu *vcpu)
-{
- if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
- __vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3);
- __vgic_v3_deactivate_traps(&vcpu->arch.vgic_cpu.vgic_v3);
- }
-}
-
-/* Restore VGICv3 state on non_VEH systems */
-static void __hyp_text __hyp_vgic_restore_state(struct kvm_vcpu *vcpu)
-{
- if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
- __vgic_v3_activate_traps(&vcpu->arch.vgic_cpu.vgic_v3);
- __vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3);
- }
-}
-
-static bool __hyp_text __translate_far_to_hpfar(u64 far, u64 *hpfar)
-{
- u64 par, tmp;
-
- /*
- * Resolve the IPA the hard way using the guest VA.
- *
- * Stage-1 translation already validated the memory access
- * rights. As such, we can use the EL1 translation regime, and
- * don't have to distinguish between EL0 and EL1 access.
- *
- * We do need to save/restore PAR_EL1 though, as we haven't
- * saved the guest context yet, and we may return early...
- */
- par = read_sysreg(par_el1);
- asm volatile("at s1e1r, %0" : : "r" (far));
- isb();
-
- tmp = read_sysreg(par_el1);
- write_sysreg(par, par_el1);
-
- if (unlikely(tmp & SYS_PAR_EL1_F))
- return false; /* Translation failed, back to guest */
-
- /* Convert PAR to HPFAR format */
- *hpfar = PAR_TO_HPFAR(tmp);
- return true;
-}
-
-static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu)
-{
- u8 ec;
- u64 esr;
- u64 hpfar, far;
-
- esr = vcpu->arch.fault.esr_el2;
- ec = ESR_ELx_EC(esr);
-
- if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW)
- return true;
-
- far = read_sysreg_el2(SYS_FAR);
-
- /*
- * The HPFAR can be invalid if the stage 2 fault did not
- * happen during a stage 1 page table walk (the ESR_EL2.S1PTW
- * bit is clear) and one of the two following cases are true:
- * 1. The fault was due to a permission fault
- * 2. The processor carries errata 834220
- *
- * Therefore, for all non S1PTW faults where we either have a
- * permission fault or the errata workaround is enabled, we
- * resolve the IPA using the AT instruction.
- */
- if (!(esr & ESR_ELx_S1PTW) &&
- (cpus_have_final_cap(ARM64_WORKAROUND_834220) ||
- (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) {
- if (!__translate_far_to_hpfar(far, &hpfar))
- return false;
- } else {
- hpfar = read_sysreg(hpfar_el2);
- }
-
- vcpu->arch.fault.far_el2 = far;
- vcpu->arch.fault.hpfar_el2 = hpfar;
- return true;
-}
-
-/* Check for an FPSIMD/SVE trap and handle as appropriate */
-static bool __hyp_text __hyp_handle_fpsimd(struct kvm_vcpu *vcpu)
-{
- bool vhe, sve_guest, sve_host;
- u8 hsr_ec;
-
- if (!system_supports_fpsimd())
- return false;
-
- if (system_supports_sve()) {
- sve_guest = vcpu_has_sve(vcpu);
- sve_host = vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE;
- vhe = true;
- } else {
- sve_guest = false;
- sve_host = false;
- vhe = has_vhe();
- }
-
- hsr_ec = kvm_vcpu_trap_get_class(vcpu);
- if (hsr_ec != ESR_ELx_EC_FP_ASIMD &&
- hsr_ec != ESR_ELx_EC_SVE)
- return false;
-
- /* Don't handle SVE traps for non-SVE vcpus here: */
- if (!sve_guest)
- if (hsr_ec != ESR_ELx_EC_FP_ASIMD)
- return false;
-
- /* Valid trap. Switch the context: */
-
- if (vhe) {
- u64 reg = read_sysreg(cpacr_el1) | CPACR_EL1_FPEN;
-
- if (sve_guest)
- reg |= CPACR_EL1_ZEN;
-
- write_sysreg(reg, cpacr_el1);
- } else {
- write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP,
- cptr_el2);
- }
-
- isb();
-
- if (vcpu->arch.flags & KVM_ARM64_FP_HOST) {
- /*
- * In the SVE case, VHE is assumed: it is enforced by
- * Kconfig and kvm_arch_init().
- */
- if (sve_host) {
- struct thread_struct *thread = container_of(
- vcpu->arch.host_fpsimd_state,
- struct thread_struct, uw.fpsimd_state);
-
- sve_save_state(sve_pffr(thread),
- &vcpu->arch.host_fpsimd_state->fpsr);
- } else {
- __fpsimd_save_state(vcpu->arch.host_fpsimd_state);
- }
-
- vcpu->arch.flags &= ~KVM_ARM64_FP_HOST;
- }
-
- if (sve_guest) {
- sve_load_state(vcpu_sve_pffr(vcpu),
- &vcpu->arch.ctxt.gp_regs.fp_regs.fpsr,
- sve_vq_from_vl(vcpu->arch.sve_max_vl) - 1);
- write_sysreg_s(vcpu->arch.ctxt.sys_regs[ZCR_EL1], SYS_ZCR_EL12);
- } else {
- __fpsimd_restore_state(&vcpu->arch.ctxt.gp_regs.fp_regs);
- }
-
- /* Skip restoring fpexc32 for AArch64 guests */
- if (!(read_sysreg(hcr_el2) & HCR_RW))
- write_sysreg(vcpu->arch.ctxt.sys_regs[FPEXC32_EL2],
- fpexc32_el2);
-
- vcpu->arch.flags |= KVM_ARM64_FP_ENABLED;
-
- return true;
-}
-
-static bool __hyp_text handle_tx2_tvm(struct kvm_vcpu *vcpu)
-{
- u32 sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_hsr(vcpu));
- int rt = kvm_vcpu_sys_get_rt(vcpu);
- u64 val = vcpu_get_reg(vcpu, rt);
-
- /*
- * The normal sysreg handling code expects to see the traps,
- * let's not do anything here.
- */
- if (vcpu->arch.hcr_el2 & HCR_TVM)
- return false;
-
- switch (sysreg) {
- case SYS_SCTLR_EL1:
- write_sysreg_el1(val, SYS_SCTLR);
- break;
- case SYS_TTBR0_EL1:
- write_sysreg_el1(val, SYS_TTBR0);
- break;
- case SYS_TTBR1_EL1:
- write_sysreg_el1(val, SYS_TTBR1);
- break;
- case SYS_TCR_EL1:
- write_sysreg_el1(val, SYS_TCR);
- break;
- case SYS_ESR_EL1:
- write_sysreg_el1(val, SYS_ESR);
- break;
- case SYS_FAR_EL1:
- write_sysreg_el1(val, SYS_FAR);
- break;
- case SYS_AFSR0_EL1:
- write_sysreg_el1(val, SYS_AFSR0);
- break;
- case SYS_AFSR1_EL1:
- write_sysreg_el1(val, SYS_AFSR1);
- break;
- case SYS_MAIR_EL1:
- write_sysreg_el1(val, SYS_MAIR);
- break;
- case SYS_AMAIR_EL1:
- write_sysreg_el1(val, SYS_AMAIR);
- break;
- case SYS_CONTEXTIDR_EL1:
- write_sysreg_el1(val, SYS_CONTEXTIDR);
- break;
- default:
- return false;
- }
-
- __kvm_skip_instr(vcpu);
- return true;
-}
-
-static bool __hyp_text esr_is_ptrauth_trap(u32 esr)
-{
- u32 ec = ESR_ELx_EC(esr);
-
- if (ec == ESR_ELx_EC_PAC)
- return true;
-
- if (ec != ESR_ELx_EC_SYS64)
- return false;
-
- switch (esr_sys64_to_sysreg(esr)) {
- case SYS_APIAKEYLO_EL1:
- case SYS_APIAKEYHI_EL1:
- case SYS_APIBKEYLO_EL1:
- case SYS_APIBKEYHI_EL1:
- case SYS_APDAKEYLO_EL1:
- case SYS_APDAKEYHI_EL1:
- case SYS_APDBKEYLO_EL1:
- case SYS_APDBKEYHI_EL1:
- case SYS_APGAKEYLO_EL1:
- case SYS_APGAKEYHI_EL1:
- return true;
- }
-
- return false;
-}
-
-#define __ptrauth_save_key(regs, key) \
-({ \
- regs[key ## KEYLO_EL1] = read_sysreg_s(SYS_ ## key ## KEYLO_EL1); \
- regs[key ## KEYHI_EL1] = read_sysreg_s(SYS_ ## key ## KEYHI_EL1); \
-})
-
-static bool __hyp_text __hyp_handle_ptrauth(struct kvm_vcpu *vcpu)
-{
- struct kvm_cpu_context *ctxt;
- u64 val;
-
- if (!vcpu_has_ptrauth(vcpu) ||
- !esr_is_ptrauth_trap(kvm_vcpu_get_hsr(vcpu)))
- return false;
-
- ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt;
- __ptrauth_save_key(ctxt->sys_regs, APIA);
- __ptrauth_save_key(ctxt->sys_regs, APIB);
- __ptrauth_save_key(ctxt->sys_regs, APDA);
- __ptrauth_save_key(ctxt->sys_regs, APDB);
- __ptrauth_save_key(ctxt->sys_regs, APGA);
-
- vcpu_ptrauth_enable(vcpu);
-
- val = read_sysreg(hcr_el2);
- val |= (HCR_API | HCR_APK);
- write_sysreg(val, hcr_el2);
-
- return true;
-}
-
-/*
- * Return true when we were able to fixup the guest exit and should return to
- * the guest, false when we should restore the host state and return to the
- * main run loop.
- */
-static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
-{
- if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ)
- vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR);
-
- /*
- * We're using the raw exception code in order to only process
- * the trap if no SError is pending. We will come back to the
- * same PC once the SError has been injected, and replay the
- * trapping instruction.
- */
- if (*exit_code != ARM_EXCEPTION_TRAP)
- goto exit;
-
- if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) &&
- kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 &&
- handle_tx2_tvm(vcpu))
- return true;
-
- /*
- * We trap the first access to the FP/SIMD to save the host context
- * and restore the guest context lazily.
- * If FP/SIMD is not implemented, handle the trap and inject an
- * undefined instruction exception to the guest.
- * Similarly for trapped SVE accesses.
- */
- if (__hyp_handle_fpsimd(vcpu))
- return true;
-
- if (__hyp_handle_ptrauth(vcpu))
- return true;
-
- if (!__populate_fault_info(vcpu))
- return true;
-
- if (static_branch_unlikely(&vgic_v2_cpuif_trap)) {
- bool valid;
-
- valid = kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_DABT_LOW &&
- kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT &&
- kvm_vcpu_dabt_isvalid(vcpu) &&
- !kvm_vcpu_dabt_isextabt(vcpu) &&
- !kvm_vcpu_dabt_iss1tw(vcpu);
-
- if (valid) {
- int ret = __vgic_v2_perform_cpuif_access(vcpu);
-
- if (ret == 1)
- return true;
-
- /* Promote an illegal access to an SError.*/
- if (ret == -1)
- *exit_code = ARM_EXCEPTION_EL1_SERROR;
-
- goto exit;
- }
- }
-
- if (static_branch_unlikely(&vgic_v3_cpuif_trap) &&
- (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 ||
- kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) {
- int ret = __vgic_v3_perform_cpuif_access(vcpu);
-
- if (ret == 1)
- return true;
- }
-
-exit:
- /* Return to the host kernel and handle the exit */
- return false;
-}
-
-static inline bool __hyp_text __needs_ssbd_off(struct kvm_vcpu *vcpu)
-{
- if (!cpus_have_final_cap(ARM64_SSBD))
- return false;
-
- return !(vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG);
-}
-
-static void __hyp_text __set_guest_arch_workaround_state(struct kvm_vcpu *vcpu)
-{
-#ifdef CONFIG_ARM64_SSBD
- /*
- * The host runs with the workaround always present. If the
- * guest wants it disabled, so be it...
- */
- if (__needs_ssbd_off(vcpu) &&
- __hyp_this_cpu_read(arm64_ssbd_callback_required))
- arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 0, NULL);
-#endif
-}
-
-static void __hyp_text __set_host_arch_workaround_state(struct kvm_vcpu *vcpu)
-{
-#ifdef CONFIG_ARM64_SSBD
- /*
- * If the guest has disabled the workaround, bring it back on.
- */
- if (__needs_ssbd_off(vcpu) &&
- __hyp_this_cpu_read(arm64_ssbd_callback_required))
- arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 1, NULL);
-#endif
-}
-
-/**
- * Disable host events, enable guest events
- */
-static bool __hyp_text __pmu_switch_to_guest(struct kvm_cpu_context *host_ctxt)
-{
- struct kvm_host_data *host;
- struct kvm_pmu_events *pmu;
-
- host = container_of(host_ctxt, struct kvm_host_data, host_ctxt);
- pmu = &host->pmu_events;
-
- if (pmu->events_host)
- write_sysreg(pmu->events_host, pmcntenclr_el0);
-
- if (pmu->events_guest)
- write_sysreg(pmu->events_guest, pmcntenset_el0);
-
- return (pmu->events_host || pmu->events_guest);
-}
-
-/**
- * Disable guest events, enable host events
- */
-static void __hyp_text __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt)
-{
- struct kvm_host_data *host;
- struct kvm_pmu_events *pmu;
-
- host = container_of(host_ctxt, struct kvm_host_data, host_ctxt);
- pmu = &host->pmu_events;
-
- if (pmu->events_guest)
- write_sysreg(pmu->events_guest, pmcntenclr_el0);
-
- if (pmu->events_host)
- write_sysreg(pmu->events_host, pmcntenset_el0);
-}
-
-/* Switch to the guest for VHE systems running in EL2 */
-static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
-{
- struct kvm_cpu_context *host_ctxt;
- struct kvm_cpu_context *guest_ctxt;
- u64 exit_code;
-
- host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt;
- host_ctxt->__hyp_running_vcpu = vcpu;
- guest_ctxt = &vcpu->arch.ctxt;
-
- sysreg_save_host_state_vhe(host_ctxt);
-
- /*
- * ARM erratum 1165522 requires us to configure both stage 1 and
- * stage 2 translation for the guest context before we clear
- * HCR_EL2.TGE.
- *
- * We have already configured the guest's stage 1 translation in
- * kvm_vcpu_load_sysregs above. We must now call __activate_vm
- * before __activate_traps, because __activate_vm configures
- * stage 2 translation, and __activate_traps clear HCR_EL2.TGE
- * (among other things).
- */
- __activate_vm(vcpu->kvm);
- __activate_traps(vcpu);
-
- sysreg_restore_guest_state_vhe(guest_ctxt);
- __debug_switch_to_guest(vcpu);
-
- __set_guest_arch_workaround_state(vcpu);
-
- do {
- /* Jump in the fire! */
- exit_code = __guest_enter(vcpu, host_ctxt);
-
- /* And we're baaack! */
- } while (fixup_guest_exit(vcpu, &exit_code));
-
- __set_host_arch_workaround_state(vcpu);
-
- sysreg_save_guest_state_vhe(guest_ctxt);
-
- __deactivate_traps(vcpu);
-
- sysreg_restore_host_state_vhe(host_ctxt);
-
- if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED)
- __fpsimd_save_fpexc32(vcpu);
-
- __debug_switch_to_host(vcpu);
-
- return exit_code;
-}
-NOKPROBE_SYMBOL(__kvm_vcpu_run_vhe);
-
-int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
-{
- int ret;
-
- local_daif_mask();
-
- /*
- * Having IRQs masked via PMR when entering the guest means the GIC
- * will not signal the CPU of interrupts of lower priority, and the
- * only way to get out will be via guest exceptions.
- * Naturally, we want to avoid this.
- *
- * local_daif_mask() already sets GIC_PRIO_PSR_I_SET, we just need a
- * dsb to ensure the redistributor is forwards EL2 IRQs to the CPU.
- */
- pmr_sync();
-
- ret = __kvm_vcpu_run_vhe(vcpu);
-
- /*
- * local_daif_restore() takes care to properly restore PSTATE.DAIF
- * and the GIC PMR if the host is using IRQ priorities.
- */
- local_daif_restore(DAIF_PROCCTX_NOIRQ);
-
- /*
- * When we exit from the guest we change a number of CPU configuration
- * parameters, such as traps. Make sure these changes take effect
- * before running the host or additional guests.
- */
- isb();
-
- return ret;
-}
-
-/* Switch to the guest for legacy non-VHE systems */
-int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
-{
- struct kvm_cpu_context *host_ctxt;
- struct kvm_cpu_context *guest_ctxt;
- bool pmu_switch_needed;
- u64 exit_code;
-
- /*
- * Having IRQs masked via PMR when entering the guest means the GIC
- * will not signal the CPU of interrupts of lower priority, and the
- * only way to get out will be via guest exceptions.
- * Naturally, we want to avoid this.
- */
- if (system_uses_irq_prio_masking()) {
- gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
- pmr_sync();
- }
-
- vcpu = kern_hyp_va(vcpu);
-
- host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt;
- host_ctxt->__hyp_running_vcpu = vcpu;
- guest_ctxt = &vcpu->arch.ctxt;
-
- pmu_switch_needed = __pmu_switch_to_guest(host_ctxt);
-
- __sysreg_save_state_nvhe(host_ctxt);
-
- /*
- * We must restore the 32-bit state before the sysregs, thanks
- * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72).
- *
- * Also, and in order to be able to deal with erratum #1319537 (A57)
- * and #1319367 (A72), we must ensure that all VM-related sysreg are
- * restored before we enable S2 translation.
- */
- __sysreg32_restore_state(vcpu);
- __sysreg_restore_state_nvhe(guest_ctxt);
-
- __activate_vm(kern_hyp_va(vcpu->kvm));
- __activate_traps(vcpu);
-
- __hyp_vgic_restore_state(vcpu);
- __timer_enable_traps(vcpu);
-
- __debug_switch_to_guest(vcpu);
-
- __set_guest_arch_workaround_state(vcpu);
-
- do {
- /* Jump in the fire! */
- exit_code = __guest_enter(vcpu, host_ctxt);
-
- /* And we're baaack! */
- } while (fixup_guest_exit(vcpu, &exit_code));
-
- __set_host_arch_workaround_state(vcpu);
-
- __sysreg_save_state_nvhe(guest_ctxt);
- __sysreg32_save_state(vcpu);
- __timer_disable_traps(vcpu);
- __hyp_vgic_save_state(vcpu);
-
- __deactivate_traps(vcpu);
- __deactivate_vm(vcpu);
-
- __sysreg_restore_state_nvhe(host_ctxt);
-
- if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED)
- __fpsimd_save_fpexc32(vcpu);
-
- /*
- * This must come after restoring the host sysregs, since a non-VHE
- * system may enable SPE here and make use of the TTBRs.
- */
- __debug_switch_to_host(vcpu);
-
- if (pmu_switch_needed)
- __pmu_switch_to_host(host_ctxt);
-
- /* Returning to host will clear PSR.I, remask PMR if needed */
- if (system_uses_irq_prio_masking())
- gic_write_pmr(GIC_PRIO_IRQOFF);
-
- return exit_code;
-}
-
-static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n";
-
-static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par,
- struct kvm_cpu_context *__host_ctxt)
-{
- struct kvm_vcpu *vcpu;
- unsigned long str_va;
-
- vcpu = __host_ctxt->__hyp_running_vcpu;
-
- if (read_sysreg(vttbr_el2)) {
- __timer_disable_traps(vcpu);
- __deactivate_traps(vcpu);
- __deactivate_vm(vcpu);
- __sysreg_restore_state_nvhe(__host_ctxt);
- }
-
- /*
- * Force the panic string to be loaded from the literal pool,
- * making sure it is a kernel address and not a PC-relative
- * reference.
- */
- asm volatile("ldr %0, =__hyp_panic_string" : "=r" (str_va));
-
- __hyp_do_panic(str_va,
- spsr, elr,
- read_sysreg(esr_el2), read_sysreg_el2(SYS_FAR),
- read_sysreg(hpfar_el2), par, vcpu);
-}
-
-static void __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par,
- struct kvm_cpu_context *host_ctxt)
-{
- struct kvm_vcpu *vcpu;
- vcpu = host_ctxt->__hyp_running_vcpu;
-
- __deactivate_traps(vcpu);
- sysreg_restore_host_state_vhe(host_ctxt);
-
- panic(__hyp_panic_string,
- spsr, elr,
- read_sysreg_el2(SYS_ESR), read_sysreg_el2(SYS_FAR),
- read_sysreg(hpfar_el2), par, vcpu);
-}
-NOKPROBE_SYMBOL(__hyp_call_panic_vhe);
-
-void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt)
-{
- u64 spsr = read_sysreg_el2(SYS_SPSR);
- u64 elr = read_sysreg_el2(SYS_ELR);
- u64 par = read_sysreg(par_el1);
-
- if (!has_vhe())
- __hyp_call_panic_nvhe(spsr, elr, par, host_ctxt);
- else
- __hyp_call_panic_vhe(spsr, elr, par, host_ctxt);
-
- unreachable();
-}
diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
deleted file mode 100644
index cc7e957f5b2c..000000000000
--- a/arch/arm64/kvm/hyp/sysreg-sr.c
+++ /dev/null
@@ -1,333 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2012-2015 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- */
-
-#include <linux/compiler.h>
-#include <linux/kvm_host.h>
-
-#include <asm/kprobes.h>
-#include <asm/kvm_asm.h>
-#include <asm/kvm_emulate.h>
-#include <asm/kvm_hyp.h>
-
-/*
- * Non-VHE: Both host and guest must save everything.
- *
- * VHE: Host and guest must save mdscr_el1 and sp_el0 (and the PC and
- * pstate, which are handled as part of the el2 return state) on every
- * switch (sp_el0 is being dealt with in the assembly code).
- * tpidr_el0 and tpidrro_el0 only need to be switched when going
- * to host userspace or a different VCPU. EL1 registers only need to be
- * switched when potentially going to run a different VCPU. The latter two
- * classes are handled as part of kvm_arch_vcpu_load and kvm_arch_vcpu_put.
- */
-
-static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
-{
- ctxt->sys_regs[MDSCR_EL1] = read_sysreg(mdscr_el1);
-}
-
-static void __hyp_text __sysreg_save_user_state(struct kvm_cpu_context *ctxt)
-{
- ctxt->sys_regs[TPIDR_EL0] = read_sysreg(tpidr_el0);
- ctxt->sys_regs[TPIDRRO_EL0] = read_sysreg(tpidrro_el0);
-}
-
-static void __hyp_text __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
-{
- ctxt->sys_regs[CSSELR_EL1] = read_sysreg(csselr_el1);
- ctxt->sys_regs[SCTLR_EL1] = read_sysreg_el1(SYS_SCTLR);
- ctxt->sys_regs[CPACR_EL1] = read_sysreg_el1(SYS_CPACR);
- ctxt->sys_regs[TTBR0_EL1] = read_sysreg_el1(SYS_TTBR0);
- ctxt->sys_regs[TTBR1_EL1] = read_sysreg_el1(SYS_TTBR1);
- ctxt->sys_regs[TCR_EL1] = read_sysreg_el1(SYS_TCR);
- ctxt->sys_regs[ESR_EL1] = read_sysreg_el1(SYS_ESR);
- ctxt->sys_regs[AFSR0_EL1] = read_sysreg_el1(SYS_AFSR0);
- ctxt->sys_regs[AFSR1_EL1] = read_sysreg_el1(SYS_AFSR1);
- ctxt->sys_regs[FAR_EL1] = read_sysreg_el1(SYS_FAR);
- ctxt->sys_regs[MAIR_EL1] = read_sysreg_el1(SYS_MAIR);
- ctxt->sys_regs[VBAR_EL1] = read_sysreg_el1(SYS_VBAR);
- ctxt->sys_regs[CONTEXTIDR_EL1] = read_sysreg_el1(SYS_CONTEXTIDR);
- ctxt->sys_regs[AMAIR_EL1] = read_sysreg_el1(SYS_AMAIR);
- ctxt->sys_regs[CNTKCTL_EL1] = read_sysreg_el1(SYS_CNTKCTL);
- ctxt->sys_regs[PAR_EL1] = read_sysreg(par_el1);
- ctxt->sys_regs[TPIDR_EL1] = read_sysreg(tpidr_el1);
-
- ctxt->gp_regs.sp_el1 = read_sysreg(sp_el1);
- ctxt->gp_regs.elr_el1 = read_sysreg_el1(SYS_ELR);
- ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg_el1(SYS_SPSR);
-}
-
-static void __hyp_text __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt)
-{
- ctxt->gp_regs.regs.pc = read_sysreg_el2(SYS_ELR);
- ctxt->gp_regs.regs.pstate = read_sysreg_el2(SYS_SPSR);
-
- if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN))
- ctxt->sys_regs[DISR_EL1] = read_sysreg_s(SYS_VDISR_EL2);
-}
-
-void __hyp_text __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt)
-{
- __sysreg_save_el1_state(ctxt);
- __sysreg_save_common_state(ctxt);
- __sysreg_save_user_state(ctxt);
- __sysreg_save_el2_return_state(ctxt);
-}
-
-void sysreg_save_host_state_vhe(struct kvm_cpu_context *ctxt)
-{
- __sysreg_save_common_state(ctxt);
-}
-NOKPROBE_SYMBOL(sysreg_save_host_state_vhe);
-
-void sysreg_save_guest_state_vhe(struct kvm_cpu_context *ctxt)
-{
- __sysreg_save_common_state(ctxt);
- __sysreg_save_el2_return_state(ctxt);
-}
-NOKPROBE_SYMBOL(sysreg_save_guest_state_vhe);
-
-static void __hyp_text __sysreg_restore_common_state(struct kvm_cpu_context *ctxt)
-{
- write_sysreg(ctxt->sys_regs[MDSCR_EL1], mdscr_el1);
-}
-
-static void __hyp_text __sysreg_restore_user_state(struct kvm_cpu_context *ctxt)
-{
- write_sysreg(ctxt->sys_regs[TPIDR_EL0], tpidr_el0);
- write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0);
-}
-
-static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
-{
- write_sysreg(ctxt->sys_regs[MPIDR_EL1], vmpidr_el2);
- write_sysreg(ctxt->sys_regs[CSSELR_EL1], csselr_el1);
-
- if (has_vhe() ||
- !cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
- write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR);
- write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR);
- } else if (!ctxt->__hyp_running_vcpu) {
- /*
- * Must only be done for guest registers, hence the context
- * test. We're coming from the host, so SCTLR.M is already
- * set. Pairs with __activate_traps_nvhe().
- */
- write_sysreg_el1((ctxt->sys_regs[TCR_EL1] |
- TCR_EPD1_MASK | TCR_EPD0_MASK),
- SYS_TCR);
- isb();
- }
-
- write_sysreg_el1(ctxt->sys_regs[CPACR_EL1], SYS_CPACR);
- write_sysreg_el1(ctxt->sys_regs[TTBR0_EL1], SYS_TTBR0);
- write_sysreg_el1(ctxt->sys_regs[TTBR1_EL1], SYS_TTBR1);
- write_sysreg_el1(ctxt->sys_regs[ESR_EL1], SYS_ESR);
- write_sysreg_el1(ctxt->sys_regs[AFSR0_EL1], SYS_AFSR0);
- write_sysreg_el1(ctxt->sys_regs[AFSR1_EL1], SYS_AFSR1);
- write_sysreg_el1(ctxt->sys_regs[FAR_EL1], SYS_FAR);
- write_sysreg_el1(ctxt->sys_regs[MAIR_EL1], SYS_MAIR);
- write_sysreg_el1(ctxt->sys_regs[VBAR_EL1], SYS_VBAR);
- write_sysreg_el1(ctxt->sys_regs[CONTEXTIDR_EL1],SYS_CONTEXTIDR);
- write_sysreg_el1(ctxt->sys_regs[AMAIR_EL1], SYS_AMAIR);
- write_sysreg_el1(ctxt->sys_regs[CNTKCTL_EL1], SYS_CNTKCTL);
- write_sysreg(ctxt->sys_regs[PAR_EL1], par_el1);
- write_sysreg(ctxt->sys_regs[TPIDR_EL1], tpidr_el1);
-
- if (!has_vhe() &&
- cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT) &&
- ctxt->__hyp_running_vcpu) {
- /*
- * Must only be done for host registers, hence the context
- * test. Pairs with __deactivate_traps_nvhe().
- */
- isb();
- /*
- * At this stage, and thanks to the above isb(), S2 is
- * deconfigured and disabled. We can now restore the host's
- * S1 configuration: SCTLR, and only then TCR.
- */
- write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR);
- isb();
- write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR);
- }
-
- write_sysreg(ctxt->gp_regs.sp_el1, sp_el1);
- write_sysreg_el1(ctxt->gp_regs.elr_el1, SYS_ELR);
- write_sysreg_el1(ctxt->gp_regs.spsr[KVM_SPSR_EL1],SYS_SPSR);
-}
-
-static void __hyp_text
-__sysreg_restore_el2_return_state(struct kvm_cpu_context *ctxt)
-{
- u64 pstate = ctxt->gp_regs.regs.pstate;
- u64 mode = pstate & PSR_AA32_MODE_MASK;
-
- /*
- * Safety check to ensure we're setting the CPU up to enter the guest
- * in a less privileged mode.
- *
- * If we are attempting a return to EL2 or higher in AArch64 state,
- * program SPSR_EL2 with M=EL2h and the IL bit set which ensures that
- * we'll take an illegal exception state exception immediately after
- * the ERET to the guest. Attempts to return to AArch32 Hyp will
- * result in an illegal exception return because EL2's execution state
- * is determined by SCR_EL3.RW.
- */
- if (!(mode & PSR_MODE32_BIT) && mode >= PSR_MODE_EL2t)
- pstate = PSR_MODE_EL2h | PSR_IL_BIT;
-
- write_sysreg_el2(ctxt->gp_regs.regs.pc, SYS_ELR);
- write_sysreg_el2(pstate, SYS_SPSR);
-
- if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN))
- write_sysreg_s(ctxt->sys_regs[DISR_EL1], SYS_VDISR_EL2);
-}
-
-void __hyp_text __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt)
-{
- __sysreg_restore_el1_state(ctxt);
- __sysreg_restore_common_state(ctxt);
- __sysreg_restore_user_state(ctxt);
- __sysreg_restore_el2_return_state(ctxt);
-}
-
-void sysreg_restore_host_state_vhe(struct kvm_cpu_context *ctxt)
-{
- __sysreg_restore_common_state(ctxt);
-}
-NOKPROBE_SYMBOL(sysreg_restore_host_state_vhe);
-
-void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt)
-{
- __sysreg_restore_common_state(ctxt);
- __sysreg_restore_el2_return_state(ctxt);
-}
-NOKPROBE_SYMBOL(sysreg_restore_guest_state_vhe);
-
-void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu)
-{
- u64 *spsr, *sysreg;
-
- if (!vcpu_el1_is_32bit(vcpu))
- return;
-
- spsr = vcpu->arch.ctxt.gp_regs.spsr;
- sysreg = vcpu->arch.ctxt.sys_regs;
-
- spsr[KVM_SPSR_ABT] = read_sysreg(spsr_abt);
- spsr[KVM_SPSR_UND] = read_sysreg(spsr_und);
- spsr[KVM_SPSR_IRQ] = read_sysreg(spsr_irq);
- spsr[KVM_SPSR_FIQ] = read_sysreg(spsr_fiq);
-
- sysreg[DACR32_EL2] = read_sysreg(dacr32_el2);
- sysreg[IFSR32_EL2] = read_sysreg(ifsr32_el2);
-
- if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)
- sysreg[DBGVCR32_EL2] = read_sysreg(dbgvcr32_el2);
-}
-
-void __hyp_text __sysreg32_restore_state(struct kvm_vcpu *vcpu)
-{
- u64 *spsr, *sysreg;
-
- if (!vcpu_el1_is_32bit(vcpu))
- return;
-
- spsr = vcpu->arch.ctxt.gp_regs.spsr;
- sysreg = vcpu->arch.ctxt.sys_regs;
-
- write_sysreg(spsr[KVM_SPSR_ABT], spsr_abt);
- write_sysreg(spsr[KVM_SPSR_UND], spsr_und);
- write_sysreg(spsr[KVM_SPSR_IRQ], spsr_irq);
- write_sysreg(spsr[KVM_SPSR_FIQ], spsr_fiq);
-
- write_sysreg(sysreg[DACR32_EL2], dacr32_el2);
- write_sysreg(sysreg[IFSR32_EL2], ifsr32_el2);
-
- if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)
- write_sysreg(sysreg[DBGVCR32_EL2], dbgvcr32_el2);
-}
-
-/**
- * kvm_vcpu_load_sysregs - Load guest system registers to the physical CPU
- *
- * @vcpu: The VCPU pointer
- *
- * Load system registers that do not affect the host's execution, for
- * example EL1 system registers on a VHE system where the host kernel
- * runs at EL2. This function is called from KVM's vcpu_load() function
- * and loading system register state early avoids having to load them on
- * every entry to the VM.
- */
-void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu)
-{
- struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt;
- struct kvm_cpu_context *host_ctxt;
-
- if (!has_vhe())
- return;
-
- host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt;
- __sysreg_save_user_state(host_ctxt);
-
- /*
- * Load guest EL1 and user state
- *
- * We must restore the 32-bit state before the sysregs, thanks
- * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72).
- */
- __sysreg32_restore_state(vcpu);
- __sysreg_restore_user_state(guest_ctxt);
- __sysreg_restore_el1_state(guest_ctxt);
-
- vcpu->arch.sysregs_loaded_on_cpu = true;
-
- activate_traps_vhe_load(vcpu);
-}
-
-/**
- * kvm_vcpu_put_sysregs - Restore host system registers to the physical CPU
- *
- * @vcpu: The VCPU pointer
- *
- * Save guest system registers that do not affect the host's execution, for
- * example EL1 system registers on a VHE system where the host kernel
- * runs at EL2. This function is called from KVM's vcpu_put() function
- * and deferring saving system register state until we're no longer running the
- * VCPU avoids having to save them on every exit from the VM.
- */
-void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu)
-{
- struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt;
- struct kvm_cpu_context *host_ctxt;
-
- if (!has_vhe())
- return;
-
- host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt;
- deactivate_traps_vhe_put();
-
- __sysreg_save_el1_state(guest_ctxt);
- __sysreg_save_user_state(guest_ctxt);
- __sysreg32_save_state(vcpu);
-
- /* Restore host user state */
- __sysreg_restore_user_state(host_ctxt);
-
- vcpu->arch.sysregs_loaded_on_cpu = false;
-}
-
-void __hyp_text __kvm_enable_ssbs(void)
-{
- u64 tmp;
-
- asm volatile(
- "mrs %0, sctlr_el2\n"
- "orr %0, %0, %1\n"
- "msr sctlr_el2, %0"
- : "=&r" (tmp) : "L" (SCTLR_ELx_DSSBS));
-}
diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c
deleted file mode 100644
index d063a576d511..000000000000
--- a/arch/arm64/kvm/hyp/tlb.c
+++ /dev/null
@@ -1,242 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2015 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- */
-
-#include <linux/irqflags.h>
-
-#include <asm/kvm_hyp.h>
-#include <asm/kvm_mmu.h>
-#include <asm/tlbflush.h>
-
-struct tlb_inv_context {
- unsigned long flags;
- u64 tcr;
- u64 sctlr;
-};
-
-static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm,
- struct tlb_inv_context *cxt)
-{
- u64 val;
-
- local_irq_save(cxt->flags);
-
- if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
- /*
- * For CPUs that are affected by ARM errata 1165522 or 1530923,
- * we cannot trust stage-1 to be in a correct state at that
- * point. Since we do not want to force a full load of the
- * vcpu state, we prevent the EL1 page-table walker to
- * allocate new TLBs. This is done by setting the EPD bits
- * in the TCR_EL1 register. We also need to prevent it to
- * allocate IPA->PA walks, so we enable the S1 MMU...
- */
- val = cxt->tcr = read_sysreg_el1(SYS_TCR);
- val |= TCR_EPD1_MASK | TCR_EPD0_MASK;
- write_sysreg_el1(val, SYS_TCR);
- val = cxt->sctlr = read_sysreg_el1(SYS_SCTLR);
- val |= SCTLR_ELx_M;
- write_sysreg_el1(val, SYS_SCTLR);
- }
-
- /*
- * With VHE enabled, we have HCR_EL2.{E2H,TGE} = {1,1}, and
- * most TLB operations target EL2/EL0. In order to affect the
- * guest TLBs (EL1/EL0), we need to change one of these two
- * bits. Changing E2H is impossible (goodbye TTBR1_EL2), so
- * let's flip TGE before executing the TLB operation.
- *
- * ARM erratum 1165522 requires some special handling (again),
- * as we need to make sure both stages of translation are in
- * place before clearing TGE. __load_guest_stage2() already
- * has an ISB in order to deal with this.
- */
- __load_guest_stage2(kvm);
- val = read_sysreg(hcr_el2);
- val &= ~HCR_TGE;
- write_sysreg(val, hcr_el2);
- isb();
-}
-
-static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm,
- struct tlb_inv_context *cxt)
-{
- if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
- u64 val;
-
- /*
- * For CPUs that are affected by ARM 1319367, we need to
- * avoid a host Stage-1 walk while we have the guest's
- * VMID set in the VTTBR in order to invalidate TLBs.
- * We're guaranteed that the S1 MMU is enabled, so we can
- * simply set the EPD bits to avoid any further TLB fill.
- */
- val = cxt->tcr = read_sysreg_el1(SYS_TCR);
- val |= TCR_EPD1_MASK | TCR_EPD0_MASK;
- write_sysreg_el1(val, SYS_TCR);
- isb();
- }
-
- /* __load_guest_stage2() includes an ISB for the workaround. */
- __load_guest_stage2(kvm);
- asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT));
-}
-
-static void __hyp_text __tlb_switch_to_guest(struct kvm *kvm,
- struct tlb_inv_context *cxt)
-{
- if (has_vhe())
- __tlb_switch_to_guest_vhe(kvm, cxt);
- else
- __tlb_switch_to_guest_nvhe(kvm, cxt);
-}
-
-static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm,
- struct tlb_inv_context *cxt)
-{
- /*
- * We're done with the TLB operation, let's restore the host's
- * view of HCR_EL2.
- */
- write_sysreg(0, vttbr_el2);
- write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
- isb();
-
- if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
- /* Restore the registers to what they were */
- write_sysreg_el1(cxt->tcr, SYS_TCR);
- write_sysreg_el1(cxt->sctlr, SYS_SCTLR);
- }
-
- local_irq_restore(cxt->flags);
-}
-
-static void __hyp_text __tlb_switch_to_host_nvhe(struct kvm *kvm,
- struct tlb_inv_context *cxt)
-{
- write_sysreg(0, vttbr_el2);
-
- if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
- /* Ensure write of the host VMID */
- isb();
- /* Restore the host's TCR_EL1 */
- write_sysreg_el1(cxt->tcr, SYS_TCR);
- }
-}
-
-static void __hyp_text __tlb_switch_to_host(struct kvm *kvm,
- struct tlb_inv_context *cxt)
-{
- if (has_vhe())
- __tlb_switch_to_host_vhe(kvm, cxt);
- else
- __tlb_switch_to_host_nvhe(kvm, cxt);
-}
-
-void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
-{
- struct tlb_inv_context cxt;
-
- dsb(ishst);
-
- /* Switch to requested VMID */
- kvm = kern_hyp_va(kvm);
- __tlb_switch_to_guest(kvm, &cxt);
-
- /*
- * We could do so much better if we had the VA as well.
- * Instead, we invalidate Stage-2 for this IPA, and the
- * whole of Stage-1. Weep...
- */
- ipa >>= 12;
- __tlbi(ipas2e1is, ipa);
-
- /*
- * We have to ensure completion of the invalidation at Stage-2,
- * since a table walk on another CPU could refill a TLB with a
- * complete (S1 + S2) walk based on the old Stage-2 mapping if
- * the Stage-1 invalidation happened first.
- */
- dsb(ish);
- __tlbi(vmalle1is);
- dsb(ish);
- isb();
-
- /*
- * If the host is running at EL1 and we have a VPIPT I-cache,
- * then we must perform I-cache maintenance at EL2 in order for
- * it to have an effect on the guest. Since the guest cannot hit
- * I-cache lines allocated with a different VMID, we don't need
- * to worry about junk out of guest reset (we nuke the I-cache on
- * VMID rollover), but we do need to be careful when remapping
- * executable pages for the same guest. This can happen when KSM
- * takes a CoW fault on an executable page, copies the page into
- * a page that was previously mapped in the guest and then needs
- * to invalidate the guest view of the I-cache for that page
- * from EL1. To solve this, we invalidate the entire I-cache when
- * unmapping a page from a guest if we have a VPIPT I-cache but
- * the host is running at EL1. As above, we could do better if
- * we had the VA.
- *
- * The moral of this story is: if you have a VPIPT I-cache, then
- * you should be running with VHE enabled.
- */
- if (!has_vhe() && icache_is_vpipt())
- __flush_icache_all();
-
- __tlb_switch_to_host(kvm, &cxt);
-}
-
-void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm)
-{
- struct tlb_inv_context cxt;
-
- dsb(ishst);
-
- /* Switch to requested VMID */
- kvm = kern_hyp_va(kvm);
- __tlb_switch_to_guest(kvm, &cxt);
-
- __tlbi(vmalls12e1is);
- dsb(ish);
- isb();
-
- __tlb_switch_to_host(kvm, &cxt);
-}
-
-void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu)
-{
- struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm);
- struct tlb_inv_context cxt;
-
- /* Switch to requested VMID */
- __tlb_switch_to_guest(kvm, &cxt);
-
- __tlbi(vmalle1);
- dsb(nsh);
- isb();
-
- __tlb_switch_to_host(kvm, &cxt);
-}
-
-void __hyp_text __kvm_flush_vm_context(void)
-{
- dsb(ishst);
- __tlbi(alle1is);
-
- /*
- * VIPT and PIPT caches are not affected by VMID, so no maintenance
- * is necessary across a VMID rollover.
- *
- * VPIPT caches constrain lookup and maintenance to the active VMID,
- * so we need to invalidate lines with a stale VMID to avoid an ABA
- * race after multiple rollovers.
- *
- */
- if (icache_is_vpipt())
- asm volatile("ic ialluis");
-
- dsb(ish);
-}
diff --git a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
index 4f3a087e36d5..bd1bab551d48 100644
--- a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
+++ b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
@@ -13,7 +13,7 @@
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
-static bool __hyp_text __is_be(struct kvm_vcpu *vcpu)
+static bool __is_be(struct kvm_vcpu *vcpu)
{
if (vcpu_mode_is_32bit(vcpu))
return !!(read_sysreg_el2(SYS_SPSR) & PSR_AA32_E_BIT);
@@ -32,7 +32,7 @@ static bool __hyp_text __is_be(struct kvm_vcpu *vcpu)
* 0: Not a GICV access
* -1: Illegal GICV access successfully performed
*/
-int __hyp_text __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu)
+int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu)
{
struct kvm *kvm = kern_hyp_va(vcpu->kvm);
struct vgic_dist *vgic = &kvm->arch.vgic;
diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c
index 10ed539835c1..5a0073511efb 100644
--- a/arch/arm64/kvm/hyp/vgic-v3-sr.c
+++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c
@@ -16,7 +16,7 @@
#define vtr_to_nr_pre_bits(v) ((((u32)(v) >> 26) & 7) + 1)
#define vtr_to_nr_apr_regs(v) (1 << (vtr_to_nr_pre_bits(v) - 5))
-static u64 __hyp_text __gic_v3_get_lr(unsigned int lr)
+static u64 __gic_v3_get_lr(unsigned int lr)
{
switch (lr & 0xf) {
case 0:
@@ -56,7 +56,7 @@ static u64 __hyp_text __gic_v3_get_lr(unsigned int lr)
unreachable();
}
-static void __hyp_text __gic_v3_set_lr(u64 val, int lr)
+static void __gic_v3_set_lr(u64 val, int lr)
{
switch (lr & 0xf) {
case 0:
@@ -110,7 +110,7 @@ static void __hyp_text __gic_v3_set_lr(u64 val, int lr)
}
}
-static void __hyp_text __vgic_v3_write_ap0rn(u32 val, int n)
+static void __vgic_v3_write_ap0rn(u32 val, int n)
{
switch (n) {
case 0:
@@ -128,7 +128,7 @@ static void __hyp_text __vgic_v3_write_ap0rn(u32 val, int n)
}
}
-static void __hyp_text __vgic_v3_write_ap1rn(u32 val, int n)
+static void __vgic_v3_write_ap1rn(u32 val, int n)
{
switch (n) {
case 0:
@@ -146,7 +146,7 @@ static void __hyp_text __vgic_v3_write_ap1rn(u32 val, int n)
}
}
-static u32 __hyp_text __vgic_v3_read_ap0rn(int n)
+static u32 __vgic_v3_read_ap0rn(int n)
{
u32 val;
@@ -170,7 +170,7 @@ static u32 __hyp_text __vgic_v3_read_ap0rn(int n)
return val;
}
-static u32 __hyp_text __vgic_v3_read_ap1rn(int n)
+static u32 __vgic_v3_read_ap1rn(int n)
{
u32 val;
@@ -194,7 +194,7 @@ static u32 __hyp_text __vgic_v3_read_ap1rn(int n)
return val;
}
-void __hyp_text __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if)
+void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if)
{
u64 used_lrs = cpu_if->used_lrs;
@@ -229,7 +229,7 @@ void __hyp_text __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if)
}
}
-void __hyp_text __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if)
+void __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if)
{
u64 used_lrs = cpu_if->used_lrs;
int i;
@@ -255,7 +255,7 @@ void __hyp_text __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if)
}
}
-void __hyp_text __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if)
+void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if)
{
/*
* VFIQEn is RES1 if ICC_SRE_EL1.SRE is 1. This causes a
@@ -302,7 +302,7 @@ void __hyp_text __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if)
write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
}
-void __hyp_text __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if)
+void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if)
{
u64 val;
@@ -328,7 +328,7 @@ void __hyp_text __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if)
write_gicreg(0, ICH_HCR_EL2);
}
-void __hyp_text __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if)
+void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if)
{
u64 val;
u32 nr_pre_bits;
@@ -361,7 +361,7 @@ void __hyp_text __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if)
}
}
-void __hyp_text __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if)
+void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if)
{
u64 val;
u32 nr_pre_bits;
@@ -394,7 +394,7 @@ void __hyp_text __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if)
}
}
-void __hyp_text __vgic_v3_init_lrs(void)
+void __vgic_v3_init_lrs(void)
{
int max_lr_idx = vtr_to_max_lr_idx(read_gicreg(ICH_VTR_EL2));
int i;
@@ -403,30 +403,30 @@ void __hyp_text __vgic_v3_init_lrs(void)
__gic_v3_set_lr(0, i);
}
-u64 __hyp_text __vgic_v3_get_ich_vtr_el2(void)
+u64 __vgic_v3_get_ich_vtr_el2(void)
{
return read_gicreg(ICH_VTR_EL2);
}
-u64 __hyp_text __vgic_v3_read_vmcr(void)
+u64 __vgic_v3_read_vmcr(void)
{
return read_gicreg(ICH_VMCR_EL2);
}
-void __hyp_text __vgic_v3_write_vmcr(u32 vmcr)
+void __vgic_v3_write_vmcr(u32 vmcr)
{
write_gicreg(vmcr, ICH_VMCR_EL2);
}
-static int __hyp_text __vgic_v3_bpr_min(void)
+static int __vgic_v3_bpr_min(void)
{
/* See Pseudocode for VPriorityGroup */
return 8 - vtr_to_nr_pre_bits(read_gicreg(ICH_VTR_EL2));
}
-static int __hyp_text __vgic_v3_get_group(struct kvm_vcpu *vcpu)
+static int __vgic_v3_get_group(struct kvm_vcpu *vcpu)
{
- u32 esr = kvm_vcpu_get_hsr(vcpu);
+ u32 esr = kvm_vcpu_get_esr(vcpu);
u8 crm = (esr & ESR_ELx_SYS64_ISS_CRM_MASK) >> ESR_ELx_SYS64_ISS_CRM_SHIFT;
return crm != 8;
@@ -434,9 +434,8 @@ static int __hyp_text __vgic_v3_get_group(struct kvm_vcpu *vcpu)
#define GICv3_IDLE_PRIORITY 0xff
-static int __hyp_text __vgic_v3_highest_priority_lr(struct kvm_vcpu *vcpu,
- u32 vmcr,
- u64 *lr_val)
+static int __vgic_v3_highest_priority_lr(struct kvm_vcpu *vcpu, u32 vmcr,
+ u64 *lr_val)
{
unsigned int used_lrs = vcpu->arch.vgic_cpu.vgic_v3.used_lrs;
u8 priority = GICv3_IDLE_PRIORITY;
@@ -474,8 +473,8 @@ static int __hyp_text __vgic_v3_highest_priority_lr(struct kvm_vcpu *vcpu,
return lr;
}
-static int __hyp_text __vgic_v3_find_active_lr(struct kvm_vcpu *vcpu,
- int intid, u64 *lr_val)
+static int __vgic_v3_find_active_lr(struct kvm_vcpu *vcpu, int intid,
+ u64 *lr_val)
{
unsigned int used_lrs = vcpu->arch.vgic_cpu.vgic_v3.used_lrs;
int i;
@@ -494,7 +493,7 @@ static int __hyp_text __vgic_v3_find_active_lr(struct kvm_vcpu *vcpu,
return -1;
}
-static int __hyp_text __vgic_v3_get_highest_active_priority(void)
+static int __vgic_v3_get_highest_active_priority(void)
{
u8 nr_apr_regs = vtr_to_nr_apr_regs(read_gicreg(ICH_VTR_EL2));
u32 hap = 0;
@@ -526,12 +525,12 @@ static int __hyp_text __vgic_v3_get_highest_active_priority(void)
return GICv3_IDLE_PRIORITY;
}
-static unsigned int __hyp_text __vgic_v3_get_bpr0(u32 vmcr)
+static unsigned int __vgic_v3_get_bpr0(u32 vmcr)
{
return (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT;
}
-static unsigned int __hyp_text __vgic_v3_get_bpr1(u32 vmcr)
+static unsigned int __vgic_v3_get_bpr1(u32 vmcr)
{
unsigned int bpr;
@@ -550,7 +549,7 @@ static unsigned int __hyp_text __vgic_v3_get_bpr1(u32 vmcr)
* Convert a priority to a preemption level, taking the relevant BPR
* into account by zeroing the sub-priority bits.
*/
-static u8 __hyp_text __vgic_v3_pri_to_pre(u8 pri, u32 vmcr, int grp)
+static u8 __vgic_v3_pri_to_pre(u8 pri, u32 vmcr, int grp)
{
unsigned int bpr;
@@ -568,7 +567,7 @@ static u8 __hyp_text __vgic_v3_pri_to_pre(u8 pri, u32 vmcr, int grp)
* matter what the guest does with its BPR, we can always set/get the
* same value of a priority.
*/
-static void __hyp_text __vgic_v3_set_active_priority(u8 pri, u32 vmcr, int grp)
+static void __vgic_v3_set_active_priority(u8 pri, u32 vmcr, int grp)
{
u8 pre, ap;
u32 val;
@@ -587,7 +586,7 @@ static void __hyp_text __vgic_v3_set_active_priority(u8 pri, u32 vmcr, int grp)
}
}
-static int __hyp_text __vgic_v3_clear_highest_active_priority(void)
+static int __vgic_v3_clear_highest_active_priority(void)
{
u8 nr_apr_regs = vtr_to_nr_apr_regs(read_gicreg(ICH_VTR_EL2));
u32 hap = 0;
@@ -625,7 +624,7 @@ static int __hyp_text __vgic_v3_clear_highest_active_priority(void)
return GICv3_IDLE_PRIORITY;
}
-static void __hyp_text __vgic_v3_read_iar(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+static void __vgic_v3_read_iar(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
{
u64 lr_val;
u8 lr_prio, pmr;
@@ -661,7 +660,7 @@ spurious:
vcpu_set_reg(vcpu, rt, ICC_IAR1_EL1_SPURIOUS);
}
-static void __hyp_text __vgic_v3_clear_active_lr(int lr, u64 lr_val)
+static void __vgic_v3_clear_active_lr(int lr, u64 lr_val)
{
lr_val &= ~ICH_LR_ACTIVE_BIT;
if (lr_val & ICH_LR_HW) {
@@ -674,7 +673,7 @@ static void __hyp_text __vgic_v3_clear_active_lr(int lr, u64 lr_val)
__gic_v3_set_lr(lr_val, lr);
}
-static void __hyp_text __vgic_v3_bump_eoicount(void)
+static void __vgic_v3_bump_eoicount(void)
{
u32 hcr;
@@ -683,8 +682,7 @@ static void __hyp_text __vgic_v3_bump_eoicount(void)
write_gicreg(hcr, ICH_HCR_EL2);
}
-static void __hyp_text __vgic_v3_write_dir(struct kvm_vcpu *vcpu,
- u32 vmcr, int rt)
+static void __vgic_v3_write_dir(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
{
u32 vid = vcpu_get_reg(vcpu, rt);
u64 lr_val;
@@ -707,7 +705,7 @@ static void __hyp_text __vgic_v3_write_dir(struct kvm_vcpu *vcpu,
__vgic_v3_clear_active_lr(lr, lr_val);
}
-static void __hyp_text __vgic_v3_write_eoir(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+static void __vgic_v3_write_eoir(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
{
u32 vid = vcpu_get_reg(vcpu, rt);
u64 lr_val;
@@ -744,17 +742,17 @@ static void __hyp_text __vgic_v3_write_eoir(struct kvm_vcpu *vcpu, u32 vmcr, int
__vgic_v3_clear_active_lr(lr, lr_val);
}
-static void __hyp_text __vgic_v3_read_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+static void __vgic_v3_read_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
{
vcpu_set_reg(vcpu, rt, !!(vmcr & ICH_VMCR_ENG0_MASK));
}
-static void __hyp_text __vgic_v3_read_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+static void __vgic_v3_read_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
{
vcpu_set_reg(vcpu, rt, !!(vmcr & ICH_VMCR_ENG1_MASK));
}
-static void __hyp_text __vgic_v3_write_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+static void __vgic_v3_write_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
{
u64 val = vcpu_get_reg(vcpu, rt);
@@ -766,7 +764,7 @@ static void __hyp_text __vgic_v3_write_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr,
__vgic_v3_write_vmcr(vmcr);
}
-static void __hyp_text __vgic_v3_write_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+static void __vgic_v3_write_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
{
u64 val = vcpu_get_reg(vcpu, rt);
@@ -778,17 +776,17 @@ static void __hyp_text __vgic_v3_write_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr,
__vgic_v3_write_vmcr(vmcr);
}
-static void __hyp_text __vgic_v3_read_bpr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+static void __vgic_v3_read_bpr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
{
vcpu_set_reg(vcpu, rt, __vgic_v3_get_bpr0(vmcr));
}
-static void __hyp_text __vgic_v3_read_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+static void __vgic_v3_read_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
{
vcpu_set_reg(vcpu, rt, __vgic_v3_get_bpr1(vmcr));
}
-static void __hyp_text __vgic_v3_write_bpr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+static void __vgic_v3_write_bpr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
{
u64 val = vcpu_get_reg(vcpu, rt);
u8 bpr_min = __vgic_v3_bpr_min() - 1;
@@ -805,7 +803,7 @@ static void __hyp_text __vgic_v3_write_bpr0(struct kvm_vcpu *vcpu, u32 vmcr, int
__vgic_v3_write_vmcr(vmcr);
}
-static void __hyp_text __vgic_v3_write_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+static void __vgic_v3_write_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
{
u64 val = vcpu_get_reg(vcpu, rt);
u8 bpr_min = __vgic_v3_bpr_min();
@@ -825,7 +823,7 @@ static void __hyp_text __vgic_v3_write_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int
__vgic_v3_write_vmcr(vmcr);
}
-static void __hyp_text __vgic_v3_read_apxrn(struct kvm_vcpu *vcpu, int rt, int n)
+static void __vgic_v3_read_apxrn(struct kvm_vcpu *vcpu, int rt, int n)
{
u32 val;
@@ -837,7 +835,7 @@ static void __hyp_text __vgic_v3_read_apxrn(struct kvm_vcpu *vcpu, int rt, int n
vcpu_set_reg(vcpu, rt, val);
}
-static void __hyp_text __vgic_v3_write_apxrn(struct kvm_vcpu *vcpu, int rt, int n)
+static void __vgic_v3_write_apxrn(struct kvm_vcpu *vcpu, int rt, int n)
{
u32 val = vcpu_get_reg(vcpu, rt);
@@ -847,56 +845,49 @@ static void __hyp_text __vgic_v3_write_apxrn(struct kvm_vcpu *vcpu, int rt, int
__vgic_v3_write_ap1rn(val, n);
}
-static void __hyp_text __vgic_v3_read_apxr0(struct kvm_vcpu *vcpu,
+static void __vgic_v3_read_apxr0(struct kvm_vcpu *vcpu,
u32 vmcr, int rt)
{
__vgic_v3_read_apxrn(vcpu, rt, 0);
}
-static void __hyp_text __vgic_v3_read_apxr1(struct kvm_vcpu *vcpu,
+static void __vgic_v3_read_apxr1(struct kvm_vcpu *vcpu,
u32 vmcr, int rt)
{
__vgic_v3_read_apxrn(vcpu, rt, 1);
}
-static void __hyp_text __vgic_v3_read_apxr2(struct kvm_vcpu *vcpu,
- u32 vmcr, int rt)
+static void __vgic_v3_read_apxr2(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
{
__vgic_v3_read_apxrn(vcpu, rt, 2);
}
-static void __hyp_text __vgic_v3_read_apxr3(struct kvm_vcpu *vcpu,
- u32 vmcr, int rt)
+static void __vgic_v3_read_apxr3(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
{
__vgic_v3_read_apxrn(vcpu, rt, 3);
}
-static void __hyp_text __vgic_v3_write_apxr0(struct kvm_vcpu *vcpu,
- u32 vmcr, int rt)
+static void __vgic_v3_write_apxr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
{
__vgic_v3_write_apxrn(vcpu, rt, 0);
}
-static void __hyp_text __vgic_v3_write_apxr1(struct kvm_vcpu *vcpu,
- u32 vmcr, int rt)
+static void __vgic_v3_write_apxr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
{
__vgic_v3_write_apxrn(vcpu, rt, 1);
}
-static void __hyp_text __vgic_v3_write_apxr2(struct kvm_vcpu *vcpu,
- u32 vmcr, int rt)
+static void __vgic_v3_write_apxr2(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
{
__vgic_v3_write_apxrn(vcpu, rt, 2);
}
-static void __hyp_text __vgic_v3_write_apxr3(struct kvm_vcpu *vcpu,
- u32 vmcr, int rt)
+static void __vgic_v3_write_apxr3(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
{
__vgic_v3_write_apxrn(vcpu, rt, 3);
}
-static void __hyp_text __vgic_v3_read_hppir(struct kvm_vcpu *vcpu,
- u32 vmcr, int rt)
+static void __vgic_v3_read_hppir(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
{
u64 lr_val;
int lr, lr_grp, grp;
@@ -915,16 +906,14 @@ spurious:
vcpu_set_reg(vcpu, rt, lr_val & ICH_LR_VIRTUAL_ID_MASK);
}
-static void __hyp_text __vgic_v3_read_pmr(struct kvm_vcpu *vcpu,
- u32 vmcr, int rt)
+static void __vgic_v3_read_pmr(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
{
vmcr &= ICH_VMCR_PMR_MASK;
vmcr >>= ICH_VMCR_PMR_SHIFT;
vcpu_set_reg(vcpu, rt, vmcr);
}
-static void __hyp_text __vgic_v3_write_pmr(struct kvm_vcpu *vcpu,
- u32 vmcr, int rt)
+static void __vgic_v3_write_pmr(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
{
u32 val = vcpu_get_reg(vcpu, rt);
@@ -936,15 +925,13 @@ static void __hyp_text __vgic_v3_write_pmr(struct kvm_vcpu *vcpu,
write_gicreg(vmcr, ICH_VMCR_EL2);
}
-static void __hyp_text __vgic_v3_read_rpr(struct kvm_vcpu *vcpu,
- u32 vmcr, int rt)
+static void __vgic_v3_read_rpr(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
{
u32 val = __vgic_v3_get_highest_active_priority();
vcpu_set_reg(vcpu, rt, val);
}
-static void __hyp_text __vgic_v3_read_ctlr(struct kvm_vcpu *vcpu,
- u32 vmcr, int rt)
+static void __vgic_v3_read_ctlr(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
{
u32 vtr, val;
@@ -965,8 +952,7 @@ static void __hyp_text __vgic_v3_read_ctlr(struct kvm_vcpu *vcpu,
vcpu_set_reg(vcpu, rt, val);
}
-static void __hyp_text __vgic_v3_write_ctlr(struct kvm_vcpu *vcpu,
- u32 vmcr, int rt)
+static void __vgic_v3_write_ctlr(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
{
u32 val = vcpu_get_reg(vcpu, rt);
@@ -983,7 +969,7 @@ static void __hyp_text __vgic_v3_write_ctlr(struct kvm_vcpu *vcpu,
write_gicreg(vmcr, ICH_VMCR_EL2);
}
-int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu)
+int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu)
{
int rt;
u32 esr;
@@ -992,7 +978,7 @@ int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu)
bool is_read;
u32 sysreg;
- esr = kvm_vcpu_get_hsr(vcpu);
+ esr = kvm_vcpu_get_esr(vcpu);
if (vcpu_mode_is_32bit(vcpu)) {
if (!kvm_condition_valid(vcpu)) {
__kvm_skip_instr(vcpu);
diff --git a/arch/arm64/kvm/hyp/vhe/Makefile b/arch/arm64/kvm/hyp/vhe/Makefile
new file mode 100644
index 000000000000..461e97c375cc
--- /dev/null
+++ b/arch/arm64/kvm/hyp/vhe/Makefile
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for Kernel-based Virtual Machine module, HYP/nVHE part
+#
+
+asflags-y := -D__KVM_VHE_HYPERVISOR__
+ccflags-y := -D__KVM_VHE_HYPERVISOR__
+
+obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o
+obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
+ ../fpsimd.o ../hyp-entry.o
diff --git a/arch/arm64/kvm/hyp/vhe/debug-sr.c b/arch/arm64/kvm/hyp/vhe/debug-sr.c
new file mode 100644
index 000000000000..f1e2e5a00933
--- /dev/null
+++ b/arch/arm64/kvm/hyp/vhe/debug-sr.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ */
+
+#include <hyp/debug-sr.h>
+
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_hyp.h>
+
+void __debug_switch_to_guest(struct kvm_vcpu *vcpu)
+{
+ __debug_switch_to_guest_common(vcpu);
+}
+
+void __debug_switch_to_host(struct kvm_vcpu *vcpu)
+{
+ __debug_switch_to_host_common(vcpu);
+}
+
+u32 __kvm_get_mdcr_el2(void)
+{
+ return read_sysreg(mdcr_el2);
+}
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
new file mode 100644
index 000000000000..c52d714e0d75
--- /dev/null
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ */
+
+#include <hyp/switch.h>
+
+#include <linux/arm-smccc.h>
+#include <linux/kvm_host.h>
+#include <linux/types.h>
+#include <linux/jump_label.h>
+#include <uapi/linux/psci.h>
+
+#include <kvm/arm_psci.h>
+
+#include <asm/barrier.h>
+#include <asm/cpufeature.h>
+#include <asm/kprobes.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
+#include <asm/fpsimd.h>
+#include <asm/debug-monitors.h>
+#include <asm/processor.h>
+#include <asm/thread_info.h>
+
+const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n";
+
+static void __activate_traps(struct kvm_vcpu *vcpu)
+{
+ u64 val;
+
+ ___activate_traps(vcpu);
+
+ val = read_sysreg(cpacr_el1);
+ val |= CPACR_EL1_TTA;
+ val &= ~CPACR_EL1_ZEN;
+
+ /*
+ * With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to
+ * CPTR_EL2. In general, CPACR_EL1 has the same layout as CPTR_EL2,
+ * except for some missing controls, such as TAM.
+ * In this case, CPTR_EL2.TAM has the same position with or without
+ * VHE (HCR.E2H == 1) which allows us to use here the CPTR_EL2.TAM
+ * shift value for trapping the AMU accesses.
+ */
+
+ val |= CPTR_EL2_TAM;
+
+ if (update_fp_enabled(vcpu)) {
+ if (vcpu_has_sve(vcpu))
+ val |= CPACR_EL1_ZEN;
+ } else {
+ val &= ~CPACR_EL1_FPEN;
+ __activate_traps_fpsimd32(vcpu);
+ }
+
+ write_sysreg(val, cpacr_el1);
+
+ write_sysreg(kvm_get_hyp_vector(), vbar_el1);
+}
+NOKPROBE_SYMBOL(__activate_traps);
+
+static void __deactivate_traps(struct kvm_vcpu *vcpu)
+{
+ extern char vectors[]; /* kernel exception vectors */
+
+ ___deactivate_traps(vcpu);
+
+ write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
+
+ /*
+ * ARM errata 1165522 and 1530923 require the actual execution of the
+ * above before we can switch to the EL2/EL0 translation regime used by
+ * the host.
+ */
+ asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT));
+
+ write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1);
+ write_sysreg(vectors, vbar_el1);
+}
+NOKPROBE_SYMBOL(__deactivate_traps);
+
+void activate_traps_vhe_load(struct kvm_vcpu *vcpu)
+{
+ __activate_traps_common(vcpu);
+}
+
+void deactivate_traps_vhe_put(void)
+{
+ u64 mdcr_el2 = read_sysreg(mdcr_el2);
+
+ mdcr_el2 &= MDCR_EL2_HPMN_MASK |
+ MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT |
+ MDCR_EL2_TPMS;
+
+ write_sysreg(mdcr_el2, mdcr_el2);
+
+ __deactivate_traps_common();
+}
+
+/* Switch to the guest for VHE systems running in EL2 */
+static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpu_context *host_ctxt;
+ struct kvm_cpu_context *guest_ctxt;
+ u64 exit_code;
+
+ host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt;
+ host_ctxt->__hyp_running_vcpu = vcpu;
+ guest_ctxt = &vcpu->arch.ctxt;
+
+ sysreg_save_host_state_vhe(host_ctxt);
+
+ /*
+ * ARM erratum 1165522 requires us to configure both stage 1 and
+ * stage 2 translation for the guest context before we clear
+ * HCR_EL2.TGE.
+ *
+ * We have already configured the guest's stage 1 translation in
+ * kvm_vcpu_load_sysregs_vhe above. We must now call __activate_vm
+ * before __activate_traps, because __activate_vm configures
+ * stage 2 translation, and __activate_traps clear HCR_EL2.TGE
+ * (among other things).
+ */
+ __activate_vm(vcpu->arch.hw_mmu);
+ __activate_traps(vcpu);
+
+ sysreg_restore_guest_state_vhe(guest_ctxt);
+ __debug_switch_to_guest(vcpu);
+
+ __set_guest_arch_workaround_state(vcpu);
+
+ do {
+ /* Jump in the fire! */
+ exit_code = __guest_enter(vcpu, host_ctxt);
+
+ /* And we're baaack! */
+ } while (fixup_guest_exit(vcpu, &exit_code));
+
+ __set_host_arch_workaround_state(vcpu);
+
+ sysreg_save_guest_state_vhe(guest_ctxt);
+
+ __deactivate_traps(vcpu);
+
+ sysreg_restore_host_state_vhe(host_ctxt);
+
+ if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED)
+ __fpsimd_save_fpexc32(vcpu);
+
+ __debug_switch_to_host(vcpu);
+
+ return exit_code;
+}
+NOKPROBE_SYMBOL(__kvm_vcpu_run_vhe);
+
+int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
+{
+ int ret;
+
+ local_daif_mask();
+
+ /*
+ * Having IRQs masked via PMR when entering the guest means the GIC
+ * will not signal the CPU of interrupts of lower priority, and the
+ * only way to get out will be via guest exceptions.
+ * Naturally, we want to avoid this.
+ *
+ * local_daif_mask() already sets GIC_PRIO_PSR_I_SET, we just need a
+ * dsb to ensure the redistributor is forwards EL2 IRQs to the CPU.
+ */
+ pmr_sync();
+
+ ret = __kvm_vcpu_run_vhe(vcpu);
+
+ /*
+ * local_daif_restore() takes care to properly restore PSTATE.DAIF
+ * and the GIC PMR if the host is using IRQ priorities.
+ */
+ local_daif_restore(DAIF_PROCCTX_NOIRQ);
+
+ /*
+ * When we exit from the guest we change a number of CPU configuration
+ * parameters, such as traps. Make sure these changes take effect
+ * before running the host or additional guests.
+ */
+ isb();
+
+ return ret;
+}
+
+static void __hyp_call_panic(u64 spsr, u64 elr, u64 par,
+ struct kvm_cpu_context *host_ctxt)
+{
+ struct kvm_vcpu *vcpu;
+ vcpu = host_ctxt->__hyp_running_vcpu;
+
+ __deactivate_traps(vcpu);
+ sysreg_restore_host_state_vhe(host_ctxt);
+
+ panic(__hyp_panic_string,
+ spsr, elr,
+ read_sysreg_el2(SYS_ESR), read_sysreg_el2(SYS_FAR),
+ read_sysreg(hpfar_el2), par, vcpu);
+}
+NOKPROBE_SYMBOL(__hyp_call_panic);
+
+void __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt)
+{
+ u64 spsr = read_sysreg_el2(SYS_SPSR);
+ u64 elr = read_sysreg_el2(SYS_ELR);
+ u64 par = read_sysreg(par_el1);
+
+ __hyp_call_panic(spsr, elr, par, host_ctxt);
+ unreachable();
+}
diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
new file mode 100644
index 000000000000..996471e4c138
--- /dev/null
+++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2012-2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ */
+
+#include <hyp/sysreg-sr.h>
+
+#include <linux/compiler.h>
+#include <linux/kvm_host.h>
+
+#include <asm/kprobes.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_hyp.h>
+
+/*
+ * VHE: Host and guest must save mdscr_el1 and sp_el0 (and the PC and
+ * pstate, which are handled as part of the el2 return state) on every
+ * switch (sp_el0 is being dealt with in the assembly code).
+ * tpidr_el0 and tpidrro_el0 only need to be switched when going
+ * to host userspace or a different VCPU. EL1 registers only need to be
+ * switched when potentially going to run a different VCPU. The latter two
+ * classes are handled as part of kvm_arch_vcpu_load and kvm_arch_vcpu_put.
+ */
+
+void sysreg_save_host_state_vhe(struct kvm_cpu_context *ctxt)
+{
+ __sysreg_save_common_state(ctxt);
+}
+NOKPROBE_SYMBOL(sysreg_save_host_state_vhe);
+
+void sysreg_save_guest_state_vhe(struct kvm_cpu_context *ctxt)
+{
+ __sysreg_save_common_state(ctxt);
+ __sysreg_save_el2_return_state(ctxt);
+}
+NOKPROBE_SYMBOL(sysreg_save_guest_state_vhe);
+
+void sysreg_restore_host_state_vhe(struct kvm_cpu_context *ctxt)
+{
+ __sysreg_restore_common_state(ctxt);
+}
+NOKPROBE_SYMBOL(sysreg_restore_host_state_vhe);
+
+void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt)
+{
+ __sysreg_restore_common_state(ctxt);
+ __sysreg_restore_el2_return_state(ctxt);
+}
+NOKPROBE_SYMBOL(sysreg_restore_guest_state_vhe);
+
+/**
+ * kvm_vcpu_load_sysregs_vhe - Load guest system registers to the physical CPU
+ *
+ * @vcpu: The VCPU pointer
+ *
+ * Load system registers that do not affect the host's execution, for
+ * example EL1 system registers on a VHE system where the host kernel
+ * runs at EL2. This function is called from KVM's vcpu_load() function
+ * and loading system register state early avoids having to load them on
+ * every entry to the VM.
+ */
+void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt;
+ struct kvm_cpu_context *host_ctxt;
+
+ host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt;
+ __sysreg_save_user_state(host_ctxt);
+
+ /*
+ * Load guest EL1 and user state
+ *
+ * We must restore the 32-bit state before the sysregs, thanks
+ * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72).
+ */
+ __sysreg32_restore_state(vcpu);
+ __sysreg_restore_user_state(guest_ctxt);
+ __sysreg_restore_el1_state(guest_ctxt);
+
+ vcpu->arch.sysregs_loaded_on_cpu = true;
+
+ activate_traps_vhe_load(vcpu);
+}
+
+/**
+ * kvm_vcpu_put_sysregs_vhe - Restore host system registers to the physical CPU
+ *
+ * @vcpu: The VCPU pointer
+ *
+ * Save guest system registers that do not affect the host's execution, for
+ * example EL1 system registers on a VHE system where the host kernel
+ * runs at EL2. This function is called from KVM's vcpu_put() function
+ * and deferring saving system register state until we're no longer running the
+ * VCPU avoids having to save them on every exit from the VM.
+ */
+void kvm_vcpu_put_sysregs_vhe(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt;
+ struct kvm_cpu_context *host_ctxt;
+
+ host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt;
+ deactivate_traps_vhe_put();
+
+ __sysreg_save_el1_state(guest_ctxt);
+ __sysreg_save_user_state(guest_ctxt);
+ __sysreg32_save_state(vcpu);
+
+ /* Restore host user state */
+ __sysreg_restore_user_state(host_ctxt);
+
+ vcpu->arch.sysregs_loaded_on_cpu = false;
+}
diff --git a/arch/arm64/kvm/hyp/vhe/timer-sr.c b/arch/arm64/kvm/hyp/vhe/timer-sr.c
new file mode 100644
index 000000000000..4cda674a8be6
--- /dev/null
+++ b/arch/arm64/kvm/hyp/vhe/timer-sr.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2012-2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ */
+
+#include <asm/kvm_hyp.h>
+
+void __kvm_timer_set_cntvoff(u64 cntvoff)
+{
+ write_sysreg(cntvoff, cntvoff_el2);
+}
diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c
new file mode 100644
index 000000000000..fd7895945bbc
--- /dev/null
+++ b/arch/arm64/kvm/hyp/vhe/tlb.c
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ */
+
+#include <linux/irqflags.h>
+
+#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
+#include <asm/tlbflush.h>
+
+struct tlb_inv_context {
+ unsigned long flags;
+ u64 tcr;
+ u64 sctlr;
+};
+
+static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu,
+ struct tlb_inv_context *cxt)
+{
+ u64 val;
+
+ local_irq_save(cxt->flags);
+
+ if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
+ /*
+ * For CPUs that are affected by ARM errata 1165522 or 1530923,
+ * we cannot trust stage-1 to be in a correct state at that
+ * point. Since we do not want to force a full load of the
+ * vcpu state, we prevent the EL1 page-table walker to
+ * allocate new TLBs. This is done by setting the EPD bits
+ * in the TCR_EL1 register. We also need to prevent it to
+ * allocate IPA->PA walks, so we enable the S1 MMU...
+ */
+ val = cxt->tcr = read_sysreg_el1(SYS_TCR);
+ val |= TCR_EPD1_MASK | TCR_EPD0_MASK;
+ write_sysreg_el1(val, SYS_TCR);
+ val = cxt->sctlr = read_sysreg_el1(SYS_SCTLR);
+ val |= SCTLR_ELx_M;
+ write_sysreg_el1(val, SYS_SCTLR);
+ }
+
+ /*
+ * With VHE enabled, we have HCR_EL2.{E2H,TGE} = {1,1}, and
+ * most TLB operations target EL2/EL0. In order to affect the
+ * guest TLBs (EL1/EL0), we need to change one of these two
+ * bits. Changing E2H is impossible (goodbye TTBR1_EL2), so
+ * let's flip TGE before executing the TLB operation.
+ *
+ * ARM erratum 1165522 requires some special handling (again),
+ * as we need to make sure both stages of translation are in
+ * place before clearing TGE. __load_guest_stage2() already
+ * has an ISB in order to deal with this.
+ */
+ __load_guest_stage2(mmu);
+ val = read_sysreg(hcr_el2);
+ val &= ~HCR_TGE;
+ write_sysreg(val, hcr_el2);
+ isb();
+}
+
+static void __tlb_switch_to_host(struct tlb_inv_context *cxt)
+{
+ /*
+ * We're done with the TLB operation, let's restore the host's
+ * view of HCR_EL2.
+ */
+ write_sysreg(0, vttbr_el2);
+ write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
+ isb();
+
+ if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
+ /* Restore the registers to what they were */
+ write_sysreg_el1(cxt->tcr, SYS_TCR);
+ write_sysreg_el1(cxt->sctlr, SYS_SCTLR);
+ }
+
+ local_irq_restore(cxt->flags);
+}
+
+void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
+ phys_addr_t ipa, int level)
+{
+ struct tlb_inv_context cxt;
+
+ dsb(ishst);
+
+ /* Switch to requested VMID */
+ __tlb_switch_to_guest(mmu, &cxt);
+
+ /*
+ * We could do so much better if we had the VA as well.
+ * Instead, we invalidate Stage-2 for this IPA, and the
+ * whole of Stage-1. Weep...
+ */
+ ipa >>= 12;
+ __tlbi_level(ipas2e1is, ipa, level);
+
+ /*
+ * We have to ensure completion of the invalidation at Stage-2,
+ * since a table walk on another CPU could refill a TLB with a
+ * complete (S1 + S2) walk based on the old Stage-2 mapping if
+ * the Stage-1 invalidation happened first.
+ */
+ dsb(ish);
+ __tlbi(vmalle1is);
+ dsb(ish);
+ isb();
+
+ __tlb_switch_to_host(&cxt);
+}
+
+void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
+{
+ struct tlb_inv_context cxt;
+
+ dsb(ishst);
+
+ /* Switch to requested VMID */
+ __tlb_switch_to_guest(mmu, &cxt);
+
+ __tlbi(vmalls12e1is);
+ dsb(ish);
+ isb();
+
+ __tlb_switch_to_host(&cxt);
+}
+
+void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu)
+{
+ struct tlb_inv_context cxt;
+
+ /* Switch to requested VMID */
+ __tlb_switch_to_guest(mmu, &cxt);
+
+ __tlbi(vmalle1);
+ dsb(nsh);
+ isb();
+
+ __tlb_switch_to_host(&cxt);
+}
+
+void __kvm_flush_vm_context(void)
+{
+ dsb(ishst);
+ __tlbi(alle1is);
+
+ /*
+ * VIPT and PIPT caches are not affected by VMID, so no maintenance
+ * is necessary across a VMID rollover.
+ *
+ * VPIPT caches constrain lookup and maintenance to the active VMID,
+ * so we need to invalidate lines with a stale VMID to avoid an ABA
+ * race after multiple rollovers.
+ *
+ */
+ if (icache_is_vpipt())
+ asm volatile("ic ialluis");
+
+ dsb(ish);
+}
diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
index e21fdd93027a..ebfdfc27b2bd 100644
--- a/arch/arm64/kvm/inject_fault.c
+++ b/arch/arm64/kvm/inject_fault.c
@@ -64,7 +64,7 @@ static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode,
case PSR_MODE_EL1h:
vbar = vcpu_read_sys_reg(vcpu, VBAR_EL1);
sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
- vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu));
+ vcpu_write_sys_reg(vcpu, *vcpu_pc(vcpu), ELR_EL1);
break;
default:
/* Don't do that */
diff --git a/arch/arm64/kvm/mmio.c b/arch/arm64/kvm/mmio.c
index 158fbe682611..6a2826f1bf5e 100644
--- a/arch/arm64/kvm/mmio.c
+++ b/arch/arm64/kvm/mmio.c
@@ -146,12 +146,6 @@ int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
return -ENOSYS;
}
- /* Page table accesses IO mem: tell guest to fix its TTBR */
- if (kvm_vcpu_dabt_iss1tw(vcpu)) {
- kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
- return 1;
- }
-
/*
* Prepare MMIO operation. First decode the syndrome data we get
* from the CPU. Then try if some in-kernel emulation feels
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 7a7ddc4558a7..0121ef2c7c8d 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -55,12 +55,13 @@ static bool memslot_is_logging(struct kvm_memory_slot *memslot)
*/
void kvm_flush_remote_tlbs(struct kvm *kvm)
{
- kvm_call_hyp(__kvm_tlb_flush_vmid, kvm);
+ kvm_call_hyp(__kvm_tlb_flush_vmid, &kvm->arch.mmu);
}
-static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
+static void kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa,
+ int level)
{
- kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
+ kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ipa, level);
}
/*
@@ -90,74 +91,80 @@ static bool kvm_is_device_pfn(unsigned long pfn)
/**
* stage2_dissolve_pmd() - clear and flush huge PMD entry
- * @kvm: pointer to kvm structure.
+ * @mmu: pointer to mmu structure to operate on
* @addr: IPA
* @pmd: pmd pointer for IPA
*
* Function clears a PMD entry, flushes addr 1st and 2nd stage TLBs.
*/
-static void stage2_dissolve_pmd(struct kvm *kvm, phys_addr_t addr, pmd_t *pmd)
+static void stage2_dissolve_pmd(struct kvm_s2_mmu *mmu, phys_addr_t addr, pmd_t *pmd)
{
if (!pmd_thp_or_huge(*pmd))
return;
pmd_clear(pmd);
- kvm_tlb_flush_vmid_ipa(kvm, addr);
+ kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PMD_LEVEL);
put_page(virt_to_page(pmd));
}
/**
* stage2_dissolve_pud() - clear and flush huge PUD entry
- * @kvm: pointer to kvm structure.
+ * @mmu: pointer to mmu structure to operate on
* @addr: IPA
* @pud: pud pointer for IPA
*
* Function clears a PUD entry, flushes addr 1st and 2nd stage TLBs.
*/
-static void stage2_dissolve_pud(struct kvm *kvm, phys_addr_t addr, pud_t *pudp)
+static void stage2_dissolve_pud(struct kvm_s2_mmu *mmu, phys_addr_t addr, pud_t *pudp)
{
+ struct kvm *kvm = mmu->kvm;
+
if (!stage2_pud_huge(kvm, *pudp))
return;
stage2_pud_clear(kvm, pudp);
- kvm_tlb_flush_vmid_ipa(kvm, addr);
+ kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PUD_LEVEL);
put_page(virt_to_page(pudp));
}
-static void clear_stage2_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr)
+static void clear_stage2_pgd_entry(struct kvm_s2_mmu *mmu, pgd_t *pgd, phys_addr_t addr)
{
+ struct kvm *kvm = mmu->kvm;
p4d_t *p4d_table __maybe_unused = stage2_p4d_offset(kvm, pgd, 0UL);
stage2_pgd_clear(kvm, pgd);
- kvm_tlb_flush_vmid_ipa(kvm, addr);
+ kvm_tlb_flush_vmid_ipa(mmu, addr, S2_NO_LEVEL_HINT);
stage2_p4d_free(kvm, p4d_table);
put_page(virt_to_page(pgd));
}
-static void clear_stage2_p4d_entry(struct kvm *kvm, p4d_t *p4d, phys_addr_t addr)
+static void clear_stage2_p4d_entry(struct kvm_s2_mmu *mmu, p4d_t *p4d, phys_addr_t addr)
{
+ struct kvm *kvm = mmu->kvm;
pud_t *pud_table __maybe_unused = stage2_pud_offset(kvm, p4d, 0);
stage2_p4d_clear(kvm, p4d);
- kvm_tlb_flush_vmid_ipa(kvm, addr);
+ kvm_tlb_flush_vmid_ipa(mmu, addr, S2_NO_LEVEL_HINT);
stage2_pud_free(kvm, pud_table);
put_page(virt_to_page(p4d));
}
-static void clear_stage2_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr)
+static void clear_stage2_pud_entry(struct kvm_s2_mmu *mmu, pud_t *pud, phys_addr_t addr)
{
+ struct kvm *kvm = mmu->kvm;
pmd_t *pmd_table __maybe_unused = stage2_pmd_offset(kvm, pud, 0);
+
VM_BUG_ON(stage2_pud_huge(kvm, *pud));
stage2_pud_clear(kvm, pud);
- kvm_tlb_flush_vmid_ipa(kvm, addr);
+ kvm_tlb_flush_vmid_ipa(mmu, addr, S2_NO_LEVEL_HINT);
stage2_pmd_free(kvm, pmd_table);
put_page(virt_to_page(pud));
}
-static void clear_stage2_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)
+static void clear_stage2_pmd_entry(struct kvm_s2_mmu *mmu, pmd_t *pmd, phys_addr_t addr)
{
pte_t *pte_table = pte_offset_kernel(pmd, 0);
VM_BUG_ON(pmd_thp_or_huge(*pmd));
pmd_clear(pmd);
- kvm_tlb_flush_vmid_ipa(kvm, addr);
+ kvm_tlb_flush_vmid_ipa(mmu, addr, S2_NO_LEVEL_HINT);
free_page((unsigned long)pte_table);
put_page(virt_to_page(pmd));
}
@@ -223,7 +230,7 @@ static inline void kvm_pgd_populate(pgd_t *pgdp, p4d_t *p4dp)
* we then fully enforce cacheability of RAM, no matter what the guest
* does.
*/
-static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd,
+static void unmap_stage2_ptes(struct kvm_s2_mmu *mmu, pmd_t *pmd,
phys_addr_t addr, phys_addr_t end)
{
phys_addr_t start_addr = addr;
@@ -235,7 +242,7 @@ static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd,
pte_t old_pte = *pte;
kvm_set_pte(pte, __pte(0));
- kvm_tlb_flush_vmid_ipa(kvm, addr);
+ kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PTE_LEVEL);
/* No need to invalidate the cache for device mappings */
if (!kvm_is_device_pfn(pte_pfn(old_pte)))
@@ -245,13 +252,14 @@ static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd,
}
} while (pte++, addr += PAGE_SIZE, addr != end);
- if (stage2_pte_table_empty(kvm, start_pte))
- clear_stage2_pmd_entry(kvm, pmd, start_addr);
+ if (stage2_pte_table_empty(mmu->kvm, start_pte))
+ clear_stage2_pmd_entry(mmu, pmd, start_addr);
}
-static void unmap_stage2_pmds(struct kvm *kvm, pud_t *pud,
+static void unmap_stage2_pmds(struct kvm_s2_mmu *mmu, pud_t *pud,
phys_addr_t addr, phys_addr_t end)
{
+ struct kvm *kvm = mmu->kvm;
phys_addr_t next, start_addr = addr;
pmd_t *pmd, *start_pmd;
@@ -263,24 +271,25 @@ static void unmap_stage2_pmds(struct kvm *kvm, pud_t *pud,
pmd_t old_pmd = *pmd;
pmd_clear(pmd);
- kvm_tlb_flush_vmid_ipa(kvm, addr);
+ kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PMD_LEVEL);
kvm_flush_dcache_pmd(old_pmd);
put_page(virt_to_page(pmd));
} else {
- unmap_stage2_ptes(kvm, pmd, addr, next);
+ unmap_stage2_ptes(mmu, pmd, addr, next);
}
}
} while (pmd++, addr = next, addr != end);
if (stage2_pmd_table_empty(kvm, start_pmd))
- clear_stage2_pud_entry(kvm, pud, start_addr);
+ clear_stage2_pud_entry(mmu, pud, start_addr);
}
-static void unmap_stage2_puds(struct kvm *kvm, p4d_t *p4d,
+static void unmap_stage2_puds(struct kvm_s2_mmu *mmu, p4d_t *p4d,
phys_addr_t addr, phys_addr_t end)
{
+ struct kvm *kvm = mmu->kvm;
phys_addr_t next, start_addr = addr;
pud_t *pud, *start_pud;
@@ -292,22 +301,23 @@ static void unmap_stage2_puds(struct kvm *kvm, p4d_t *p4d,
pud_t old_pud = *pud;
stage2_pud_clear(kvm, pud);
- kvm_tlb_flush_vmid_ipa(kvm, addr);
+ kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PUD_LEVEL);
kvm_flush_dcache_pud(old_pud);
put_page(virt_to_page(pud));
} else {
- unmap_stage2_pmds(kvm, pud, addr, next);
+ unmap_stage2_pmds(mmu, pud, addr, next);
}
}
} while (pud++, addr = next, addr != end);
if (stage2_pud_table_empty(kvm, start_pud))
- clear_stage2_p4d_entry(kvm, p4d, start_addr);
+ clear_stage2_p4d_entry(mmu, p4d, start_addr);
}
-static void unmap_stage2_p4ds(struct kvm *kvm, pgd_t *pgd,
+static void unmap_stage2_p4ds(struct kvm_s2_mmu *mmu, pgd_t *pgd,
phys_addr_t addr, phys_addr_t end)
{
+ struct kvm *kvm = mmu->kvm;
phys_addr_t next, start_addr = addr;
p4d_t *p4d, *start_p4d;
@@ -315,11 +325,11 @@ static void unmap_stage2_p4ds(struct kvm *kvm, pgd_t *pgd,
do {
next = stage2_p4d_addr_end(kvm, addr, end);
if (!stage2_p4d_none(kvm, *p4d))
- unmap_stage2_puds(kvm, p4d, addr, next);
+ unmap_stage2_puds(mmu, p4d, addr, next);
} while (p4d++, addr = next, addr != end);
if (stage2_p4d_table_empty(kvm, start_p4d))
- clear_stage2_pgd_entry(kvm, pgd, start_addr);
+ clear_stage2_pgd_entry(mmu, pgd, start_addr);
}
/**
@@ -333,8 +343,9 @@ static void unmap_stage2_p4ds(struct kvm *kvm, pgd_t *pgd,
* destroying the VM), otherwise another faulting VCPU may come in and mess
* with things behind our backs.
*/
-static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
+static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size)
{
+ struct kvm *kvm = mmu->kvm;
pgd_t *pgd;
phys_addr_t addr = start, end = start + size;
phys_addr_t next;
@@ -342,18 +353,18 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
assert_spin_locked(&kvm->mmu_lock);
WARN_ON(size & ~PAGE_MASK);
- pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr);
+ pgd = mmu->pgd + stage2_pgd_index(kvm, addr);
do {
/*
* Make sure the page table is still active, as another thread
* could have possibly freed the page table, while we released
* the lock.
*/
- if (!READ_ONCE(kvm->arch.pgd))
+ if (!READ_ONCE(mmu->pgd))
break;
next = stage2_pgd_addr_end(kvm, addr, end);
if (!stage2_pgd_none(kvm, *pgd))
- unmap_stage2_p4ds(kvm, pgd, addr, next);
+ unmap_stage2_p4ds(mmu, pgd, addr, next);
/*
* If the range is too large, release the kvm->mmu_lock
* to prevent starvation and lockup detector warnings.
@@ -363,7 +374,7 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
} while (pgd++, addr = next, addr != end);
}
-static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd,
+static void stage2_flush_ptes(struct kvm_s2_mmu *mmu, pmd_t *pmd,
phys_addr_t addr, phys_addr_t end)
{
pte_t *pte;
@@ -375,9 +386,10 @@ static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd,
} while (pte++, addr += PAGE_SIZE, addr != end);
}
-static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud,
+static void stage2_flush_pmds(struct kvm_s2_mmu *mmu, pud_t *pud,
phys_addr_t addr, phys_addr_t end)
{
+ struct kvm *kvm = mmu->kvm;
pmd_t *pmd;
phys_addr_t next;
@@ -388,14 +400,15 @@ static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud,
if (pmd_thp_or_huge(*pmd))
kvm_flush_dcache_pmd(*pmd);
else
- stage2_flush_ptes(kvm, pmd, addr, next);
+ stage2_flush_ptes(mmu, pmd, addr, next);
}
} while (pmd++, addr = next, addr != end);
}
-static void stage2_flush_puds(struct kvm *kvm, p4d_t *p4d,
+static void stage2_flush_puds(struct kvm_s2_mmu *mmu, p4d_t *p4d,
phys_addr_t addr, phys_addr_t end)
{
+ struct kvm *kvm = mmu->kvm;
pud_t *pud;
phys_addr_t next;
@@ -406,14 +419,15 @@ static void stage2_flush_puds(struct kvm *kvm, p4d_t *p4d,
if (stage2_pud_huge(kvm, *pud))
kvm_flush_dcache_pud(*pud);
else
- stage2_flush_pmds(kvm, pud, addr, next);
+ stage2_flush_pmds(mmu, pud, addr, next);
}
} while (pud++, addr = next, addr != end);
}
-static void stage2_flush_p4ds(struct kvm *kvm, pgd_t *pgd,
+static void stage2_flush_p4ds(struct kvm_s2_mmu *mmu, pgd_t *pgd,
phys_addr_t addr, phys_addr_t end)
{
+ struct kvm *kvm = mmu->kvm;
p4d_t *p4d;
phys_addr_t next;
@@ -421,23 +435,24 @@ static void stage2_flush_p4ds(struct kvm *kvm, pgd_t *pgd,
do {
next = stage2_p4d_addr_end(kvm, addr, end);
if (!stage2_p4d_none(kvm, *p4d))
- stage2_flush_puds(kvm, p4d, addr, next);
+ stage2_flush_puds(mmu, p4d, addr, next);
} while (p4d++, addr = next, addr != end);
}
static void stage2_flush_memslot(struct kvm *kvm,
struct kvm_memory_slot *memslot)
{
+ struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
phys_addr_t end = addr + PAGE_SIZE * memslot->npages;
phys_addr_t next;
pgd_t *pgd;
- pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr);
+ pgd = mmu->pgd + stage2_pgd_index(kvm, addr);
do {
next = stage2_pgd_addr_end(kvm, addr, end);
if (!stage2_pgd_none(kvm, *pgd))
- stage2_flush_p4ds(kvm, pgd, addr, next);
+ stage2_flush_p4ds(mmu, pgd, addr, next);
if (next != end)
cond_resched_lock(&kvm->mmu_lock);
@@ -964,21 +979,23 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
}
/**
- * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
- * @kvm: The KVM struct pointer for the VM.
+ * kvm_init_stage2_mmu - Initialise a S2 MMU strucrure
+ * @kvm: The pointer to the KVM structure
+ * @mmu: The pointer to the s2 MMU structure
*
* Allocates only the stage-2 HW PGD level table(s) of size defined by
- * stage2_pgd_size(kvm).
+ * stage2_pgd_size(mmu->kvm).
*
* Note we don't need locking here as this is only called when the VM is
* created, which can only be done once.
*/
-int kvm_alloc_stage2_pgd(struct kvm *kvm)
+int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu)
{
phys_addr_t pgd_phys;
pgd_t *pgd;
+ int cpu;
- if (kvm->arch.pgd != NULL) {
+ if (mmu->pgd != NULL) {
kvm_err("kvm_arch already initialized?\n");
return -EINVAL;
}
@@ -992,8 +1009,20 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
if (WARN_ON(pgd_phys & ~kvm_vttbr_baddr_mask(kvm)))
return -EINVAL;
- kvm->arch.pgd = pgd;
- kvm->arch.pgd_phys = pgd_phys;
+ mmu->last_vcpu_ran = alloc_percpu(typeof(*mmu->last_vcpu_ran));
+ if (!mmu->last_vcpu_ran) {
+ free_pages_exact(pgd, stage2_pgd_size(kvm));
+ return -ENOMEM;
+ }
+
+ for_each_possible_cpu(cpu)
+ *per_cpu_ptr(mmu->last_vcpu_ran, cpu) = -1;
+
+ mmu->kvm = kvm;
+ mmu->pgd = pgd;
+ mmu->pgd_phys = pgd_phys;
+ mmu->vmid.vmid_gen = 0;
+
return 0;
}
@@ -1032,7 +1061,7 @@ static void stage2_unmap_memslot(struct kvm *kvm,
if (!(vma->vm_flags & VM_PFNMAP)) {
gpa_t gpa = addr + (vm_start - memslot->userspace_addr);
- unmap_stage2_range(kvm, gpa, vm_end - vm_start);
+ unmap_stage2_range(&kvm->arch.mmu, gpa, vm_end - vm_start);
}
hva = vm_end;
} while (hva < reg_end);
@@ -1064,39 +1093,34 @@ void stage2_unmap_vm(struct kvm *kvm)
srcu_read_unlock(&kvm->srcu, idx);
}
-/**
- * kvm_free_stage2_pgd - free all stage-2 tables
- * @kvm: The KVM struct pointer for the VM.
- *
- * Walks the level-1 page table pointed to by kvm->arch.pgd and frees all
- * underlying level-2 and level-3 tables before freeing the actual level-1 table
- * and setting the struct pointer to NULL.
- */
-void kvm_free_stage2_pgd(struct kvm *kvm)
+void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu)
{
+ struct kvm *kvm = mmu->kvm;
void *pgd = NULL;
spin_lock(&kvm->mmu_lock);
- if (kvm->arch.pgd) {
- unmap_stage2_range(kvm, 0, kvm_phys_size(kvm));
- pgd = READ_ONCE(kvm->arch.pgd);
- kvm->arch.pgd = NULL;
- kvm->arch.pgd_phys = 0;
+ if (mmu->pgd) {
+ unmap_stage2_range(mmu, 0, kvm_phys_size(kvm));
+ pgd = READ_ONCE(mmu->pgd);
+ mmu->pgd = NULL;
}
spin_unlock(&kvm->mmu_lock);
/* Free the HW pgd, one page at a time */
- if (pgd)
+ if (pgd) {
free_pages_exact(pgd, stage2_pgd_size(kvm));
+ free_percpu(mmu->last_vcpu_ran);
+ }
}
-static p4d_t *stage2_get_p4d(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
+static p4d_t *stage2_get_p4d(struct kvm_s2_mmu *mmu, struct kvm_mmu_memory_cache *cache,
phys_addr_t addr)
{
+ struct kvm *kvm = mmu->kvm;
pgd_t *pgd;
p4d_t *p4d;
- pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr);
+ pgd = mmu->pgd + stage2_pgd_index(kvm, addr);
if (stage2_pgd_none(kvm, *pgd)) {
if (!cache)
return NULL;
@@ -1108,13 +1132,14 @@ static p4d_t *stage2_get_p4d(struct kvm *kvm, struct kvm_mmu_memory_cache *cache
return stage2_p4d_offset(kvm, pgd, addr);
}
-static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
+static pud_t *stage2_get_pud(struct kvm_s2_mmu *mmu, struct kvm_mmu_memory_cache *cache,
phys_addr_t addr)
{
+ struct kvm *kvm = mmu->kvm;
p4d_t *p4d;
pud_t *pud;
- p4d = stage2_get_p4d(kvm, cache, addr);
+ p4d = stage2_get_p4d(mmu, cache, addr);
if (stage2_p4d_none(kvm, *p4d)) {
if (!cache)
return NULL;
@@ -1126,13 +1151,14 @@ static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache
return stage2_pud_offset(kvm, p4d, addr);
}
-static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
+static pmd_t *stage2_get_pmd(struct kvm_s2_mmu *mmu, struct kvm_mmu_memory_cache *cache,
phys_addr_t addr)
{
+ struct kvm *kvm = mmu->kvm;
pud_t *pud;
pmd_t *pmd;
- pud = stage2_get_pud(kvm, cache, addr);
+ pud = stage2_get_pud(mmu, cache, addr);
if (!pud || stage2_pud_huge(kvm, *pud))
return NULL;
@@ -1147,13 +1173,14 @@ static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache
return stage2_pmd_offset(kvm, pud, addr);
}
-static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
- *cache, phys_addr_t addr, const pmd_t *new_pmd)
+static int stage2_set_pmd_huge(struct kvm_s2_mmu *mmu,
+ struct kvm_mmu_memory_cache *cache,
+ phys_addr_t addr, const pmd_t *new_pmd)
{
pmd_t *pmd, old_pmd;
retry:
- pmd = stage2_get_pmd(kvm, cache, addr);
+ pmd = stage2_get_pmd(mmu, cache, addr);
VM_BUG_ON(!pmd);
old_pmd = *pmd;
@@ -1186,7 +1213,7 @@ retry:
* get handled accordingly.
*/
if (!pmd_thp_or_huge(old_pmd)) {
- unmap_stage2_range(kvm, addr & S2_PMD_MASK, S2_PMD_SIZE);
+ unmap_stage2_range(mmu, addr & S2_PMD_MASK, S2_PMD_SIZE);
goto retry;
}
/*
@@ -1202,7 +1229,7 @@ retry:
*/
WARN_ON_ONCE(pmd_pfn(old_pmd) != pmd_pfn(*new_pmd));
pmd_clear(pmd);
- kvm_tlb_flush_vmid_ipa(kvm, addr);
+ kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PMD_LEVEL);
} else {
get_page(virt_to_page(pmd));
}
@@ -1211,13 +1238,15 @@ retry:
return 0;
}
-static int stage2_set_pud_huge(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
+static int stage2_set_pud_huge(struct kvm_s2_mmu *mmu,
+ struct kvm_mmu_memory_cache *cache,
phys_addr_t addr, const pud_t *new_pudp)
{
+ struct kvm *kvm = mmu->kvm;
pud_t *pudp, old_pud;
retry:
- pudp = stage2_get_pud(kvm, cache, addr);
+ pudp = stage2_get_pud(mmu, cache, addr);
VM_BUG_ON(!pudp);
old_pud = *pudp;
@@ -1236,13 +1265,13 @@ retry:
* the range for this block and retry.
*/
if (!stage2_pud_huge(kvm, old_pud)) {
- unmap_stage2_range(kvm, addr & S2_PUD_MASK, S2_PUD_SIZE);
+ unmap_stage2_range(mmu, addr & S2_PUD_MASK, S2_PUD_SIZE);
goto retry;
}
WARN_ON_ONCE(kvm_pud_pfn(old_pud) != kvm_pud_pfn(*new_pudp));
stage2_pud_clear(kvm, pudp);
- kvm_tlb_flush_vmid_ipa(kvm, addr);
+ kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PUD_LEVEL);
} else {
get_page(virt_to_page(pudp));
}
@@ -1257,9 +1286,10 @@ retry:
* leaf-entry is returned in the appropriate level variable - pudpp,
* pmdpp, ptepp.
*/
-static bool stage2_get_leaf_entry(struct kvm *kvm, phys_addr_t addr,
+static bool stage2_get_leaf_entry(struct kvm_s2_mmu *mmu, phys_addr_t addr,
pud_t **pudpp, pmd_t **pmdpp, pte_t **ptepp)
{
+ struct kvm *kvm = mmu->kvm;
pud_t *pudp;
pmd_t *pmdp;
pte_t *ptep;
@@ -1268,7 +1298,7 @@ static bool stage2_get_leaf_entry(struct kvm *kvm, phys_addr_t addr,
*pmdpp = NULL;
*ptepp = NULL;
- pudp = stage2_get_pud(kvm, NULL, addr);
+ pudp = stage2_get_pud(mmu, NULL, addr);
if (!pudp || stage2_pud_none(kvm, *pudp) || !stage2_pud_present(kvm, *pudp))
return false;
@@ -1294,14 +1324,14 @@ static bool stage2_get_leaf_entry(struct kvm *kvm, phys_addr_t addr,
return true;
}
-static bool stage2_is_exec(struct kvm *kvm, phys_addr_t addr, unsigned long sz)
+static bool stage2_is_exec(struct kvm_s2_mmu *mmu, phys_addr_t addr, unsigned long sz)
{
pud_t *pudp;
pmd_t *pmdp;
pte_t *ptep;
bool found;
- found = stage2_get_leaf_entry(kvm, addr, &pudp, &pmdp, &ptep);
+ found = stage2_get_leaf_entry(mmu, addr, &pudp, &pmdp, &ptep);
if (!found)
return false;
@@ -1313,10 +1343,12 @@ static bool stage2_is_exec(struct kvm *kvm, phys_addr_t addr, unsigned long sz)
return sz == PAGE_SIZE && kvm_s2pte_exec(ptep);
}
-static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
+static int stage2_set_pte(struct kvm_s2_mmu *mmu,
+ struct kvm_mmu_memory_cache *cache,
phys_addr_t addr, const pte_t *new_pte,
unsigned long flags)
{
+ struct kvm *kvm = mmu->kvm;
pud_t *pud;
pmd_t *pmd;
pte_t *pte, old_pte;
@@ -1326,7 +1358,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
VM_BUG_ON(logging_active && !cache);
/* Create stage-2 page table mapping - Levels 0 and 1 */
- pud = stage2_get_pud(kvm, cache, addr);
+ pud = stage2_get_pud(mmu, cache, addr);
if (!pud) {
/*
* Ignore calls from kvm_set_spte_hva for unallocated
@@ -1340,7 +1372,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
* on to allocate page.
*/
if (logging_active)
- stage2_dissolve_pud(kvm, addr, pud);
+ stage2_dissolve_pud(mmu, addr, pud);
if (stage2_pud_none(kvm, *pud)) {
if (!cache)
@@ -1364,7 +1396,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
* allocate page.
*/
if (logging_active)
- stage2_dissolve_pmd(kvm, addr, pmd);
+ stage2_dissolve_pmd(mmu, addr, pmd);
/* Create stage-2 page mappings - Level 2 */
if (pmd_none(*pmd)) {
@@ -1388,7 +1420,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
return 0;
kvm_set_pte(pte, __pte(0));
- kvm_tlb_flush_vmid_ipa(kvm, addr);
+ kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PTE_LEVEL);
} else {
get_page(virt_to_page(pte));
}
@@ -1453,8 +1485,8 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
if (ret)
goto out;
spin_lock(&kvm->mmu_lock);
- ret = stage2_set_pte(kvm, &cache, addr, &pte,
- KVM_S2PTE_FLAG_IS_IOMAP);
+ ret = stage2_set_pte(&kvm->arch.mmu, &cache, addr, &pte,
+ KVM_S2PTE_FLAG_IS_IOMAP);
spin_unlock(&kvm->mmu_lock);
if (ret)
goto out;
@@ -1493,9 +1525,10 @@ static void stage2_wp_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end)
* @addr: range start address
* @end: range end address
*/
-static void stage2_wp_pmds(struct kvm *kvm, pud_t *pud,
+static void stage2_wp_pmds(struct kvm_s2_mmu *mmu, pud_t *pud,
phys_addr_t addr, phys_addr_t end)
{
+ struct kvm *kvm = mmu->kvm;
pmd_t *pmd;
phys_addr_t next;
@@ -1516,13 +1549,14 @@ static void stage2_wp_pmds(struct kvm *kvm, pud_t *pud,
/**
* stage2_wp_puds - write protect P4D range
- * @pgd: pointer to pgd entry
+ * @p4d: pointer to p4d entry
* @addr: range start address
* @end: range end address
*/
-static void stage2_wp_puds(struct kvm *kvm, p4d_t *p4d,
+static void stage2_wp_puds(struct kvm_s2_mmu *mmu, p4d_t *p4d,
phys_addr_t addr, phys_addr_t end)
{
+ struct kvm *kvm = mmu->kvm;
pud_t *pud;
phys_addr_t next;
@@ -1534,7 +1568,7 @@ static void stage2_wp_puds(struct kvm *kvm, p4d_t *p4d,
if (!kvm_s2pud_readonly(pud))
kvm_set_s2pud_readonly(pud);
} else {
- stage2_wp_pmds(kvm, pud, addr, next);
+ stage2_wp_pmds(mmu, pud, addr, next);
}
}
} while (pud++, addr = next, addr != end);
@@ -1546,9 +1580,10 @@ static void stage2_wp_puds(struct kvm *kvm, p4d_t *p4d,
* @addr: range start address
* @end: range end address
*/
-static void stage2_wp_p4ds(struct kvm *kvm, pgd_t *pgd,
+static void stage2_wp_p4ds(struct kvm_s2_mmu *mmu, pgd_t *pgd,
phys_addr_t addr, phys_addr_t end)
{
+ struct kvm *kvm = mmu->kvm;
p4d_t *p4d;
phys_addr_t next;
@@ -1556,7 +1591,7 @@ static void stage2_wp_p4ds(struct kvm *kvm, pgd_t *pgd,
do {
next = stage2_p4d_addr_end(kvm, addr, end);
if (!stage2_p4d_none(kvm, *p4d))
- stage2_wp_puds(kvm, p4d, addr, next);
+ stage2_wp_puds(mmu, p4d, addr, next);
} while (p4d++, addr = next, addr != end);
}
@@ -1566,12 +1601,13 @@ static void stage2_wp_p4ds(struct kvm *kvm, pgd_t *pgd,
* @addr: Start address of range
* @end: End address of range
*/
-static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
+static void stage2_wp_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end)
{
+ struct kvm *kvm = mmu->kvm;
pgd_t *pgd;
phys_addr_t next;
- pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr);
+ pgd = mmu->pgd + stage2_pgd_index(kvm, addr);
do {
/*
* Release kvm_mmu_lock periodically if the memory region is
@@ -1583,11 +1619,11 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
* the lock.
*/
cond_resched_lock(&kvm->mmu_lock);
- if (!READ_ONCE(kvm->arch.pgd))
+ if (!READ_ONCE(mmu->pgd))
break;
next = stage2_pgd_addr_end(kvm, addr, end);
if (stage2_pgd_present(kvm, *pgd))
- stage2_wp_p4ds(kvm, pgd, addr, next);
+ stage2_wp_p4ds(mmu, pgd, addr, next);
} while (pgd++, addr = next, addr != end);
}
@@ -1617,7 +1653,7 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
spin_lock(&kvm->mmu_lock);
- stage2_wp_range(kvm, start, end);
+ stage2_wp_range(&kvm->arch.mmu, start, end);
spin_unlock(&kvm->mmu_lock);
kvm_flush_remote_tlbs(kvm);
}
@@ -1641,7 +1677,7 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
phys_addr_t start = (base_gfn + __ffs(mask)) << PAGE_SHIFT;
phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT;
- stage2_wp_range(kvm, start, end);
+ stage2_wp_range(&kvm->arch.mmu, start, end);
}
/*
@@ -1804,6 +1840,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
pgprot_t mem_type = PAGE_S2;
bool logging_active = memslot_is_logging(memslot);
unsigned long vma_pagesize, flags = 0;
+ struct kvm_s2_mmu *mmu = vcpu->arch.hw_mmu;
write_fault = kvm_is_write_fault(vcpu);
exec_fault = kvm_vcpu_trap_is_iabt(vcpu);
@@ -1925,7 +1962,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
*/
needs_exec = exec_fault ||
(fault_status == FSC_PERM &&
- stage2_is_exec(kvm, fault_ipa, vma_pagesize));
+ stage2_is_exec(mmu, fault_ipa, vma_pagesize));
if (vma_pagesize == PUD_SIZE) {
pud_t new_pud = kvm_pfn_pud(pfn, mem_type);
@@ -1937,7 +1974,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
if (needs_exec)
new_pud = kvm_s2pud_mkexec(new_pud);
- ret = stage2_set_pud_huge(kvm, memcache, fault_ipa, &new_pud);
+ ret = stage2_set_pud_huge(mmu, memcache, fault_ipa, &new_pud);
} else if (vma_pagesize == PMD_SIZE) {
pmd_t new_pmd = kvm_pfn_pmd(pfn, mem_type);
@@ -1949,7 +1986,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
if (needs_exec)
new_pmd = kvm_s2pmd_mkexec(new_pmd);
- ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
+ ret = stage2_set_pmd_huge(mmu, memcache, fault_ipa, &new_pmd);
} else {
pte_t new_pte = kvm_pfn_pte(pfn, mem_type);
@@ -1961,7 +1998,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
if (needs_exec)
new_pte = kvm_s2pte_mkexec(new_pte);
- ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, flags);
+ ret = stage2_set_pte(mmu, memcache, fault_ipa, &new_pte, flags);
}
out_unlock:
@@ -1990,7 +2027,7 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
spin_lock(&vcpu->kvm->mmu_lock);
- if (!stage2_get_leaf_entry(vcpu->kvm, fault_ipa, &pud, &pmd, &pte))
+ if (!stage2_get_leaf_entry(vcpu->arch.hw_mmu, fault_ipa, &pud, &pmd, &pte))
goto out;
if (pud) { /* HugeTLB */
@@ -2040,21 +2077,18 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
/* Synchronous External Abort? */
- if (kvm_vcpu_dabt_isextabt(vcpu)) {
+ if (kvm_vcpu_abt_issea(vcpu)) {
/*
* For RAS the host kernel may handle this abort.
* There is no need to pass the error into the guest.
*/
- if (!kvm_handle_guest_sea(fault_ipa, kvm_vcpu_get_hsr(vcpu)))
- return 1;
-
- if (unlikely(!is_iabt)) {
+ if (kvm_handle_guest_sea(fault_ipa, kvm_vcpu_get_esr(vcpu)))
kvm_inject_vabt(vcpu);
- return 1;
- }
+
+ return 1;
}
- trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_hsr(vcpu),
+ trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_esr(vcpu),
kvm_vcpu_get_hfar(vcpu), fault_ipa);
/* Check the stage-2 fault is trans. fault or write fault */
@@ -2063,7 +2097,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
kvm_vcpu_trap_get_class(vcpu),
(unsigned long)kvm_vcpu_trap_get_fault(vcpu),
- (unsigned long)kvm_vcpu_get_hsr(vcpu));
+ (unsigned long)kvm_vcpu_get_esr(vcpu));
return -EFAULT;
}
@@ -2074,12 +2108,23 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable);
write_fault = kvm_is_write_fault(vcpu);
if (kvm_is_error_hva(hva) || (write_fault && !writable)) {
+ /*
+ * The guest has put either its instructions or its page-tables
+ * somewhere it shouldn't have. Userspace won't be able to do
+ * anything about this (there's no syndrome for a start), so
+ * re-inject the abort back into the guest.
+ */
if (is_iabt) {
- /* Prefetch Abort on I/O address */
ret = -ENOEXEC;
goto out;
}
+ if (kvm_vcpu_dabt_iss1tw(vcpu)) {
+ kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
+ ret = 1;
+ goto out_unlock;
+ }
+
/*
* Check for a cache maintenance operation. Since we
* ended-up here, we know it is outside of any memory
@@ -2090,7 +2135,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
* So let's assume that the guest is just being
* cautious, and skip the instruction.
*/
- if (kvm_vcpu_dabt_is_cm(vcpu)) {
+ if (kvm_is_error_hva(hva) && kvm_vcpu_dabt_is_cm(vcpu)) {
kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
ret = 1;
goto out_unlock;
@@ -2163,14 +2208,14 @@ static int handle_hva_to_gpa(struct kvm *kvm,
static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
{
- unmap_stage2_range(kvm, gpa, size);
+ unmap_stage2_range(&kvm->arch.mmu, gpa, size);
return 0;
}
int kvm_unmap_hva_range(struct kvm *kvm,
unsigned long start, unsigned long end)
{
- if (!kvm->arch.pgd)
+ if (!kvm->arch.mmu.pgd)
return 0;
trace_kvm_unmap_hva_range(start, end);
@@ -2190,7 +2235,7 @@ static int kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data
* therefore stage2_set_pte() never needs to clear out a huge PMD
* through this calling path.
*/
- stage2_set_pte(kvm, NULL, gpa, pte, 0);
+ stage2_set_pte(&kvm->arch.mmu, NULL, gpa, pte, 0);
return 0;
}
@@ -2201,7 +2246,7 @@ int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
kvm_pfn_t pfn = pte_pfn(pte);
pte_t stage2_pte;
- if (!kvm->arch.pgd)
+ if (!kvm->arch.mmu.pgd)
return 0;
trace_kvm_set_spte_hva(hva);
@@ -2224,7 +2269,7 @@ static int kvm_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
pte_t *pte;
WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE);
- if (!stage2_get_leaf_entry(kvm, gpa, &pud, &pmd, &pte))
+ if (!stage2_get_leaf_entry(&kvm->arch.mmu, gpa, &pud, &pmd, &pte))
return 0;
if (pud)
@@ -2242,7 +2287,7 @@ static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *
pte_t *pte;
WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE);
- if (!stage2_get_leaf_entry(kvm, gpa, &pud, &pmd, &pte))
+ if (!stage2_get_leaf_entry(&kvm->arch.mmu, gpa, &pud, &pmd, &pte))
return 0;
if (pud)
@@ -2255,7 +2300,7 @@ static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
{
- if (!kvm->arch.pgd)
+ if (!kvm->arch.mmu.pgd)
return 0;
trace_kvm_age_hva(start, end);
return handle_hva_to_gpa(kvm, start, end, kvm_age_hva_handler, NULL);
@@ -2263,7 +2308,7 @@ int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
{
- if (!kvm->arch.pgd)
+ if (!kvm->arch.mmu.pgd)
return 0;
trace_kvm_test_age_hva(hva);
return handle_hva_to_gpa(kvm, hva, hva + PAGE_SIZE,
@@ -2476,7 +2521,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
spin_lock(&kvm->mmu_lock);
if (ret)
- unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size);
+ unmap_stage2_range(&kvm->arch.mmu, mem->guest_phys_addr, mem->memory_size);
else
stage2_flush_memslot(kvm, memslot);
spin_unlock(&kvm->mmu_lock);
@@ -2495,7 +2540,7 @@ void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen)
void kvm_arch_flush_shadow_all(struct kvm *kvm)
{
- kvm_free_stage2_pgd(kvm);
+ kvm_free_stage2_pgd(&kvm->arch.mmu);
}
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
@@ -2505,7 +2550,7 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
phys_addr_t size = slot->npages << PAGE_SHIFT;
spin_lock(&kvm->mmu_lock);
- unmap_stage2_range(kvm, gpa, size);
+ unmap_stage2_range(&kvm->arch.mmu, gpa, size);
spin_unlock(&kvm->mmu_lock);
}
diff --git a/arch/arm64/kvm/regmap.c b/arch/arm64/kvm/regmap.c
index a900181e3867..accc1d5fba61 100644
--- a/arch/arm64/kvm/regmap.c
+++ b/arch/arm64/kvm/regmap.c
@@ -100,7 +100,7 @@ static const unsigned long vcpu_reg_offsets[VCPU_NR_MODES][16] = {
*/
unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num)
{
- unsigned long *reg_array = (unsigned long *)&vcpu->arch.ctxt.gp_regs.regs;
+ unsigned long *reg_array = (unsigned long *)&vcpu->arch.ctxt.regs;
unsigned long mode = *vcpu_cpsr(vcpu) & PSR_AA32_MODE_MASK;
switch (mode) {
@@ -147,8 +147,20 @@ unsigned long vcpu_read_spsr32(const struct kvm_vcpu *vcpu)
{
int spsr_idx = vcpu_spsr32_mode(vcpu);
- if (!vcpu->arch.sysregs_loaded_on_cpu)
- return vcpu_gp_regs(vcpu)->spsr[spsr_idx];
+ if (!vcpu->arch.sysregs_loaded_on_cpu) {
+ switch (spsr_idx) {
+ case KVM_SPSR_SVC:
+ return __vcpu_sys_reg(vcpu, SPSR_EL1);
+ case KVM_SPSR_ABT:
+ return vcpu->arch.ctxt.spsr_abt;
+ case KVM_SPSR_UND:
+ return vcpu->arch.ctxt.spsr_und;
+ case KVM_SPSR_IRQ:
+ return vcpu->arch.ctxt.spsr_irq;
+ case KVM_SPSR_FIQ:
+ return vcpu->arch.ctxt.spsr_fiq;
+ }
+ }
switch (spsr_idx) {
case KVM_SPSR_SVC:
@@ -171,7 +183,24 @@ void vcpu_write_spsr32(struct kvm_vcpu *vcpu, unsigned long v)
int spsr_idx = vcpu_spsr32_mode(vcpu);
if (!vcpu->arch.sysregs_loaded_on_cpu) {
- vcpu_gp_regs(vcpu)->spsr[spsr_idx] = v;
+ switch (spsr_idx) {
+ case KVM_SPSR_SVC:
+ __vcpu_sys_reg(vcpu, SPSR_EL1) = v;
+ break;
+ case KVM_SPSR_ABT:
+ vcpu->arch.ctxt.spsr_abt = v;
+ break;
+ case KVM_SPSR_UND:
+ vcpu->arch.ctxt.spsr_und = v;
+ break;
+ case KVM_SPSR_IRQ:
+ vcpu->arch.ctxt.spsr_irq = v;
+ break;
+ case KVM_SPSR_FIQ:
+ vcpu->arch.ctxt.spsr_fiq = v;
+ break;
+ }
+
return;
}
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 6ed36be51b4b..ee33875c5c2a 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -42,6 +42,11 @@ static u32 kvm_ipa_limit;
#define VCPU_RESET_PSTATE_SVC (PSR_AA32_MODE_SVC | PSR_AA32_A_BIT | \
PSR_AA32_I_BIT | PSR_AA32_F_BIT)
+static bool system_has_full_ptr_auth(void)
+{
+ return system_supports_address_auth() && system_supports_generic_auth();
+}
+
/**
* kvm_arch_vm_ioctl_check_extension
*
@@ -80,8 +85,7 @@ int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext)
break;
case KVM_CAP_ARM_PTRAUTH_ADDRESS:
case KVM_CAP_ARM_PTRAUTH_GENERIC:
- r = has_vhe() && system_supports_address_auth() &&
- system_supports_generic_auth();
+ r = system_has_full_ptr_auth();
break;
default:
r = 0;
@@ -205,19 +209,14 @@ static void kvm_vcpu_reset_sve(struct kvm_vcpu *vcpu)
static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu)
{
- /* Support ptrauth only if the system supports these capabilities. */
- if (!has_vhe())
- return -EINVAL;
-
- if (!system_supports_address_auth() ||
- !system_supports_generic_auth())
- return -EINVAL;
/*
* For now make sure that both address/generic pointer authentication
- * features are requested by the userspace together.
+ * features are requested by the userspace together and the system
+ * supports these capabilities.
*/
if (!test_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, vcpu->arch.features) ||
- !test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, vcpu->arch.features))
+ !test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, vcpu->arch.features) ||
+ !system_has_full_ptr_auth())
return -EINVAL;
vcpu->arch.flags |= KVM_ARM64_GUEST_HAS_PTRAUTH;
@@ -292,7 +291,7 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
/* Reset core registers */
memset(vcpu_gp_regs(vcpu), 0, sizeof(*vcpu_gp_regs(vcpu)));
- vcpu_gp_regs(vcpu)->regs.pstate = pstate;
+ vcpu_gp_regs(vcpu)->pstate = pstate;
/* Reset system registers */
kvm_reset_sys_regs(vcpu);
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 138961d7ebe3..077293b5115f 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -94,6 +94,7 @@ static bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val)
case TPIDR_EL1: *val = read_sysreg_s(SYS_TPIDR_EL1); break;
case AMAIR_EL1: *val = read_sysreg_s(SYS_AMAIR_EL12); break;
case CNTKCTL_EL1: *val = read_sysreg_s(SYS_CNTKCTL_EL12); break;
+ case ELR_EL1: *val = read_sysreg_s(SYS_ELR_EL12); break;
case PAR_EL1: *val = read_sysreg_s(SYS_PAR_EL1); break;
case DACR32_EL2: *val = read_sysreg_s(SYS_DACR32_EL2); break;
case IFSR32_EL2: *val = read_sysreg_s(SYS_IFSR32_EL2); break;
@@ -133,6 +134,7 @@ static bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg)
case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); break;
case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); break;
case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); break;
+ case ELR_EL1: write_sysreg_s(val, SYS_ELR_EL12); break;
case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); break;
case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); break;
case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); break;
@@ -242,6 +244,25 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu,
return true;
}
+static bool access_actlr(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (p->is_write)
+ return ignore_write(vcpu, p);
+
+ p->regval = vcpu_read_sys_reg(vcpu, ACTLR_EL1);
+
+ if (p->is_aarch32) {
+ if (r->Op2 & 2)
+ p->regval = upper_32_bits(p->regval);
+ else
+ p->regval = lower_32_bits(p->regval);
+ }
+
+ return true;
+}
+
/*
* Trap handler for the GICv3 SGI generation system register.
* Forward the request to the VGIC emulation.
@@ -615,6 +636,12 @@ static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
vcpu_write_sys_reg(vcpu, amair, AMAIR_EL1);
}
+static void reset_actlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+ u64 actlr = read_sysreg(actlr_el1);
+ vcpu_write_sys_reg(vcpu, actlr, ACTLR_EL1);
+}
+
static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
{
u64 mpidr;
@@ -1518,6 +1545,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
ID_UNALLOCATED(7,7),
{ SYS_DESC(SYS_SCTLR_EL1), access_vm_reg, reset_val, SCTLR_EL1, 0x00C50078 },
+ { SYS_DESC(SYS_ACTLR_EL1), access_actlr, reset_actlr, ACTLR_EL1 },
{ SYS_DESC(SYS_CPACR_EL1), NULL, reset_val, CPACR_EL1, 0 },
{ SYS_DESC(SYS_ZCR_EL1), NULL, reset_val, ZCR_EL1, 0, .visibility = sve_visibility },
{ SYS_DESC(SYS_TTBR0_EL1), access_vm_reg, reset_unknown, TTBR0_EL1 },
@@ -1957,6 +1985,8 @@ static const struct sys_reg_desc cp14_64_regs[] = {
static const struct sys_reg_desc cp15_regs[] = {
{ Op1( 0), CRn( 0), CRm( 0), Op2( 1), access_ctr },
{ Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_vm_reg, NULL, c1_SCTLR },
+ { Op1( 0), CRn( 1), CRm( 0), Op2( 1), access_actlr },
+ { Op1( 0), CRn( 1), CRm( 0), Op2( 3), access_actlr },
{ Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
{ Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 },
{ Op1( 0), CRn( 2), CRm( 0), Op2( 2), access_vm_reg, NULL, c2_TTBCR },
@@ -2109,36 +2139,6 @@ static int check_sysreg_table(const struct sys_reg_desc *table, unsigned int n,
return 0;
}
-/* Target specific emulation tables */
-static struct kvm_sys_reg_target_table *target_tables[KVM_ARM_NUM_TARGETS];
-
-void kvm_register_target_sys_reg_table(unsigned int target,
- struct kvm_sys_reg_target_table *table)
-{
- if (check_sysreg_table(table->table64.table, table->table64.num, false) ||
- check_sysreg_table(table->table32.table, table->table32.num, true))
- return;
-
- target_tables[target] = table;
-}
-
-/* Get specific register table for this target. */
-static const struct sys_reg_desc *get_target_table(unsigned target,
- bool mode_is_64,
- size_t *num)
-{
- struct kvm_sys_reg_target_table *table;
-
- table = target_tables[target];
- if (mode_is_64) {
- *num = table->table64.num;
- return table->table64.table;
- } else {
- *num = table->table32.num;
- return table->table32.table;
- }
-}
-
static int match_sys_reg(const void *key, const void *elt)
{
const unsigned long pval = (unsigned long)key;
@@ -2220,10 +2220,10 @@ static int emulate_cp(struct kvm_vcpu *vcpu,
static void unhandled_cp_access(struct kvm_vcpu *vcpu,
struct sys_reg_params *params)
{
- u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu);
+ u8 esr_ec = kvm_vcpu_trap_get_class(vcpu);
int cp = -1;
- switch(hsr_ec) {
+ switch (esr_ec) {
case ESR_ELx_EC_CP15_32:
case ESR_ELx_EC_CP15_64:
cp = 15;
@@ -2249,22 +2249,20 @@ static void unhandled_cp_access(struct kvm_vcpu *vcpu,
*/
static int kvm_handle_cp_64(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *global,
- size_t nr_global,
- const struct sys_reg_desc *target_specific,
- size_t nr_specific)
+ size_t nr_global)
{
struct sys_reg_params params;
- u32 hsr = kvm_vcpu_get_hsr(vcpu);
+ u32 esr = kvm_vcpu_get_esr(vcpu);
int Rt = kvm_vcpu_sys_get_rt(vcpu);
- int Rt2 = (hsr >> 10) & 0x1f;
+ int Rt2 = (esr >> 10) & 0x1f;
params.is_aarch32 = true;
params.is_32bit = false;
- params.CRm = (hsr >> 1) & 0xf;
- params.is_write = ((hsr & 1) == 0);
+ params.CRm = (esr >> 1) & 0xf;
+ params.is_write = ((esr & 1) == 0);
params.Op0 = 0;
- params.Op1 = (hsr >> 16) & 0xf;
+ params.Op1 = (esr >> 16) & 0xf;
params.Op2 = 0;
params.CRn = 0;
@@ -2278,14 +2276,11 @@ static int kvm_handle_cp_64(struct kvm_vcpu *vcpu,
}
/*
- * Try to emulate the coprocessor access using the target
- * specific table first, and using the global table afterwards.
- * If either of the tables contains a handler, handle the
+ * If the table contains a handler, handle the
* potential register operation in the case of a read and return
* with success.
*/
- if (!emulate_cp(vcpu, &params, target_specific, nr_specific) ||
- !emulate_cp(vcpu, &params, global, nr_global)) {
+ if (!emulate_cp(vcpu, &params, global, nr_global)) {
/* Split up the value between registers for the read side */
if (!params.is_write) {
vcpu_set_reg(vcpu, Rt, lower_32_bits(params.regval));
@@ -2306,26 +2301,23 @@ static int kvm_handle_cp_64(struct kvm_vcpu *vcpu,
*/
static int kvm_handle_cp_32(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *global,
- size_t nr_global,
- const struct sys_reg_desc *target_specific,
- size_t nr_specific)
+ size_t nr_global)
{
struct sys_reg_params params;
- u32 hsr = kvm_vcpu_get_hsr(vcpu);
+ u32 esr = kvm_vcpu_get_esr(vcpu);
int Rt = kvm_vcpu_sys_get_rt(vcpu);
params.is_aarch32 = true;
params.is_32bit = true;
- params.CRm = (hsr >> 1) & 0xf;
+ params.CRm = (esr >> 1) & 0xf;
params.regval = vcpu_get_reg(vcpu, Rt);
- params.is_write = ((hsr & 1) == 0);
- params.CRn = (hsr >> 10) & 0xf;
+ params.is_write = ((esr & 1) == 0);
+ params.CRn = (esr >> 10) & 0xf;
params.Op0 = 0;
- params.Op1 = (hsr >> 14) & 0x7;
- params.Op2 = (hsr >> 17) & 0x7;
+ params.Op1 = (esr >> 14) & 0x7;
+ params.Op2 = (esr >> 17) & 0x7;
- if (!emulate_cp(vcpu, &params, target_specific, nr_specific) ||
- !emulate_cp(vcpu, &params, global, nr_global)) {
+ if (!emulate_cp(vcpu, &params, global, nr_global)) {
if (!params.is_write)
vcpu_set_reg(vcpu, Rt, params.regval);
return 1;
@@ -2337,38 +2329,22 @@ static int kvm_handle_cp_32(struct kvm_vcpu *vcpu,
int kvm_handle_cp15_64(struct kvm_vcpu *vcpu)
{
- const struct sys_reg_desc *target_specific;
- size_t num;
-
- target_specific = get_target_table(vcpu->arch.target, false, &num);
- return kvm_handle_cp_64(vcpu,
- cp15_64_regs, ARRAY_SIZE(cp15_64_regs),
- target_specific, num);
+ return kvm_handle_cp_64(vcpu, cp15_64_regs, ARRAY_SIZE(cp15_64_regs));
}
int kvm_handle_cp15_32(struct kvm_vcpu *vcpu)
{
- const struct sys_reg_desc *target_specific;
- size_t num;
-
- target_specific = get_target_table(vcpu->arch.target, false, &num);
- return kvm_handle_cp_32(vcpu,
- cp15_regs, ARRAY_SIZE(cp15_regs),
- target_specific, num);
+ return kvm_handle_cp_32(vcpu, cp15_regs, ARRAY_SIZE(cp15_regs));
}
int kvm_handle_cp14_64(struct kvm_vcpu *vcpu)
{
- return kvm_handle_cp_64(vcpu,
- cp14_64_regs, ARRAY_SIZE(cp14_64_regs),
- NULL, 0);
+ return kvm_handle_cp_64(vcpu, cp14_64_regs, ARRAY_SIZE(cp14_64_regs));
}
int kvm_handle_cp14_32(struct kvm_vcpu *vcpu)
{
- return kvm_handle_cp_32(vcpu,
- cp14_regs, ARRAY_SIZE(cp14_regs),
- NULL, 0);
+ return kvm_handle_cp_32(vcpu, cp14_regs, ARRAY_SIZE(cp14_regs));
}
static bool is_imp_def_sys_reg(struct sys_reg_params *params)
@@ -2380,15 +2356,9 @@ static bool is_imp_def_sys_reg(struct sys_reg_params *params)
static int emulate_sys_reg(struct kvm_vcpu *vcpu,
struct sys_reg_params *params)
{
- size_t num;
- const struct sys_reg_desc *table, *r;
+ const struct sys_reg_desc *r;
- table = get_target_table(vcpu->arch.target, true, &num);
-
- /* Search target-specific then generic table. */
- r = find_reg(params, table, num);
- if (!r)
- r = find_reg(params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
+ r = find_reg(params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
if (likely(r)) {
perform_access(vcpu, params, r);
@@ -2403,14 +2373,20 @@ static int emulate_sys_reg(struct kvm_vcpu *vcpu,
return 1;
}
-static void reset_sys_reg_descs(struct kvm_vcpu *vcpu,
- const struct sys_reg_desc *table, size_t num)
+/**
+ * kvm_reset_sys_regs - sets system registers to reset value
+ * @vcpu: The VCPU pointer
+ *
+ * This function finds the right table above and sets the registers on the
+ * virtual CPU struct to their architecturally defined reset values.
+ */
+void kvm_reset_sys_regs(struct kvm_vcpu *vcpu)
{
unsigned long i;
- for (i = 0; i < num; i++)
- if (table[i].reset)
- table[i].reset(vcpu, &table[i]);
+ for (i = 0; i < ARRAY_SIZE(sys_reg_descs); i++)
+ if (sys_reg_descs[i].reset)
+ sys_reg_descs[i].reset(vcpu, &sys_reg_descs[i]);
}
/**
@@ -2420,7 +2396,7 @@ static void reset_sys_reg_descs(struct kvm_vcpu *vcpu,
int kvm_handle_sys_reg(struct kvm_vcpu *vcpu)
{
struct sys_reg_params params;
- unsigned long esr = kvm_vcpu_get_hsr(vcpu);
+ unsigned long esr = kvm_vcpu_get_esr(vcpu);
int Rt = kvm_vcpu_sys_get_rt(vcpu);
int ret;
@@ -2491,8 +2467,7 @@ const struct sys_reg_desc *find_reg_by_id(u64 id,
static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu,
u64 id)
{
- size_t num;
- const struct sys_reg_desc *table, *r;
+ const struct sys_reg_desc *r;
struct sys_reg_params params;
/* We only do sys_reg for now. */
@@ -2502,10 +2477,7 @@ static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu,
if (!index_to_params(id, &params))
return NULL;
- table = get_target_table(vcpu->arch.target, true, &num);
- r = find_reg(&params, table, num);
- if (!r)
- r = find_reg(&params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
+ r = find_reg(&params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
/* Not saved in the sys_reg array and not otherwise accessible? */
if (r && !(r->reg || r->get_user))
@@ -2805,35 +2777,17 @@ static int walk_one_sys_reg(const struct kvm_vcpu *vcpu,
/* Assumed ordered tables, see kvm_sys_reg_table_init. */
static int walk_sys_regs(struct kvm_vcpu *vcpu, u64 __user *uind)
{
- const struct sys_reg_desc *i1, *i2, *end1, *end2;
+ const struct sys_reg_desc *i2, *end2;
unsigned int total = 0;
- size_t num;
int err;
- /* We check for duplicates here, to allow arch-specific overrides. */
- i1 = get_target_table(vcpu->arch.target, true, &num);
- end1 = i1 + num;
i2 = sys_reg_descs;
end2 = sys_reg_descs + ARRAY_SIZE(sys_reg_descs);
- BUG_ON(i1 == end1 || i2 == end2);
-
- /* Walk carefully, as both tables may refer to the same register. */
- while (i1 || i2) {
- int cmp = cmp_sys_reg(i1, i2);
- /* target-specific overrides generic entry. */
- if (cmp <= 0)
- err = walk_one_sys_reg(vcpu, i1, &uind, &total);
- else
- err = walk_one_sys_reg(vcpu, i2, &uind, &total);
-
+ while (i2 != end2) {
+ err = walk_one_sys_reg(vcpu, i2++, &uind, &total);
if (err)
return err;
-
- if (cmp <= 0 && ++i1 == end1)
- i1 = NULL;
- if (cmp >= 0 && ++i2 == end2)
- i2 = NULL;
}
return total;
}
@@ -2900,22 +2854,3 @@ void kvm_sys_reg_table_init(void)
/* Clear all higher bits. */
cache_levels &= (1 << (i*3))-1;
}
-
-/**
- * kvm_reset_sys_regs - sets system registers to reset value
- * @vcpu: The VCPU pointer
- *
- * This function finds the right table above and sets the registers on the
- * virtual CPU struct to their architecturally defined reset values.
- */
-void kvm_reset_sys_regs(struct kvm_vcpu *vcpu)
-{
- size_t num;
- const struct sys_reg_desc *table;
-
- /* Generic chip reset first (so target could override). */
- reset_sys_reg_descs(vcpu, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
-
- table = get_target_table(vcpu->arch.target, true, &num);
- reset_sys_reg_descs(vcpu, table, num);
-}
diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c
deleted file mode 100644
index aa9d356451eb..000000000000
--- a/arch/arm64/kvm/sys_regs_generic_v8.c
+++ /dev/null
@@ -1,96 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2012,2013 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * Based on arch/arm/kvm/coproc_a15.c:
- * Copyright (C) 2012 - Virtual Open Systems and Columbia University
- * Authors: Rusty Russell <rusty@rustcorp.au>
- * Christoffer Dall <c.dall@virtualopensystems.com>
- */
-#include <linux/kvm_host.h>
-#include <asm/cputype.h>
-#include <asm/kvm_arm.h>
-#include <asm/kvm_asm.h>
-#include <asm/kvm_emulate.h>
-#include <asm/kvm_coproc.h>
-#include <asm/sysreg.h>
-#include <linux/init.h>
-
-#include "sys_regs.h"
-
-static bool access_actlr(struct kvm_vcpu *vcpu,
- struct sys_reg_params *p,
- const struct sys_reg_desc *r)
-{
- if (p->is_write)
- return ignore_write(vcpu, p);
-
- p->regval = vcpu_read_sys_reg(vcpu, ACTLR_EL1);
-
- if (p->is_aarch32) {
- if (r->Op2 & 2)
- p->regval = upper_32_bits(p->regval);
- else
- p->regval = lower_32_bits(p->regval);
- }
-
- return true;
-}
-
-static void reset_actlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
-{
- __vcpu_sys_reg(vcpu, ACTLR_EL1) = read_sysreg(actlr_el1);
-}
-
-/*
- * Implementation specific sys-reg registers.
- * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
- */
-static const struct sys_reg_desc genericv8_sys_regs[] = {
- { SYS_DESC(SYS_ACTLR_EL1), access_actlr, reset_actlr, ACTLR_EL1 },
-};
-
-static const struct sys_reg_desc genericv8_cp15_regs[] = {
- /* ACTLR */
- { Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b001),
- access_actlr },
- { Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b011),
- access_actlr },
-};
-
-static struct kvm_sys_reg_target_table genericv8_target_table = {
- .table64 = {
- .table = genericv8_sys_regs,
- .num = ARRAY_SIZE(genericv8_sys_regs),
- },
- .table32 = {
- .table = genericv8_cp15_regs,
- .num = ARRAY_SIZE(genericv8_cp15_regs),
- },
-};
-
-static int __init sys_reg_genericv8_init(void)
-{
- unsigned int i;
-
- for (i = 1; i < ARRAY_SIZE(genericv8_sys_regs); i++)
- BUG_ON(cmp_sys_reg(&genericv8_sys_regs[i-1],
- &genericv8_sys_regs[i]) >= 0);
-
- kvm_register_target_sys_reg_table(KVM_ARM_TARGET_AEM_V8,
- &genericv8_target_table);
- kvm_register_target_sys_reg_table(KVM_ARM_TARGET_FOUNDATION_V8,
- &genericv8_target_table);
- kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A53,
- &genericv8_target_table);
- kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A57,
- &genericv8_target_table);
- kvm_register_target_sys_reg_table(KVM_ARM_TARGET_XGENE_POTENZA,
- &genericv8_target_table);
- kvm_register_target_sys_reg_table(KVM_ARM_TARGET_GENERIC_V8,
- &genericv8_target_table);
-
- return 0;
-}
-late_initcall(sys_reg_genericv8_init);
diff --git a/arch/arm64/kvm/trace_arm.h b/arch/arm64/kvm/trace_arm.h
index 4c71270cc097..4691053c5ee4 100644
--- a/arch/arm64/kvm/trace_arm.h
+++ b/arch/arm64/kvm/trace_arm.h
@@ -301,8 +301,8 @@ TRACE_EVENT(kvm_timer_save_state,
),
TP_fast_assign(
- __entry->ctl = ctx->cnt_ctl;
- __entry->cval = ctx->cnt_cval;
+ __entry->ctl = timer_get_ctl(ctx);
+ __entry->cval = timer_get_cval(ctx);
__entry->timer_idx = arch_timer_ctx_index(ctx);
),
@@ -323,8 +323,8 @@ TRACE_EVENT(kvm_timer_restore_state,
),
TP_fast_assign(
- __entry->ctl = ctx->cnt_ctl;
- __entry->cval = ctx->cnt_cval;
+ __entry->ctl = timer_get_ctl(ctx);
+ __entry->cval = timer_get_cval(ctx);
__entry->timer_idx = arch_timer_ctx_index(ctx);
),
diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c
index a4f48c1ac28c..e0404bcab019 100644
--- a/arch/arm64/kvm/va_layout.c
+++ b/arch/arm64/kvm/va_layout.c
@@ -48,7 +48,7 @@ __init void kvm_compute_layout(void)
va_mask = GENMASK_ULL(tag_lsb - 1, 0);
tag_val = hyp_va_msb;
- if (tag_lsb != (vabits_actual - 1)) {
+ if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && tag_lsb != (vabits_actual - 1)) {
/* We have some free bits to insert a random tag. */
tag_val |= get_random_long() & GENMASK_ULL(vabits_actual - 2, tag_lsb);
}
diff --git a/arch/arm64/kvm/vgic/vgic-irqfd.c b/arch/arm64/kvm/vgic/vgic-irqfd.c
index d8cdfea5cc96..79f8899b234c 100644
--- a/arch/arm64/kvm/vgic/vgic-irqfd.c
+++ b/arch/arm64/kvm/vgic/vgic-irqfd.c
@@ -100,19 +100,33 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
/**
* kvm_arch_set_irq_inatomic: fast-path for irqfd injection
- *
- * Currently only direct MSI injection is supported.
*/
int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
struct kvm *kvm, int irq_source_id, int level,
bool line_status)
{
- if (e->type == KVM_IRQ_ROUTING_MSI && vgic_has_its(kvm) && level) {
+ if (!level)
+ return -EWOULDBLOCK;
+
+ switch (e->type) {
+ case KVM_IRQ_ROUTING_MSI: {
struct kvm_msi msi;
+ if (!vgic_has_its(kvm))
+ break;
+
kvm_populate_msi(e, &msi);
- if (!vgic_its_inject_cached_translation(kvm, &msi))
- return 0;
+ return vgic_its_inject_cached_translation(kvm, &msi);
+ }
+
+ case KVM_IRQ_ROUTING_IRQCHIP:
+ /*
+ * Injecting SPIs is always possible in atomic context
+ * as long as the damn vgic is initialized.
+ */
+ if (unlikely(!vgic_initialized(kvm)))
+ break;
+ return vgic_irqfd_set_irq(e, kvm, irq_source_id, 1, line_status);
}
return -EWOULDBLOCK;
diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
index c012a52b19f5..40cbaca81333 100644
--- a/arch/arm64/kvm/vgic/vgic-its.c
+++ b/arch/arm64/kvm/vgic/vgic-its.c
@@ -757,9 +757,8 @@ int vgic_its_inject_cached_translation(struct kvm *kvm, struct kvm_msi *msi)
db = (u64)msi->address_hi << 32 | msi->address_lo;
irq = vgic_its_check_cache(kvm, db, msi->devid, msi->data);
-
if (!irq)
- return -1;
+ return -EWOULDBLOCK;
raw_spin_lock_irqsave(&irq->irq_lock, flags);
irq->pending_latch = true;
diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
index d2339a2b9fb9..5c786b915cd3 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
@@ -389,7 +389,7 @@ u64 vgic_sanitise_outer_cacheability(u64 field)
case GIC_BASER_CACHE_nC:
return field;
default:
- return GIC_BASER_CACHE_nC;
+ return GIC_BASER_CACHE_SameAsInner;
}
}
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 8afb238ff335..f07333e86c2f 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -404,7 +404,8 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re
#define VM_FAULT_BADACCESS 0x020000
static vm_fault_t __do_page_fault(struct mm_struct *mm, unsigned long addr,
- unsigned int mm_flags, unsigned long vm_flags)
+ unsigned int mm_flags, unsigned long vm_flags,
+ struct pt_regs *regs)
{
struct vm_area_struct *vma = find_vma(mm, addr);
@@ -428,7 +429,7 @@ static vm_fault_t __do_page_fault(struct mm_struct *mm, unsigned long addr,
*/
if (!(vma->vm_flags & vm_flags))
return VM_FAULT_BADACCESS;
- return handle_mm_fault(vma, addr & PAGE_MASK, mm_flags);
+ return handle_mm_fault(vma, addr & PAGE_MASK, mm_flags, regs);
}
static bool is_el0_instruction_abort(unsigned int esr)
@@ -450,7 +451,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
{
const struct fault_info *inf;
struct mm_struct *mm = current->mm;
- vm_fault_t fault, major = 0;
+ vm_fault_t fault;
unsigned long vm_flags = VM_ACCESS_FLAGS;
unsigned int mm_flags = FAULT_FLAG_DEFAULT;
@@ -516,8 +517,7 @@ retry:
#endif
}
- fault = __do_page_fault(mm, addr, mm_flags, vm_flags);
- major |= fault & VM_FAULT_MAJOR;
+ fault = __do_page_fault(mm, addr, mm_flags, vm_flags, regs);
/* Quick path to respond to signals */
if (fault_signal_pending(fault, regs)) {
@@ -538,25 +538,8 @@ retry:
* Handle the "normal" (no error) case first.
*/
if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP |
- VM_FAULT_BADACCESS)))) {
- /*
- * Major/minor page fault accounting is only done
- * once. If we go through a retry, it is extremely
- * likely that the page will be found in page cache at
- * that point.
- */
- if (major) {
- current->maj_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs,
- addr);
- } else {
- current->min_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs,
- addr);
- }
-
+ VM_FAULT_BADACCESS))))
return 0;
- }
/*
* If we are in kernel mode at this point, we have no context to
diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c
index aafcee3e3f7e..73f8b49d485c 100644
--- a/arch/arm64/mm/numa.c
+++ b/arch/arm64/mm/numa.c
@@ -461,13 +461,3 @@ void __init arm64_numa_init(void)
numa_init(dummy_numa_init);
}
-
-/*
- * We hope that we will be hotplugging memory on nodes we already know about,
- * such that acpi_get_node() succeeds and we never fall back to this...
- */
-int memory_add_physaddr_to_nid(u64 addr)
-{
- pr_warn("Unknown node for memory at 0x%llx, assuming node 0\n", addr);
- return 0;
-}
diff --git a/arch/csky/include/asm/segment.h b/arch/csky/include/asm/segment.h
index db2640d5f575..79ede9b1a646 100644
--- a/arch/csky/include/asm/segment.h
+++ b/arch/csky/include/asm/segment.h
@@ -13,6 +13,6 @@ typedef struct {
#define USER_DS ((mm_segment_t) { 0x80000000UL })
#define get_fs() (current_thread_info()->addr_limit)
#define set_fs(x) (current_thread_info()->addr_limit = (x))
-#define segment_eq(a, b) ((a).seg == (b).seg)
+#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg)
#endif /* __ASM_CSKY_SEGMENT_H */
diff --git a/arch/csky/mm/fault.c b/arch/csky/mm/fault.c
index b1dce9f2f04d..081b178b41b1 100644
--- a/arch/csky/mm/fault.c
+++ b/arch/csky/mm/fault.c
@@ -150,7 +150,8 @@ good_area:
* make sure we exit gracefully rather than endlessly redo
* the fault.
*/
- fault = handle_mm_fault(vma, address, write ? FAULT_FLAG_WRITE : 0);
+ fault = handle_mm_fault(vma, address, write ? FAULT_FLAG_WRITE : 0,
+ regs);
if (unlikely(fault & VM_FAULT_ERROR)) {
if (fault & VM_FAULT_OOM)
goto out_of_memory;
@@ -160,16 +161,6 @@ good_area:
goto bad_area;
BUG();
}
- if (fault & VM_FAULT_MAJOR) {
- tsk->maj_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs,
- address);
- } else {
- tsk->min_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs,
- address);
- }
-
mmap_read_unlock(mm);
return;
diff --git a/arch/h8300/include/asm/segment.h b/arch/h8300/include/asm/segment.h
index a407978f9f9f..37950725d9b9 100644
--- a/arch/h8300/include/asm/segment.h
+++ b/arch/h8300/include/asm/segment.h
@@ -33,7 +33,7 @@ static inline mm_segment_t get_fs(void)
return USER_DS;
}
-#define segment_eq(a, b) ((a).seg == (b).seg)
+#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg)
#endif /* __ASSEMBLY__ */
diff --git a/arch/hexagon/mm/vm_fault.c b/arch/hexagon/mm/vm_fault.c
index cd3808f96b93..ef32c5a84ff3 100644
--- a/arch/hexagon/mm/vm_fault.c
+++ b/arch/hexagon/mm/vm_fault.c
@@ -18,6 +18,7 @@
#include <linux/signal.h>
#include <linux/extable.h>
#include <linux/hardirq.h>
+#include <linux/perf_event.h>
/*
* Decode of hardware exception sends us to one of several
@@ -53,6 +54,8 @@ void do_page_fault(unsigned long address, long cause, struct pt_regs *regs)
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
+
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
retry:
mmap_read_lock(mm);
vma = find_vma(mm, address);
@@ -88,7 +91,7 @@ good_area:
break;
}
- fault = handle_mm_fault(vma, address, flags);
+ fault = handle_mm_fault(vma, address, flags, regs);
if (fault_signal_pending(fault, regs))
return;
@@ -96,10 +99,6 @@ good_area:
/* The most common case -- we are done. */
if (likely(!(fault & VM_FAULT_ERROR))) {
if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_MAJOR)
- current->maj_flt++;
- else
- current->min_flt++;
if (fault & VM_FAULT_RETRY) {
flags |= FAULT_FLAG_TRIED;
goto retry;
diff --git a/arch/ia64/include/asm/uaccess.h b/arch/ia64/include/asm/uaccess.h
index 8aa473a4b0f4..179243c3dfc7 100644
--- a/arch/ia64/include/asm/uaccess.h
+++ b/arch/ia64/include/asm/uaccess.h
@@ -50,7 +50,7 @@
#define get_fs() (current_thread_info()->addr_limit)
#define set_fs(x) (current_thread_info()->addr_limit = (x))
-#define segment_eq(a, b) ((a).seg == (b).seg)
+#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg)
/*
* When accessing user memory, we need to make sure the entire area really is in
diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl
index ced9c83e47c9..f52a41f4c340 100644
--- a/arch/ia64/kernel/syscalls/syscall.tbl
+++ b/arch/ia64/kernel/syscalls/syscall.tbl
@@ -135,7 +135,7 @@
123 common writev sys_writev
124 common pread64 sys_pread64
125 common pwrite64 sys_pwrite64
-126 common _sysctl sys_sysctl
+126 common _sysctl sys_ni_syscall
127 common mmap sys_mmap
128 common munmap sys_munmap
129 common mlock sys_mlock
diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
index 3a4dec334cc5..cd9766d2b6e0 100644
--- a/arch/ia64/mm/fault.c
+++ b/arch/ia64/mm/fault.c
@@ -14,6 +14,7 @@
#include <linux/kdebug.h>
#include <linux/prefetch.h>
#include <linux/uaccess.h>
+#include <linux/perf_event.h>
#include <asm/processor.h>
#include <asm/exception.h>
@@ -105,6 +106,8 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
flags |= FAULT_FLAG_USER;
if (mask & VM_WRITE)
flags |= FAULT_FLAG_WRITE;
+
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
retry:
mmap_read_lock(mm);
@@ -143,7 +146,7 @@ retry:
* sure we exit gracefully rather than endlessly redo the
* fault.
*/
- fault = handle_mm_fault(vma, address, flags);
+ fault = handle_mm_fault(vma, address, flags, regs);
if (fault_signal_pending(fault, regs))
return;
@@ -166,10 +169,6 @@ retry:
}
if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_MAJOR)
- current->maj_flt++;
- else
- current->min_flt++;
if (fault & VM_FAULT_RETRY) {
flags |= FAULT_FLAG_TRIED;
diff --git a/arch/ia64/mm/numa.c b/arch/ia64/mm/numa.c
index 5e1015eb6d0d..f34964271101 100644
--- a/arch/ia64/mm/numa.c
+++ b/arch/ia64/mm/numa.c
@@ -106,7 +106,5 @@ int memory_add_physaddr_to_nid(u64 addr)
return 0;
return nid;
}
-
-EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
#endif
#endif
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 6663f1741798..6f2f38d05772 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -16,6 +16,7 @@ config M68K
select HAVE_DEBUG_BUGVERBOSE
select GENERIC_IRQ_SHOW
select GENERIC_ATOMIC64
+ select NO_DMA if !MMU && !COLDFIRE
select HAVE_UID16
select VIRT_TO_BUS
select ARCH_HAVE_NMI_SAFE_CMPXCHG if RMW_INSNS
@@ -59,9 +60,6 @@ config TIME_LOW_RES
config NO_IOPORT_MAP
def_bool y
-config NO_DMA
- def_bool (MMU && SUN3) || (!MMU && !COLDFIRE)
-
config ZONE_DMA
bool
default y
diff --git a/arch/m68k/Kconfig.machine b/arch/m68k/Kconfig.machine
index a82651d58af4..17e8c3a292d7 100644
--- a/arch/m68k/Kconfig.machine
+++ b/arch/m68k/Kconfig.machine
@@ -126,6 +126,7 @@ config SUN3
depends on MMU
depends on !MMU_MOTOROLA
select MMU_SUN3 if MMU
+ select NO_DMA
select M68020
help
This option enables support for the Sun 3 series of workstations
diff --git a/arch/m68k/include/asm/segment.h b/arch/m68k/include/asm/segment.h
index c6686559e9b7..2b5e68a71ef7 100644
--- a/arch/m68k/include/asm/segment.h
+++ b/arch/m68k/include/asm/segment.h
@@ -52,7 +52,7 @@ static inline void set_fs(mm_segment_t val)
#define set_fs(x) (current_thread_info()->addr_limit = (x))
#endif
-#define segment_eq(a, b) ((a).seg == (b).seg)
+#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg)
#endif /* __ASSEMBLY__ */
diff --git a/arch/m68k/include/asm/tlbflush.h b/arch/m68k/include/asm/tlbflush.h
index 191e75a6bb24..5337bc2c262f 100644
--- a/arch/m68k/include/asm/tlbflush.h
+++ b/arch/m68k/include/asm/tlbflush.h
@@ -85,10 +85,10 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
{
if (vma->vm_mm == current->active_mm) {
- mm_segment_t old_fs = get_fs();
- set_fs(USER_DS);
+ mm_segment_t old_fs = force_uaccess_begin();
+
__flush_tlb_one(addr);
- set_fs(old_fs);
+ force_uaccess_end(old_fs);
}
}
diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl
index 1a4822de7292..81fc799d8392 100644
--- a/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
@@ -156,7 +156,7 @@
146 common writev sys_writev
147 common getsid sys_getsid
148 common fdatasync sys_fdatasync
-149 common _sysctl sys_sysctl
+149 common _sysctl sys_ni_syscall
150 common mlock sys_mlock
151 common munlock sys_munlock
152 common mlockall sys_mlockall
diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c
index 508abb63da67..795f483b1050 100644
--- a/arch/m68k/mm/fault.c
+++ b/arch/m68k/mm/fault.c
@@ -12,6 +12,7 @@
#include <linux/interrupt.h>
#include <linux/module.h>
#include <linux/uaccess.h>
+#include <linux/perf_event.h>
#include <asm/setup.h>
#include <asm/traps.h>
@@ -84,6 +85,8 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
+
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
retry:
mmap_read_lock(mm);
@@ -134,7 +137,7 @@ good_area:
* the fault.
*/
- fault = handle_mm_fault(vma, address, flags);
+ fault = handle_mm_fault(vma, address, flags, regs);
pr_debug("handle_mm_fault returns %x\n", fault);
if (fault_signal_pending(fault, regs))
@@ -150,16 +153,7 @@ good_area:
BUG();
}
- /*
- * Major/minor page fault accounting is only done on the
- * initial attempt. If we go through a retry, it is extremely
- * likely that the page will be found in page cache at that point.
- */
if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_MAJOR)
- current->maj_flt++;
- else
- current->min_flt++;
if (fault & VM_FAULT_RETRY) {
flags |= FAULT_FLAG_TRIED;
diff --git a/arch/microblaze/include/asm/uaccess.h b/arch/microblaze/include/asm/uaccess.h
index 6723c56ec378..304b04ffea2f 100644
--- a/arch/microblaze/include/asm/uaccess.h
+++ b/arch/microblaze/include/asm/uaccess.h
@@ -41,7 +41,7 @@
# define get_fs() (current_thread_info()->addr_limit)
# define set_fs(val) (current_thread_info()->addr_limit = (val))
-# define segment_eq(a, b) ((a).seg == (b).seg)
+# define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg)
#ifndef CONFIG_MMU
diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl
index a3f4be8e7238..b4e263916f41 100644
--- a/arch/microblaze/kernel/syscalls/syscall.tbl
+++ b/arch/microblaze/kernel/syscalls/syscall.tbl
@@ -156,7 +156,7 @@
146 common writev sys_writev
147 common getsid sys_getsid
148 common fdatasync sys_fdatasync
-149 common _sysctl sys_sysctl
+149 common _sysctl sys_ni_syscall
150 common mlock sys_mlock
151 common munlock sys_munlock
152 common mlockall sys_mlockall
diff --git a/arch/microblaze/mm/fault.c b/arch/microblaze/mm/fault.c
index a2bfe587b491..b3fed2cecf84 100644
--- a/arch/microblaze/mm/fault.c
+++ b/arch/microblaze/mm/fault.c
@@ -28,6 +28,7 @@
#include <linux/mman.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
+#include <linux/perf_event.h>
#include <asm/page.h>
#include <asm/mmu.h>
@@ -121,6 +122,8 @@ void do_page_fault(struct pt_regs *regs, unsigned long address,
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
+
/* When running in the kernel we expect faults to occur only to
* addresses in user space. All other faults represent errors in the
* kernel and should generate an OOPS. Unfortunately, in the case of an
@@ -214,7 +217,7 @@ good_area:
* make sure we exit gracefully rather than endlessly redo
* the fault.
*/
- fault = handle_mm_fault(vma, address, flags);
+ fault = handle_mm_fault(vma, address, flags, regs);
if (fault_signal_pending(fault, regs))
return;
@@ -230,10 +233,6 @@ good_area:
}
if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (unlikely(fault & VM_FAULT_MAJOR))
- current->maj_flt++;
- else
- current->min_flt++;
if (fault & VM_FAULT_RETRY) {
flags |= FAULT_FLAG_TRIED;
diff --git a/arch/mips/configs/cu1000-neo_defconfig b/arch/mips/configs/cu1000-neo_defconfig
index 6b471cdb16cf..e924c817f73d 100644
--- a/arch/mips/configs/cu1000-neo_defconfig
+++ b/arch/mips/configs/cu1000-neo_defconfig
@@ -17,7 +17,6 @@ CONFIG_CGROUP_CPUACCT=y
CONFIG_NAMESPACES=y
CONFIG_USER_NS=y
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
-CONFIG_SYSCTL_SYSCALL=y
CONFIG_KALLSYMS_ALL=y
CONFIG_EMBEDDED=y
# CONFIG_VM_EVENT_COUNTERS is not set
diff --git a/arch/mips/include/asm/uaccess.h b/arch/mips/include/asm/uaccess.h
index 62b298c50905..61fc01f177a6 100644
--- a/arch/mips/include/asm/uaccess.h
+++ b/arch/mips/include/asm/uaccess.h
@@ -72,7 +72,7 @@ extern u64 __ua_limit;
#define get_fs() (current_thread_info()->addr_limit)
#define set_fs(x) (current_thread_info()->addr_limit = (x))
-#define segment_eq(a, b) ((a).seg == (b).seg)
+#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg)
/*
* eva_kernel_access() - determine whether kernel memory access on an EVA system
diff --git a/arch/mips/include/asm/vdso/gettimeofday.h b/arch/mips/include/asm/vdso/gettimeofday.h
index c63ddcaea54c..2203e2d0ae2a 100644
--- a/arch/mips/include/asm/vdso/gettimeofday.h
+++ b/arch/mips/include/asm/vdso/gettimeofday.h
@@ -167,7 +167,8 @@ static __always_inline u64 read_gic_count(const struct vdso_data *data)
#endif
-static __always_inline u64 __arch_get_hw_counter(s32 clock_mode)
+static __always_inline u64 __arch_get_hw_counter(s32 clock_mode,
+ const struct vdso_data *vd)
{
#ifdef CONFIG_CSRC_R4K
if (clock_mode == VDSO_CLOCKMODE_R4K)
@@ -175,7 +176,7 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode)
#endif
#ifdef CONFIG_CLKSRC_MIPS_GIC
if (clock_mode == VDSO_CLOCKMODE_GIC)
- return read_gic_count(get_vdso_data());
+ return read_gic_count(vd);
#endif
/*
* Core checks mode already. So this raced against a concurrent
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index 6b4ee92e3aed..f9df9edb67a4 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -159,7 +159,7 @@
149 n32 munlockall sys_munlockall
150 n32 vhangup sys_vhangup
151 n32 pivot_root sys_pivot_root
-152 n32 _sysctl compat_sys_sysctl
+152 n32 _sysctl sys_ni_syscall
153 n32 prctl sys_prctl
154 n32 adjtimex sys_adjtimex_time32
155 n32 setrlimit compat_sys_setrlimit
diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl
index 391acbf425a0..557f9954a2b9 100644
--- a/arch/mips/kernel/syscalls/syscall_n64.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n64.tbl
@@ -159,7 +159,7 @@
149 n64 munlockall sys_munlockall
150 n64 vhangup sys_vhangup
151 n64 pivot_root sys_pivot_root
-152 n64 _sysctl sys_sysctl
+152 n64 _sysctl sys_ni_syscall
153 n64 prctl sys_prctl
154 n64 adjtimex sys_adjtimex
155 n64 setrlimit sys_setrlimit
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index 5727c5187508..195b43cf27c8 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -164,7 +164,7 @@
150 o32 unused150 sys_ni_syscall
151 o32 getsid sys_getsid
152 o32 fdatasync sys_fdatasync
-153 o32 _sysctl sys_sysctl compat_sys_sysctl
+153 o32 _sysctl sys_ni_syscall
154 o32 mlock sys_mlock
155 o32 munlock sys_munlock
156 o32 mlockall sys_mlockall
diff --git a/arch/mips/kernel/unaligned.c b/arch/mips/kernel/unaligned.c
index 0adce604fa44..126a5f3f4e4c 100644
--- a/arch/mips/kernel/unaligned.c
+++ b/arch/mips/kernel/unaligned.c
@@ -191,17 +191,16 @@ static void emulate_load_store_insn(struct pt_regs *regs,
* memory, so we need to "switch" the address limit to
* user space, so that address check can work properly.
*/
- seg = get_fs();
- set_fs(USER_DS);
+ seg = force_uaccess_begin();
switch (insn.spec3_format.func) {
case lhe_op:
if (!access_ok(addr, 2)) {
- set_fs(seg);
+ force_uaccess_end(seg);
goto sigbus;
}
LoadHWE(addr, value, res);
if (res) {
- set_fs(seg);
+ force_uaccess_end(seg);
goto fault;
}
compute_return_epc(regs);
@@ -209,12 +208,12 @@ static void emulate_load_store_insn(struct pt_regs *regs,
break;
case lwe_op:
if (!access_ok(addr, 4)) {
- set_fs(seg);
+ force_uaccess_end(seg);
goto sigbus;
}
LoadWE(addr, value, res);
if (res) {
- set_fs(seg);
+ force_uaccess_end(seg);
goto fault;
}
compute_return_epc(regs);
@@ -222,12 +221,12 @@ static void emulate_load_store_insn(struct pt_regs *regs,
break;
case lhue_op:
if (!access_ok(addr, 2)) {
- set_fs(seg);
+ force_uaccess_end(seg);
goto sigbus;
}
LoadHWUE(addr, value, res);
if (res) {
- set_fs(seg);
+ force_uaccess_end(seg);
goto fault;
}
compute_return_epc(regs);
@@ -235,35 +234,35 @@ static void emulate_load_store_insn(struct pt_regs *regs,
break;
case she_op:
if (!access_ok(addr, 2)) {
- set_fs(seg);
+ force_uaccess_end(seg);
goto sigbus;
}
compute_return_epc(regs);
value = regs->regs[insn.spec3_format.rt];
StoreHWE(addr, value, res);
if (res) {
- set_fs(seg);
+ force_uaccess_end(seg);
goto fault;
}
break;
case swe_op:
if (!access_ok(addr, 4)) {
- set_fs(seg);
+ force_uaccess_end(seg);
goto sigbus;
}
compute_return_epc(regs);
value = regs->regs[insn.spec3_format.rt];
StoreWE(addr, value, res);
if (res) {
- set_fs(seg);
+ force_uaccess_end(seg);
goto fault;
}
break;
default:
- set_fs(seg);
+ force_uaccess_end(seg);
goto sigill;
}
- set_fs(seg);
+ force_uaccess_end(seg);
}
#endif
break;
diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c
index 703782355318..d70c4f8e14e2 100644
--- a/arch/mips/kvm/emulate.c
+++ b/arch/mips/kvm/emulate.c
@@ -1935,7 +1935,7 @@ enum emulation_result kvm_mips_emulate_load(union mips_instruction inst,
case lwu_op:
vcpu->mmio_needed = 1; /* unsigned */
- /* fall through */
+ fallthrough;
#endif
case lw_op:
run->mmio.len = 4;
diff --git a/arch/mips/kvm/vz.c b/arch/mips/kvm/vz.c
index 3932f767e938..c299e5d6d69c 100644
--- a/arch/mips/kvm/vz.c
+++ b/arch/mips/kvm/vz.c
@@ -29,7 +29,9 @@
#include <linux/kvm_host.h>
#include "interrupt.h"
+#ifdef CONFIG_CPU_LOONGSON64
#include "loongson_regs.h"
+#endif
#include "trace.h"
@@ -1142,7 +1144,6 @@ static enum emulation_result kvm_vz_gpsi_cache(union mips_instruction inst,
#ifdef CONFIG_CPU_LOONGSON64
static enum emulation_result kvm_vz_gpsi_lwc2(union mips_instruction inst,
u32 *opc, u32 cause,
- struct kvm_run *run,
struct kvm_vcpu *vcpu)
{
unsigned int rs, rd;
@@ -1240,7 +1241,7 @@ static enum emulation_result kvm_trap_vz_handle_gpsi(u32 cause, u32 *opc,
#endif
#ifdef CONFIG_CPU_LOONGSON64
case lwc2_op:
- er = kvm_vz_gpsi_lwc2(inst, opc, cause, run, vcpu);
+ er = kvm_vz_gpsi_lwc2(inst, opc, cause, vcpu);
break;
#endif
case spec3_op:
diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c
index 01b168a90434..7c871b14e74a 100644
--- a/arch/mips/mm/fault.c
+++ b/arch/mips/mm/fault.c
@@ -96,6 +96,8 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, unsigned long write,
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
+
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
retry:
mmap_read_lock(mm);
vma = find_vma(mm, address);
@@ -152,12 +154,11 @@ good_area:
* make sure we exit gracefully rather than endlessly redo
* the fault.
*/
- fault = handle_mm_fault(vma, address, flags);
+ fault = handle_mm_fault(vma, address, flags, regs);
if (fault_signal_pending(fault, regs))
return;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
if (unlikely(fault & VM_FAULT_ERROR)) {
if (fault & VM_FAULT_OOM)
goto out_of_memory;
@@ -168,15 +169,6 @@ good_area:
BUG();
}
if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_MAJOR) {
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
- regs, address);
- tsk->maj_flt++;
- } else {
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
- regs, address);
- tsk->min_flt++;
- }
if (fault & VM_FAULT_RETRY) {
flags |= FAULT_FLAG_TRIED;
diff --git a/arch/nds32/include/asm/uaccess.h b/arch/nds32/include/asm/uaccess.h
index 3a9219f53ee0..010ba5f1d7dd 100644
--- a/arch/nds32/include/asm/uaccess.h
+++ b/arch/nds32/include/asm/uaccess.h
@@ -44,7 +44,7 @@ static inline void set_fs(mm_segment_t fs)
current_thread_info()->addr_limit = fs;
}
-#define segment_eq(a, b) ((a) == (b))
+#define uaccess_kernel() (get_fs() == KERNEL_DS)
#define __range_ok(addr, size) (size <= get_fs() && addr <= (get_fs() -size))
diff --git a/arch/nds32/kernel/process.c b/arch/nds32/kernel/process.c
index e85bbbadc0e7..e01ad5d17224 100644
--- a/arch/nds32/kernel/process.c
+++ b/arch/nds32/kernel/process.c
@@ -121,7 +121,7 @@ void show_regs(struct pt_regs *regs)
regs->uregs[3], regs->uregs[2], regs->uregs[1], regs->uregs[0]);
pr_info(" IRQs o%s Segment %s\n",
interrupts_enabled(regs) ? "n" : "ff",
- segment_eq(get_fs(), KERNEL_DS)? "kernel" : "user");
+ uaccess_kernel() ? "kernel" : "user");
}
EXPORT_SYMBOL(show_regs);
diff --git a/arch/nds32/mm/alignment.c b/arch/nds32/mm/alignment.c
index c8b9061a2ee3..1eb7ded6992b 100644
--- a/arch/nds32/mm/alignment.c
+++ b/arch/nds32/mm/alignment.c
@@ -512,7 +512,7 @@ int do_unaligned_access(unsigned long addr, struct pt_regs *regs)
{
unsigned long inst;
int ret = -EFAULT;
- mm_segment_t seg = get_fs();
+ mm_segment_t seg;
inst = get_inst(regs->ipc);
@@ -520,13 +520,12 @@ int do_unaligned_access(unsigned long addr, struct pt_regs *regs)
"Faulting addr: 0x%08lx, pc: 0x%08lx [inst: 0x%08lx ]\n", addr,
regs->ipc, inst);
- set_fs(USER_DS);
-
+ seg = force_uaccess_begin();
if (inst & NDS32_16BIT_INSTRUCTION)
ret = do_16((inst >> 16) & 0xffff, regs);
else
ret = do_32(inst, regs);
- set_fs(seg);
+ force_uaccess_end(seg);
return ret;
}
diff --git a/arch/nds32/mm/fault.c b/arch/nds32/mm/fault.c
index 8fb73f6401a0..f02524eb6d56 100644
--- a/arch/nds32/mm/fault.c
+++ b/arch/nds32/mm/fault.c
@@ -121,6 +121,8 @@ void do_page_fault(unsigned long entry, unsigned long addr,
if (unlikely(faulthandler_disabled() || !mm))
goto no_context;
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
+
/*
* As per x86, we may deadlock here. However, since the kernel only
* validly references user space from well defined areas of the code,
@@ -206,7 +208,7 @@ good_area:
* the fault.
*/
- fault = handle_mm_fault(vma, addr, flags);
+ fault = handle_mm_fault(vma, addr, flags, regs);
/*
* If we need to retry but a fatal signal is pending, handle the
@@ -228,22 +230,7 @@ good_area:
goto bad_area;
}
- /*
- * Major/minor page fault accounting is only done on the initial
- * attempt. If we go through a retry, it is extremely likely that the
- * page will be found in page cache at that point.
- */
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_MAJOR) {
- tsk->maj_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ,
- 1, regs, addr);
- } else {
- tsk->min_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN,
- 1, regs, addr);
- }
if (fault & VM_FAULT_RETRY) {
flags |= FAULT_FLAG_TRIED;
diff --git a/arch/nios2/include/asm/uaccess.h b/arch/nios2/include/asm/uaccess.h
index e83f831a76f9..a741abbed6fb 100644
--- a/arch/nios2/include/asm/uaccess.h
+++ b/arch/nios2/include/asm/uaccess.h
@@ -30,7 +30,7 @@
#define get_fs() (current_thread_info()->addr_limit)
#define set_fs(seg) (current_thread_info()->addr_limit = (seg))
-#define segment_eq(a, b) ((a).seg == (b).seg)
+#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg)
#define __access_ok(addr, len) \
(((signed long)(((long)get_fs().seg) & \
diff --git a/arch/nios2/mm/fault.c b/arch/nios2/mm/fault.c
index 4112ef0e247e..9476feecf512 100644
--- a/arch/nios2/mm/fault.c
+++ b/arch/nios2/mm/fault.c
@@ -24,6 +24,7 @@
#include <linux/mm.h>
#include <linux/extable.h>
#include <linux/uaccess.h>
+#include <linux/perf_event.h>
#include <asm/mmu_context.h>
#include <asm/traps.h>
@@ -83,6 +84,8 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long cause,
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
+
if (!mmap_read_trylock(mm)) {
if (!user_mode(regs) && !search_exception_tables(regs->ea))
goto bad_area_nosemaphore;
@@ -131,7 +134,7 @@ good_area:
* make sure we exit gracefully rather than endlessly redo
* the fault.
*/
- fault = handle_mm_fault(vma, address, flags);
+ fault = handle_mm_fault(vma, address, flags, regs);
if (fault_signal_pending(fault, regs))
return;
@@ -146,16 +149,7 @@ good_area:
BUG();
}
- /*
- * Major/minor page fault accounting is only done on the
- * initial attempt. If we go through a retry, it is extremely
- * likely that the page will be found in page cache at that point.
- */
if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_MAJOR)
- current->maj_flt++;
- else
- current->min_flt++;
if (fault & VM_FAULT_RETRY) {
flags |= FAULT_FLAG_TRIED;
diff --git a/arch/openrisc/include/asm/io.h b/arch/openrisc/include/asm/io.h
index db02fb2077d9..7d6b4a77b379 100644
--- a/arch/openrisc/include/asm/io.h
+++ b/arch/openrisc/include/asm/io.h
@@ -14,6 +14,8 @@
#ifndef __ASM_OPENRISC_IO_H
#define __ASM_OPENRISC_IO_H
+#include <linux/types.h>
+
/*
* PCI: can we really do 0 here if we have no port IO?
*/
@@ -25,9 +27,12 @@
#define PIO_OFFSET 0
#define PIO_MASK 0
-#include <asm-generic/io.h>
-
+#define ioremap ioremap
void __iomem *ioremap(phys_addr_t offset, unsigned long size);
+
+#define iounmap iounmap
extern void iounmap(void *addr);
+#include <asm-generic/io.h>
+
#endif
diff --git a/arch/openrisc/include/asm/uaccess.h b/arch/openrisc/include/asm/uaccess.h
index 17c24f14615f..f0390211236b 100644
--- a/arch/openrisc/include/asm/uaccess.h
+++ b/arch/openrisc/include/asm/uaccess.h
@@ -43,21 +43,22 @@
#define get_fs() (current_thread_info()->addr_limit)
#define set_fs(x) (current_thread_info()->addr_limit = (x))
-#define segment_eq(a, b) ((a) == (b))
+#define uaccess_kernel() (get_fs() == KERNEL_DS)
/* Ensure that the range from addr to addr+size is all within the process'
* address space
*/
-#define __range_ok(addr, size) (size <= get_fs() && addr <= (get_fs()-size))
+static inline int __range_ok(unsigned long addr, unsigned long size)
+{
+ const mm_segment_t fs = get_fs();
-/* Ensure that addr is below task's addr_limit */
-#define __addr_ok(addr) ((unsigned long) addr < get_fs())
+ return size <= fs && addr <= (fs - size);
+}
#define access_ok(addr, size) \
({ \
- unsigned long __ao_addr = (unsigned long)(addr); \
- unsigned long __ao_size = (unsigned long)(size); \
- __range_ok(__ao_addr, __ao_size); \
+ __chk_user_ptr(addr); \
+ __range_ok((unsigned long)(addr), (size)); \
})
/*
@@ -100,7 +101,7 @@ extern long __put_user_bad(void);
#define __put_user_check(x, ptr, size) \
({ \
long __pu_err = -EFAULT; \
- __typeof__(*(ptr)) *__pu_addr = (ptr); \
+ __typeof__(*(ptr)) __user *__pu_addr = (ptr); \
if (access_ok(__pu_addr, size)) \
__put_user_size((x), __pu_addr, (size), __pu_err); \
__pu_err; \
@@ -173,7 +174,7 @@ struct __large_struct {
#define __get_user_check(x, ptr, size) \
({ \
long __gu_err = -EFAULT, __gu_val = 0; \
- const __typeof__(*(ptr)) * __gu_addr = (ptr); \
+ const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \
if (access_ok(__gu_addr, size)) \
__get_user_size(__gu_val, __gu_addr, (size), __gu_err); \
(x) = (__force __typeof__(*(ptr)))__gu_val; \
@@ -241,17 +242,17 @@ raw_copy_from_user(void *to, const void __user *from, unsigned long size)
return __copy_tofrom_user(to, (__force const void *)from, size);
}
static inline unsigned long
-raw_copy_to_user(void *to, const void __user *from, unsigned long size)
+raw_copy_to_user(void __user *to, const void *from, unsigned long size)
{
return __copy_tofrom_user((__force void *)to, from, size);
}
#define INLINE_COPY_FROM_USER
#define INLINE_COPY_TO_USER
-extern unsigned long __clear_user(void *addr, unsigned long size);
+extern unsigned long __clear_user(void __user *addr, unsigned long size);
static inline __must_check unsigned long
-clear_user(void *addr, unsigned long size)
+clear_user(void __user *addr, unsigned long size)
{
if (likely(access_ok(addr, size)))
size = __clear_user(addr, size);
diff --git a/arch/openrisc/kernel/setup.c b/arch/openrisc/kernel/setup.c
index 8aa438e1f51f..b18e775f8be3 100644
--- a/arch/openrisc/kernel/setup.c
+++ b/arch/openrisc/kernel/setup.c
@@ -292,13 +292,15 @@ void __init setup_arch(char **cmdline_p)
init_mm.brk = (unsigned long)_end;
#ifdef CONFIG_BLK_DEV_INITRD
- initrd_start = (unsigned long)&__initrd_start;
- initrd_end = (unsigned long)&__initrd_end;
if (initrd_start == initrd_end) {
+ printk(KERN_INFO "Initial ramdisk not found\n");
initrd_start = 0;
initrd_end = 0;
+ } else {
+ printk(KERN_INFO "Initial ramdisk at: 0x%p (%lu bytes)\n",
+ (void *)(initrd_start), initrd_end - initrd_start);
+ initrd_below_start_ok = 1;
}
- initrd_below_start_ok = 1;
#endif
/* setup memblock allocator */
diff --git a/arch/openrisc/kernel/signal.c b/arch/openrisc/kernel/signal.c
index 4f0754874d78..97804f21a40c 100644
--- a/arch/openrisc/kernel/signal.c
+++ b/arch/openrisc/kernel/signal.c
@@ -68,7 +68,7 @@ static int restore_sigcontext(struct pt_regs *regs,
asmlinkage long _sys_rt_sigreturn(struct pt_regs *regs)
{
- struct rt_sigframe *frame = (struct rt_sigframe __user *)regs->sp;
+ struct rt_sigframe __user *frame = (struct rt_sigframe __user *)regs->sp;
sigset_t set;
/*
@@ -76,7 +76,7 @@ asmlinkage long _sys_rt_sigreturn(struct pt_regs *regs)
* then frame should be dword aligned here. If it's
* not, then the user is trying to mess with us.
*/
- if (((long)frame) & 3)
+ if (((unsigned long)frame) & 3)
goto badframe;
if (!access_ok(frame, sizeof(*frame)))
@@ -151,7 +151,7 @@ static inline void __user *get_sigframe(struct ksignal *ksig,
static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
struct pt_regs *regs)
{
- struct rt_sigframe *frame;
+ struct rt_sigframe __user *frame;
unsigned long return_ip;
int err = 0;
@@ -181,10 +181,10 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
l.ori r11,r0,__NR_sigreturn
l.sys 1
*/
- err |= __put_user(0xa960, (short *)(frame->retcode + 0));
- err |= __put_user(__NR_rt_sigreturn, (short *)(frame->retcode + 2));
- err |= __put_user(0x20000001, (unsigned long *)(frame->retcode + 4));
- err |= __put_user(0x15000000, (unsigned long *)(frame->retcode + 8));
+ err |= __put_user(0xa960, (short __user *)(frame->retcode + 0));
+ err |= __put_user(__NR_rt_sigreturn, (short __user *)(frame->retcode + 2));
+ err |= __put_user(0x20000001, (unsigned long __user *)(frame->retcode + 4));
+ err |= __put_user(0x15000000, (unsigned long __user *)(frame->retcode + 8));
if (err)
return -EFAULT;
diff --git a/arch/openrisc/kernel/smp.c b/arch/openrisc/kernel/smp.c
index bd1e660bbc89..29c82ef2e207 100644
--- a/arch/openrisc/kernel/smp.c
+++ b/arch/openrisc/kernel/smp.c
@@ -219,30 +219,99 @@ static inline void ipi_flush_tlb_all(void *ignored)
local_flush_tlb_all();
}
+static inline void ipi_flush_tlb_mm(void *info)
+{
+ struct mm_struct *mm = (struct mm_struct *)info;
+
+ local_flush_tlb_mm(mm);
+}
+
+static void smp_flush_tlb_mm(struct cpumask *cmask, struct mm_struct *mm)
+{
+ unsigned int cpuid;
+
+ if (cpumask_empty(cmask))
+ return;
+
+ cpuid = get_cpu();
+
+ if (cpumask_any_but(cmask, cpuid) >= nr_cpu_ids) {
+ /* local cpu is the only cpu present in cpumask */
+ local_flush_tlb_mm(mm);
+ } else {
+ on_each_cpu_mask(cmask, ipi_flush_tlb_mm, mm, 1);
+ }
+ put_cpu();
+}
+
+struct flush_tlb_data {
+ unsigned long addr1;
+ unsigned long addr2;
+};
+
+static inline void ipi_flush_tlb_page(void *info)
+{
+ struct flush_tlb_data *fd = (struct flush_tlb_data *)info;
+
+ local_flush_tlb_page(NULL, fd->addr1);
+}
+
+static inline void ipi_flush_tlb_range(void *info)
+{
+ struct flush_tlb_data *fd = (struct flush_tlb_data *)info;
+
+ local_flush_tlb_range(NULL, fd->addr1, fd->addr2);
+}
+
+static void smp_flush_tlb_range(struct cpumask *cmask, unsigned long start,
+ unsigned long end)
+{
+ unsigned int cpuid;
+
+ if (cpumask_empty(cmask))
+ return;
+
+ cpuid = get_cpu();
+
+ if (cpumask_any_but(cmask, cpuid) >= nr_cpu_ids) {
+ /* local cpu is the only cpu present in cpumask */
+ if ((end - start) <= PAGE_SIZE)
+ local_flush_tlb_page(NULL, start);
+ else
+ local_flush_tlb_range(NULL, start, end);
+ } else {
+ struct flush_tlb_data fd;
+
+ fd.addr1 = start;
+ fd.addr2 = end;
+
+ if ((end - start) <= PAGE_SIZE)
+ on_each_cpu_mask(cmask, ipi_flush_tlb_page, &fd, 1);
+ else
+ on_each_cpu_mask(cmask, ipi_flush_tlb_range, &fd, 1);
+ }
+ put_cpu();
+}
+
void flush_tlb_all(void)
{
on_each_cpu(ipi_flush_tlb_all, NULL, 1);
}
-/*
- * FIXME: implement proper functionality instead of flush_tlb_all.
- * *But*, as things currently stands, the local_tlb_flush_* functions will
- * all boil down to local_tlb_flush_all anyway.
- */
void flush_tlb_mm(struct mm_struct *mm)
{
- on_each_cpu(ipi_flush_tlb_all, NULL, 1);
+ smp_flush_tlb_mm(mm_cpumask(mm), mm);
}
void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
{
- on_each_cpu(ipi_flush_tlb_all, NULL, 1);
+ smp_flush_tlb_range(mm_cpumask(vma->vm_mm), uaddr, uaddr + PAGE_SIZE);
}
void flush_tlb_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
- on_each_cpu(ipi_flush_tlb_all, NULL, 1);
+ smp_flush_tlb_range(mm_cpumask(vma->vm_mm), start, end);
}
/* Instruction cache invalidate - performed on each cpu */
diff --git a/arch/openrisc/kernel/stacktrace.c b/arch/openrisc/kernel/stacktrace.c
index 43f140a28bc7..54d38809e22c 100644
--- a/arch/openrisc/kernel/stacktrace.c
+++ b/arch/openrisc/kernel/stacktrace.c
@@ -13,6 +13,7 @@
#include <linux/export.h>
#include <linux/sched.h>
#include <linux/sched/debug.h>
+#include <linux/sched/task_stack.h>
#include <linux/stacktrace.h>
#include <asm/processor.h>
@@ -68,12 +69,25 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
{
unsigned long *sp = NULL;
+ if (!try_get_task_stack(tsk))
+ return;
+
if (tsk == current)
sp = (unsigned long *) &sp;
- else
- sp = (unsigned long *) KSTK_ESP(tsk);
+ else {
+ unsigned long ksp;
+
+ /* Locate stack from kernel context */
+ ksp = task_thread_info(tsk)->ksp;
+ ksp += STACK_FRAME_OVERHEAD; /* redzone */
+ ksp += sizeof(struct pt_regs);
+
+ sp = (unsigned long *) ksp;
+ }
unwind_stack(trace, sp, save_stack_address_nosched);
+
+ put_task_stack(tsk);
}
EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
diff --git a/arch/openrisc/kernel/vmlinux.lds.S b/arch/openrisc/kernel/vmlinux.lds.S
index 60449fd7f16f..22fbc5fb24b3 100644
--- a/arch/openrisc/kernel/vmlinux.lds.S
+++ b/arch/openrisc/kernel/vmlinux.lds.S
@@ -96,18 +96,6 @@ SECTIONS
__init_end = .;
- . = ALIGN(PAGE_SIZE);
- .initrd : AT(ADDR(.initrd) - LOAD_OFFSET)
- {
- __initrd_start = .;
- *(.initrd)
- __initrd_end = .;
- FILL (0);
- . = ALIGN (PAGE_SIZE);
- }
-
- __vmlinux_end = .; /* last address of the physical file */
-
BSS_SECTION(0, 0, 0x20)
_end = .;
diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c
index d2224ccca294..ca97d9baab51 100644
--- a/arch/openrisc/mm/fault.c
+++ b/arch/openrisc/mm/fault.c
@@ -15,6 +15,7 @@
#include <linux/interrupt.h>
#include <linux/extable.h>
#include <linux/sched/signal.h>
+#include <linux/perf_event.h>
#include <linux/uaccess.h>
#include <asm/siginfo.h>
@@ -103,6 +104,8 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long address,
if (in_interrupt() || !mm)
goto no_context;
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
+
retry:
mmap_read_lock(mm);
vma = find_vma(mm, address);
@@ -159,7 +162,7 @@ good_area:
* the fault.
*/
- fault = handle_mm_fault(vma, address, flags);
+ fault = handle_mm_fault(vma, address, flags, regs);
if (fault_signal_pending(fault, regs))
return;
@@ -176,10 +179,6 @@ good_area:
if (flags & FAULT_FLAG_ALLOW_RETRY) {
/*RGD modeled on Cris */
- if (fault & VM_FAULT_MAJOR)
- tsk->maj_flt++;
- else
- tsk->min_flt++;
if (fault & VM_FAULT_RETRY) {
flags |= FAULT_FLAG_TRIED;
diff --git a/arch/openrisc/mm/tlb.c b/arch/openrisc/mm/tlb.c
index 4b680aed8f5f..2b6feabf6381 100644
--- a/arch/openrisc/mm/tlb.c
+++ b/arch/openrisc/mm/tlb.c
@@ -137,21 +137,28 @@ void local_flush_tlb_mm(struct mm_struct *mm)
void switch_mm(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *next_tsk)
{
+ unsigned int cpu;
+
+ if (unlikely(prev == next))
+ return;
+
+ cpu = smp_processor_id();
+
+ cpumask_clear_cpu(cpu, mm_cpumask(prev));
+ cpumask_set_cpu(cpu, mm_cpumask(next));
+
/* remember the pgd for the fault handlers
* this is similar to the pgd register in some other CPU's.
* we need our own copy of it because current and active_mm
* might be invalid at points where we still need to derefer
* the pgd.
*/
- current_pgd[smp_processor_id()] = next->pgd;
+ current_pgd[cpu] = next->pgd;
/* We don't have context support implemented, so flush all
* entries belonging to previous map
*/
-
- if (prev != next)
- local_flush_tlb_mm(prev);
-
+ local_flush_tlb_mm(prev);
}
/*
diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h
index 03862320f779..21b375c67e53 100644
--- a/arch/parisc/include/asm/atomic.h
+++ b/arch/parisc/include/asm/atomic.h
@@ -34,13 +34,13 @@ extern arch_spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned;
/* Can't use raw_spin_lock_irq because of #include problems, so
* this is the substitute */
#define _atomic_spin_lock_irqsave(l,f) do { \
- arch_spinlock_t *s = ATOMIC_HASH(l); \
+ arch_spinlock_t *s = ATOMIC_HASH(l); \
local_irq_save(f); \
arch_spin_lock(s); \
} while(0)
#define _atomic_spin_unlock_irqrestore(l,f) do { \
- arch_spinlock_t *s = ATOMIC_HASH(l); \
+ arch_spinlock_t *s = ATOMIC_HASH(l); \
arch_spin_unlock(s); \
local_irq_restore(f); \
} while(0)
@@ -85,7 +85,7 @@ static __inline__ void atomic_##op(int i, atomic_t *v) \
_atomic_spin_lock_irqsave(v, flags); \
v->counter c_op i; \
_atomic_spin_unlock_irqrestore(v, flags); \
-} \
+}
#define ATOMIC_OP_RETURN(op, c_op) \
static __inline__ int atomic_##op##_return(int i, atomic_t *v) \
@@ -148,7 +148,7 @@ static __inline__ void atomic64_##op(s64 i, atomic64_t *v) \
_atomic_spin_lock_irqsave(v, flags); \
v->counter c_op i; \
_atomic_spin_unlock_irqrestore(v, flags); \
-} \
+}
#define ATOMIC64_OP_RETURN(op, c_op) \
static __inline__ s64 atomic64_##op##_return(s64 i, atomic64_t *v) \
diff --git a/arch/parisc/include/asm/barrier.h b/arch/parisc/include/asm/barrier.h
index dbaaca84f27f..640d46edf32e 100644
--- a/arch/parisc/include/asm/barrier.h
+++ b/arch/parisc/include/asm/barrier.h
@@ -26,6 +26,67 @@
#define __smp_rmb() mb()
#define __smp_wmb() mb()
+#define __smp_store_release(p, v) \
+do { \
+ typeof(p) __p = (p); \
+ union { typeof(*p) __val; char __c[1]; } __u = \
+ { .__val = (__force typeof(*p)) (v) }; \
+ compiletime_assert_atomic_type(*p); \
+ switch (sizeof(*p)) { \
+ case 1: \
+ asm volatile("stb,ma %0,0(%1)" \
+ : : "r"(*(__u8 *)__u.__c), "r"(__p) \
+ : "memory"); \
+ break; \
+ case 2: \
+ asm volatile("sth,ma %0,0(%1)" \
+ : : "r"(*(__u16 *)__u.__c), "r"(__p) \
+ : "memory"); \
+ break; \
+ case 4: \
+ asm volatile("stw,ma %0,0(%1)" \
+ : : "r"(*(__u32 *)__u.__c), "r"(__p) \
+ : "memory"); \
+ break; \
+ case 8: \
+ if (IS_ENABLED(CONFIG_64BIT)) \
+ asm volatile("std,ma %0,0(%1)" \
+ : : "r"(*(__u64 *)__u.__c), "r"(__p) \
+ : "memory"); \
+ break; \
+ } \
+} while (0)
+
+#define __smp_load_acquire(p) \
+({ \
+ union { typeof(*p) __val; char __c[1]; } __u; \
+ typeof(p) __p = (p); \
+ compiletime_assert_atomic_type(*p); \
+ switch (sizeof(*p)) { \
+ case 1: \
+ asm volatile("ldb,ma 0(%1),%0" \
+ : "=r"(*(__u8 *)__u.__c) : "r"(__p) \
+ : "memory"); \
+ break; \
+ case 2: \
+ asm volatile("ldh,ma 0(%1),%0" \
+ : "=r"(*(__u16 *)__u.__c) : "r"(__p) \
+ : "memory"); \
+ break; \
+ case 4: \
+ asm volatile("ldw,ma 0(%1),%0" \
+ : "=r"(*(__u32 *)__u.__c) : "r"(__p) \
+ : "memory"); \
+ break; \
+ case 8: \
+ if (IS_ENABLED(CONFIG_64BIT)) \
+ asm volatile("ldd,ma 0(%1),%0" \
+ : "=r"(*(__u64 *)__u.__c) : "r"(__p) \
+ : "memory"); \
+ break; \
+ } \
+ __u.__val; \
+})
#include <asm-generic/barrier.h>
#endif /* !__ASSEMBLY__ */
diff --git a/arch/parisc/include/asm/io.h b/arch/parisc/include/asm/io.h
index 116effe26143..45e20d38dc59 100644
--- a/arch/parisc/include/asm/io.h
+++ b/arch/parisc/include/asm/io.h
@@ -303,8 +303,8 @@ extern void outsl (unsigned long port, const void *src, unsigned long count);
#define ioread64be ioread64be
#define iowrite64 iowrite64
#define iowrite64be iowrite64be
-extern u64 ioread64(void __iomem *addr);
-extern u64 ioread64be(void __iomem *addr);
+extern u64 ioread64(const void __iomem *addr);
+extern u64 ioread64be(const void __iomem *addr);
extern void iowrite64(u64 val, void __iomem *addr);
extern void iowrite64be(u64 val, void __iomem *addr);
diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h
index cc7ecc2ef55d..a6482b2ce0ea 100644
--- a/arch/parisc/include/asm/pgalloc.h
+++ b/arch/parisc/include/asm/pgalloc.h
@@ -10,6 +10,7 @@
#include <asm/cache.h>
+#define __HAVE_ARCH_PMD_ALLOC_ONE
#define __HAVE_ARCH_PMD_FREE
#define __HAVE_ARCH_PGD_FREE
#include <asm-generic/pgalloc.h>
@@ -67,6 +68,11 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
(__u32)(__pa((unsigned long)pmd) >> PxD_VALUE_SHIFT)));
}
+static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
+{
+ return (pmd_t *)__get_free_pages(GFP_PGTABLE_KERNEL, PMD_ORDER);
+}
+
static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
{
if (pmd_flag(*pmd) & PxD_FLAG_ATTACHED) {
diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h
index ebbb9ffe038c..ed2cd4fb479b 100644
--- a/arch/parisc/include/asm/uaccess.h
+++ b/arch/parisc/include/asm/uaccess.h
@@ -14,7 +14,7 @@
#define KERNEL_DS ((mm_segment_t){0})
#define USER_DS ((mm_segment_t){1})
-#define segment_eq(a, b) ((a).seg == (b).seg)
+#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg)
#define get_fs() (current_thread_info()->addr_limit)
#define set_fs(x) (current_thread_info()->addr_limit = (x))
diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c
index 1df0f67ed667..4bab21c71055 100644
--- a/arch/parisc/kernel/ftrace.c
+++ b/arch/parisc/kernel/ftrace.c
@@ -64,7 +64,8 @@ void notrace __hot ftrace_function_trampoline(unsigned long parent,
function_trace_op, regs);
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- if (ftrace_graph_return != (trace_func_graph_ret_t) ftrace_stub ||
+ if (dereference_function_descriptor(ftrace_graph_return) !=
+ dereference_function_descriptor(ftrace_stub) ||
ftrace_graph_entry != ftrace_graph_entry_stub) {
unsigned long *parent_rp;
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index 292baabefade..def64d221cd4 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -163,7 +163,7 @@
146 common writev sys_writev compat_sys_writev
147 common getsid sys_getsid
148 common fdatasync sys_fdatasync
-149 common _sysctl sys_sysctl compat_sys_sysctl
+149 common _sysctl sys_ni_syscall
150 common mlock sys_mlock
151 common munlock sys_munlock
152 common mlockall sys_mlockall
diff --git a/arch/parisc/lib/iomap.c b/arch/parisc/lib/iomap.c
index 0195aec657e2..ce400417d54e 100644
--- a/arch/parisc/lib/iomap.c
+++ b/arch/parisc/lib/iomap.c
@@ -43,13 +43,13 @@
#endif
struct iomap_ops {
- unsigned int (*read8)(void __iomem *);
- unsigned int (*read16)(void __iomem *);
- unsigned int (*read16be)(void __iomem *);
- unsigned int (*read32)(void __iomem *);
- unsigned int (*read32be)(void __iomem *);
- u64 (*read64)(void __iomem *);
- u64 (*read64be)(void __iomem *);
+ unsigned int (*read8)(const void __iomem *);
+ unsigned int (*read16)(const void __iomem *);
+ unsigned int (*read16be)(const void __iomem *);
+ unsigned int (*read32)(const void __iomem *);
+ unsigned int (*read32be)(const void __iomem *);
+ u64 (*read64)(const void __iomem *);
+ u64 (*read64be)(const void __iomem *);
void (*write8)(u8, void __iomem *);
void (*write16)(u16, void __iomem *);
void (*write16be)(u16, void __iomem *);
@@ -57,9 +57,9 @@ struct iomap_ops {
void (*write32be)(u32, void __iomem *);
void (*write64)(u64, void __iomem *);
void (*write64be)(u64, void __iomem *);
- void (*read8r)(void __iomem *, void *, unsigned long);
- void (*read16r)(void __iomem *, void *, unsigned long);
- void (*read32r)(void __iomem *, void *, unsigned long);
+ void (*read8r)(const void __iomem *, void *, unsigned long);
+ void (*read16r)(const void __iomem *, void *, unsigned long);
+ void (*read32r)(const void __iomem *, void *, unsigned long);
void (*write8r)(void __iomem *, const void *, unsigned long);
void (*write16r)(void __iomem *, const void *, unsigned long);
void (*write32r)(void __iomem *, const void *, unsigned long);
@@ -69,17 +69,17 @@ struct iomap_ops {
#define ADDR2PORT(addr) ((unsigned long __force)(addr) & 0xffffff)
-static unsigned int ioport_read8(void __iomem *addr)
+static unsigned int ioport_read8(const void __iomem *addr)
{
return inb(ADDR2PORT(addr));
}
-static unsigned int ioport_read16(void __iomem *addr)
+static unsigned int ioport_read16(const void __iomem *addr)
{
return inw(ADDR2PORT(addr));
}
-static unsigned int ioport_read32(void __iomem *addr)
+static unsigned int ioport_read32(const void __iomem *addr)
{
return inl(ADDR2PORT(addr));
}
@@ -99,17 +99,17 @@ static void ioport_write32(u32 datum, void __iomem *addr)
outl(datum, ADDR2PORT(addr));
}
-static void ioport_read8r(void __iomem *addr, void *dst, unsigned long count)
+static void ioport_read8r(const void __iomem *addr, void *dst, unsigned long count)
{
insb(ADDR2PORT(addr), dst, count);
}
-static void ioport_read16r(void __iomem *addr, void *dst, unsigned long count)
+static void ioport_read16r(const void __iomem *addr, void *dst, unsigned long count)
{
insw(ADDR2PORT(addr), dst, count);
}
-static void ioport_read32r(void __iomem *addr, void *dst, unsigned long count)
+static void ioport_read32r(const void __iomem *addr, void *dst, unsigned long count)
{
insl(ADDR2PORT(addr), dst, count);
}
@@ -150,37 +150,37 @@ static const struct iomap_ops ioport_ops = {
/* Legacy I/O memory ops */
-static unsigned int iomem_read8(void __iomem *addr)
+static unsigned int iomem_read8(const void __iomem *addr)
{
return readb(addr);
}
-static unsigned int iomem_read16(void __iomem *addr)
+static unsigned int iomem_read16(const void __iomem *addr)
{
return readw(addr);
}
-static unsigned int iomem_read16be(void __iomem *addr)
+static unsigned int iomem_read16be(const void __iomem *addr)
{
return __raw_readw(addr);
}
-static unsigned int iomem_read32(void __iomem *addr)
+static unsigned int iomem_read32(const void __iomem *addr)
{
return readl(addr);
}
-static unsigned int iomem_read32be(void __iomem *addr)
+static unsigned int iomem_read32be(const void __iomem *addr)
{
return __raw_readl(addr);
}
-static u64 iomem_read64(void __iomem *addr)
+static u64 iomem_read64(const void __iomem *addr)
{
return readq(addr);
}
-static u64 iomem_read64be(void __iomem *addr)
+static u64 iomem_read64be(const void __iomem *addr)
{
return __raw_readq(addr);
}
@@ -220,7 +220,7 @@ static void iomem_write64be(u64 datum, void __iomem *addr)
__raw_writel(datum, addr);
}
-static void iomem_read8r(void __iomem *addr, void *dst, unsigned long count)
+static void iomem_read8r(const void __iomem *addr, void *dst, unsigned long count)
{
while (count--) {
*(u8 *)dst = __raw_readb(addr);
@@ -228,7 +228,7 @@ static void iomem_read8r(void __iomem *addr, void *dst, unsigned long count)
}
}
-static void iomem_read16r(void __iomem *addr, void *dst, unsigned long count)
+static void iomem_read16r(const void __iomem *addr, void *dst, unsigned long count)
{
while (count--) {
*(u16 *)dst = __raw_readw(addr);
@@ -236,7 +236,7 @@ static void iomem_read16r(void __iomem *addr, void *dst, unsigned long count)
}
}
-static void iomem_read32r(void __iomem *addr, void *dst, unsigned long count)
+static void iomem_read32r(const void __iomem *addr, void *dst, unsigned long count)
{
while (count--) {
*(u32 *)dst = __raw_readl(addr);
@@ -297,49 +297,49 @@ static const struct iomap_ops *iomap_ops[8] = {
};
-unsigned int ioread8(void __iomem *addr)
+unsigned int ioread8(const void __iomem *addr)
{
if (unlikely(INDIRECT_ADDR(addr)))
return iomap_ops[ADDR_TO_REGION(addr)]->read8(addr);
return *((u8 *)addr);
}
-unsigned int ioread16(void __iomem *addr)
+unsigned int ioread16(const void __iomem *addr)
{
if (unlikely(INDIRECT_ADDR(addr)))
return iomap_ops[ADDR_TO_REGION(addr)]->read16(addr);
return le16_to_cpup((u16 *)addr);
}
-unsigned int ioread16be(void __iomem *addr)
+unsigned int ioread16be(const void __iomem *addr)
{
if (unlikely(INDIRECT_ADDR(addr)))
return iomap_ops[ADDR_TO_REGION(addr)]->read16be(addr);
return *((u16 *)addr);
}
-unsigned int ioread32(void __iomem *addr)
+unsigned int ioread32(const void __iomem *addr)
{
if (unlikely(INDIRECT_ADDR(addr)))
return iomap_ops[ADDR_TO_REGION(addr)]->read32(addr);
return le32_to_cpup((u32 *)addr);
}
-unsigned int ioread32be(void __iomem *addr)
+unsigned int ioread32be(const void __iomem *addr)
{
if (unlikely(INDIRECT_ADDR(addr)))
return iomap_ops[ADDR_TO_REGION(addr)]->read32be(addr);
return *((u32 *)addr);
}
-u64 ioread64(void __iomem *addr)
+u64 ioread64(const void __iomem *addr)
{
if (unlikely(INDIRECT_ADDR(addr)))
return iomap_ops[ADDR_TO_REGION(addr)]->read64(addr);
return le64_to_cpup((u64 *)addr);
}
-u64 ioread64be(void __iomem *addr)
+u64 ioread64be(const void __iomem *addr)
{
if (unlikely(INDIRECT_ADDR(addr)))
return iomap_ops[ADDR_TO_REGION(addr)]->read64be(addr);
@@ -411,7 +411,7 @@ void iowrite64be(u64 datum, void __iomem *addr)
/* Repeating interfaces */
-void ioread8_rep(void __iomem *addr, void *dst, unsigned long count)
+void ioread8_rep(const void __iomem *addr, void *dst, unsigned long count)
{
if (unlikely(INDIRECT_ADDR(addr))) {
iomap_ops[ADDR_TO_REGION(addr)]->read8r(addr, dst, count);
@@ -423,7 +423,7 @@ void ioread8_rep(void __iomem *addr, void *dst, unsigned long count)
}
}
-void ioread16_rep(void __iomem *addr, void *dst, unsigned long count)
+void ioread16_rep(const void __iomem *addr, void *dst, unsigned long count)
{
if (unlikely(INDIRECT_ADDR(addr))) {
iomap_ops[ADDR_TO_REGION(addr)]->read16r(addr, dst, count);
@@ -435,7 +435,7 @@ void ioread16_rep(void __iomem *addr, void *dst, unsigned long count)
}
}
-void ioread32_rep(void __iomem *addr, void *dst, unsigned long count)
+void ioread32_rep(const void __iomem *addr, void *dst, unsigned long count)
{
if (unlikely(INDIRECT_ADDR(addr))) {
iomap_ops[ADDR_TO_REGION(addr)]->read32r(addr, dst, count);
diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
index 66ac0719bd49..4bfe2da9fbe3 100644
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -18,6 +18,7 @@
#include <linux/extable.h>
#include <linux/uaccess.h>
#include <linux/hugetlb.h>
+#include <linux/perf_event.h>
#include <asm/traps.h>
@@ -281,6 +282,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long code,
acc_type = parisc_acctyp(code, regs->iir);
if (acc_type & VM_WRITE)
flags |= FAULT_FLAG_WRITE;
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
retry:
mmap_read_lock(mm);
vma = find_vma_prev(mm, address, &prev_vma);
@@ -302,7 +304,7 @@ good_area:
* fault.
*/
- fault = handle_mm_fault(vma, address, flags);
+ fault = handle_mm_fault(vma, address, flags, regs);
if (fault_signal_pending(fault, regs))
return;
@@ -323,10 +325,6 @@ good_area:
BUG();
}
if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_MAJOR)
- current->maj_flt++;
- else
- current->min_flt++;
if (fault & VM_FAULT_RETRY) {
/*
* No need to mmap_read_unlock(mm) as we would
diff --git a/arch/powerpc/include/asm/kvm_book3s_uvmem.h b/arch/powerpc/include/asm/kvm_book3s_uvmem.h
index 9cb7d8be2366..0a6319448cb6 100644
--- a/arch/powerpc/include/asm/kvm_book3s_uvmem.h
+++ b/arch/powerpc/include/asm/kvm_book3s_uvmem.h
@@ -23,6 +23,10 @@ int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn);
unsigned long kvmppc_h_svm_init_abort(struct kvm *kvm);
void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free,
struct kvm *kvm, bool skip_page_out);
+int kvmppc_uvmem_memslot_create(struct kvm *kvm,
+ const struct kvm_memory_slot *new);
+void kvmppc_uvmem_memslot_delete(struct kvm *kvm,
+ const struct kvm_memory_slot *old);
#else
static inline int kvmppc_uvmem_init(void)
{
@@ -82,5 +86,15 @@ static inline int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn)
static inline void
kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free,
struct kvm *kvm, bool skip_page_out) { }
+
+static inline int kvmppc_uvmem_memslot_create(struct kvm *kvm,
+ const struct kvm_memory_slot *new)
+{
+ return H_UNSUPPORTED;
+}
+
+static inline void kvmppc_uvmem_memslot_delete(struct kvm *kvm,
+ const struct kvm_memory_slot *old) { }
+
#endif /* CONFIG_PPC_UV */
#endif /* __ASM_KVM_BOOK3S_UVMEM_H__ */
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index ccf66b3a4c1d..0a056c64c317 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -59,7 +59,7 @@ enum xlate_readwrite {
};
extern int kvmppc_vcpu_run(struct kvm_vcpu *vcpu);
-extern int __kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu);
+extern int __kvmppc_vcpu_run(struct kvm_vcpu *vcpu);
extern void kvmppc_handler_highmem(void);
extern void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu);
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 41419f1fc00f..88fb88491fe9 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -474,7 +474,8 @@
#ifndef SPRN_LPID
#define SPRN_LPID 0x13F /* Logical Partition Identifier */
#endif
-#define LPID_RSVD 0x3ff /* Reserved LPID for partn switching */
+#define LPID_RSVD_POWER7 0x3ff /* Reserved LPID for partn switching */
+#define LPID_RSVD 0xfff /* Reserved LPID for partn switching */
#define SPRN_HMER 0x150 /* Hypervisor maintenance exception reg */
#define HMER_DEBUG_TRIG (1ul << (63 - 17)) /* Debug trigger */
#define SPRN_HMEER 0x151 /* Hyp maintenance exception enable reg */
@@ -1362,6 +1363,7 @@
#define PVR_ARCH_206p 0x0f100003
#define PVR_ARCH_207 0x0f000004
#define PVR_ARCH_300 0x0f000005
+#define PVR_ARCH_31 0x0f000006
/* Macros for setting and retrieving special purpose registers */
#ifndef __ASSEMBLY__
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 64c04ab09112..00699903f1ef 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -38,8 +38,7 @@ static inline void set_fs(mm_segment_t fs)
set_thread_flag(TIF_FSCHECK);
}
-#define segment_eq(a, b) ((a).seg == (b).seg)
-
+#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg)
#define user_addr_max() (get_fs().seg)
#ifdef __powerpc64__
diff --git a/arch/powerpc/kernel/iomap.c b/arch/powerpc/kernel/iomap.c
index 5ac84efc6ede..9fe4fb3b08aa 100644
--- a/arch/powerpc/kernel/iomap.c
+++ b/arch/powerpc/kernel/iomap.c
@@ -15,23 +15,23 @@
* Here comes the ppc64 implementation of the IOMAP
* interfaces.
*/
-unsigned int ioread8(void __iomem *addr)
+unsigned int ioread8(const void __iomem *addr)
{
return readb(addr);
}
-unsigned int ioread16(void __iomem *addr)
+unsigned int ioread16(const void __iomem *addr)
{
return readw(addr);
}
-unsigned int ioread16be(void __iomem *addr)
+unsigned int ioread16be(const void __iomem *addr)
{
return readw_be(addr);
}
-unsigned int ioread32(void __iomem *addr)
+unsigned int ioread32(const void __iomem *addr)
{
return readl(addr);
}
-unsigned int ioread32be(void __iomem *addr)
+unsigned int ioread32be(const void __iomem *addr)
{
return readl_be(addr);
}
@@ -41,27 +41,27 @@ EXPORT_SYMBOL(ioread16be);
EXPORT_SYMBOL(ioread32);
EXPORT_SYMBOL(ioread32be);
#ifdef __powerpc64__
-u64 ioread64(void __iomem *addr)
+u64 ioread64(const void __iomem *addr)
{
return readq(addr);
}
-u64 ioread64_lo_hi(void __iomem *addr)
+u64 ioread64_lo_hi(const void __iomem *addr)
{
return readq(addr);
}
-u64 ioread64_hi_lo(void __iomem *addr)
+u64 ioread64_hi_lo(const void __iomem *addr)
{
return readq(addr);
}
-u64 ioread64be(void __iomem *addr)
+u64 ioread64be(const void __iomem *addr)
{
return readq_be(addr);
}
-u64 ioread64be_lo_hi(void __iomem *addr)
+u64 ioread64be_lo_hi(const void __iomem *addr)
{
return readq_be(addr);
}
-u64 ioread64be_hi_lo(void __iomem *addr)
+u64 ioread64be_hi_lo(const void __iomem *addr)
{
return readq_be(addr);
}
@@ -139,15 +139,15 @@ EXPORT_SYMBOL(iowrite64be_hi_lo);
* FIXME! We could make these do EEH handling if we really
* wanted. Not clear if we do.
*/
-void ioread8_rep(void __iomem *addr, void *dst, unsigned long count)
+void ioread8_rep(const void __iomem *addr, void *dst, unsigned long count)
{
readsb(addr, dst, count);
}
-void ioread16_rep(void __iomem *addr, void *dst, unsigned long count)
+void ioread16_rep(const void __iomem *addr, void *dst, unsigned long count)
{
readsw(addr, dst, count);
}
-void ioread32_rep(void __iomem *addr, void *dst, unsigned long count)
+void ioread32_rep(const void __iomem *addr, void *dst, unsigned long count)
{
readsl(addr, dst, count);
}
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index be9f74546068..c2d737ff2e7b 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -197,7 +197,7 @@
146 common writev sys_writev compat_sys_writev
147 common getsid sys_getsid
148 common fdatasync sys_fdatasync
-149 nospu _sysctl sys_sysctl compat_sys_sysctl
+149 nospu _sysctl sys_ni_syscall
150 common mlock sys_mlock
151 common munlock sys_munlock
152 common mlockall sys_mlockall
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 7c5a1812a1c3..38ea396a23d6 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -260,11 +260,15 @@ int kvmppc_mmu_hv_init(void)
if (!mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE))
return -EINVAL;
- /* POWER7 has 10-bit LPIDs (12-bit in POWER8) */
host_lpid = 0;
if (cpu_has_feature(CPU_FTR_HVMODE))
host_lpid = mfspr(SPRN_LPID);
- rsvd_lpid = LPID_RSVD;
+
+ /* POWER8 and above have 12-bit LPIDs (10-bit in POWER7) */
+ if (cpu_has_feature(CPU_FTR_ARCH_207S))
+ rsvd_lpid = LPID_RSVD;
+ else
+ rsvd_lpid = LPID_RSVD_POWER7;
kvmppc_init_lpid(rsvd_lpid + 1);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index 777aa5625d5f..22a677b18695 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -161,7 +161,9 @@ int kvmppc_mmu_walk_radix_tree(struct kvm_vcpu *vcpu, gva_t eaddr,
return -EINVAL;
/* Read the entry from guest memory */
addr = base + (index * sizeof(rpte));
+ vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
ret = kvm_read_guest(kvm, addr, &rpte, sizeof(rpte));
+ srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
if (ret) {
if (pte_ret_p)
*pte_ret_p = addr;
@@ -237,7 +239,9 @@ int kvmppc_mmu_radix_translate_table(struct kvm_vcpu *vcpu, gva_t eaddr,
/* Read the table to find the root of the radix tree */
ptbl = (table & PRTB_MASK) + (table_index * sizeof(entry));
+ vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
ret = kvm_read_guest(kvm, ptbl, &entry, sizeof(entry));
+ srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
if (ret)
return ret;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 0f83f39a2bd2..4ba06a2a306c 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -343,13 +343,18 @@ static void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr)
vcpu->arch.pvr = pvr;
}
+/* Dummy value used in computing PCR value below */
+#define PCR_ARCH_31 (PCR_ARCH_300 << 1)
+
static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
{
unsigned long host_pcr_bit = 0, guest_pcr_bit = 0;
struct kvmppc_vcore *vc = vcpu->arch.vcore;
/* We can (emulate) our own architecture version and anything older */
- if (cpu_has_feature(CPU_FTR_ARCH_300))
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ host_pcr_bit = PCR_ARCH_31;
+ else if (cpu_has_feature(CPU_FTR_ARCH_300))
host_pcr_bit = PCR_ARCH_300;
else if (cpu_has_feature(CPU_FTR_ARCH_207S))
host_pcr_bit = PCR_ARCH_207;
@@ -375,6 +380,9 @@ static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
case PVR_ARCH_300:
guest_pcr_bit = PCR_ARCH_300;
break;
+ case PVR_ARCH_31:
+ guest_pcr_bit = PCR_ARCH_31;
+ break;
default:
return -EINVAL;
}
@@ -2355,7 +2363,7 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu)
* to trap and then we emulate them.
*/
vcpu->arch.hfscr = HFSCR_TAR | HFSCR_EBB | HFSCR_PM | HFSCR_BHRB |
- HFSCR_DSCR | HFSCR_VECVSX | HFSCR_FP;
+ HFSCR_DSCR | HFSCR_VECVSX | HFSCR_FP | HFSCR_PREFIX;
if (cpu_has_feature(CPU_FTR_HVMODE)) {
vcpu->arch.hfscr &= mfspr(SPRN_HFSCR);
if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
@@ -4552,16 +4560,14 @@ static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm,
switch (change) {
case KVM_MR_CREATE:
- if (kvmppc_uvmem_slot_init(kvm, new))
- return;
- uv_register_mem_slot(kvm->arch.lpid,
- new->base_gfn << PAGE_SHIFT,
- new->npages * PAGE_SIZE,
- 0, new->id);
+ /*
+ * @TODO kvmppc_uvmem_memslot_create() can fail and
+ * return error. Fix this.
+ */
+ kvmppc_uvmem_memslot_create(kvm, new);
break;
case KVM_MR_DELETE:
- uv_unregister_mem_slot(kvm->arch.lpid, old->id);
- kvmppc_uvmem_slot_free(kvm, old);
+ kvmppc_uvmem_memslot_delete(kvm, old);
break;
default:
/* TODO: Handle KVM_MR_MOVE */
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index 2c849a65db77..6822d23a2da4 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -233,20 +233,21 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
/* copy parameters in */
hv_ptr = kvmppc_get_gpr(vcpu, 4);
+ regs_ptr = kvmppc_get_gpr(vcpu, 5);
+ vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
err = kvm_vcpu_read_guest(vcpu, hv_ptr, &l2_hv,
- sizeof(struct hv_guest_state));
+ sizeof(struct hv_guest_state)) ||
+ kvm_vcpu_read_guest(vcpu, regs_ptr, &l2_regs,
+ sizeof(struct pt_regs));
+ srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
if (err)
return H_PARAMETER;
+
if (kvmppc_need_byteswap(vcpu))
byteswap_hv_regs(&l2_hv);
if (l2_hv.version != HV_GUEST_STATE_VERSION)
return H_P2;
- regs_ptr = kvmppc_get_gpr(vcpu, 5);
- err = kvm_vcpu_read_guest(vcpu, regs_ptr, &l2_regs,
- sizeof(struct pt_regs));
- if (err)
- return H_PARAMETER;
if (kvmppc_need_byteswap(vcpu))
byteswap_pt_regs(&l2_regs);
if (l2_hv.vcpu_token >= NR_CPUS)
@@ -323,12 +324,12 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
byteswap_hv_regs(&l2_hv);
byteswap_pt_regs(&l2_regs);
}
+ vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
err = kvm_vcpu_write_guest(vcpu, hv_ptr, &l2_hv,
- sizeof(struct hv_guest_state));
- if (err)
- return H_AUTHORITY;
- err = kvm_vcpu_write_guest(vcpu, regs_ptr, &l2_regs,
+ sizeof(struct hv_guest_state)) ||
+ kvm_vcpu_write_guest(vcpu, regs_ptr, &l2_regs,
sizeof(struct pt_regs));
+ srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
if (err)
return H_AUTHORITY;
@@ -508,12 +509,16 @@ long kvmhv_copy_tofrom_guest_nested(struct kvm_vcpu *vcpu)
goto not_found;
/* Write what was loaded into our buffer back to the L1 guest */
+ vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
rc = kvm_vcpu_write_guest(vcpu, gp_to, buf, n);
+ srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
if (rc)
goto not_found;
} else {
/* Load the data to be stored from the L1 guest into our buf */
+ vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
rc = kvm_vcpu_read_guest(vcpu, gp_from, buf, n);
+ srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
if (rc)
goto not_found;
@@ -548,9 +553,12 @@ static void kvmhv_update_ptbl_cache(struct kvm_nested_guest *gp)
ret = -EFAULT;
ptbl_addr = (kvm->arch.l1_ptcr & PRTB_MASK) + (gp->l1_lpid << 4);
- if (gp->l1_lpid < (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 8)))
+ if (gp->l1_lpid < (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 8))) {
+ int srcu_idx = srcu_read_lock(&kvm->srcu);
ret = kvm_read_guest(kvm, ptbl_addr,
&ptbl_entry, sizeof(ptbl_entry));
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+ }
if (ret) {
gp->l1_gr_to_hr = 0;
gp->process_table = 0;
diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c
index 6850bd04bcb9..7705d5557239 100644
--- a/arch/powerpc/kvm/book3s_hv_uvmem.c
+++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
@@ -93,12 +93,133 @@
#include <asm/ultravisor.h>
#include <asm/mman.h>
#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s_uvmem.h>
static struct dev_pagemap kvmppc_uvmem_pgmap;
static unsigned long *kvmppc_uvmem_bitmap;
static DEFINE_SPINLOCK(kvmppc_uvmem_bitmap_lock);
-#define KVMPPC_UVMEM_PFN (1UL << 63)
+/*
+ * States of a GFN
+ * ---------------
+ * The GFN can be in one of the following states.
+ *
+ * (a) Secure - The GFN is secure. The GFN is associated with
+ * a Secure VM, the contents of the GFN is not accessible
+ * to the Hypervisor. This GFN can be backed by a secure-PFN,
+ * or can be backed by a normal-PFN with contents encrypted.
+ * The former is true when the GFN is paged-in into the
+ * ultravisor. The latter is true when the GFN is paged-out
+ * of the ultravisor.
+ *
+ * (b) Shared - The GFN is shared. The GFN is associated with a
+ * a secure VM. The contents of the GFN is accessible to
+ * Hypervisor. This GFN is backed by a normal-PFN and its
+ * content is un-encrypted.
+ *
+ * (c) Normal - The GFN is a normal. The GFN is associated with
+ * a normal VM. The contents of the GFN is accesible to
+ * the Hypervisor. Its content is never encrypted.
+ *
+ * States of a VM.
+ * ---------------
+ *
+ * Normal VM: A VM whose contents are always accessible to
+ * the hypervisor. All its GFNs are normal-GFNs.
+ *
+ * Secure VM: A VM whose contents are not accessible to the
+ * hypervisor without the VM's consent. Its GFNs are
+ * either Shared-GFN or Secure-GFNs.
+ *
+ * Transient VM: A Normal VM that is transitioning to secure VM.
+ * The transition starts on successful return of
+ * H_SVM_INIT_START, and ends on successful return
+ * of H_SVM_INIT_DONE. This transient VM, can have GFNs
+ * in any of the three states; i.e Secure-GFN, Shared-GFN,
+ * and Normal-GFN. The VM never executes in this state
+ * in supervisor-mode.
+ *
+ * Memory slot State.
+ * -----------------------------
+ * The state of a memory slot mirrors the state of the
+ * VM the memory slot is associated with.
+ *
+ * VM State transition.
+ * --------------------
+ *
+ * A VM always starts in Normal Mode.
+ *
+ * H_SVM_INIT_START moves the VM into transient state. During this
+ * time the Ultravisor may request some of its GFNs to be shared or
+ * secured. So its GFNs can be in one of the three GFN states.
+ *
+ * H_SVM_INIT_DONE moves the VM entirely from transient state to
+ * secure-state. At this point any left-over normal-GFNs are
+ * transitioned to Secure-GFN.
+ *
+ * H_SVM_INIT_ABORT moves the transient VM back to normal VM.
+ * All its GFNs are moved to Normal-GFNs.
+ *
+ * UV_TERMINATE transitions the secure-VM back to normal-VM. All
+ * the secure-GFN and shared-GFNs are tranistioned to normal-GFN
+ * Note: The contents of the normal-GFN is undefined at this point.
+ *
+ * GFN state implementation:
+ * -------------------------
+ *
+ * Secure GFN is associated with a secure-PFN; also called uvmem_pfn,
+ * when the GFN is paged-in. Its pfn[] has KVMPPC_GFN_UVMEM_PFN flag
+ * set, and contains the value of the secure-PFN.
+ * It is associated with a normal-PFN; also called mem_pfn, when
+ * the GFN is pagedout. Its pfn[] has KVMPPC_GFN_MEM_PFN flag set.
+ * The value of the normal-PFN is not tracked.
+ *
+ * Shared GFN is associated with a normal-PFN. Its pfn[] has
+ * KVMPPC_UVMEM_SHARED_PFN flag set. The value of the normal-PFN
+ * is not tracked.
+ *
+ * Normal GFN is associated with normal-PFN. Its pfn[] has
+ * no flag set. The value of the normal-PFN is not tracked.
+ *
+ * Life cycle of a GFN
+ * --------------------
+ *
+ * --------------------------------------------------------------
+ * | | Share | Unshare | SVM |H_SVM_INIT_DONE|
+ * | |operation |operation | abort/ | |
+ * | | | | terminate | |
+ * -------------------------------------------------------------
+ * | | | | | |
+ * | Secure | Shared | Secure |Normal |Secure |
+ * | | | | | |
+ * | Shared | Shared | Secure |Normal |Shared |
+ * | | | | | |
+ * | Normal | Shared | Secure |Normal |Secure |
+ * --------------------------------------------------------------
+ *
+ * Life cycle of a VM
+ * --------------------
+ *
+ * --------------------------------------------------------------------
+ * | | start | H_SVM_ |H_SVM_ |H_SVM_ |UV_SVM_ |
+ * | | VM |INIT_START|INIT_DONE|INIT_ABORT |TERMINATE |
+ * | | | | | | |
+ * --------- ----------------------------------------------------------
+ * | | | | | | |
+ * | Normal | Normal | Transient|Error |Error |Normal |
+ * | | | | | | |
+ * | Secure | Error | Error |Error |Error |Normal |
+ * | | | | | | |
+ * |Transient| N/A | Error |Secure |Normal |Normal |
+ * --------------------------------------------------------------------
+ */
+
+#define KVMPPC_GFN_UVMEM_PFN (1UL << 63)
+#define KVMPPC_GFN_MEM_PFN (1UL << 62)
+#define KVMPPC_GFN_SHARED (1UL << 61)
+#define KVMPPC_GFN_SECURE (KVMPPC_GFN_UVMEM_PFN | KVMPPC_GFN_MEM_PFN)
+#define KVMPPC_GFN_FLAG_MASK (KVMPPC_GFN_SECURE | KVMPPC_GFN_SHARED)
+#define KVMPPC_GFN_PFN_MASK (~KVMPPC_GFN_FLAG_MASK)
struct kvmppc_uvmem_slot {
struct list_head list;
@@ -106,11 +227,11 @@ struct kvmppc_uvmem_slot {
unsigned long base_pfn;
unsigned long *pfns;
};
-
struct kvmppc_uvmem_page_pvt {
struct kvm *kvm;
unsigned long gpa;
bool skip_page_out;
+ bool remove_gfn;
};
bool kvmppc_uvmem_available(void)
@@ -163,8 +284,8 @@ void kvmppc_uvmem_slot_free(struct kvm *kvm, const struct kvm_memory_slot *slot)
mutex_unlock(&kvm->arch.uvmem_lock);
}
-static void kvmppc_uvmem_pfn_insert(unsigned long gfn, unsigned long uvmem_pfn,
- struct kvm *kvm)
+static void kvmppc_mark_gfn(unsigned long gfn, struct kvm *kvm,
+ unsigned long flag, unsigned long uvmem_pfn)
{
struct kvmppc_uvmem_slot *p;
@@ -172,24 +293,41 @@ static void kvmppc_uvmem_pfn_insert(unsigned long gfn, unsigned long uvmem_pfn,
if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) {
unsigned long index = gfn - p->base_pfn;
- p->pfns[index] = uvmem_pfn | KVMPPC_UVMEM_PFN;
+ if (flag == KVMPPC_GFN_UVMEM_PFN)
+ p->pfns[index] = uvmem_pfn | flag;
+ else
+ p->pfns[index] = flag;
return;
}
}
}
-static void kvmppc_uvmem_pfn_remove(unsigned long gfn, struct kvm *kvm)
+/* mark the GFN as secure-GFN associated with @uvmem pfn device-PFN. */
+static void kvmppc_gfn_secure_uvmem_pfn(unsigned long gfn,
+ unsigned long uvmem_pfn, struct kvm *kvm)
{
- struct kvmppc_uvmem_slot *p;
+ kvmppc_mark_gfn(gfn, kvm, KVMPPC_GFN_UVMEM_PFN, uvmem_pfn);
+}
- list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) {
- if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) {
- p->pfns[gfn - p->base_pfn] = 0;
- return;
- }
- }
+/* mark the GFN as secure-GFN associated with a memory-PFN. */
+static void kvmppc_gfn_secure_mem_pfn(unsigned long gfn, struct kvm *kvm)
+{
+ kvmppc_mark_gfn(gfn, kvm, KVMPPC_GFN_MEM_PFN, 0);
+}
+
+/* mark the GFN as a shared GFN. */
+static void kvmppc_gfn_shared(unsigned long gfn, struct kvm *kvm)
+{
+ kvmppc_mark_gfn(gfn, kvm, KVMPPC_GFN_SHARED, 0);
+}
+
+/* mark the GFN as a non-existent GFN. */
+static void kvmppc_gfn_remove(unsigned long gfn, struct kvm *kvm)
+{
+ kvmppc_mark_gfn(gfn, kvm, 0, 0);
}
+/* return true, if the GFN is a secure-GFN backed by a secure-PFN */
static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm,
unsigned long *uvmem_pfn)
{
@@ -199,10 +337,10 @@ static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm,
if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) {
unsigned long index = gfn - p->base_pfn;
- if (p->pfns[index] & KVMPPC_UVMEM_PFN) {
+ if (p->pfns[index] & KVMPPC_GFN_UVMEM_PFN) {
if (uvmem_pfn)
*uvmem_pfn = p->pfns[index] &
- ~KVMPPC_UVMEM_PFN;
+ KVMPPC_GFN_PFN_MASK;
return true;
} else
return false;
@@ -211,10 +349,114 @@ static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm,
return false;
}
+/*
+ * starting from *gfn search for the next available GFN that is not yet
+ * transitioned to a secure GFN. return the value of that GFN in *gfn. If a
+ * GFN is found, return true, else return false
+ *
+ * Must be called with kvm->arch.uvmem_lock held.
+ */
+static bool kvmppc_next_nontransitioned_gfn(const struct kvm_memory_slot *memslot,
+ struct kvm *kvm, unsigned long *gfn)
+{
+ struct kvmppc_uvmem_slot *p;
+ bool ret = false;
+ unsigned long i;
+
+ list_for_each_entry(p, &kvm->arch.uvmem_pfns, list)
+ if (*gfn >= p->base_pfn && *gfn < p->base_pfn + p->nr_pfns)
+ break;
+ if (!p)
+ return ret;
+ /*
+ * The code below assumes, one to one correspondence between
+ * kvmppc_uvmem_slot and memslot.
+ */
+ for (i = *gfn; i < p->base_pfn + p->nr_pfns; i++) {
+ unsigned long index = i - p->base_pfn;
+
+ if (!(p->pfns[index] & KVMPPC_GFN_FLAG_MASK)) {
+ *gfn = i;
+ ret = true;
+ break;
+ }
+ }
+ return ret;
+}
+
+static int kvmppc_memslot_page_merge(struct kvm *kvm,
+ const struct kvm_memory_slot *memslot, bool merge)
+{
+ unsigned long gfn = memslot->base_gfn;
+ unsigned long end, start = gfn_to_hva(kvm, gfn);
+ int ret = 0;
+ struct vm_area_struct *vma;
+ int merge_flag = (merge) ? MADV_MERGEABLE : MADV_UNMERGEABLE;
+
+ if (kvm_is_error_hva(start))
+ return H_STATE;
+
+ end = start + (memslot->npages << PAGE_SHIFT);
+
+ mmap_write_lock(kvm->mm);
+ do {
+ vma = find_vma_intersection(kvm->mm, start, end);
+ if (!vma) {
+ ret = H_STATE;
+ break;
+ }
+ ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
+ merge_flag, &vma->vm_flags);
+ if (ret) {
+ ret = H_STATE;
+ break;
+ }
+ start = vma->vm_end;
+ } while (end > vma->vm_end);
+
+ mmap_write_unlock(kvm->mm);
+ return ret;
+}
+
+static void __kvmppc_uvmem_memslot_delete(struct kvm *kvm,
+ const struct kvm_memory_slot *memslot)
+{
+ uv_unregister_mem_slot(kvm->arch.lpid, memslot->id);
+ kvmppc_uvmem_slot_free(kvm, memslot);
+ kvmppc_memslot_page_merge(kvm, memslot, true);
+}
+
+static int __kvmppc_uvmem_memslot_create(struct kvm *kvm,
+ const struct kvm_memory_slot *memslot)
+{
+ int ret = H_PARAMETER;
+
+ if (kvmppc_memslot_page_merge(kvm, memslot, false))
+ return ret;
+
+ if (kvmppc_uvmem_slot_init(kvm, memslot))
+ goto out1;
+
+ ret = uv_register_mem_slot(kvm->arch.lpid,
+ memslot->base_gfn << PAGE_SHIFT,
+ memslot->npages * PAGE_SIZE,
+ 0, memslot->id);
+ if (ret < 0) {
+ ret = H_PARAMETER;
+ goto out;
+ }
+ return 0;
+out:
+ kvmppc_uvmem_slot_free(kvm, memslot);
+out1:
+ kvmppc_memslot_page_merge(kvm, memslot, true);
+ return ret;
+}
+
unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
{
struct kvm_memslots *slots;
- struct kvm_memory_slot *memslot;
+ struct kvm_memory_slot *memslot, *m;
int ret = H_SUCCESS;
int srcu_idx;
@@ -232,35 +474,117 @@ unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
return H_AUTHORITY;
srcu_idx = srcu_read_lock(&kvm->srcu);
+
+ /* register the memslot */
slots = kvm_memslots(kvm);
kvm_for_each_memslot(memslot, slots) {
- if (kvmppc_uvmem_slot_init(kvm, memslot)) {
- ret = H_PARAMETER;
- goto out;
- }
- ret = uv_register_mem_slot(kvm->arch.lpid,
- memslot->base_gfn << PAGE_SHIFT,
- memslot->npages * PAGE_SIZE,
- 0, memslot->id);
- if (ret < 0) {
- kvmppc_uvmem_slot_free(kvm, memslot);
- ret = H_PARAMETER;
- goto out;
+ ret = __kvmppc_uvmem_memslot_create(kvm, memslot);
+ if (ret)
+ break;
+ }
+
+ if (ret) {
+ slots = kvm_memslots(kvm);
+ kvm_for_each_memslot(m, slots) {
+ if (m == memslot)
+ break;
+ __kvmppc_uvmem_memslot_delete(kvm, memslot);
}
}
-out:
+
srcu_read_unlock(&kvm->srcu, srcu_idx);
return ret;
}
-unsigned long kvmppc_h_svm_init_done(struct kvm *kvm)
+/*
+ * Provision a new page on HV side and copy over the contents
+ * from secure memory using UV_PAGE_OUT uvcall.
+ * Caller must held kvm->arch.uvmem_lock.
+ */
+static int __kvmppc_svm_page_out(struct vm_area_struct *vma,
+ unsigned long start,
+ unsigned long end, unsigned long page_shift,
+ struct kvm *kvm, unsigned long gpa)
{
- if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START))
- return H_UNSUPPORTED;
+ unsigned long src_pfn, dst_pfn = 0;
+ struct migrate_vma mig;
+ struct page *dpage, *spage;
+ struct kvmppc_uvmem_page_pvt *pvt;
+ unsigned long pfn;
+ int ret = U_SUCCESS;
- kvm->arch.secure_guest |= KVMPPC_SECURE_INIT_DONE;
- pr_info("LPID %d went secure\n", kvm->arch.lpid);
- return H_SUCCESS;
+ memset(&mig, 0, sizeof(mig));
+ mig.vma = vma;
+ mig.start = start;
+ mig.end = end;
+ mig.src = &src_pfn;
+ mig.dst = &dst_pfn;
+ mig.pgmap_owner = &kvmppc_uvmem_pgmap;
+ mig.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
+
+ /* The requested page is already paged-out, nothing to do */
+ if (!kvmppc_gfn_is_uvmem_pfn(gpa >> page_shift, kvm, NULL))
+ return ret;
+
+ ret = migrate_vma_setup(&mig);
+ if (ret)
+ return -1;
+
+ spage = migrate_pfn_to_page(*mig.src);
+ if (!spage || !(*mig.src & MIGRATE_PFN_MIGRATE))
+ goto out_finalize;
+
+ if (!is_zone_device_page(spage))
+ goto out_finalize;
+
+ dpage = alloc_page_vma(GFP_HIGHUSER, vma, start);
+ if (!dpage) {
+ ret = -1;
+ goto out_finalize;
+ }
+
+ lock_page(dpage);
+ pvt = spage->zone_device_data;
+ pfn = page_to_pfn(dpage);
+
+ /*
+ * This function is used in two cases:
+ * - When HV touches a secure page, for which we do UV_PAGE_OUT
+ * - When a secure page is converted to shared page, we *get*
+ * the page to essentially unmap the device page. In this
+ * case we skip page-out.
+ */
+ if (!pvt->skip_page_out)
+ ret = uv_page_out(kvm->arch.lpid, pfn << page_shift,
+ gpa, 0, page_shift);
+
+ if (ret == U_SUCCESS)
+ *mig.dst = migrate_pfn(pfn) | MIGRATE_PFN_LOCKED;
+ else {
+ unlock_page(dpage);
+ __free_page(dpage);
+ goto out_finalize;
+ }
+
+ migrate_vma_pages(&mig);
+
+out_finalize:
+ migrate_vma_finalize(&mig);
+ return ret;
+}
+
+static inline int kvmppc_svm_page_out(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end,
+ unsigned long page_shift,
+ struct kvm *kvm, unsigned long gpa)
+{
+ int ret;
+
+ mutex_lock(&kvm->arch.uvmem_lock);
+ ret = __kvmppc_svm_page_out(vma, start, end, page_shift, kvm, gpa);
+ mutex_unlock(&kvm->arch.uvmem_lock);
+
+ return ret;
}
/*
@@ -271,33 +595,53 @@ unsigned long kvmppc_h_svm_init_done(struct kvm *kvm)
* fault on them, do fault time migration to replace the device PTEs in
* QEMU page table with normal PTEs from newly allocated pages.
*/
-void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free,
+void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *slot,
struct kvm *kvm, bool skip_page_out)
{
int i;
struct kvmppc_uvmem_page_pvt *pvt;
- unsigned long pfn, uvmem_pfn;
- unsigned long gfn = free->base_gfn;
+ struct page *uvmem_page;
+ struct vm_area_struct *vma = NULL;
+ unsigned long uvmem_pfn, gfn;
+ unsigned long addr;
- for (i = free->npages; i; --i, ++gfn) {
- struct page *uvmem_page;
+ mmap_read_lock(kvm->mm);
+
+ addr = slot->userspace_addr;
+
+ gfn = slot->base_gfn;
+ for (i = slot->npages; i; --i, ++gfn, addr += PAGE_SIZE) {
+
+ /* Fetch the VMA if addr is not in the latest fetched one */
+ if (!vma || addr >= vma->vm_end) {
+ vma = find_vma_intersection(kvm->mm, addr, addr+1);
+ if (!vma) {
+ pr_err("Can't find VMA for gfn:0x%lx\n", gfn);
+ break;
+ }
+ }
mutex_lock(&kvm->arch.uvmem_lock);
- if (!kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) {
- mutex_unlock(&kvm->arch.uvmem_lock);
- continue;
+
+ if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) {
+ uvmem_page = pfn_to_page(uvmem_pfn);
+ pvt = uvmem_page->zone_device_data;
+ pvt->skip_page_out = skip_page_out;
+ pvt->remove_gfn = true;
+
+ if (__kvmppc_svm_page_out(vma, addr, addr + PAGE_SIZE,
+ PAGE_SHIFT, kvm, pvt->gpa))
+ pr_err("Can't page out gpa:0x%lx addr:0x%lx\n",
+ pvt->gpa, addr);
+ } else {
+ /* Remove the shared flag if any */
+ kvmppc_gfn_remove(gfn, kvm);
}
- uvmem_page = pfn_to_page(uvmem_pfn);
- pvt = uvmem_page->zone_device_data;
- pvt->skip_page_out = skip_page_out;
mutex_unlock(&kvm->arch.uvmem_lock);
-
- pfn = gfn_to_pfn(kvm, gfn);
- if (is_error_noslot_pfn(pfn))
- continue;
- kvm_release_pfn_clean(pfn);
}
+
+ mmap_read_unlock(kvm->mm);
}
unsigned long kvmppc_h_svm_init_abort(struct kvm *kvm)
@@ -360,7 +704,7 @@ static struct page *kvmppc_uvmem_get_page(unsigned long gpa, struct kvm *kvm)
goto out_clear;
uvmem_pfn = bit + pfn_first;
- kvmppc_uvmem_pfn_insert(gpa >> PAGE_SHIFT, uvmem_pfn, kvm);
+ kvmppc_gfn_secure_uvmem_pfn(gpa >> PAGE_SHIFT, uvmem_pfn, kvm);
pvt->gpa = gpa;
pvt->kvm = kvm;
@@ -379,13 +723,14 @@ out:
}
/*
- * Alloc a PFN from private device memory pool and copy page from normal
- * memory to secure memory using UV_PAGE_IN uvcall.
+ * Alloc a PFN from private device memory pool. If @pagein is true,
+ * copy page from normal memory to secure memory using UV_PAGE_IN uvcall.
*/
-static int
-kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start,
- unsigned long end, unsigned long gpa, struct kvm *kvm,
- unsigned long page_shift, bool *downgrade)
+static int kvmppc_svm_page_in(struct vm_area_struct *vma,
+ unsigned long start,
+ unsigned long end, unsigned long gpa, struct kvm *kvm,
+ unsigned long page_shift,
+ bool pagein)
{
unsigned long src_pfn, dst_pfn = 0;
struct migrate_vma mig;
@@ -402,18 +747,6 @@ kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start,
mig.dst = &dst_pfn;
mig.flags = MIGRATE_VMA_SELECT_SYSTEM;
- /*
- * We come here with mmap_lock write lock held just for
- * ksm_madvise(), otherwise we only need read mmap_lock.
- * Hence downgrade to read lock once ksm_madvise() is done.
- */
- ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
- MADV_UNMERGEABLE, &vma->vm_flags);
- mmap_write_downgrade(kvm->mm);
- *downgrade = true;
- if (ret)
- return ret;
-
ret = migrate_vma_setup(&mig);
if (ret)
return ret;
@@ -429,11 +762,16 @@ kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start,
goto out_finalize;
}
- pfn = *mig.src >> MIGRATE_PFN_SHIFT;
- spage = migrate_pfn_to_page(*mig.src);
- if (spage)
- uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0,
- page_shift);
+ if (pagein) {
+ pfn = *mig.src >> MIGRATE_PFN_SHIFT;
+ spage = migrate_pfn_to_page(*mig.src);
+ if (spage) {
+ ret = uv_page_in(kvm->arch.lpid, pfn << page_shift,
+ gpa, 0, page_shift);
+ if (ret)
+ goto out_finalize;
+ }
+ }
*mig.dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
migrate_vma_pages(&mig);
@@ -442,6 +780,80 @@ out_finalize:
return ret;
}
+static int kvmppc_uv_migrate_mem_slot(struct kvm *kvm,
+ const struct kvm_memory_slot *memslot)
+{
+ unsigned long gfn = memslot->base_gfn;
+ struct vm_area_struct *vma;
+ unsigned long start, end;
+ int ret = 0;
+
+ mmap_read_lock(kvm->mm);
+ mutex_lock(&kvm->arch.uvmem_lock);
+ while (kvmppc_next_nontransitioned_gfn(memslot, kvm, &gfn)) {
+ ret = H_STATE;
+ start = gfn_to_hva(kvm, gfn);
+ if (kvm_is_error_hva(start))
+ break;
+
+ end = start + (1UL << PAGE_SHIFT);
+ vma = find_vma_intersection(kvm->mm, start, end);
+ if (!vma || vma->vm_start > start || vma->vm_end < end)
+ break;
+
+ ret = kvmppc_svm_page_in(vma, start, end,
+ (gfn << PAGE_SHIFT), kvm, PAGE_SHIFT, false);
+ if (ret) {
+ ret = H_STATE;
+ break;
+ }
+
+ /* relinquish the cpu if needed */
+ cond_resched();
+ }
+ mutex_unlock(&kvm->arch.uvmem_lock);
+ mmap_read_unlock(kvm->mm);
+ return ret;
+}
+
+unsigned long kvmppc_h_svm_init_done(struct kvm *kvm)
+{
+ struct kvm_memslots *slots;
+ struct kvm_memory_slot *memslot;
+ int srcu_idx;
+ long ret = H_SUCCESS;
+
+ if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START))
+ return H_UNSUPPORTED;
+
+ /* migrate any unmoved normal pfn to device pfns*/
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+ slots = kvm_memslots(kvm);
+ kvm_for_each_memslot(memslot, slots) {
+ ret = kvmppc_uv_migrate_mem_slot(kvm, memslot);
+ if (ret) {
+ /*
+ * The pages will remain transitioned.
+ * Its the callers responsibility to
+ * terminate the VM, which will undo
+ * all state of the VM. Till then
+ * this VM is in a erroneous state.
+ * Its KVMPPC_SECURE_INIT_DONE will
+ * remain unset.
+ */
+ ret = H_STATE;
+ goto out;
+ }
+ }
+
+ kvm->arch.secure_guest |= KVMPPC_SECURE_INIT_DONE;
+ pr_info("LPID %d went secure\n", kvm->arch.lpid);
+
+out:
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+ return ret;
+}
+
/*
* Shares the page with HV, thus making it a normal page.
*
@@ -451,8 +863,8 @@ out_finalize:
* In the former case, uses dev_pagemap_ops.migrate_to_ram handler
* to unmap the device page from QEMU's page tables.
*/
-static unsigned long
-kvmppc_share_page(struct kvm *kvm, unsigned long gpa, unsigned long page_shift)
+static unsigned long kvmppc_share_page(struct kvm *kvm, unsigned long gpa,
+ unsigned long page_shift)
{
int ret = H_PARAMETER;
@@ -469,6 +881,11 @@ kvmppc_share_page(struct kvm *kvm, unsigned long gpa, unsigned long page_shift)
uvmem_page = pfn_to_page(uvmem_pfn);
pvt = uvmem_page->zone_device_data;
pvt->skip_page_out = true;
+ /*
+ * do not drop the GFN. It is a valid GFN
+ * that is transitioned to a shared GFN.
+ */
+ pvt->remove_gfn = false;
}
retry:
@@ -482,12 +899,16 @@ retry:
uvmem_page = pfn_to_page(uvmem_pfn);
pvt = uvmem_page->zone_device_data;
pvt->skip_page_out = true;
+ pvt->remove_gfn = false; /* it continues to be a valid GFN */
kvm_release_pfn_clean(pfn);
goto retry;
}
- if (!uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0, page_shift))
+ if (!uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0,
+ page_shift)) {
+ kvmppc_gfn_shared(gfn, kvm);
ret = H_SUCCESS;
+ }
kvm_release_pfn_clean(pfn);
mutex_unlock(&kvm->arch.uvmem_lock);
out:
@@ -501,11 +922,10 @@ out:
* H_PAGE_IN_SHARED flag makes the page shared which means that the same
* memory in is visible from both UV and HV.
*/
-unsigned long
-kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa,
- unsigned long flags, unsigned long page_shift)
+unsigned long kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa,
+ unsigned long flags,
+ unsigned long page_shift)
{
- bool downgrade = false;
unsigned long start, end;
struct vm_area_struct *vma;
int srcu_idx;
@@ -526,7 +946,7 @@ kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa,
ret = H_PARAMETER;
srcu_idx = srcu_read_lock(&kvm->srcu);
- mmap_write_lock(kvm->mm);
+ mmap_read_lock(kvm->mm);
start = gfn_to_hva(kvm, gfn);
if (kvm_is_error_hva(start))
@@ -542,97 +962,20 @@ kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa,
if (!vma || vma->vm_start > start || vma->vm_end < end)
goto out_unlock;
- if (!kvmppc_svm_page_in(vma, start, end, gpa, kvm, page_shift,
- &downgrade))
- ret = H_SUCCESS;
+ if (kvmppc_svm_page_in(vma, start, end, gpa, kvm, page_shift,
+ true))
+ goto out_unlock;
+
+ ret = H_SUCCESS;
+
out_unlock:
mutex_unlock(&kvm->arch.uvmem_lock);
out:
- if (downgrade)
- mmap_read_unlock(kvm->mm);
- else
- mmap_write_unlock(kvm->mm);
+ mmap_read_unlock(kvm->mm);
srcu_read_unlock(&kvm->srcu, srcu_idx);
return ret;
}
-/*
- * Provision a new page on HV side and copy over the contents
- * from secure memory using UV_PAGE_OUT uvcall.
- */
-static int
-kvmppc_svm_page_out(struct vm_area_struct *vma, unsigned long start,
- unsigned long end, unsigned long page_shift,
- struct kvm *kvm, unsigned long gpa)
-{
- unsigned long src_pfn, dst_pfn = 0;
- struct migrate_vma mig;
- struct page *dpage, *spage;
- struct kvmppc_uvmem_page_pvt *pvt;
- unsigned long pfn;
- int ret = U_SUCCESS;
-
- memset(&mig, 0, sizeof(mig));
- mig.vma = vma;
- mig.start = start;
- mig.end = end;
- mig.src = &src_pfn;
- mig.dst = &dst_pfn;
- mig.pgmap_owner = &kvmppc_uvmem_pgmap;
- mig.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
-
- mutex_lock(&kvm->arch.uvmem_lock);
- /* The requested page is already paged-out, nothing to do */
- if (!kvmppc_gfn_is_uvmem_pfn(gpa >> page_shift, kvm, NULL))
- goto out;
-
- ret = migrate_vma_setup(&mig);
- if (ret)
- goto out;
-
- spage = migrate_pfn_to_page(*mig.src);
- if (!spage || !(*mig.src & MIGRATE_PFN_MIGRATE))
- goto out_finalize;
-
- if (!is_zone_device_page(spage))
- goto out_finalize;
-
- dpage = alloc_page_vma(GFP_HIGHUSER, vma, start);
- if (!dpage) {
- ret = -1;
- goto out_finalize;
- }
-
- lock_page(dpage);
- pvt = spage->zone_device_data;
- pfn = page_to_pfn(dpage);
-
- /*
- * This function is used in two cases:
- * - When HV touches a secure page, for which we do UV_PAGE_OUT
- * - When a secure page is converted to shared page, we *get*
- * the page to essentially unmap the device page. In this
- * case we skip page-out.
- */
- if (!pvt->skip_page_out)
- ret = uv_page_out(kvm->arch.lpid, pfn << page_shift,
- gpa, 0, page_shift);
-
- if (ret == U_SUCCESS)
- *mig.dst = migrate_pfn(pfn) | MIGRATE_PFN_LOCKED;
- else {
- unlock_page(dpage);
- __free_page(dpage);
- goto out_finalize;
- }
-
- migrate_vma_pages(&mig);
-out_finalize:
- migrate_vma_finalize(&mig);
-out:
- mutex_unlock(&kvm->arch.uvmem_lock);
- return ret;
-}
/*
* Fault handler callback that gets called when HV touches any page that
@@ -657,7 +1000,8 @@ static vm_fault_t kvmppc_uvmem_migrate_to_ram(struct vm_fault *vmf)
/*
* Release the device PFN back to the pool
*
- * Gets called when secure page becomes a normal page during H_SVM_PAGE_OUT.
+ * Gets called when secure GFN tranistions from a secure-PFN
+ * to a normal PFN during H_SVM_PAGE_OUT.
* Gets called with kvm->arch.uvmem_lock held.
*/
static void kvmppc_uvmem_page_free(struct page *page)
@@ -672,7 +1016,10 @@ static void kvmppc_uvmem_page_free(struct page *page)
pvt = page->zone_device_data;
page->zone_device_data = NULL;
- kvmppc_uvmem_pfn_remove(pvt->gpa >> PAGE_SHIFT, pvt->kvm);
+ if (pvt->remove_gfn)
+ kvmppc_gfn_remove(pvt->gpa >> PAGE_SHIFT, pvt->kvm);
+ else
+ kvmppc_gfn_secure_mem_pfn(pvt->gpa >> PAGE_SHIFT, pvt->kvm);
kfree(pvt);
}
@@ -744,6 +1091,21 @@ out:
return (ret == U_SUCCESS) ? RESUME_GUEST : -EFAULT;
}
+int kvmppc_uvmem_memslot_create(struct kvm *kvm, const struct kvm_memory_slot *new)
+{
+ int ret = __kvmppc_uvmem_memslot_create(kvm, new);
+
+ if (!ret)
+ ret = kvmppc_uv_migrate_mem_slot(kvm, new);
+
+ return ret;
+}
+
+void kvmppc_uvmem_memslot_delete(struct kvm *kvm, const struct kvm_memory_slot *old)
+{
+ __kvmppc_uvmem_memslot_delete(kvm, old);
+}
+
static u64 kvmppc_get_secmem_size(void)
{
struct device_node *np;
diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S
index 607a9b99c334..25a3679fb590 100644
--- a/arch/powerpc/kvm/book3s_interrupts.S
+++ b/arch/powerpc/kvm/book3s_interrupts.S
@@ -55,8 +55,7 @@
****************************************************************************/
/* Registers:
- * r3: kvm_run pointer
- * r4: vcpu pointer
+ * r3: vcpu pointer
*/
_GLOBAL(__kvmppc_vcpu_run)
@@ -68,8 +67,8 @@ kvm_start_entry:
/* Save host state to the stack */
PPC_STLU r1, -SWITCH_FRAME_SIZE(r1)
- /* Save r3 (kvm_run) and r4 (vcpu) */
- SAVE_2GPRS(3, r1)
+ /* Save r3 (vcpu) */
+ SAVE_GPR(3, r1)
/* Save non-volatile registers (r14 - r31) */
SAVE_NVGPRS(r1)
@@ -82,47 +81,46 @@ kvm_start_entry:
PPC_STL r0, _LINK(r1)
/* Load non-volatile guest state from the vcpu */
- VCPU_LOAD_NVGPRS(r4)
+ VCPU_LOAD_NVGPRS(r3)
kvm_start_lightweight:
/* Copy registers into shadow vcpu so we can access them in real mode */
- mr r3, r4
bl FUNC(kvmppc_copy_to_svcpu)
nop
- REST_GPR(4, r1)
+ REST_GPR(3, r1)
#ifdef CONFIG_PPC_BOOK3S_64
/* Get the dcbz32 flag */
- PPC_LL r3, VCPU_HFLAGS(r4)
- rldicl r3, r3, 0, 63 /* r3 &= 1 */
- stb r3, HSTATE_RESTORE_HID5(r13)
+ PPC_LL r0, VCPU_HFLAGS(r3)
+ rldicl r0, r0, 0, 63 /* r3 &= 1 */
+ stb r0, HSTATE_RESTORE_HID5(r13)
/* Load up guest SPRG3 value, since it's user readable */
- lwz r3, VCPU_SHAREDBE(r4)
- cmpwi r3, 0
- ld r5, VCPU_SHARED(r4)
+ lbz r4, VCPU_SHAREDBE(r3)
+ cmpwi r4, 0
+ ld r5, VCPU_SHARED(r3)
beq sprg3_little_endian
sprg3_big_endian:
#ifdef __BIG_ENDIAN__
- ld r3, VCPU_SHARED_SPRG3(r5)
+ ld r4, VCPU_SHARED_SPRG3(r5)
#else
addi r5, r5, VCPU_SHARED_SPRG3
- ldbrx r3, 0, r5
+ ldbrx r4, 0, r5
#endif
b after_sprg3_load
sprg3_little_endian:
#ifdef __LITTLE_ENDIAN__
- ld r3, VCPU_SHARED_SPRG3(r5)
+ ld r4, VCPU_SHARED_SPRG3(r5)
#else
addi r5, r5, VCPU_SHARED_SPRG3
- ldbrx r3, 0, r5
+ ldbrx r4, 0, r5
#endif
after_sprg3_load:
- mtspr SPRN_SPRG3, r3
+ mtspr SPRN_SPRG3, r4
#endif /* CONFIG_PPC_BOOK3S_64 */
- PPC_LL r4, VCPU_SHADOW_MSR(r4) /* get shadow_msr */
+ PPC_LL r4, VCPU_SHADOW_MSR(r3) /* get shadow_msr */
/* Jump to segment patching handler and into our guest */
bl FUNC(kvmppc_entry_trampoline)
@@ -146,7 +144,7 @@ after_sprg3_load:
*
*/
- PPC_LL r3, GPR4(r1) /* vcpu pointer */
+ PPC_LL r3, GPR3(r1) /* vcpu pointer */
/*
* kvmppc_copy_from_svcpu can clobber volatile registers, save
@@ -169,7 +167,7 @@ after_sprg3_load:
#endif /* CONFIG_PPC_BOOK3S_64 */
/* R7 = vcpu */
- PPC_LL r7, GPR4(r1)
+ PPC_LL r7, GPR3(r1)
PPC_STL r14, VCPU_GPR(R14)(r7)
PPC_STL r15, VCPU_GPR(R15)(r7)
@@ -190,11 +188,11 @@ after_sprg3_load:
PPC_STL r30, VCPU_GPR(R30)(r7)
PPC_STL r31, VCPU_GPR(R31)(r7)
- /* Pass the exit number as 3rd argument to kvmppc_handle_exit */
- lwz r5, VCPU_TRAP(r7)
+ /* Pass the exit number as 2nd argument to kvmppc_handle_exit */
+ lwz r4, VCPU_TRAP(r7)
- /* Restore r3 (kvm_run) and r4 (vcpu) */
- REST_2GPRS(3, r1)
+ /* Restore r3 (vcpu) */
+ REST_GPR(3, r1)
bl FUNC(kvmppc_handle_exit_pr)
/* If RESUME_GUEST, get back in the loop */
@@ -223,11 +221,11 @@ kvm_loop_heavyweight:
PPC_LL r4, _LINK(r1)
PPC_STL r4, (PPC_LR_STKOFF + SWITCH_FRAME_SIZE)(r1)
- /* Load vcpu and cpu_run */
- REST_2GPRS(3, r1)
+ /* Load vcpu */
+ REST_GPR(3, r1)
/* Load non-volatile guest state from the vcpu */
- VCPU_LOAD_NVGPRS(r4)
+ VCPU_LOAD_NVGPRS(r3)
/* Jump back into the beginning of this function */
b kvm_start_lightweight
@@ -235,7 +233,7 @@ kvm_loop_heavyweight:
kvm_loop_lightweight:
/* We'll need the vcpu pointer */
- REST_GPR(4, r1)
+ REST_GPR(3, r1)
/* Jump back into the beginning of this function */
b kvm_start_lightweight
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index ed12dfbf9bb5..88fac22fbf09 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -1151,9 +1151,9 @@ static int kvmppc_exit_pr_progint(struct kvm_vcpu *vcpu, unsigned int exit_nr)
return r;
}
-int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
- unsigned int exit_nr)
+int kvmppc_handle_exit_pr(struct kvm_vcpu *vcpu, unsigned int exit_nr)
{
+ struct kvm_run *run = vcpu->run;
int r = RESUME_HOST;
int s;
@@ -1826,12 +1826,11 @@ static void kvmppc_core_vcpu_free_pr(struct kvm_vcpu *vcpu)
static int kvmppc_vcpu_run_pr(struct kvm_vcpu *vcpu)
{
- struct kvm_run *run = vcpu->run;
int ret;
/* Check if we can run the vcpu at all */
if (!vcpu->arch.sane) {
- run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
ret = -EINVAL;
goto out;
}
@@ -1858,7 +1857,7 @@ static int kvmppc_vcpu_run_pr(struct kvm_vcpu *vcpu)
kvmppc_fix_ee_before_entry();
- ret = __kvmppc_vcpu_run(run, vcpu);
+ ret = __kvmppc_vcpu_run(vcpu);
kvmppc_clear_debug(vcpu);
diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c
index 26b25994c969..c5e677508d3b 100644
--- a/arch/powerpc/kvm/book3s_rtas.c
+++ b/arch/powerpc/kvm/book3s_rtas.c
@@ -229,7 +229,9 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu)
*/
args_phys = kvmppc_get_gpr(vcpu, 4) & KVM_PAM;
+ vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
rc = kvm_read_guest(vcpu->kvm, args_phys, &args, sizeof(args));
+ srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
if (rc)
goto fail;
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index c0d62a917e20..3e1c9f08e302 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -731,12 +731,11 @@ int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
int kvmppc_vcpu_run(struct kvm_vcpu *vcpu)
{
- struct kvm_run *run = vcpu->run;
int ret, s;
struct debug_reg debug;
if (!vcpu->arch.sane) {
- run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
return -EINVAL;
}
@@ -778,7 +777,7 @@ int kvmppc_vcpu_run(struct kvm_vcpu *vcpu)
vcpu->arch.pgdir = vcpu->kvm->mm->pgd;
kvmppc_fix_ee_before_entry();
- ret = __kvmppc_vcpu_run(run, vcpu);
+ ret = __kvmppc_vcpu_run(vcpu);
/* No need for guest_exit. It's done in handle_exit.
We also get here with interrupts enabled. */
@@ -982,9 +981,9 @@ static int kvmppc_resume_inst_load(struct kvm_vcpu *vcpu,
*
* Return value is in the form (errcode<<2 | RESUME_FLAG_HOST | RESUME_FLAG_NV)
*/
-int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
- unsigned int exit_nr)
+int kvmppc_handle_exit(struct kvm_vcpu *vcpu, unsigned int exit_nr)
{
+ struct kvm_run *run = vcpu->run;
int r = RESUME_HOST;
int s;
int idx;
diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S
index 2e56ab5a5f55..6fa82efe833b 100644
--- a/arch/powerpc/kvm/booke_interrupts.S
+++ b/arch/powerpc/kvm/booke_interrupts.S
@@ -237,7 +237,7 @@ _GLOBAL(kvmppc_resume_host)
/* Switch to kernel stack and jump to handler. */
LOAD_REG_ADDR(r3, kvmppc_handle_exit)
mtctr r3
- lwz r3, HOST_RUN(r1)
+ mr r3, r4
lwz r2, HOST_R2(r1)
mr r14, r4 /* Save vcpu pointer. */
@@ -337,15 +337,14 @@ heavyweight_exit:
/* Registers:
- * r3: kvm_run pointer
- * r4: vcpu pointer
+ * r3: vcpu pointer
*/
_GLOBAL(__kvmppc_vcpu_run)
stwu r1, -HOST_STACK_SIZE(r1)
- stw r1, VCPU_HOST_STACK(r4) /* Save stack pointer to vcpu. */
+ stw r1, VCPU_HOST_STACK(r3) /* Save stack pointer to vcpu. */
/* Save host state to stack. */
- stw r3, HOST_RUN(r1)
+ mr r4, r3
mflr r3
stw r3, HOST_STACK_LR(r1)
mfcr r5
diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S
index c577ba4b3169..8262c14fc9e6 100644
--- a/arch/powerpc/kvm/bookehv_interrupts.S
+++ b/arch/powerpc/kvm/bookehv_interrupts.S
@@ -434,9 +434,10 @@ _GLOBAL(kvmppc_resume_host)
#endif
/* Switch to kernel stack and jump to handler. */
- PPC_LL r3, HOST_RUN(r1)
+ mr r3, r4
mr r5, r14 /* intno */
mr r14, r4 /* Save vcpu pointer. */
+ mr r4, r5
bl kvmppc_handle_exit
/* Restore vcpu pointer and the nonvolatiles we used. */
@@ -525,15 +526,14 @@ heavyweight_exit:
blr
/* Registers:
- * r3: kvm_run pointer
- * r4: vcpu pointer
+ * r3: vcpu pointer
*/
_GLOBAL(__kvmppc_vcpu_run)
stwu r1, -HOST_STACK_SIZE(r1)
- PPC_STL r1, VCPU_HOST_STACK(r4) /* Save stack pointer to vcpu. */
+ PPC_STL r1, VCPU_HOST_STACK(r3) /* Save stack pointer to vcpu. */
/* Save host state to stack. */
- PPC_STL r3, HOST_RUN(r1)
+ mr r4, r3
mflr r3
mfcr r5
PPC_STL r3, HOST_STACK_LR(r1)
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index aaa7b62f2f82..13999123b735 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -403,7 +403,10 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
return EMULATE_DONE;
}
- if (kvm_read_guest(vcpu->kvm, pte.raddr, ptr, size))
+ vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ rc = kvm_read_guest(vcpu->kvm, pte.raddr, ptr, size);
+ srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+ if (rc)
return EMULATE_DO_MMIO;
return EMULATE_DONE;
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index 1478fceeb683..1da9dbba9217 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -1115,9 +1115,8 @@ void hash__early_init_mmu_secondary(void)
&& cpu_has_feature(CPU_FTR_HVMODE))
tlbiel_all();
-#ifdef CONFIG_PPC_MEM_KEYS
- mtspr(SPRN_UAMOR, default_uamor);
-#endif
+ if (IS_ENABLED(CONFIG_PPC_MEM_KEYS) && mmu_has_feature(MMU_FTR_PKEY))
+ mtspr(SPRN_UAMOR, default_uamor);
}
#endif /* CONFIG_SMP */
diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c
index 69a6b87f2bb4..b1d091a97611 100644
--- a/arch/powerpc/mm/book3s64/pkeys.c
+++ b/arch/powerpc/mm/book3s64/pkeys.c
@@ -73,12 +73,6 @@ static int scan_pkey_feature(void)
if (early_radix_enabled())
return 0;
- /*
- * Only P7 and above supports SPRN_AMR update with MSR[PR] = 1
- */
- if (!early_cpu_has_feature(CPU_FTR_ARCH_206))
- return 0;
-
ret = of_scan_flat_dt(dt_scan_storage_keys, &pkeys_total);
if (ret == 0) {
/*
@@ -124,6 +118,12 @@ void __init pkey_early_init_devtree(void)
__builtin_popcountl(ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT)
!= (sizeof(u64) * BITS_PER_BYTE));
+ /*
+ * Only P7 and above supports SPRN_AMR update with MSR[PR] = 1
+ */
+ if (!early_cpu_has_feature(CPU_FTR_ARCH_206))
+ return;
+
/* scan the device tree for pkey feature */
pkeys_total = scan_pkey_feature();
if (!pkeys_total)
diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c
index b83abbead4a2..8acd00178956 100644
--- a/arch/powerpc/mm/copro_fault.c
+++ b/arch/powerpc/mm/copro_fault.c
@@ -64,7 +64,7 @@ int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea,
}
ret = 0;
- *flt = handle_mm_fault(vma, ea, is_write ? FAULT_FLAG_WRITE : 0);
+ *flt = handle_mm_fault(vma, ea, is_write ? FAULT_FLAG_WRITE : 0, NULL);
if (unlikely(*flt & VM_FAULT_ERROR)) {
if (*flt & VM_FAULT_OOM) {
ret = -ENOMEM;
@@ -76,11 +76,6 @@ int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea,
BUG();
}
- if (*flt & VM_FAULT_MAJOR)
- current->maj_flt++;
- else
- current->min_flt++;
-
out_unlock:
mmap_read_unlock(mm);
return ret;
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 925a7231abb3..0add963a849b 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -511,7 +511,7 @@ retry:
* make sure we exit gracefully rather than endlessly redo
* the fault.
*/
- fault = handle_mm_fault(vma, address, flags);
+ fault = handle_mm_fault(vma, address, flags, regs);
major |= fault & VM_FAULT_MAJOR;
@@ -537,14 +537,9 @@ retry:
/*
* Major/minor page fault accounting.
*/
- if (major) {
- current->maj_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address);
+ if (major)
cmo_account_page_fault();
- } else {
- current->min_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
- }
+
return 0;
}
NOKPROBE_SYMBOL(__do_page_fault);
diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h
index 8ce9d607b53d..f56c66b3f5fe 100644
--- a/arch/riscv/include/asm/uaccess.h
+++ b/arch/riscv/include/asm/uaccess.h
@@ -8,6 +8,8 @@
#ifndef _ASM_RISCV_UACCESS_H
#define _ASM_RISCV_UACCESS_H
+#include <asm/pgtable.h> /* for TASK_SIZE */
+
/*
* User space memory access functions
*/
@@ -62,11 +64,9 @@ static inline void set_fs(mm_segment_t fs)
current_thread_info()->addr_limit = fs;
}
-#define segment_eq(a, b) ((a).seg == (b).seg)
-
+#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg)
#define user_addr_max() (get_fs().seg)
-
/**
* access_ok: - Checks if a user space pointer is valid
* @addr: User space pointer to start of block to check
diff --git a/arch/riscv/include/asm/vdso/gettimeofday.h b/arch/riscv/include/asm/vdso/gettimeofday.h
index 3099362d9f26..f839f16e0d2a 100644
--- a/arch/riscv/include/asm/vdso/gettimeofday.h
+++ b/arch/riscv/include/asm/vdso/gettimeofday.h
@@ -60,7 +60,8 @@ int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
return ret;
}
-static __always_inline u64 __arch_get_hw_counter(s32 clock_mode)
+static __always_inline u64 __arch_get_hw_counter(s32 clock_mode,
+ const struct vdso_data *vd)
{
/*
* The purpose of csr_read(CSR_TIME) is to trap the system into
diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
index d0c5c316e9bb..0a4e81b8dc79 100644
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -77,16 +77,10 @@ relocate:
csrw CSR_SATP, a0
.align 2
1:
- /* Set trap vector to exception handler */
- la a0, handle_exception
+ /* Set trap vector to spin forever to help debug */
+ la a0, .Lsecondary_park
csrw CSR_TVEC, a0
- /*
- * Set sup0 scratch register to 0, indicating to exception vector that
- * we are presently executing in kernel.
- */
- csrw CSR_SCRATCH, zero
-
/* Reload the global pointer */
.option push
.option norelax
@@ -144,9 +138,23 @@ secondary_start_common:
la a0, swapper_pg_dir
call relocate
#endif
+ call setup_trap_vector
tail smp_callin
#endif /* CONFIG_SMP */
+.align 2
+setup_trap_vector:
+ /* Set trap vector to exception handler */
+ la a0, handle_exception
+ csrw CSR_TVEC, a0
+
+ /*
+ * Set sup0 scratch register to 0, indicating to exception vector that
+ * we are presently executing in kernel.
+ */
+ csrw CSR_SCRATCH, zero
+ ret
+
.Lsecondary_park:
/* We lack SMP support or have too many harts, so park this hart */
wfi
@@ -240,6 +248,7 @@ clear_bss_done:
call relocate
#endif /* CONFIG_MMU */
+ call setup_trap_vector
/* Restore C environment */
la tp, init_task
sw zero, TASK_TI_CPU(tp)
diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
index 5873835a3e6b..716d64e36f83 100644
--- a/arch/riscv/mm/fault.c
+++ b/arch/riscv/mm/fault.c
@@ -109,7 +109,7 @@ good_area:
* make sure we exit gracefully rather than endlessly redo
* the fault.
*/
- fault = handle_mm_fault(vma, addr, flags);
+ fault = handle_mm_fault(vma, addr, flags, regs);
/*
* If we need to retry but a fatal signal is pending, handle the
@@ -127,21 +127,7 @@ good_area:
BUG();
}
- /*
- * Major/minor page fault accounting is only done on the
- * initial attempt. If we go through a retry, it is extremely
- * likely that the page will be found in page cache at that point.
- */
if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_MAJOR) {
- tsk->maj_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ,
- 1, regs, addr);
- } else {
- tsk->min_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN,
- 1, regs, addr);
- }
if (fault & VM_FAULT_RETRY) {
flags |= FAULT_FLAG_TRIED;
diff --git a/arch/s390/Kbuild b/arch/s390/Kbuild
index e63940bb57cd..8b98c501142d 100644
--- a/arch/s390/Kbuild
+++ b/arch/s390/Kbuild
@@ -7,5 +7,4 @@ obj-$(CONFIG_S390_HYPFS_FS) += hypfs/
obj-$(CONFIG_APPLDATA_BASE) += appldata/
obj-y += net/
obj-$(CONFIG_PCI) += pci/
-obj-$(CONFIG_NUMA) += numa/
obj-$(CONFIG_ARCH_HAS_KEXEC_PURGATORY) += purgatory/
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 8c0b52940165..3d86e12e8e3c 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -126,7 +126,6 @@ config S390
select HAVE_ARCH_JUMP_LABEL_RELATIVE
select HAVE_ARCH_KASAN
select HAVE_ARCH_KASAN_VMALLOC
- select CLOCKSOURCE_VALIDATE_LAST_CYCLE
select CPU_NO_EFFICIENT_FFS if !HAVE_MARCH_Z9_109_FEATURES
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_SOFT_DIRTY
@@ -766,6 +765,7 @@ config VFIO_AP
def_tristate n
prompt "VFIO support for AP devices"
depends on S390_AP_IOMMU && VFIO_MDEV_DEVICE && KVM
+ depends on ZCRYPT
help
This driver grants access to Adjunct Processor (AP) devices
via the VFIO mediated device interface.
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index cae473a7b6f7..11c5952e1afa 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -45,7 +45,11 @@ static inline int atomic_fetch_add(int i, atomic_t *v)
static inline void atomic_add(int i, atomic_t *v)
{
#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
- if (__builtin_constant_p(i) && (i > -129) && (i < 128)) {
+ /*
+ * Order of conditions is important to circumvent gcc 10 bug:
+ * https://gcc.gnu.org/pipermail/gcc-patches/2020-July/549318.html
+ */
+ if ((i > -129) && (i < 128) && __builtin_constant_p(i)) {
__atomic_add_const(i, &v->counter);
return;
}
@@ -112,7 +116,11 @@ static inline s64 atomic64_fetch_add(s64 i, atomic64_t *v)
static inline void atomic64_add(s64 i, atomic64_t *v)
{
#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
- if (__builtin_constant_p(i) && (i > -129) && (i < 128)) {
+ /*
+ * Order of conditions is important to circumvent gcc 10 bug:
+ * https://gcc.gnu.org/pipermail/gcc-patches/2020-July/549318.html
+ */
+ if ((i > -129) && (i < 128) && __builtin_constant_p(i)) {
__atomic64_add_const(i, (long *)&v->counter);
return;
}
diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h
index 17a26261f288..c1b82bcc017c 100644
--- a/arch/s390/include/asm/debug.h
+++ b/arch/s390/include/asm/debug.h
@@ -2,7 +2,7 @@
/*
* S/390 debug facility
*
- * Copyright IBM Corp. 1999, 2000
+ * Copyright IBM Corp. 1999, 2020
*/
#ifndef DEBUG_H
#define DEBUG_H
@@ -26,19 +26,14 @@
#define DEBUG_DATA(entry) (char *)(entry + 1) /* data is stored behind */
/* the entry information */
-#define __DEBUG_FEATURE_VERSION 2 /* version of debug feature */
+#define __DEBUG_FEATURE_VERSION 3 /* version of debug feature */
struct __debug_entry {
- union {
- struct {
- unsigned long clock : 52;
- unsigned long exception : 1;
- unsigned long level : 3;
- unsigned long cpuid : 8;
- } fields;
- unsigned long stck;
- } id;
+ unsigned long clock : 60;
+ unsigned long exception : 1;
+ unsigned long level : 3;
void *caller;
+ unsigned short cpu;
} __packed;
typedef struct __debug_entry debug_entry_t;
diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h
index fbb507504a3b..3a0ac0c7a9a3 100644
--- a/arch/s390/include/asm/topology.h
+++ b/arch/s390/include/asm/topology.h
@@ -86,12 +86,6 @@ static inline const struct cpumask *cpumask_of_node(int node)
#define pcibus_to_node(bus) __pcibus_to_node(bus)
-#define node_distance(a, b) __node_distance(a, b)
-static inline int __node_distance(int a, int b)
-{
- return 0;
-}
-
#else /* !CONFIG_NUMA */
#define numa_node_id numa_node_id
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index 324438889fe1..f09444d6aeab 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -32,7 +32,7 @@
#define USER_DS_SACF (3)
#define get_fs() (current->thread.mm_segment)
-#define segment_eq(a,b) (((a) & 2) == ((b) & 2))
+#define uaccess_kernel() ((get_fs() & 2) == KERNEL_DS)
void set_fs(mm_segment_t fs);
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index a8f136943deb..efca70970761 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -49,6 +49,7 @@ CFLAGS_REMOVE_nospec-branch.o += $(CC_FLAGS_EXPOLINE)
obj-$(CONFIG_MODULES) += module.o
obj-$(CONFIG_SCHED_TOPOLOGY) += topology.o
+obj-$(CONFIG_NUMA) += numa.o
obj-$(CONFIG_AUDIT) += audit.o
compat-obj-$(CONFIG_AUDIT) += compat_audit.o
obj-$(CONFIG_COMPAT) += compat_linux.o compat_signal.o
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index beb4b44a11d1..b6619ae9a3e0 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -2,7 +2,7 @@
/*
* S/390 debug facility
*
- * Copyright IBM Corp. 1999, 2012
+ * Copyright IBM Corp. 1999, 2020
*
* Author(s): Michael Holzheu (holzheu@de.ibm.com),
* Holger Smolinski (Holger.Smolinski@de.ibm.com)
@@ -433,7 +433,7 @@ static int debug_format_entry(file_private_info_t *p_info)
act_entry = (debug_entry_t *) ((char *)id_snap->areas[p_info->act_area]
[p_info->act_page] + p_info->act_entry);
- if (act_entry->id.stck == 0LL)
+ if (act_entry->clock == 0LL)
goto out; /* empty entry */
if (view->header_proc)
len += view->header_proc(id_snap, view, p_info->act_area,
@@ -829,12 +829,17 @@ static inline debug_entry_t *get_active_entry(debug_info_t *id)
static inline void debug_finish_entry(debug_info_t *id, debug_entry_t *active,
int level, int exception)
{
- active->id.stck = get_tod_clock_fast() -
- *(unsigned long long *) &tod_clock_base[1];
- active->id.fields.cpuid = smp_processor_id();
+ unsigned char clk[STORE_CLOCK_EXT_SIZE];
+ unsigned long timestamp;
+
+ get_tod_clock_ext(clk);
+ timestamp = *(unsigned long *) &clk[0] >> 4;
+ timestamp -= TOD_UNIX_EPOCH >> 12;
+ active->clock = timestamp;
+ active->cpu = smp_processor_id();
active->caller = __builtin_return_address(0);
- active->id.fields.exception = exception;
- active->id.fields.level = level;
+ active->exception = exception;
+ active->level = level;
proceed_active_entry(id);
if (exception)
proceed_active_area(id);
@@ -1398,25 +1403,24 @@ static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view,
int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view,
int area, debug_entry_t *entry, char *out_buf)
{
- unsigned long base, sec, usec;
+ unsigned long sec, usec;
unsigned long caller;
unsigned int level;
char *except_str;
int rc = 0;
- level = entry->id.fields.level;
- base = (*(unsigned long *) &tod_clock_base[0]) >> 4;
- sec = (entry->id.stck >> 12) + base - (TOD_UNIX_EPOCH >> 12);
+ level = entry->level;
+ sec = entry->clock;
usec = do_div(sec, USEC_PER_SEC);
- if (entry->id.fields.exception)
+ if (entry->exception)
except_str = "*";
else
except_str = "-";
caller = (unsigned long) entry->caller;
- rc += sprintf(out_buf, "%02i %011ld:%06lu %1u %1s %02i %pK ",
+ rc += sprintf(out_buf, "%02i %011ld:%06lu %1u %1s %04u %pK ",
area, sec, usec, level, except_str,
- entry->id.fields.cpuid, (void *)caller);
+ entry->cpu, (void *)caller);
return rc;
}
EXPORT_SYMBOL(debug_dflt_header_fn);
diff --git a/arch/s390/numa/numa.c b/arch/s390/kernel/numa.c
index 51c5a9f6e525..51c5a9f6e525 100644
--- a/arch/s390/numa/numa.c
+++ b/arch/s390/kernel/numa.c
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index f1fda4375526..10456bc936fb 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -138,7 +138,7 @@
146 common writev sys_writev compat_sys_writev
147 common getsid sys_getsid sys_getsid
148 common fdatasync sys_fdatasync sys_fdatasync
-149 common _sysctl sys_sysctl compat_sys_sysctl
+149 common _sysctl - -
150 common mlock sys_mlock sys_mlock
151 common munlock sys_munlock sys_munlock
152 common mlockall sys_mlockall sys_mlockall
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 1608fd99bbee..2f177298c663 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -2768,7 +2768,7 @@ static struct page *get_map_page(struct kvm *kvm, u64 uaddr)
struct page *page = NULL;
mmap_read_lock(kvm->mm);
- get_user_pages_remote(NULL, kvm->mm, uaddr, 1, FOLL_WRITE,
+ get_user_pages_remote(kvm->mm, uaddr, 1, FOLL_WRITE,
&page, NULL, NULL);
mmap_read_unlock(kvm->mm);
return page;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 66da278a67fb..6b74b92c1a58 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1892,7 +1892,7 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
r = set_guest_storage_key(current->mm, hva, keys[i], 0);
if (r) {
- r = fixup_user_fault(current, current->mm, hva,
+ r = fixup_user_fault(current->mm, hva,
FAULT_FLAG_WRITE, &unlocked);
if (r)
break;
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 2f721a923b54..cd74989ce0b0 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -273,7 +273,7 @@ retry:
rc = get_guest_storage_key(current->mm, vmaddr, &key);
if (rc) {
- rc = fixup_user_fault(current, current->mm, vmaddr,
+ rc = fixup_user_fault(current->mm, vmaddr,
FAULT_FLAG_WRITE, &unlocked);
if (!rc) {
mmap_read_unlock(current->mm);
@@ -319,7 +319,7 @@ retry:
mmap_read_lock(current->mm);
rc = reset_guest_reference_bit(current->mm, vmaddr);
if (rc < 0) {
- rc = fixup_user_fault(current, current->mm, vmaddr,
+ rc = fixup_user_fault(current->mm, vmaddr,
FAULT_FLAG_WRITE, &unlocked);
if (!rc) {
mmap_read_unlock(current->mm);
@@ -390,7 +390,7 @@ static int handle_sske(struct kvm_vcpu *vcpu)
m3 & SSKE_MC);
if (rc < 0) {
- rc = fixup_user_fault(current, current->mm, vmaddr,
+ rc = fixup_user_fault(current->mm, vmaddr,
FAULT_FLAG_WRITE, &unlocked);
rc = !rc ? -EAGAIN : rc;
}
@@ -1094,7 +1094,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
rc = cond_set_guest_storage_key(current->mm, vmaddr,
key, NULL, nq, mr, mc);
if (rc < 0) {
- rc = fixup_user_fault(current, current->mm, vmaddr,
+ rc = fixup_user_fault(current->mm, vmaddr,
FAULT_FLAG_WRITE, &unlocked);
rc = !rc ? -EAGAIN : rc;
}
diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c
index eb382ceaa116..7c988994931f 100644
--- a/arch/s390/lib/test_unwind.c
+++ b/arch/s390/lib/test_unwind.c
@@ -64,6 +64,7 @@ static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs,
break;
if (state.reliable && !addr) {
pr_err("unwind state reliable but addr is 0\n");
+ kfree(bt);
return -EINVAL;
}
sprint_symbol(sym, addr);
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index aebf9183bedd..4c8c063bce5b 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -476,7 +476,7 @@ retry:
* make sure we exit gracefully rather than endlessly redo
* the fault.
*/
- fault = handle_mm_fault(vma, address, flags);
+ fault = handle_mm_fault(vma, address, flags, regs);
if (fault_signal_pending(fault, regs)) {
fault = VM_FAULT_SIGNAL;
if (flags & FAULT_FLAG_RETRY_NOWAIT)
@@ -486,21 +486,7 @@ retry:
if (unlikely(fault & VM_FAULT_ERROR))
goto out_up;
- /*
- * Major/minor page fault accounting is only done on the
- * initial attempt. If we go through a retry, it is extremely
- * likely that the page will be found in page cache at that point.
- */
if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_MAJOR) {
- tsk->maj_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
- regs, address);
- } else {
- tsk->min_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
- regs, address);
- }
if (fault & VM_FAULT_RETRY) {
if (IS_ENABLED(CONFIG_PGSTE) && gmap &&
(flags & FAULT_FLAG_RETRY_NOWAIT)) {
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 190357ff86b3..373542ca1113 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -649,7 +649,7 @@ retry:
rc = vmaddr;
goto out_up;
}
- if (fixup_user_fault(current, gmap->mm, vmaddr, fault_flags,
+ if (fixup_user_fault(gmap->mm, vmaddr, fault_flags,
&unlocked)) {
rc = -EFAULT;
goto out_up;
@@ -879,7 +879,7 @@ static int gmap_pte_op_fixup(struct gmap *gmap, unsigned long gaddr,
BUG_ON(gmap_is_shadow(gmap));
fault_flags = (prot == PROT_WRITE) ? FAULT_FLAG_WRITE : 0;
- if (fixup_user_fault(current, mm, vmaddr, fault_flags, &unlocked))
+ if (fixup_user_fault(mm, vmaddr, fault_flags, &unlocked))
return -EFAULT;
if (unlocked)
/* lost mmap_lock, caller has to retry __gmap_translate */
@@ -2485,23 +2485,36 @@ void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long bitmap[4],
}
EXPORT_SYMBOL_GPL(gmap_sync_dirty_log_pmd);
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static int thp_split_walk_pmd_entry(pmd_t *pmd, unsigned long addr,
+ unsigned long end, struct mm_walk *walk)
+{
+ struct vm_area_struct *vma = walk->vma;
+
+ split_huge_pmd(vma, pmd, addr);
+ return 0;
+}
+
+static const struct mm_walk_ops thp_split_walk_ops = {
+ .pmd_entry = thp_split_walk_pmd_entry,
+};
+
static inline void thp_split_mm(struct mm_struct *mm)
{
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
struct vm_area_struct *vma;
- unsigned long addr;
for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
- for (addr = vma->vm_start;
- addr < vma->vm_end;
- addr += PAGE_SIZE)
- follow_page(vma, addr, FOLL_SPLIT);
vma->vm_flags &= ~VM_HUGEPAGE;
vma->vm_flags |= VM_NOHUGEPAGE;
+ walk_page_vma(vma, &thp_split_walk_ops, NULL);
}
mm->def_flags |= VM_NOHUGEPAGE;
-#endif
}
+#else
+static inline void thp_split_mm(struct mm_struct *mm)
+{
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
/*
* Remove all empty zero pages from the mapping for lazy refaulting
diff --git a/arch/s390/numa/Makefile b/arch/s390/numa/Makefile
deleted file mode 100644
index c89d26f4f77d..000000000000
--- a/arch/s390/numa/Makefile
+++ /dev/null
@@ -1,2 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-obj-y += numa.o
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 9fc2b010e938..d20927128fce 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -1,75 +1,77 @@
# SPDX-License-Identifier: GPL-2.0
config SUPERH
def_bool y
+ select ARCH_32BIT_OFF_T
+ select ARCH_HAVE_CUSTOM_GPIO_H
+ select ARCH_HAVE_NMI_SAFE_CMPXCHG if (GUSA_RB || CPU_SH4A)
select ARCH_HAS_BINFMT_FLAT if !MMU
+ select ARCH_HAS_GIGANTIC_PAGE
+ select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
+ select ARCH_HIBERNATION_POSSIBLE if MMU
select ARCH_MIGHT_HAVE_PC_PARPORT
- select HAVE_PATA_PLATFORM
+ select ARCH_WANT_IPC_PARSE_VERSION
select CLKDEV_LOOKUP
+ select CPU_NO_EFFICIENT_FFS
select DMA_DECLARE_COHERENT
- select HAVE_IDE if HAS_IOPORT_MAP
- select HAVE_OPROFILE
+ select GENERIC_ATOMIC64
+ select GENERIC_CLOCKEVENTS
+ select GENERIC_CMOS_UPDATE if SH_SH03 || SH_DREAMCAST
+ select GENERIC_IDLE_POLL_SETUP
+ select GENERIC_IRQ_SHOW
+ select GENERIC_PCI_IOMAP if PCI
+ select GENERIC_SCHED_CLOCK
+ select GENERIC_STRNCPY_FROM_USER
+ select GENERIC_STRNLEN_USER
+ select GENERIC_SMP_IDLE_THREAD
+ select GUP_GET_PTE_LOW_HIGH if X2TLB
+ select HAVE_ARCH_AUDITSYSCALL
+ select HAVE_ARCH_KGDB
+ select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
- select HAVE_PERF_EVENTS
+ select HAVE_COPY_THREAD_TLS
select HAVE_DEBUG_BUGVERBOSE
- select HAVE_FAST_GUP if MMU
- select ARCH_HAVE_CUSTOM_GPIO_H
- select ARCH_HAVE_NMI_SAFE_CMPXCHG if (GUSA_RB || CPU_SH4A)
- select ARCH_HAS_GCOV_PROFILE_ALL
- select PERF_USE_VMALLOC
select HAVE_DEBUG_KMEMLEAK
- select HAVE_KERNEL_GZIP
- select CPU_NO_EFFICIENT_FFS
+ select HAVE_DYNAMIC_FTRACE
+ select HAVE_FAST_GUP if MMU
+ select HAVE_FUNCTION_GRAPH_TRACER
+ select HAVE_FUNCTION_TRACER
+ select HAVE_FUTEX_CMPXCHG if FUTEX
+ select HAVE_FTRACE_MCOUNT_RECORD
+ select HAVE_HW_BREAKPOINT
+ select HAVE_IDE if HAS_IOPORT_MAP
+ select HAVE_IOREMAP_PROT if MMU && !X2TLB
select HAVE_KERNEL_BZIP2
+ select HAVE_KERNEL_GZIP
select HAVE_KERNEL_LZMA
- select HAVE_KERNEL_XZ
select HAVE_KERNEL_LZO
+ select HAVE_KERNEL_XZ
+ select HAVE_KPROBES
+ select HAVE_KRETPROBES
+ select HAVE_MIXED_BREAKPOINTS_REGS
+ select HAVE_MOD_ARCH_SPECIFIC if DWARF_UNWINDER
+ select HAVE_NMI
+ select HAVE_OPROFILE
+ select HAVE_PATA_PLATFORM
+ select HAVE_PERF_EVENTS
+ select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_UID16
- select ARCH_WANT_IPC_PARSE_VERSION
+ select HAVE_STACKPROTECTOR
select HAVE_SYSCALL_TRACEPOINTS
- select HAVE_REGS_AND_STACK_ACCESS_API
- select MAY_HAVE_SPARSE_IRQ
select IRQ_FORCED_THREADING
- select RTC_LIB
- select GENERIC_ATOMIC64
- select GENERIC_IRQ_SHOW
- select GENERIC_SMP_IDLE_THREAD
- select GENERIC_IDLE_POLL_SETUP
- select GENERIC_CLOCKEVENTS
- select GENERIC_CMOS_UPDATE if SH_SH03 || SH_DREAMCAST
- select GENERIC_PCI_IOMAP if PCI
- select GENERIC_SCHED_CLOCK
- select GENERIC_STRNCPY_FROM_USER
- select GENERIC_STRNLEN_USER
- select HAVE_MOD_ARCH_SPECIFIC if DWARF_UNWINDER
+ select MAY_HAVE_SPARSE_IRQ
select MODULES_USE_ELF_RELA
+ select NEED_SG_DMA_LENGTH
+ select NO_DMA if !MMU && !DMA_COHERENT
select NO_GENERIC_PCI_IOPORT_MAP if PCI
- select OLD_SIGSUSPEND
select OLD_SIGACTION
+ select OLD_SIGSUSPEND
select PCI_DOMAINS if PCI
- select HAVE_ARCH_AUDITSYSCALL
- select HAVE_FUTEX_CMPXCHG if FUTEX
- select HAVE_NMI
- select NEED_SG_DMA_LENGTH
- select ARCH_HAS_GIGANTIC_PAGE
- select ARCH_32BIT_OFF_T
- select GUP_GET_PTE_LOW_HIGH if X2TLB
- select HAVE_KPROBES
- select HAVE_KRETPROBES
- select HAVE_IOREMAP_PROT if MMU && !X2TLB
- select HAVE_FUNCTION_TRACER
- select HAVE_FTRACE_MCOUNT_RECORD
- select HAVE_DYNAMIC_FTRACE
- select ARCH_WANT_IPC_PARSE_VERSION
- select HAVE_FUNCTION_GRAPH_TRACER
- select HAVE_ARCH_KGDB
- select HAVE_HW_BREAKPOINT
- select HAVE_MIXED_BREAKPOINTS_REGS
select PERF_EVENTS
- select ARCH_HIBERNATION_POSSIBLE if MMU
+ select PERF_USE_VMALLOC
+ select RTC_LIB
select SPARSE_IRQ
- select HAVE_STACKPROTECTOR
help
The SuperH is a RISC processor targeted for use in embedded systems
and consumer electronics; it was also used in the Sega Dreamcast
@@ -123,8 +125,8 @@ config ARCH_HAS_ILOG2_U64
config NO_IOPORT_MAP
def_bool !PCI
- depends on !SH_CAYMAN && !SH_SH4202_MICRODEV && !SH_SHMIN && \
- !SH_HP6XX && !SH_SOLUTION_ENGINE
+ depends on !SH_SH4202_MICRODEV && !SH_SHMIN && !SH_HP6XX && \
+ !SH_SOLUTION_ENGINE
config IO_TRAPPED
bool
@@ -136,8 +138,10 @@ config DMA_COHERENT
bool
config DMA_NONCOHERENT
- def_bool !DMA_COHERENT
+ def_bool !NO_DMA && !DMA_COHERENT
+ select ARCH_HAS_DMA_PREP_COHERENT
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
+ select DMA_DIRECT_REMAP
config PGTABLE_LEVELS
default 3 if X2TLB
@@ -630,7 +634,7 @@ config SMP
Y to "Enhanced Real Time Clock Support", below.
See also <file:Documentation/admin-guide/lockup-watchdogs.rst> and the SMP-HOWTO
- available at <http://www.tldp.org/docs.html#howto>.
+ available at <https://www.tldp.org/docs.html#howto>.
If you don't know what to do here, say N.
@@ -726,7 +730,6 @@ config ZERO_PAGE_OFFSET
config BOOT_LINK_OFFSET
hex
default "0x00210000" if SH_SHMIN
- default "0x00400000" if SH_CAYMAN
default "0x00810000" if SH_7780_SOLUTION_ENGINE
default "0x009e0000" if SH_TITAN
default "0x01800000" if SH_SDK7780
diff --git a/arch/sh/Makefile b/arch/sh/Makefile
index da9cf952f33c..2faebfd72eca 100644
--- a/arch/sh/Makefile
+++ b/arch/sh/Makefile
@@ -15,11 +15,7 @@ ifneq ($(SUBARCH),$(ARCH))
endif
endif
-ifeq ($(ARCH),sh)
KBUILD_DEFCONFIG := shx3_defconfig
-else
-KBUILD_DEFCONFIG := cayman_defconfig
-endif
isa-y := any
isa-$(CONFIG_SH_DSP) := sh
@@ -143,7 +139,6 @@ machdir-$(CONFIG_SH_SH7763RDP) += mach-sh7763rdp
machdir-$(CONFIG_SH_SH4202_MICRODEV) += mach-microdev
machdir-$(CONFIG_SH_LANDISK) += mach-landisk
machdir-$(CONFIG_SH_LBOX_RE2) += mach-lboxre2
-machdir-$(CONFIG_SH_CAYMAN) += mach-cayman
machdir-$(CONFIG_SH_RSK) += mach-rsk
ifneq ($(machdir-y),)
diff --git a/arch/sh/boards/Kconfig b/arch/sh/boards/Kconfig
index fb0ca0c1efe1..83bcb6d2daca 100644
--- a/arch/sh/boards/Kconfig
+++ b/arch/sh/boards/Kconfig
@@ -340,12 +340,6 @@ config SH_MAGIC_PANEL_R2
help
Select Magic Panel R2 if configuring for Magic Panel R2.
-config SH_CAYMAN
- bool "Hitachi Cayman"
- depends on CPU_SUBTYPE_SH5_101 || CPU_SUBTYPE_SH5_103
- select HAVE_PCI
- select ARCH_MIGHT_HAVE_PC_SERIO
-
config SH_POLARIS
bool "SMSC Polaris"
select CPU_HAS_IPR_IRQ
diff --git a/arch/sh/boards/board-sh2007.c b/arch/sh/boards/board-sh2007.c
index ef9c87deeb08..6ea85e480851 100644
--- a/arch/sh/boards/board-sh2007.c
+++ b/arch/sh/boards/board-sh2007.c
@@ -126,14 +126,14 @@ static void __init sh2007_init_irq(void)
*/
static void __init sh2007_setup(char **cmdline_p)
{
- printk(KERN_INFO "SH-2007 Setup...");
+ pr_info("SH-2007 Setup...");
/* setup wait control registers for area 5 */
__raw_writel(CS5BCR_D, CS5BCR);
__raw_writel(CS5WCR_D, CS5WCR);
__raw_writel(CS5PCR_D, CS5PCR);
- printk(KERN_INFO " done.\n");
+ pr_cont(" done.\n");
}
/*
diff --git a/arch/sh/boards/mach-cayman/Makefile b/arch/sh/boards/mach-cayman/Makefile
deleted file mode 100644
index 775a4be57434..000000000000
--- a/arch/sh/boards/mach-cayman/Makefile
+++ /dev/null
@@ -1,5 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# Makefile for the Hitachi Cayman specific parts of the kernel
-#
-obj-y := setup.o irq.o panic.o
diff --git a/arch/sh/boards/mach-cayman/irq.c b/arch/sh/boards/mach-cayman/irq.c
deleted file mode 100644
index 0305d0b51730..000000000000
--- a/arch/sh/boards/mach-cayman/irq.c
+++ /dev/null
@@ -1,148 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * arch/sh/mach-cayman/irq.c - SH-5 Cayman Interrupt Support
- *
- * This file handles the board specific parts of the Cayman interrupt system
- *
- * Copyright (C) 2002 Stuart Menefy
- */
-#include <linux/io.h>
-#include <linux/irq.h>
-#include <linux/interrupt.h>
-#include <linux/signal.h>
-#include <cpu/irq.h>
-#include <asm/page.h>
-
-/* Setup for the SMSC FDC37C935 / LAN91C100FD */
-#define SMSC_IRQ IRQ_IRL1
-
-/* Setup for PCI Bus 2, which transmits interrupts via the EPLD */
-#define PCI2_IRQ IRQ_IRL3
-
-unsigned long epld_virt;
-
-#define EPLD_BASE 0x04002000
-#define EPLD_STATUS_BASE (epld_virt + 0x10)
-#define EPLD_MASK_BASE (epld_virt + 0x20)
-
-/* Note the SMSC SuperIO chip and SMSC LAN chip interrupts are all muxed onto
- the same SH-5 interrupt */
-
-static irqreturn_t cayman_interrupt_smsc(int irq, void *dev_id)
-{
- printk(KERN_INFO "CAYMAN: spurious SMSC interrupt\n");
- return IRQ_NONE;
-}
-
-static irqreturn_t cayman_interrupt_pci2(int irq, void *dev_id)
-{
- printk(KERN_INFO "CAYMAN: spurious PCI interrupt, IRQ %d\n", irq);
- return IRQ_NONE;
-}
-
-static void enable_cayman_irq(struct irq_data *data)
-{
- unsigned int irq = data->irq;
- unsigned long flags;
- unsigned long mask;
- unsigned int reg;
- unsigned char bit;
-
- irq -= START_EXT_IRQS;
- reg = EPLD_MASK_BASE + ((irq / 8) << 2);
- bit = 1<<(irq % 8);
- local_irq_save(flags);
- mask = __raw_readl(reg);
- mask |= bit;
- __raw_writel(mask, reg);
- local_irq_restore(flags);
-}
-
-static void disable_cayman_irq(struct irq_data *data)
-{
- unsigned int irq = data->irq;
- unsigned long flags;
- unsigned long mask;
- unsigned int reg;
- unsigned char bit;
-
- irq -= START_EXT_IRQS;
- reg = EPLD_MASK_BASE + ((irq / 8) << 2);
- bit = 1<<(irq % 8);
- local_irq_save(flags);
- mask = __raw_readl(reg);
- mask &= ~bit;
- __raw_writel(mask, reg);
- local_irq_restore(flags);
-}
-
-struct irq_chip cayman_irq_type = {
- .name = "Cayman-IRQ",
- .irq_unmask = enable_cayman_irq,
- .irq_mask = disable_cayman_irq,
-};
-
-int cayman_irq_demux(int evt)
-{
- int irq = intc_evt_to_irq[evt];
-
- if (irq == SMSC_IRQ) {
- unsigned long status;
- int i;
-
- status = __raw_readl(EPLD_STATUS_BASE) &
- __raw_readl(EPLD_MASK_BASE) & 0xff;
- if (status == 0) {
- irq = -1;
- } else {
- for (i=0; i<8; i++) {
- if (status & (1<<i))
- break;
- }
- irq = START_EXT_IRQS + i;
- }
- }
-
- if (irq == PCI2_IRQ) {
- unsigned long status;
- int i;
-
- status = __raw_readl(EPLD_STATUS_BASE + 3 * sizeof(u32)) &
- __raw_readl(EPLD_MASK_BASE + 3 * sizeof(u32)) & 0xff;
- if (status == 0) {
- irq = -1;
- } else {
- for (i=0; i<8; i++) {
- if (status & (1<<i))
- break;
- }
- irq = START_EXT_IRQS + (3 * 8) + i;
- }
- }
-
- return irq;
-}
-
-void init_cayman_irq(void)
-{
- int i;
-
- epld_virt = (unsigned long)ioremap(EPLD_BASE, 1024);
- if (!epld_virt) {
- printk(KERN_ERR "Cayman IRQ: Unable to remap EPLD\n");
- return;
- }
-
- for (i = 0; i < NR_EXT_IRQS; i++) {
- irq_set_chip_and_handler(START_EXT_IRQS + i,
- &cayman_irq_type, handle_level_irq);
- }
-
- /* Setup the SMSC interrupt */
- if (request_irq(SMSC_IRQ, cayman_interrupt_smsc, 0, "Cayman SMSC Mux",
- NULL))
- pr_err("Failed to register Cayman SMSC Mux interrupt\n");
- if (request_irq(PCI2_IRQ, cayman_interrupt_pci2, 0, "Cayman PCI2 Mux",
- NULL))
- pr_err("Failed to register Cayman PCI2 Mux interrupt\n");
-}
diff --git a/arch/sh/boards/mach-cayman/panic.c b/arch/sh/boards/mach-cayman/panic.c
deleted file mode 100644
index cfc46314e7d9..000000000000
--- a/arch/sh/boards/mach-cayman/panic.c
+++ /dev/null
@@ -1,46 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2003 Richard Curnow, SuperH UK Limited
- */
-
-#include <linux/kernel.h>
-#include <linux/io.h>
-#include <cpu/registers.h>
-
-/* THIS IS A PHYSICAL ADDRESS */
-#define HDSP2534_ADDR (0x04002100)
-
-static void poor_mans_delay(void)
-{
- int i;
-
- for (i = 0; i < 2500000; i++)
- cpu_relax();
-}
-
-static void show_value(unsigned long x)
-{
- int i;
- unsigned nibble;
- for (i = 0; i < 8; i++) {
- nibble = ((x >> (i * 4)) & 0xf);
-
- __raw_writeb(nibble + ((nibble > 9) ? 55 : 48),
- HDSP2534_ADDR + 0xe0 + ((7 - i) << 2));
- }
-}
-
-void
-panic_handler(unsigned long panicPC, unsigned long panicSSR,
- unsigned long panicEXPEVT)
-{
- while (1) {
- /* This piece of code displays the PC on the LED display */
- show_value(panicPC);
- poor_mans_delay();
- show_value(panicSSR);
- poor_mans_delay();
- show_value(panicEXPEVT);
- poor_mans_delay();
- }
-}
diff --git a/arch/sh/boards/mach-cayman/setup.c b/arch/sh/boards/mach-cayman/setup.c
deleted file mode 100644
index 8ef76e288da0..000000000000
--- a/arch/sh/boards/mach-cayman/setup.c
+++ /dev/null
@@ -1,181 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * arch/sh/mach-cayman/setup.c
- *
- * SH5 Cayman support
- *
- * Copyright (C) 2002 David J. Mckay & Benedict Gaster
- * Copyright (C) 2003 - 2007 Paul Mundt
- */
-#include <linux/init.h>
-#include <linux/io.h>
-#include <linux/kernel.h>
-#include <cpu/irq.h>
-
-/*
- * Platform Dependent Interrupt Priorities.
- */
-
-/* Using defaults defined in irq.h */
-#define RES NO_PRIORITY /* Disabled */
-#define IR0 IRL0_PRIORITY /* IRLs */
-#define IR1 IRL1_PRIORITY
-#define IR2 IRL2_PRIORITY
-#define IR3 IRL3_PRIORITY
-#define PCA INTA_PRIORITY /* PCI Ints */
-#define PCB INTB_PRIORITY
-#define PCC INTC_PRIORITY
-#define PCD INTD_PRIORITY
-#define SER TOP_PRIORITY
-#define ERR TOP_PRIORITY
-#define PW0 TOP_PRIORITY
-#define PW1 TOP_PRIORITY
-#define PW2 TOP_PRIORITY
-#define PW3 TOP_PRIORITY
-#define DM0 NO_PRIORITY /* DMA Ints */
-#define DM1 NO_PRIORITY
-#define DM2 NO_PRIORITY
-#define DM3 NO_PRIORITY
-#define DAE NO_PRIORITY
-#define TU0 TIMER_PRIORITY /* TMU Ints */
-#define TU1 NO_PRIORITY
-#define TU2 NO_PRIORITY
-#define TI2 NO_PRIORITY
-#define ATI NO_PRIORITY /* RTC Ints */
-#define PRI NO_PRIORITY
-#define CUI RTC_PRIORITY
-#define ERI SCIF_PRIORITY /* SCIF Ints */
-#define RXI SCIF_PRIORITY
-#define BRI SCIF_PRIORITY
-#define TXI SCIF_PRIORITY
-#define ITI TOP_PRIORITY /* WDT Ints */
-
-/* Setup for the SMSC FDC37C935 */
-#define SMSC_SUPERIO_BASE 0x04000000
-#define SMSC_CONFIG_PORT_ADDR 0x3f0
-#define SMSC_INDEX_PORT_ADDR SMSC_CONFIG_PORT_ADDR
-#define SMSC_DATA_PORT_ADDR 0x3f1
-
-#define SMSC_ENTER_CONFIG_KEY 0x55
-#define SMSC_EXIT_CONFIG_KEY 0xaa
-
-#define SMCS_LOGICAL_DEV_INDEX 0x07
-#define SMSC_DEVICE_ID_INDEX 0x20
-#define SMSC_DEVICE_REV_INDEX 0x21
-#define SMSC_ACTIVATE_INDEX 0x30
-#define SMSC_PRIMARY_BASE_INDEX 0x60
-#define SMSC_SECONDARY_BASE_INDEX 0x62
-#define SMSC_PRIMARY_INT_INDEX 0x70
-#define SMSC_SECONDARY_INT_INDEX 0x72
-
-#define SMSC_IDE1_DEVICE 1
-#define SMSC_KEYBOARD_DEVICE 7
-#define SMSC_CONFIG_REGISTERS 8
-
-#define SMSC_SUPERIO_READ_INDEXED(index) ({ \
- outb((index), SMSC_INDEX_PORT_ADDR); \
- inb(SMSC_DATA_PORT_ADDR); })
-#define SMSC_SUPERIO_WRITE_INDEXED(val, index) ({ \
- outb((index), SMSC_INDEX_PORT_ADDR); \
- outb((val), SMSC_DATA_PORT_ADDR); })
-
-#define IDE1_PRIMARY_BASE 0x01f0
-#define IDE1_SECONDARY_BASE 0x03f6
-
-unsigned long smsc_superio_virt;
-
-int platform_int_priority[NR_INTC_IRQS] = {
- IR0, IR1, IR2, IR3, PCA, PCB, PCC, PCD, /* IRQ 0- 7 */
- RES, RES, RES, RES, SER, ERR, PW3, PW2, /* IRQ 8-15 */
- PW1, PW0, DM0, DM1, DM2, DM3, DAE, RES, /* IRQ 16-23 */
- RES, RES, RES, RES, RES, RES, RES, RES, /* IRQ 24-31 */
- TU0, TU1, TU2, TI2, ATI, PRI, CUI, ERI, /* IRQ 32-39 */
- RXI, BRI, TXI, RES, RES, RES, RES, RES, /* IRQ 40-47 */
- RES, RES, RES, RES, RES, RES, RES, RES, /* IRQ 48-55 */
- RES, RES, RES, RES, RES, RES, RES, ITI, /* IRQ 56-63 */
-};
-
-static int __init smsc_superio_setup(void)
-{
- unsigned char devid, devrev;
-
- smsc_superio_virt = (unsigned long)ioremap(SMSC_SUPERIO_BASE, 1024);
- if (!smsc_superio_virt) {
- panic("Unable to remap SMSC SuperIO\n");
- }
-
- /* Initially the chip is in run state */
- /* Put it into configuration state */
- outb(SMSC_ENTER_CONFIG_KEY, SMSC_CONFIG_PORT_ADDR);
- outb(SMSC_ENTER_CONFIG_KEY, SMSC_CONFIG_PORT_ADDR);
-
- /* Read device ID info */
- devid = SMSC_SUPERIO_READ_INDEXED(SMSC_DEVICE_ID_INDEX);
- devrev = SMSC_SUPERIO_READ_INDEXED(SMSC_DEVICE_REV_INDEX);
- printk("SMSC SuperIO devid %02x rev %02x\n", devid, devrev);
-
- /* Select the keyboard device */
- SMSC_SUPERIO_WRITE_INDEXED(SMSC_KEYBOARD_DEVICE, SMCS_LOGICAL_DEV_INDEX);
-
- /* enable it */
- SMSC_SUPERIO_WRITE_INDEXED(1, SMSC_ACTIVATE_INDEX);
-
- /* Select the interrupts */
- /* On a PC keyboard is IRQ1, mouse is IRQ12 */
- SMSC_SUPERIO_WRITE_INDEXED(1, SMSC_PRIMARY_INT_INDEX);
- SMSC_SUPERIO_WRITE_INDEXED(12, SMSC_SECONDARY_INT_INDEX);
-
- /*
- * Only IDE1 exists on the Cayman
- */
-
- /* Power it on */
- SMSC_SUPERIO_WRITE_INDEXED(1 << SMSC_IDE1_DEVICE, 0x22);
-
- SMSC_SUPERIO_WRITE_INDEXED(SMSC_IDE1_DEVICE, SMCS_LOGICAL_DEV_INDEX);
- SMSC_SUPERIO_WRITE_INDEXED(1, SMSC_ACTIVATE_INDEX);
-
- SMSC_SUPERIO_WRITE_INDEXED(IDE1_PRIMARY_BASE >> 8,
- SMSC_PRIMARY_BASE_INDEX + 0);
- SMSC_SUPERIO_WRITE_INDEXED(IDE1_PRIMARY_BASE & 0xff,
- SMSC_PRIMARY_BASE_INDEX + 1);
-
- SMSC_SUPERIO_WRITE_INDEXED(IDE1_SECONDARY_BASE >> 8,
- SMSC_SECONDARY_BASE_INDEX + 0);
- SMSC_SUPERIO_WRITE_INDEXED(IDE1_SECONDARY_BASE & 0xff,
- SMSC_SECONDARY_BASE_INDEX + 1);
-
- SMSC_SUPERIO_WRITE_INDEXED(14, SMSC_PRIMARY_INT_INDEX);
-
- SMSC_SUPERIO_WRITE_INDEXED(SMSC_CONFIG_REGISTERS,
- SMCS_LOGICAL_DEV_INDEX);
-
- SMSC_SUPERIO_WRITE_INDEXED(0x00, 0xc2); /* GP42 = nIDE1_OE */
- SMSC_SUPERIO_WRITE_INDEXED(0x01, 0xc5); /* GP45 = IDE1_IRQ */
- SMSC_SUPERIO_WRITE_INDEXED(0x00, 0xc6); /* GP46 = nIOROP */
- SMSC_SUPERIO_WRITE_INDEXED(0x00, 0xc7); /* GP47 = nIOWOP */
-
- /* Exit the configuration state */
- outb(SMSC_EXIT_CONFIG_KEY, SMSC_CONFIG_PORT_ADDR);
-
- return 0;
-}
-device_initcall(smsc_superio_setup);
-
-static void __iomem *cayman_ioport_map(unsigned long port, unsigned int len)
-{
- if (port < 0x400) {
- extern unsigned long smsc_superio_virt;
- return (void __iomem *)((port << 2) | smsc_superio_virt);
- }
-
- return (void __iomem *)port;
-}
-
-extern void init_cayman_irq(void);
-
-static struct sh_machine_vector mv_cayman __initmv = {
- .mv_name = "Hitachi Cayman",
- .mv_ioport_map = cayman_ioport_map,
- .mv_init_irq = init_cayman_irq,
-};
diff --git a/arch/sh/boards/mach-landisk/setup.c b/arch/sh/boards/mach-landisk/setup.c
index 16b4d8b0bb85..2c44b94f82fb 100644
--- a/arch/sh/boards/mach-landisk/setup.c
+++ b/arch/sh/boards/mach-landisk/setup.c
@@ -82,6 +82,9 @@ device_initcall(landisk_devices_setup);
static void __init landisk_setup(char **cmdline_p)
{
+ /* I/O port identity mapping */
+ __set_io_port_base(0);
+
/* LED ON */
__raw_writeb(__raw_readb(PA_LED) | 0x03, PA_LED);
diff --git a/arch/sh/configs/cayman_defconfig b/arch/sh/configs/cayman_defconfig
deleted file mode 100644
index 911437c163c7..000000000000
--- a/arch/sh/configs/cayman_defconfig
+++ /dev/null
@@ -1,66 +0,0 @@
-CONFIG_POSIX_MQUEUE=y
-CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-CONFIG_SLAB=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-# CONFIG_BLK_DEV_BSG is not set
-CONFIG_FORCE_MAX_ZONEORDER=11
-CONFIG_MEMORY_START=0x80000000
-CONFIG_MEMORY_SIZE=0x00400000
-CONFIG_FLATMEM_MANUAL=y
-CONFIG_CACHE_OFF=y
-CONFIG_SH_PCLK_FREQ=50000000
-CONFIG_HEARTBEAT=y
-CONFIG_PREEMPT=y
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_INET=y
-CONFIG_IP_PNP=y
-# CONFIG_IPV6 is not set
-# CONFIG_FW_LOADER is not set
-CONFIG_BLK_DEV_LOOP=y
-CONFIG_BLK_DEV_RAM=y
-CONFIG_SCSI=y
-CONFIG_BLK_DEV_SD=y
-CONFIG_SCSI_MULTI_LUN=y
-CONFIG_SCSI_SPI_ATTRS=y
-CONFIG_NETDEVICES=y
-CONFIG_NET_ETHERNET=y
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_SERIO is not set
-CONFIG_HW_RANDOM=y
-CONFIG_I2C=m
-CONFIG_WATCHDOG=y
-CONFIG_FB=y
-CONFIG_FIRMWARE_EDID=y
-CONFIG_FB_MODE_HELPERS=y
-CONFIG_FB_SH_MOBILE_LCDC=m
-CONFIG_FRAMEBUFFER_CONSOLE=y
-CONFIG_FONTS=y
-CONFIG_FONT_8x16=y
-CONFIG_LOGO=y
-# CONFIG_LOGO_LINUX_MONO is not set
-# CONFIG_LOGO_LINUX_VGA16 is not set
-# CONFIG_LOGO_LINUX_CLUT224 is not set
-# CONFIG_LOGO_SUPERH_MONO is not set
-# CONFIG_LOGO_SUPERH_VGA16 is not set
-CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_MINIX_FS=y
-CONFIG_ROMFS_FS=y
-CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
-CONFIG_ROOT_NFS=y
-CONFIG_PARTITION_ADVANCED=y
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_FS=y
-CONFIG_DEBUG_KERNEL=y
-CONFIG_DETECT_HUNG_TASK=y
-CONFIG_SCHEDSTATS=y
-CONFIG_FRAME_POINTER=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/sh/configs/dreamcast_defconfig b/arch/sh/configs/dreamcast_defconfig
index ae067e0b15e3..6a82c7b8ff32 100644
--- a/arch/sh/configs/dreamcast_defconfig
+++ b/arch/sh/configs/dreamcast_defconfig
@@ -1,7 +1,6 @@
CONFIG_SYSVIPC=y
CONFIG_BSD_PROCESS_ACCT=y
CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_SYSCTL_SYSCALL is not set
CONFIG_SLAB=y
CONFIG_PROFILING=y
CONFIG_MODULES=y
diff --git a/arch/sh/configs/espt_defconfig b/arch/sh/configs/espt_defconfig
index a5b865a75d22..9a988c347e9d 100644
--- a/arch/sh/configs/espt_defconfig
+++ b/arch/sh/configs/espt_defconfig
@@ -5,7 +5,6 @@ CONFIG_LOG_BUF_SHIFT=14
CONFIG_NAMESPACES=y
CONFIG_UTS_NS=y
CONFIG_IPC_NS=y
-# CONFIG_SYSCTL_SYSCALL is not set
CONFIG_SLAB=y
CONFIG_PROFILING=y
CONFIG_OPROFILE=y
diff --git a/arch/sh/configs/hp6xx_defconfig b/arch/sh/configs/hp6xx_defconfig
index a92db6694ce2..70e6605d7f7e 100644
--- a/arch/sh/configs/hp6xx_defconfig
+++ b/arch/sh/configs/hp6xx_defconfig
@@ -3,7 +3,6 @@ CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_LOG_BUF_SHIFT=14
# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-# CONFIG_SYSCTL_SYSCALL is not set
CONFIG_SLAB=y
# CONFIG_BLK_DEV_BSG is not set
CONFIG_CPU_SUBTYPE_SH7709=y
diff --git a/arch/sh/configs/landisk_defconfig b/arch/sh/configs/landisk_defconfig
index 567af752b1bb..ba6ec042606f 100644
--- a/arch/sh/configs/landisk_defconfig
+++ b/arch/sh/configs/landisk_defconfig
@@ -1,6 +1,5 @@
CONFIG_SYSVIPC=y
CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_SYSCTL_SYSCALL is not set
CONFIG_KALLSYMS_EXTRA_PASS=y
CONFIG_SLAB=y
CONFIG_MODULES=y
diff --git a/arch/sh/configs/lboxre2_defconfig b/arch/sh/configs/lboxre2_defconfig
index 10f6d371ce2c..05e4ac6fed5f 100644
--- a/arch/sh/configs/lboxre2_defconfig
+++ b/arch/sh/configs/lboxre2_defconfig
@@ -1,6 +1,5 @@
CONFIG_SYSVIPC=y
CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_SYSCTL_SYSCALL is not set
CONFIG_KALLSYMS_EXTRA_PASS=y
CONFIG_SLAB=y
CONFIG_MODULES=y
diff --git a/arch/sh/configs/microdev_defconfig b/arch/sh/configs/microdev_defconfig
index ed84d1303acf..c65667d00313 100644
--- a/arch/sh/configs/microdev_defconfig
+++ b/arch/sh/configs/microdev_defconfig
@@ -2,7 +2,6 @@ CONFIG_BSD_PROCESS_ACCT=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-# CONFIG_SYSCTL_SYSCALL is not set
CONFIG_SLAB=y
# CONFIG_BLK_DEV_BSG is not set
CONFIG_CPU_SUBTYPE_SH4_202=y
diff --git a/arch/sh/configs/migor_defconfig b/arch/sh/configs/migor_defconfig
index 37e9521a99e5..a24cf8cd2cea 100644
--- a/arch/sh/configs/migor_defconfig
+++ b/arch/sh/configs/migor_defconfig
@@ -4,7 +4,6 @@ CONFIG_IKCONFIG_PROC=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-# CONFIG_SYSCTL_SYSCALL is not set
CONFIG_SLAB=y
CONFIG_PROFILING=y
CONFIG_OPROFILE=y
diff --git a/arch/sh/configs/r7780mp_defconfig b/arch/sh/configs/r7780mp_defconfig
index c97ec60cff27..e922659fdadb 100644
--- a/arch/sh/configs/r7780mp_defconfig
+++ b/arch/sh/configs/r7780mp_defconfig
@@ -3,7 +3,6 @@ CONFIG_BSD_PROCESS_ACCT=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_SYSCTL_SYSCALL is not set
# CONFIG_FUTEX is not set
# CONFIG_EPOLL is not set
CONFIG_SLAB=y
diff --git a/arch/sh/configs/r7785rp_defconfig b/arch/sh/configs/r7785rp_defconfig
index 55fce65eb454..5978866358ec 100644
--- a/arch/sh/configs/r7785rp_defconfig
+++ b/arch/sh/configs/r7785rp_defconfig
@@ -7,7 +7,6 @@ CONFIG_RCU_TRACE=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_SYSCTL_SYSCALL is not set
CONFIG_SLAB=y
CONFIG_PROFILING=y
CONFIG_OPROFILE=y
diff --git a/arch/sh/configs/rts7751r2d1_defconfig b/arch/sh/configs/rts7751r2d1_defconfig
index 6a3cfe08295f..fc9c22152b08 100644
--- a/arch/sh/configs/rts7751r2d1_defconfig
+++ b/arch/sh/configs/rts7751r2d1_defconfig
@@ -1,7 +1,6 @@
CONFIG_SYSVIPC=y
CONFIG_LOG_BUF_SHIFT=14
# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-# CONFIG_SYSCTL_SYSCALL is not set
CONFIG_SLAB=y
CONFIG_PROFILING=y
CONFIG_OPROFILE=y
diff --git a/arch/sh/configs/rts7751r2dplus_defconfig b/arch/sh/configs/rts7751r2dplus_defconfig
index 2b3d7d280672..ff3fd6787fd6 100644
--- a/arch/sh/configs/rts7751r2dplus_defconfig
+++ b/arch/sh/configs/rts7751r2dplus_defconfig
@@ -1,7 +1,6 @@
CONFIG_SYSVIPC=y
CONFIG_LOG_BUF_SHIFT=14
# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-# CONFIG_SYSCTL_SYSCALL is not set
CONFIG_SLAB=y
CONFIG_PROFILING=y
CONFIG_OPROFILE=y
diff --git a/arch/sh/configs/se7206_defconfig b/arch/sh/configs/se7206_defconfig
index 21a43f14ffac..ff5bb4489922 100644
--- a/arch/sh/configs/se7206_defconfig
+++ b/arch/sh/configs/se7206_defconfig
@@ -18,7 +18,6 @@ CONFIG_USER_NS=y
CONFIG_PID_NS=y
CONFIG_BLK_DEV_INITRD=y
# CONFIG_UID16 is not set
-# CONFIG_SYSCTL_SYSCALL is not set
CONFIG_KALLSYMS_ALL=y
# CONFIG_ELF_CORE is not set
# CONFIG_COMPAT_BRK is not set
diff --git a/arch/sh/configs/se7343_defconfig b/arch/sh/configs/se7343_defconfig
index 4e794e719a28..5d6c19338ebf 100644
--- a/arch/sh/configs/se7343_defconfig
+++ b/arch/sh/configs/se7343_defconfig
@@ -2,7 +2,6 @@
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_SYSCTL_SYSCALL is not set
# CONFIG_FUTEX is not set
# CONFIG_EPOLL is not set
# CONFIG_SHMEM is not set
diff --git a/arch/sh/configs/se7619_defconfig b/arch/sh/configs/se7619_defconfig
index 3264415a5931..71a672c30716 100644
--- a/arch/sh/configs/se7619_defconfig
+++ b/arch/sh/configs/se7619_defconfig
@@ -1,7 +1,6 @@
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_LOG_BUF_SHIFT=14
# CONFIG_UID16 is not set
-# CONFIG_SYSCTL_SYSCALL is not set
# CONFIG_KALLSYMS is not set
# CONFIG_HOTPLUG is not set
# CONFIG_ELF_CORE is not set
diff --git a/arch/sh/configs/se7705_defconfig b/arch/sh/configs/se7705_defconfig
index 4496b94b7d88..ed00a6eeadf5 100644
--- a/arch/sh/configs/se7705_defconfig
+++ b/arch/sh/configs/se7705_defconfig
@@ -2,7 +2,6 @@
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-# CONFIG_SYSCTL_SYSCALL is not set
# CONFIG_KALLSYMS is not set
# CONFIG_HOTPLUG is not set
CONFIG_SLAB=y
diff --git a/arch/sh/configs/se7750_defconfig b/arch/sh/configs/se7750_defconfig
index b23f67542728..3f1c13799d79 100644
--- a/arch/sh/configs/se7750_defconfig
+++ b/arch/sh/configs/se7750_defconfig
@@ -5,7 +5,6 @@ CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_LOG_BUF_SHIFT=14
# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-# CONFIG_SYSCTL_SYSCALL is not set
# CONFIG_HOTPLUG is not set
CONFIG_SLAB=y
CONFIG_MODULES=y
diff --git a/arch/sh/configs/se7751_defconfig b/arch/sh/configs/se7751_defconfig
index 162343683937..4a024065bb75 100644
--- a/arch/sh/configs/se7751_defconfig
+++ b/arch/sh/configs/se7751_defconfig
@@ -3,7 +3,6 @@ CONFIG_BSD_PROCESS_ACCT=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-# CONFIG_SYSCTL_SYSCALL is not set
# CONFIG_HOTPLUG is not set
CONFIG_SLAB=y
CONFIG_MODULES=y
diff --git a/arch/sh/configs/secureedge5410_defconfig b/arch/sh/configs/secureedge5410_defconfig
index 360592d63a2f..8422599cfb04 100644
--- a/arch/sh/configs/secureedge5410_defconfig
+++ b/arch/sh/configs/secureedge5410_defconfig
@@ -1,7 +1,6 @@
# CONFIG_SWAP is not set
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
-# CONFIG_SYSCTL_SYSCALL is not set
# CONFIG_HOTPLUG is not set
CONFIG_SLAB=y
# CONFIG_BLK_DEV_BSG is not set
diff --git a/arch/sh/configs/sh03_defconfig b/arch/sh/configs/sh03_defconfig
index 87db9a84b5ec..f0073ed39947 100644
--- a/arch/sh/configs/sh03_defconfig
+++ b/arch/sh/configs/sh03_defconfig
@@ -3,7 +3,6 @@ CONFIG_POSIX_MQUEUE=y
CONFIG_BSD_PROCESS_ACCT=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
-# CONFIG_SYSCTL_SYSCALL is not set
CONFIG_SLAB=y
CONFIG_PROFILING=y
CONFIG_OPROFILE=m
diff --git a/arch/sh/configs/sh7710voipgw_defconfig b/arch/sh/configs/sh7710voipgw_defconfig
index 08426913c0e3..0d814770b07f 100644
--- a/arch/sh/configs/sh7710voipgw_defconfig
+++ b/arch/sh/configs/sh7710voipgw_defconfig
@@ -2,7 +2,6 @@
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_SYSCTL_SYSCALL is not set
# CONFIG_FUTEX is not set
# CONFIG_EPOLL is not set
# CONFIG_SHMEM is not set
diff --git a/arch/sh/configs/sh7757lcr_defconfig b/arch/sh/configs/sh7757lcr_defconfig
index d0933a9b9799..a2700ab165af 100644
--- a/arch/sh/configs/sh7757lcr_defconfig
+++ b/arch/sh/configs/sh7757lcr_defconfig
@@ -8,7 +8,6 @@ CONFIG_TASK_XACCT=y
CONFIG_TASK_IO_ACCOUNTING=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
-# CONFIG_SYSCTL_SYSCALL is not set
CONFIG_KALLSYMS_ALL=y
CONFIG_SLAB=y
CONFIG_MODULES=y
diff --git a/arch/sh/configs/sh7763rdp_defconfig b/arch/sh/configs/sh7763rdp_defconfig
index d0a0aa74cecf..26c5fd02c87a 100644
--- a/arch/sh/configs/sh7763rdp_defconfig
+++ b/arch/sh/configs/sh7763rdp_defconfig
@@ -5,7 +5,6 @@ CONFIG_LOG_BUF_SHIFT=14
CONFIG_NAMESPACES=y
CONFIG_UTS_NS=y
CONFIG_IPC_NS=y
-# CONFIG_SYSCTL_SYSCALL is not set
CONFIG_SLAB=y
CONFIG_PROFILING=y
CONFIG_OPROFILE=y
diff --git a/arch/sh/configs/shmin_defconfig b/arch/sh/configs/shmin_defconfig
index a27b129b93c5..c0b6f40d01cc 100644
--- a/arch/sh/configs/shmin_defconfig
+++ b/arch/sh/configs/shmin_defconfig
@@ -1,7 +1,6 @@
# CONFIG_SWAP is not set
CONFIG_LOG_BUF_SHIFT=14
# CONFIG_UID16 is not set
-# CONFIG_SYSCTL_SYSCALL is not set
# CONFIG_KALLSYMS is not set
# CONFIG_HOTPLUG is not set
# CONFIG_BUG is not set
diff --git a/arch/sh/configs/titan_defconfig b/arch/sh/configs/titan_defconfig
index 4ec961ace688..ba887f1351be 100644
--- a/arch/sh/configs/titan_defconfig
+++ b/arch/sh/configs/titan_defconfig
@@ -6,7 +6,6 @@ CONFIG_IKCONFIG_PROC=y
CONFIG_LOG_BUF_SHIFT=16
CONFIG_BLK_DEV_INITRD=y
# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-# CONFIG_SYSCTL_SYSCALL is not set
CONFIG_SLAB=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
diff --git a/arch/sh/drivers/pci/Makefile b/arch/sh/drivers/pci/Makefile
index a5c1e9066f83..d313fd3ce709 100644
--- a/arch/sh/drivers/pci/Makefile
+++ b/arch/sh/drivers/pci/Makefile
@@ -25,4 +25,3 @@ obj-$(CONFIG_SH_7780_SOLUTION_ENGINE) += fixups-sdk7780.o
obj-$(CONFIG_SH_TITAN) += fixups-titan.o
obj-$(CONFIG_SH_LANDISK) += fixups-landisk.o
obj-$(CONFIG_SH_LBOX_RE2) += fixups-rts7751r2d.o
-obj-$(CONFIG_SH_CAYMAN) += fixups-cayman.o
diff --git a/arch/sh/drivers/pci/common.c b/arch/sh/drivers/pci/common.c
index fe163ecd0719..2fd2b77e12ce 100644
--- a/arch/sh/drivers/pci/common.c
+++ b/arch/sh/drivers/pci/common.c
@@ -54,7 +54,7 @@ int __init pci_is_66mhz_capable(struct pci_channel *hose,
int cap66 = -1;
u16 stat;
- printk(KERN_INFO "PCI: Checking 66MHz capabilities...\n");
+ pr_info("PCI: Checking 66MHz capabilities...\n");
for (pci_devfn = 0; pci_devfn < 0xff; pci_devfn++) {
if (PCI_FUNC(pci_devfn))
@@ -134,7 +134,7 @@ unsigned int pcibios_handle_status_errors(unsigned long addr,
pcibios_report_status(PCI_STATUS_REC_TARGET_ABORT |
PCI_STATUS_SIG_TARGET_ABORT |
PCI_STATUS_REC_MASTER_ABORT, 1);
- printk("\n");
+ pr_cont("\n");
cmd |= PCI_STATUS_REC_TARGET_ABORT;
}
@@ -143,7 +143,7 @@ unsigned int pcibios_handle_status_errors(unsigned long addr,
printk(KERN_DEBUG "PCI: parity error detected: ");
pcibios_report_status(PCI_STATUS_PARITY |
PCI_STATUS_DETECTED_PARITY, 1);
- printk("\n");
+ pr_cont("\n");
cmd |= PCI_STATUS_PARITY | PCI_STATUS_DETECTED_PARITY;
diff --git a/arch/sh/drivers/pci/fixups-cayman.c b/arch/sh/drivers/pci/fixups-cayman.c
deleted file mode 100644
index c797bfbe2e98..000000000000
--- a/arch/sh/drivers/pci/fixups-cayman.c
+++ /dev/null
@@ -1,78 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/pci.h>
-#include <linux/types.h>
-#include <cpu/irq.h>
-#include "pci-sh5.h"
-
-int pcibios_map_platform_irq(const struct pci_dev *dev, u8 slot, u8 pin)
-{
- int result = -1;
-
- /* The complication here is that the PCI IRQ lines from the Cayman's 2
- 5V slots get into the CPU via a different path from the IRQ lines
- from the 3 3.3V slots. Thus, we have to detect whether the card's
- interrupts go via the 5V or 3.3V path, i.e. the 'bridge swizzling'
- at the point where we cross from 5V to 3.3V is not the normal case.
-
- The added complication is that we don't know that the 5V slots are
- always bus 2, because a card containing a PCI-PCI bridge may be
- plugged into a 3.3V slot, and this changes the bus numbering.
-
- Also, the Cayman has an intermediate PCI bus that goes a custom
- expansion board header (and to the secondary bridge). This bus has
- never been used in practice.
-
- The 1ary onboard PCI-PCI bridge is device 3 on bus 0
- The 2ary onboard PCI-PCI bridge is device 0 on the 2ary bus of
- the 1ary bridge.
- */
-
- struct slot_pin {
- int slot;
- int pin;
- } path[4];
- int i=0;
-
- while (dev->bus->number > 0) {
-
- slot = path[i].slot = PCI_SLOT(dev->devfn);
- pin = path[i].pin = pci_swizzle_interrupt_pin(dev, pin);
- dev = dev->bus->self;
- i++;
- if (i > 3) panic("PCI path to root bus too long!\n");
- }
-
- slot = PCI_SLOT(dev->devfn);
- /* This is the slot on bus 0 through which the device is eventually
- reachable. */
-
- /* Now work back up. */
- if ((slot < 3) || (i == 0)) {
- /* Bus 0 (incl. PCI-PCI bridge itself) : perform the final
- swizzle now. */
- result = IRQ_INTA + pci_swizzle_interrupt_pin(dev, pin) - 1;
- } else {
- i--;
- slot = path[i].slot;
- pin = path[i].pin;
- if (slot > 0) {
- panic("PCI expansion bus device found - not handled!\n");
- } else {
- if (i > 0) {
- /* 5V slots */
- i--;
- slot = path[i].slot;
- pin = path[i].pin;
- /* 'pin' was swizzled earlier wrt slot, don't do it again. */
- result = IRQ_P2INTA + (pin - 1);
- } else {
- /* IRQ for 2ary PCI-PCI bridge : unused */
- result = -1;
- }
- }
- }
-
- return result;
-}
diff --git a/arch/sh/drivers/pci/pci-sh7780.c b/arch/sh/drivers/pci/pci-sh7780.c
index 287b3a68570c..9a624a6ee354 100644
--- a/arch/sh/drivers/pci/pci-sh7780.c
+++ b/arch/sh/drivers/pci/pci-sh7780.c
@@ -148,7 +148,7 @@ static irqreturn_t sh7780_pci_serr_irq(int irq, void *dev_id)
printk(KERN_DEBUG "PCI: system error received: ");
pcibios_report_status(PCI_STATUS_SIG_SYSTEM_ERROR, 1);
- printk("\n");
+ pr_cont("\n");
/* Deassert SERR */
__raw_writel(SH4_PCIINTM_SDIM, hose->reg_base + SH4_PCIINTM);
@@ -179,7 +179,7 @@ static int __init sh7780_pci_setup_irqs(struct pci_channel *hose)
ret = request_irq(hose->serr_irq, sh7780_pci_serr_irq, 0,
"PCI SERR interrupt", hose);
if (unlikely(ret)) {
- printk(KERN_ERR "PCI: Failed hooking SERR IRQ\n");
+ pr_err("PCI: Failed hooking SERR IRQ\n");
return ret;
}
@@ -250,7 +250,7 @@ static int __init sh7780_pci_init(void)
const char *type;
int ret, i;
- printk(KERN_NOTICE "PCI: Starting initialization.\n");
+ pr_notice("PCI: Starting initialization.\n");
chan->reg_base = 0xfe040000;
@@ -270,7 +270,7 @@ static int __init sh7780_pci_init(void)
id = __raw_readw(chan->reg_base + PCI_VENDOR_ID);
if (id != PCI_VENDOR_ID_RENESAS) {
- printk(KERN_ERR "PCI: Unknown vendor ID 0x%04x.\n", id);
+ pr_err("PCI: Unknown vendor ID 0x%04x.\n", id);
return -ENODEV;
}
@@ -281,14 +281,13 @@ static int __init sh7780_pci_init(void)
(id == PCI_DEVICE_ID_RENESAS_SH7785) ? "SH7785" :
NULL;
if (unlikely(!type)) {
- printk(KERN_ERR "PCI: Found an unsupported Renesas host "
- "controller, device id 0x%04x.\n", id);
+ pr_err("PCI: Found an unsupported Renesas host controller, device id 0x%04x.\n",
+ id);
return -EINVAL;
}
- printk(KERN_NOTICE "PCI: Found a Renesas %s host "
- "controller, revision %d.\n", type,
- __raw_readb(chan->reg_base + PCI_REVISION_ID));
+ pr_notice("PCI: Found a Renesas %s host controller, revision %d.\n",
+ type, __raw_readb(chan->reg_base + PCI_REVISION_ID));
/*
* Now throw it in to register initialization mode and
@@ -395,9 +394,9 @@ static int __init sh7780_pci_init(void)
sh7780_pci66_init(chan);
- printk(KERN_NOTICE "PCI: Running at %dMHz.\n",
- (__raw_readw(chan->reg_base + PCI_STATUS) & PCI_STATUS_66MHZ) ?
- 66 : 33);
+ pr_notice("PCI: Running at %dMHz.\n",
+ (__raw_readw(chan->reg_base + PCI_STATUS) & PCI_STATUS_66MHZ)
+ ? 66 : 33);
return 0;
diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c
index c7784e156964..6ab0b7377f66 100644
--- a/arch/sh/drivers/pci/pci.c
+++ b/arch/sh/drivers/pci/pci.c
@@ -120,8 +120,7 @@ int register_pci_controller(struct pci_channel *hose)
* Do not panic here but later - this might happen before console init.
*/
if (!hose->io_map_base) {
- printk(KERN_WARNING
- "registering PCI controller with io_map_base unset\n");
+ pr_warn("registering PCI controller with io_map_base unset\n");
}
/*
@@ -145,7 +144,7 @@ out:
for (--i; i >= 0; i--)
release_resource(&hose->resources[i]);
- printk(KERN_WARNING "Skipping PCI bus scan due to resource conflict\n");
+ pr_warn("Skipping PCI bus scan due to resource conflict\n");
return -1;
}
@@ -213,8 +212,8 @@ pcibios_bus_report_status_early(struct pci_channel *hose,
pci_devfn, PCI_STATUS,
status & status_mask);
if (warn)
- printk("(%02x:%02x: %04X) ", current_bus,
- pci_devfn, status);
+ pr_cont("(%02x:%02x: %04X) ", current_bus, pci_devfn,
+ status);
}
}
@@ -249,7 +248,7 @@ pcibios_bus_report_status(struct pci_bus *bus, unsigned int status_mask,
pci_write_config_word(dev, PCI_STATUS, status & status_mask);
if (warn)
- printk("(%s: %04X) ", pci_name(dev), status);
+ pr_cont("(%s: %04X) ", pci_name(dev), status);
}
list_for_each_entry(dev, &bus->devices, bus_list)
diff --git a/arch/sh/include/asm/adc.h b/arch/sh/include/asm/adc.h
index 99ec66849559..feccfe639e38 100644
--- a/arch/sh/include/asm/adc.h
+++ b/arch/sh/include/asm/adc.h
@@ -1,7 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __ASM_ADC_H
#define __ASM_ADC_H
-#ifdef __KERNEL__
/*
* Copyright (C) 2004 Andriy Skulysh
*/
@@ -10,5 +9,4 @@
int adc_single(unsigned int channel);
-#endif /* __KERNEL__ */
#endif /* __ASM_ADC_H */
diff --git a/arch/sh/include/asm/addrspace.h b/arch/sh/include/asm/addrspace.h
index 34bfbcddcce0..468fba333e89 100644
--- a/arch/sh/include/asm/addrspace.h
+++ b/arch/sh/include/asm/addrspace.h
@@ -7,8 +7,6 @@
#ifndef __ASM_SH_ADDRSPACE_H
#define __ASM_SH_ADDRSPACE_H
-#ifdef __KERNEL__
-
#include <cpu/addrspace.h>
/* If this CPU supports segmentation, hook up the helpers */
@@ -62,5 +60,4 @@
#define P3_ADDR_MAX P4SEG
#endif
-#endif /* __KERNEL__ */
#endif /* __ASM_SH_ADDRSPACE_H */
diff --git a/arch/sh/include/asm/bitops.h b/arch/sh/include/asm/bitops.h
index 445dd14c448a..450b5854d7c6 100644
--- a/arch/sh/include/asm/bitops.h
+++ b/arch/sh/include/asm/bitops.h
@@ -2,8 +2,6 @@
#ifndef __ASM_SH_BITOPS_H
#define __ASM_SH_BITOPS_H
-#ifdef __KERNEL__
-
#ifndef _LINUX_BITOPS_H
#error only <linux/bitops.h> can be included directly
#endif
@@ -71,6 +69,4 @@ static inline unsigned long __ffs(unsigned long word)
#include <asm-generic/bitops/__fls.h>
#include <asm-generic/bitops/fls64.h>
-#endif /* __KERNEL__ */
-
#endif /* __ASM_SH_BITOPS_H */
diff --git a/arch/sh/include/asm/cache.h b/arch/sh/include/asm/cache.h
index 2408ac4873aa..a293343456af 100644
--- a/arch/sh/include/asm/cache.h
+++ b/arch/sh/include/asm/cache.h
@@ -8,7 +8,6 @@
*/
#ifndef __ASM_SH_CACHE_H
#define __ASM_SH_CACHE_H
-#ifdef __KERNEL__
#include <linux/init.h>
#include <cpu/cache.h>
@@ -44,5 +43,4 @@ struct cache_info {
unsigned long flags;
};
#endif /* __ASSEMBLY__ */
-#endif /* __KERNEL__ */
#endif /* __ASM_SH_CACHE_H */
diff --git a/arch/sh/include/asm/cacheflush.h b/arch/sh/include/asm/cacheflush.h
index fe7400079b97..4486a865ff62 100644
--- a/arch/sh/include/asm/cacheflush.h
+++ b/arch/sh/include/asm/cacheflush.h
@@ -2,8 +2,6 @@
#ifndef __ASM_SH_CACHEFLUSH_H
#define __ASM_SH_CACHEFLUSH_H
-#ifdef __KERNEL__
-
#include <linux/mm.h>
/*
@@ -109,5 +107,4 @@ static inline void *sh_cacheop_vaddr(void *vaddr)
return vaddr;
}
-#endif /* __KERNEL__ */
#endif /* __ASM_SH_CACHEFLUSH_H */
diff --git a/arch/sh/include/asm/dma.h b/arch/sh/include/asm/dma.h
index 4d5a21a891c0..17d23ae98c77 100644
--- a/arch/sh/include/asm/dma.h
+++ b/arch/sh/include/asm/dma.h
@@ -6,7 +6,6 @@
*/
#ifndef __ASM_SH_DMA_H
#define __ASM_SH_DMA_H
-#ifdef __KERNEL__
#include <linux/spinlock.h>
#include <linux/wait.h>
@@ -144,5 +143,4 @@ extern int isa_dma_bridge_buggy;
#define isa_dma_bridge_buggy (0)
#endif
-#endif /* __KERNEL__ */
#endif /* __ASM_SH_DMA_H */
diff --git a/arch/sh/include/asm/elf.h b/arch/sh/include/asm/elf.h
index 7661fb5d548a..2862d6d1cb64 100644
--- a/arch/sh/include/asm/elf.h
+++ b/arch/sh/include/asm/elf.h
@@ -90,7 +90,6 @@ typedef struct user_fpu_struct elf_fpregset_t;
#endif
#define ELF_ARCH EM_SH
-#ifdef __KERNEL__
/*
* This is used to ensure we don't load something for the wrong architecture.
*/
@@ -209,5 +208,4 @@ do { \
NEW_AUX_ENT(AT_L2_CACHESHAPE, l2_cache_shape); \
} while (0)
-#endif /* __KERNEL__ */
#endif /* __ASM_SH_ELF_H */
diff --git a/arch/sh/include/asm/freq.h b/arch/sh/include/asm/freq.h
index 18133bf83738..87c23621b7ae 100644
--- a/arch/sh/include/asm/freq.h
+++ b/arch/sh/include/asm/freq.h
@@ -6,9 +6,7 @@
*/
#ifndef __ASM_SH_FREQ_H
#define __ASM_SH_FREQ_H
-#ifdef __KERNEL__
#include <cpu/freq.h>
-#endif /* __KERNEL__ */
#endif /* __ASM_SH_FREQ_H */
diff --git a/arch/sh/include/asm/futex.h b/arch/sh/include/asm/futex.h
index b39cda09fb95..b70f3fce6ed7 100644
--- a/arch/sh/include/asm/futex.h
+++ b/arch/sh/include/asm/futex.h
@@ -2,8 +2,6 @@
#ifndef __ASM_SH_FUTEX_H
#define __ASM_SH_FUTEX_H
-#ifdef __KERNEL__
-
#include <linux/futex.h>
#include <linux/uaccess.h>
#include <asm/errno.h>
@@ -71,5 +69,4 @@ static inline int arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval,
return ret;
}
-#endif /* __KERNEL__ */
#endif /* __ASM_SH_FUTEX_H */
diff --git a/arch/sh/include/asm/io.h b/arch/sh/include/asm/io.h
index ec587b583822..6d5c6463bc07 100644
--- a/arch/sh/include/asm/io.h
+++ b/arch/sh/include/asm/io.h
@@ -21,10 +21,8 @@
#include <linux/pgtable.h>
#include <asm-generic/iomap.h>
-#ifdef __KERNEL__
#define __IO_PREFIX generic
#include <asm/io_generic.h>
-#include <asm/io_trapped.h>
#include <asm-generic/pci_iomap.h>
#include <mach/mangle-port.h>
@@ -244,125 +242,38 @@ unsigned long long poke_real_address_q(unsigned long long addr,
#define phys_to_virt(address) (__va(address))
#endif
-/*
- * On 32-bit SH, we traditionally have the whole physical address space
- * mapped at all times (as MIPS does), so "ioremap()" and "iounmap()" do
- * not need to do anything but place the address in the proper segment.
- * This is true for P1 and P2 addresses, as well as some P3 ones.
- * However, most of the P3 addresses and newer cores using extended
- * addressing need to map through page tables, so the ioremap()
- * implementation becomes a bit more complicated.
- *
- * See arch/sh/mm/ioremap.c for additional notes on this.
- *
- * We cheat a bit and always return uncachable areas until we've fixed
- * the drivers to handle caching properly.
- *
- * On the SH-5 the concept of segmentation in the 1:1 PXSEG sense simply
- * doesn't exist, so everything must go through page tables.
- */
#ifdef CONFIG_MMU
+void iounmap(void __iomem *addr);
void __iomem *__ioremap_caller(phys_addr_t offset, unsigned long size,
pgprot_t prot, void *caller);
-void iounmap(void __iomem *addr);
-
-static inline void __iomem *
-__ioremap(phys_addr_t offset, unsigned long size, pgprot_t prot)
-{
- return __ioremap_caller(offset, size, prot, __builtin_return_address(0));
-}
-
-static inline void __iomem *
-__ioremap_29bit(phys_addr_t offset, unsigned long size, pgprot_t prot)
-{
-#ifdef CONFIG_29BIT
- phys_addr_t last_addr = offset + size - 1;
-
- /*
- * For P1 and P2 space this is trivial, as everything is already
- * mapped. Uncached access for P1 addresses are done through P2.
- * In the P3 case or for addresses outside of the 29-bit space,
- * mapping must be done by the PMB or by using page tables.
- */
- if (likely(PXSEG(offset) < P3SEG && PXSEG(last_addr) < P3SEG)) {
- u64 flags = pgprot_val(prot);
-
- /*
- * Anything using the legacy PTEA space attributes needs
- * to be kicked down to page table mappings.
- */
- if (unlikely(flags & _PAGE_PCC_MASK))
- return NULL;
- if (unlikely(flags & _PAGE_CACHABLE))
- return (void __iomem *)P1SEGADDR(offset);
-
- return (void __iomem *)P2SEGADDR(offset);
- }
-
- /* P4 above the store queues are always mapped. */
- if (unlikely(offset >= P3_ADDR_MAX))
- return (void __iomem *)P4SEGADDR(offset);
-#endif
-
- return NULL;
-}
-
-static inline void __iomem *
-__ioremap_mode(phys_addr_t offset, unsigned long size, pgprot_t prot)
-{
- void __iomem *ret;
-
- ret = __ioremap_trapped(offset, size);
- if (ret)
- return ret;
-
- ret = __ioremap_29bit(offset, size, prot);
- if (ret)
- return ret;
-
- return __ioremap(offset, size, prot);
-}
-#else
-#define __ioremap(offset, size, prot) ((void __iomem *)(offset))
-#define __ioremap_mode(offset, size, prot) ((void __iomem *)(offset))
-static inline void iounmap(void __iomem *addr) {}
-#endif /* CONFIG_MMU */
static inline void __iomem *ioremap(phys_addr_t offset, unsigned long size)
{
- return __ioremap_mode(offset, size, PAGE_KERNEL_NOCACHE);
+ return __ioremap_caller(offset, size, PAGE_KERNEL_NOCACHE,
+ __builtin_return_address(0));
}
static inline void __iomem *
ioremap_cache(phys_addr_t offset, unsigned long size)
{
- return __ioremap_mode(offset, size, PAGE_KERNEL);
+ return __ioremap_caller(offset, size, PAGE_KERNEL,
+ __builtin_return_address(0));
}
#define ioremap_cache ioremap_cache
#ifdef CONFIG_HAVE_IOREMAP_PROT
-static inline void __iomem *
-ioremap_prot(phys_addr_t offset, unsigned long size, unsigned long flags)
+static inline void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size,
+ unsigned long flags)
{
- return __ioremap_mode(offset, size, __pgprot(flags));
+ return __ioremap_caller(offset, size, __pgprot(flags),
+ __builtin_return_address(0));
}
-#endif
+#endif /* CONFIG_HAVE_IOREMAP_PROT */
-#ifdef CONFIG_IOREMAP_FIXED
-extern void __iomem *ioremap_fixed(phys_addr_t, unsigned long, pgprot_t);
-extern int iounmap_fixed(void __iomem *);
-extern void ioremap_fixed_init(void);
-#else
-static inline void __iomem *
-ioremap_fixed(phys_addr_t phys_addr, unsigned long size, pgprot_t prot)
-{
- BUG();
- return NULL;
-}
-
-static inline void ioremap_fixed_init(void) { }
-static inline int iounmap_fixed(void __iomem *addr) { return -EINVAL; }
-#endif
+#else /* CONFIG_MMU */
+#define iounmap(addr) do { } while (0)
+#define ioremap(offset, size) ((void __iomem *)(unsigned long)(offset))
+#endif /* CONFIG_MMU */
#define ioremap_uc ioremap
@@ -381,6 +292,4 @@ static inline int iounmap_fixed(void __iomem *addr) { return -EINVAL; }
int valid_phys_addr_range(phys_addr_t addr, size_t size);
int valid_mmap_phys_addr_range(unsigned long pfn, size_t size);
-#endif /* __KERNEL__ */
-
#endif /* __ASM_SH_IO_H */
diff --git a/arch/sh/include/asm/kdebug.h b/arch/sh/include/asm/kdebug.h
index 960545306afa..de8693fabb1d 100644
--- a/arch/sh/include/asm/kdebug.h
+++ b/arch/sh/include/asm/kdebug.h
@@ -12,8 +12,7 @@ enum die_val {
};
/* arch/sh/kernel/dumpstack.c */
-extern void printk_address(unsigned long address, int reliable,
- const char *loglvl);
+extern void printk_address(unsigned long address, int reliable);
extern void dump_mem(const char *str, const char *loglvl,
unsigned long bottom, unsigned long top);
diff --git a/arch/sh/include/asm/mmu_context.h b/arch/sh/include/asm/mmu_context.h
index 48e67d544d53..f664e51e8a15 100644
--- a/arch/sh/include/asm/mmu_context.h
+++ b/arch/sh/include/asm/mmu_context.h
@@ -8,7 +8,6 @@
#ifndef __ASM_SH_MMU_CONTEXT_H
#define __ASM_SH_MMU_CONTEXT_H
-#ifdef __KERNEL__
#include <cpu/mmu_context.h>
#include <asm/tlbflush.h>
#include <linux/uaccess.h>
@@ -177,5 +176,4 @@ static inline void disable_mmu(void)
#define disable_mmu() do { } while (0)
#endif
-#endif /* __KERNEL__ */
#endif /* __ASM_SH_MMU_CONTEXT_H */
diff --git a/arch/sh/include/asm/mmzone.h b/arch/sh/include/asm/mmzone.h
index cbaee1d1b673..6552a088dc97 100644
--- a/arch/sh/include/asm/mmzone.h
+++ b/arch/sh/include/asm/mmzone.h
@@ -2,8 +2,6 @@
#ifndef __ASM_SH_MMZONE_H
#define __ASM_SH_MMZONE_H
-#ifdef __KERNEL__
-
#ifdef CONFIG_NEED_MULTIPLE_NODES
#include <linux/numa.h>
@@ -44,5 +42,4 @@ void __init __add_active_range(unsigned int nid, unsigned long start_pfn,
/* arch/sh/mm/init.c */
void __init allocate_pgdat(unsigned int nid);
-#endif /* __KERNEL__ */
#endif /* __ASM_SH_MMZONE_H */
diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h
index 10a36b1cf2ea..ad22e88c6657 100644
--- a/arch/sh/include/asm/pci.h
+++ b/arch/sh/include/asm/pci.h
@@ -2,8 +2,6 @@
#ifndef __ASM_SH_PCI_H
#define __ASM_SH_PCI_H
-#ifdef __KERNEL__
-
/* Can be used to override the logic in pci_scan_bus for skipping
already-configured bus numbers - to be used for buggy BIOSes
or architectures with incomplete PCI setup by the loader */
@@ -96,6 +94,4 @@ static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
return channel ? 15 : 14;
}
-#endif /* __KERNEL__ */
#endif /* __ASM_SH_PCI_H */
-
diff --git a/arch/sh/include/asm/processor_32.h b/arch/sh/include/asm/processor_32.h
index d44409413418..aa92cc933889 100644
--- a/arch/sh/include/asm/processor_32.h
+++ b/arch/sh/include/asm/processor_32.h
@@ -8,7 +8,6 @@
#ifndef __ASM_SH_PROCESSOR_32_H
#define __ASM_SH_PROCESSOR_32_H
-#ifdef __KERNEL__
#include <linux/compiler.h>
#include <linux/linkage.h>
@@ -203,5 +202,4 @@ static inline void prefetchw(const void *x)
}
#endif
-#endif /* __KERNEL__ */
#endif /* __ASM_SH_PROCESSOR_32_H */
diff --git a/arch/sh/include/asm/segment.h b/arch/sh/include/asm/segment.h
index 33d1d28057cb..02e54a3335d6 100644
--- a/arch/sh/include/asm/segment.h
+++ b/arch/sh/include/asm/segment.h
@@ -24,8 +24,7 @@ typedef struct {
#define USER_DS KERNEL_DS
#endif
-#define segment_eq(a, b) ((a).seg == (b).seg)
-
+#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg)
#define get_fs() (current_thread_info()->addr_limit)
#define set_fs(x) (current_thread_info()->addr_limit = (x))
diff --git a/arch/sh/include/asm/smc37c93x.h b/arch/sh/include/asm/smc37c93x.h
index f054c30a171a..891f2f8f2fd0 100644
--- a/arch/sh/include/asm/smc37c93x.h
+++ b/arch/sh/include/asm/smc37c93x.h
@@ -112,8 +112,8 @@ typedef struct uart_reg {
#define FCR_RFRES 0x0200 /* Receiver FIFO reset */
#define FCR_TFRES 0x0400 /* Transmitter FIFO reset */
#define FCR_DMA 0x0800 /* DMA mode select */
-#define FCR_RTL 0x4000 /* Receiver triger (LSB) */
-#define FCR_RTM 0x8000 /* Receiver triger (MSB) */
+#define FCR_RTL 0x4000 /* Receiver trigger (LSB) */
+#define FCR_RTM 0x8000 /* Receiver trigger (MSB) */
/* Line Control Register */
diff --git a/arch/sh/include/asm/sparsemem.h b/arch/sh/include/asm/sparsemem.h
index 4eb899751e45..4703cbe23844 100644
--- a/arch/sh/include/asm/sparsemem.h
+++ b/arch/sh/include/asm/sparsemem.h
@@ -2,16 +2,11 @@
#ifndef __ASM_SH_SPARSEMEM_H
#define __ASM_SH_SPARSEMEM_H
-#ifdef __KERNEL__
/*
* SECTION_SIZE_BITS 2^N: how big each section will be
- * MAX_PHYSADDR_BITS 2^N: how much physical address space we have
- * MAX_PHYSMEM_BITS 2^N: how much memory we can have in that space
+ * MAX_PHYSMEM_BITS 2^N: how much physical address space we have
*/
#define SECTION_SIZE_BITS 26
-#define MAX_PHYSADDR_BITS 32
#define MAX_PHYSMEM_BITS 32
-#endif
-
#endif /* __ASM_SH_SPARSEMEM_H */
diff --git a/arch/sh/include/asm/stacktrace.h b/arch/sh/include/asm/stacktrace.h
index 50c173c0b9f5..4f98cdc64ec5 100644
--- a/arch/sh/include/asm/stacktrace.h
+++ b/arch/sh/include/asm/stacktrace.h
@@ -12,8 +12,6 @@
struct stacktrace_ops {
void (*address)(void *data, unsigned long address, int reliable);
- /* On negative return stop dumping */
- int (*stack)(void *data, char *name);
};
void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
diff --git a/arch/sh/include/asm/string_32.h b/arch/sh/include/asm/string_32.h
index 3558b1d7123e..a276b193d3b4 100644
--- a/arch/sh/include/asm/string_32.h
+++ b/arch/sh/include/asm/string_32.h
@@ -2,8 +2,6 @@
#ifndef __ASM_SH_STRING_H
#define __ASM_SH_STRING_H
-#ifdef __KERNEL__
-
/*
* Copyright (C) 1999 Niibe Yutaka
* But consider these trivial functions to be public domain.
@@ -28,32 +26,6 @@ static inline char *strcpy(char *__dest, const char *__src)
return __xdest;
}
-#define __HAVE_ARCH_STRNCPY
-static inline char *strncpy(char *__dest, const char *__src, size_t __n)
-{
- register char *__xdest = __dest;
- unsigned long __dummy;
-
- if (__n == 0)
- return __xdest;
-
- __asm__ __volatile__(
- "1:\n"
- "mov.b @%1+, %2\n\t"
- "mov.b %2, @%0\n\t"
- "cmp/eq #0, %2\n\t"
- "bt/s 2f\n\t"
- " cmp/eq %5,%1\n\t"
- "bf/s 1b\n\t"
- " add #1, %0\n"
- "2:"
- : "=r" (__dest), "=r" (__src), "=&z" (__dummy)
- : "0" (__dest), "1" (__src), "r" (__src+__n)
- : "memory", "t");
-
- return __xdest;
-}
-
#define __HAVE_ARCH_STRCMP
static inline int strcmp(const char *__cs, const char *__ct)
{
@@ -127,6 +99,4 @@ extern void *memchr(const void *__s, int __c, size_t __n);
#define __HAVE_ARCH_STRLEN
extern size_t strlen(const char *);
-#endif /* __KERNEL__ */
-
#endif /* __ASM_SH_STRING_H */
diff --git a/arch/sh/include/asm/syscall_32.h b/arch/sh/include/asm/syscall_32.h
index 0b5b8e75edac..cb51a7528384 100644
--- a/arch/sh/include/asm/syscall_32.h
+++ b/arch/sh/include/asm/syscall_32.h
@@ -40,10 +40,7 @@ static inline void syscall_set_return_value(struct task_struct *task,
struct pt_regs *regs,
int error, long val)
{
- if (error)
- regs->regs[0] = -error;
- else
- regs->regs[0] = val;
+ regs->regs[0] = (long) error ?: val;
}
static inline void syscall_get_arguments(struct task_struct *task,
diff --git a/arch/sh/include/asm/syscalls_32.h b/arch/sh/include/asm/syscalls_32.h
index 9f9faf63b48c..5c555b864fe0 100644
--- a/arch/sh/include/asm/syscalls_32.h
+++ b/arch/sh/include/asm/syscalls_32.h
@@ -2,8 +2,6 @@
#ifndef __ASM_SH_SYSCALLS_32_H
#define __ASM_SH_SYSCALLS_32_H
-#ifdef __KERNEL__
-
#include <linux/compiler.h>
#include <linux/linkage.h>
#include <linux/types.h>
@@ -26,5 +24,4 @@ asmlinkage void do_syscall_trace_leave(struct pt_regs *regs);
asmlinkage void do_notify_resume(struct pt_regs *regs, unsigned int save_r0,
unsigned long thread_info_flags);
-#endif /* __KERNEL__ */
#endif /* __ASM_SH_SYSCALLS_32_H */
diff --git a/arch/sh/include/asm/thread_info.h b/arch/sh/include/asm/thread_info.h
index 6404be69d5fa..243ea5150aa0 100644
--- a/arch/sh/include/asm/thread_info.h
+++ b/arch/sh/include/asm/thread_info.h
@@ -10,8 +10,6 @@
* Copyright (C) 2002 David Howells (dhowells@redhat.com)
* - Incorporating suggestions made by Linus Torvalds and Dave Miller
*/
-#ifdef __KERNEL__
-
#include <asm/page.h>
/*
@@ -170,7 +168,4 @@ static inline unsigned int get_thread_fault_code(void)
}
#endif /* !__ASSEMBLY__ */
-
-#endif /* __KERNEL__ */
-
#endif /* __ASM_SH_THREAD_INFO_H */
diff --git a/arch/sh/include/asm/uaccess_32.h b/arch/sh/include/asm/uaccess_32.h
index 624cf55acc27..5d7ddc092afd 100644
--- a/arch/sh/include/asm/uaccess_32.h
+++ b/arch/sh/include/asm/uaccess_32.h
@@ -26,6 +26,9 @@ do { \
case 4: \
__get_user_asm(x, ptr, retval, "l"); \
break; \
+ case 8: \
+ __get_user_u64(x, ptr, retval); \
+ break; \
default: \
__get_user_unknown(); \
break; \
@@ -66,6 +69,56 @@ do { \
extern void __get_user_unknown(void);
+#if defined(CONFIG_CPU_LITTLE_ENDIAN)
+#define __get_user_u64(x, addr, err) \
+({ \
+__asm__ __volatile__( \
+ "1:\n\t" \
+ "mov.l %2,%R1\n\t" \
+ "mov.l %T2,%S1\n\t" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3:\n\t" \
+ "mov #0,%R1\n\t" \
+ "mov #0,%S1\n\t" \
+ "mov.l 4f, %0\n\t" \
+ "jmp @%0\n\t" \
+ " mov %3, %0\n\t" \
+ ".balign 4\n" \
+ "4: .long 2b\n\t" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n\t" \
+ ".long 1b, 3b\n\t" \
+ ".long 1b + 2, 3b\n\t" \
+ ".previous" \
+ :"=&r" (err), "=&r" (x) \
+ :"m" (__m(addr)), "i" (-EFAULT), "0" (err)); })
+#else
+#define __get_user_u64(x, addr, err) \
+({ \
+__asm__ __volatile__( \
+ "1:\n\t" \
+ "mov.l %2,%S1\n\t" \
+ "mov.l %T2,%R1\n\t" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3:\n\t" \
+ "mov #0,%S1\n\t" \
+ "mov #0,%R1\n\t" \
+ "mov.l 4f, %0\n\t" \
+ "jmp @%0\n\t" \
+ " mov %3, %0\n\t" \
+ ".balign 4\n" \
+ "4: .long 2b\n\t" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n\t" \
+ ".long 1b, 3b\n\t" \
+ ".long 1b + 2, 3b\n\t" \
+ ".previous" \
+ :"=&r" (err), "=&r" (x) \
+ :"m" (__m(addr)), "i" (-EFAULT), "0" (err)); })
+#endif
+
#define __put_user_size(x,ptr,size,retval) \
do { \
retval = 0; \
diff --git a/arch/sh/include/asm/watchdog.h b/arch/sh/include/asm/watchdog.h
index cecd0fc507f9..b9ca4c99f046 100644
--- a/arch/sh/include/asm/watchdog.h
+++ b/arch/sh/include/asm/watchdog.h
@@ -8,7 +8,6 @@
*/
#ifndef __ASM_SH_WATCHDOG_H
#define __ASM_SH_WATCHDOG_H
-#ifdef __KERNEL__
#include <linux/types.h>
#include <linux/io.h>
@@ -157,5 +156,4 @@ static inline void sh_wdt_write_csr(__u8 val)
__raw_writew((WTCSR_HIGH << 8) | (__u16)val, WTCSR);
}
#endif /* CONFIG_CPU_SUBTYPE_SH7785 || CONFIG_CPU_SUBTYPE_SH7780 */
-#endif /* __KERNEL__ */
#endif /* __ASM_SH_WATCHDOG_H */
diff --git a/arch/sh/kernel/Makefile b/arch/sh/kernel/Makefile
index b0f5574b6228..aa0fbc9202b1 100644
--- a/arch/sh/kernel/Makefile
+++ b/arch/sh/kernel/Makefile
@@ -47,5 +47,3 @@ obj-$(CONFIG_DWARF_UNWINDER) += dwarf.o
obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_callchain.o
obj-$(CONFIG_DMA_NONCOHERENT) += dma-coherent.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
-
-ccflags-y := -Werror
diff --git a/arch/sh/kernel/disassemble.c b/arch/sh/kernel/disassemble.c
index 845543780cc5..08e1af63edd9 100644
--- a/arch/sh/kernel/disassemble.c
+++ b/arch/sh/kernel/disassemble.c
@@ -376,148 +376,148 @@ static void print_sh_insn(u32 memaddr, u16 insn)
}
ok:
- printk("%-8s ", op->name);
+ pr_cont("%-8s ", op->name);
lastsp = (op->arg[0] == A_END);
disp_pc = 0;
for (n = 0; n < 6 && op->arg[n] != A_END; n++) {
if (n && op->arg[1] != A_END)
- printk(", ");
+ pr_cont(", ");
switch (op->arg[n]) {
case A_IMM:
- printk("#%d", (char)(imm));
+ pr_cont("#%d", (char)(imm));
break;
case A_R0:
- printk("r0");
+ pr_cont("r0");
break;
case A_REG_N:
- printk("r%d", rn);
+ pr_cont("r%d", rn);
break;
case A_INC_N:
- printk("@r%d+", rn);
+ pr_cont("@r%d+", rn);
break;
case A_DEC_N:
- printk("@-r%d", rn);
+ pr_cont("@-r%d", rn);
break;
case A_IND_N:
- printk("@r%d", rn);
+ pr_cont("@r%d", rn);
break;
case A_DISP_REG_N:
- printk("@(%d,r%d)", imm, rn);
+ pr_cont("@(%d,r%d)", imm, rn);
break;
case A_REG_M:
- printk("r%d", rm);
+ pr_cont("r%d", rm);
break;
case A_INC_M:
- printk("@r%d+", rm);
+ pr_cont("@r%d+", rm);
break;
case A_DEC_M:
- printk("@-r%d", rm);
+ pr_cont("@-r%d", rm);
break;
case A_IND_M:
- printk("@r%d", rm);
+ pr_cont("@r%d", rm);
break;
case A_DISP_REG_M:
- printk("@(%d,r%d)", imm, rm);
+ pr_cont("@(%d,r%d)", imm, rm);
break;
case A_REG_B:
- printk("r%d_bank", rb);
+ pr_cont("r%d_bank", rb);
break;
case A_DISP_PC:
disp_pc = 1;
disp_pc_addr = imm + 4 + (memaddr & relmask);
- printk("%08x <%pS>", disp_pc_addr,
- (void *)disp_pc_addr);
+ pr_cont("%08x <%pS>", disp_pc_addr,
+ (void *)disp_pc_addr);
break;
case A_IND_R0_REG_N:
- printk("@(r0,r%d)", rn);
+ pr_cont("@(r0,r%d)", rn);
break;
case A_IND_R0_REG_M:
- printk("@(r0,r%d)", rm);
+ pr_cont("@(r0,r%d)", rm);
break;
case A_DISP_GBR:
- printk("@(%d,gbr)",imm);
+ pr_cont("@(%d,gbr)", imm);
break;
case A_R0_GBR:
- printk("@(r0,gbr)");
+ pr_cont("@(r0,gbr)");
break;
case A_BDISP12:
case A_BDISP8:
- printk("%08x", imm + memaddr);
+ pr_cont("%08x", imm + memaddr);
break;
case A_SR:
- printk("sr");
+ pr_cont("sr");
break;
case A_GBR:
- printk("gbr");
+ pr_cont("gbr");
break;
case A_VBR:
- printk("vbr");
+ pr_cont("vbr");
break;
case A_SSR:
- printk("ssr");
+ pr_cont("ssr");
break;
case A_SPC:
- printk("spc");
+ pr_cont("spc");
break;
case A_MACH:
- printk("mach");
+ pr_cont("mach");
break;
case A_MACL:
- printk("macl");
+ pr_cont("macl");
break;
case A_PR:
- printk("pr");
+ pr_cont("pr");
break;
case A_SGR:
- printk("sgr");
+ pr_cont("sgr");
break;
case A_DBR:
- printk("dbr");
+ pr_cont("dbr");
break;
case FD_REG_N:
case F_REG_N:
- printk("fr%d", rn);
+ pr_cont("fr%d", rn);
break;
case F_REG_M:
- printk("fr%d", rm);
+ pr_cont("fr%d", rm);
break;
case DX_REG_N:
if (rn & 1) {
- printk("xd%d", rn & ~1);
+ pr_cont("xd%d", rn & ~1);
break;
}
/* else, fall through */
case D_REG_N:
- printk("dr%d", rn);
+ pr_cont("dr%d", rn);
break;
case DX_REG_M:
if (rm & 1) {
- printk("xd%d", rm & ~1);
+ pr_cont("xd%d", rm & ~1);
break;
}
/* else, fall through */
case D_REG_M:
- printk("dr%d", rm);
+ pr_cont("dr%d", rm);
break;
case FPSCR_M:
case FPSCR_N:
- printk("fpscr");
+ pr_cont("fpscr");
break;
case FPUL_M:
case FPUL_N:
- printk("fpul");
+ pr_cont("fpul");
break;
case F_FR0:
- printk("fr0");
+ pr_cont("fr0");
break;
case V_REG_N:
- printk("fv%d", rn*4);
+ pr_cont("fv%d", rn*4);
break;
case V_REG_M:
- printk("fv%d", rm*4);
+ pr_cont("fv%d", rm*4);
break;
case XMTRX_M4:
- printk("xmtrx");
+ pr_cont("xmtrx");
break;
default:
return;
@@ -532,7 +532,7 @@ static void print_sh_insn(u32 memaddr, u16 insn)
else
__get_user(val, (u32 *)disp_pc_addr);
- printk(" ! %08x <%pS>", val, (void *)val);
+ pr_cont(" ! %08x <%pS>", val, (void *)val);
}
return;
@@ -541,7 +541,7 @@ static void print_sh_insn(u32 memaddr, u16 insn)
}
- printk(".word 0x%x%x%x%x", nibs[0], nibs[1], nibs[2], nibs[3]);
+ pr_info(".word 0x%x%x%x%x", nibs[0], nibs[1], nibs[2], nibs[3]);
}
void show_code(struct pt_regs *regs)
@@ -552,20 +552,21 @@ void show_code(struct pt_regs *regs)
if (regs->pc & 0x1)
return;
- printk("Code:\n");
+ pr_info("Code:\n");
for (i = -3 ; i < 6 ; i++) {
unsigned short insn;
if (__get_user(insn, pc + i)) {
- printk(" (Bad address in pc)\n");
+ pr_err(" (Bad address in pc)\n");
break;
}
- printk("%s%08lx: ", (i ? " ": "->"), (unsigned long)(pc + i));
+ pr_info("%s%08lx: ", (i ? " " : "->"),
+ (unsigned long)(pc + i));
print_sh_insn((unsigned long)(pc + i), insn);
- printk("\n");
+ pr_cont("\n");
}
- printk("\n");
+ pr_info("\n");
}
diff --git a/arch/sh/kernel/dma-coherent.c b/arch/sh/kernel/dma-coherent.c
index d4811691b93c..cd46a9825e3c 100644
--- a/arch/sh/kernel/dma-coherent.c
+++ b/arch/sh/kernel/dma-coherent.c
@@ -3,60 +3,13 @@
* Copyright (C) 2004 - 2007 Paul Mundt
*/
#include <linux/mm.h>
-#include <linux/init.h>
#include <linux/dma-noncoherent.h>
-#include <linux/module.h>
#include <asm/cacheflush.h>
#include <asm/addrspace.h>
-void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
- gfp_t gfp, unsigned long attrs)
+void arch_dma_prep_coherent(struct page *page, size_t size)
{
- void *ret, *ret_nocache;
- int order = get_order(size);
-
- gfp |= __GFP_ZERO;
-
- ret = (void *)__get_free_pages(gfp, order);
- if (!ret)
- return NULL;
-
- /*
- * Pages from the page allocator may have data present in
- * cache. So flush the cache before using uncached memory.
- */
- arch_sync_dma_for_device(virt_to_phys(ret), size,
- DMA_BIDIRECTIONAL);
-
- ret_nocache = (void __force *)ioremap(virt_to_phys(ret), size);
- if (!ret_nocache) {
- free_pages((unsigned long)ret, order);
- return NULL;
- }
-
- split_page(pfn_to_page(virt_to_phys(ret) >> PAGE_SHIFT), order);
-
- *dma_handle = virt_to_phys(ret);
- if (!WARN_ON(!dev))
- *dma_handle -= PFN_PHYS(dev->dma_pfn_offset);
-
- return ret_nocache;
-}
-
-void arch_dma_free(struct device *dev, size_t size, void *vaddr,
- dma_addr_t dma_handle, unsigned long attrs)
-{
- int order = get_order(size);
- unsigned long pfn = (dma_handle >> PAGE_SHIFT);
- int k;
-
- if (!WARN_ON(!dev))
- pfn += dev->dma_pfn_offset;
-
- for (k = 0; k < (1 << order); k++)
- __free_pages(pfn_to_page(pfn + k), 0);
-
- iounmap(vaddr);
+ __flush_purge_region(page_address(page), size);
}
void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
diff --git a/arch/sh/kernel/dumpstack.c b/arch/sh/kernel/dumpstack.c
index a13c045804ed..758a6c89e911 100644
--- a/arch/sh/kernel/dumpstack.c
+++ b/arch/sh/kernel/dumpstack.c
@@ -16,8 +16,8 @@
#include <asm/unwinder.h>
#include <asm/stacktrace.h>
-void dump_mem(const char *str, const char *loglvl,
- unsigned long bottom, unsigned long top)
+void dump_mem(const char *str, const char *loglvl, unsigned long bottom,
+ unsigned long top)
{
unsigned long p;
int i;
@@ -31,23 +31,23 @@ void dump_mem(const char *str, const char *loglvl,
unsigned int val;
if (p < bottom || p >= top)
- printk("%s ", loglvl);
+ pr_cont(" ");
else {
if (__get_user(val, (unsigned int __user *)p)) {
- printk("%s\n", loglvl);
+ pr_cont("\n");
return;
}
- printk("%s%08x ", loglvl, val);
+ pr_cont("%08x ", val);
}
}
- printk("%s\n", loglvl);
+ pr_cont("\n");
}
}
-void printk_address(unsigned long address, int reliable, const char *loglvl)
+void printk_address(unsigned long address, int reliable)
{
- printk("%s [<%p>] %s%pS\n", loglvl, (void *) address,
- reliable ? "" : "? ", (void *) address);
+ pr_cont(" [<%px>] %s%pS\n", (void *) address,
+ reliable ? "" : "? ", (void *) address);
}
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
@@ -107,22 +107,16 @@ stack_reader_dump(struct task_struct *task, struct pt_regs *regs,
}
}
-static int print_trace_stack(void *data, char *name)
-{
- printk("%s <%s> ", (char *)data, name);
- return 0;
-}
-
/*
* Print one address/symbol entries per line.
*/
static void print_trace_address(void *data, unsigned long addr, int reliable)
{
- printk_address(addr, reliable, (char *)data);
+ printk("%s", (char *)data);
+ printk_address(addr, reliable);
}
static const struct stacktrace_ops print_trace_ops = {
- .stack = print_trace_stack,
.address = print_trace_address,
};
@@ -136,7 +130,7 @@ void show_trace(struct task_struct *tsk, unsigned long *sp,
unwind_stack(tsk, regs, sp, &print_trace_ops, (void *)loglvl);
- printk("%s\n", loglvl);
+ pr_cont("\n");
if (!tsk)
tsk = current;
diff --git a/arch/sh/kernel/entry-common.S b/arch/sh/kernel/entry-common.S
index 9bac5bbb67f3..ad963104d22d 100644
--- a/arch/sh/kernel/entry-common.S
+++ b/arch/sh/kernel/entry-common.S
@@ -178,34 +178,6 @@ syscall_exit_work:
bra resume_userspace
nop
- .align 2
-syscall_trace_entry:
- ! Yes it is traced.
- mov r15, r4
- mov.l 7f, r11 ! Call do_syscall_trace_enter which notifies
- jsr @r11 ! superior (will chomp R[0-7])
- nop
- mov.l r0, @(OFF_R0,r15) ! Save return value
- ! Reload R0-R4 from kernel stack, where the
- ! parent may have modified them using
- ! ptrace(POKEUSR). (Note that R0-R2 are
- ! reloaded from the kernel stack by syscall_call
- ! below, so don't need to be reloaded here.)
- ! This allows the parent to rewrite system calls
- ! and args on the fly.
- mov.l @(OFF_R4,r15), r4 ! arg0
- mov.l @(OFF_R5,r15), r5
- mov.l @(OFF_R6,r15), r6
- mov.l @(OFF_R7,r15), r7 ! arg3
- mov.l @(OFF_R3,r15), r3 ! syscall_nr
- !
- mov.l 6f, r10 ! Number of syscalls
- cmp/hs r10, r3
- bf syscall_call
- mov #-ENOSYS, r0
- bra syscall_exit
- mov.l r0, @(OFF_R0,r15) ! Return value
-
__restore_all:
mov #OFF_SR, r0
mov.l @(r0,r15), r0 ! get status register
@@ -388,6 +360,37 @@ syscall_exit:
bf syscall_exit_work
bra __restore_all
nop
+
+ .align 2
+syscall_trace_entry:
+ ! Yes it is traced.
+ mov r15, r4
+ mov.l 7f, r11 ! Call do_syscall_trace_enter which notifies
+ jsr @r11 ! superior (will chomp R[0-7])
+ nop
+ cmp/eq #-1, r0
+ bt syscall_exit
+ mov.l r0, @(OFF_R0,r15) ! Save return value
+ ! Reload R0-R4 from kernel stack, where the
+ ! parent may have modified them using
+ ! ptrace(POKEUSR). (Note that R0-R2 are
+ ! reloaded from the kernel stack by syscall_call
+ ! below, so don't need to be reloaded here.)
+ ! This allows the parent to rewrite system calls
+ ! and args on the fly.
+ mov.l @(OFF_R4,r15), r4 ! arg0
+ mov.l @(OFF_R5,r15), r5
+ mov.l @(OFF_R6,r15), r6
+ mov.l @(OFF_R7,r15), r7 ! arg3
+ mov.l @(OFF_R3,r15), r3 ! syscall_nr
+ !
+ mov.l 6f, r10 ! Number of syscalls
+ cmp/hs r10, r3
+ bf syscall_call
+ mov #-ENOSYS, r0
+ bra syscall_exit
+ mov.l r0, @(OFF_R0,r15) ! Return value
+
.align 2
#if !defined(CONFIG_CPU_SH2)
1: .long TRA
diff --git a/arch/sh/kernel/io_trapped.c b/arch/sh/kernel/io_trapped.c
index 037aab2708b7..004ad0130b10 100644
--- a/arch/sh/kernel/io_trapped.c
+++ b/arch/sh/kernel/io_trapped.c
@@ -102,7 +102,6 @@ int register_trapped_io(struct trapped_io *tiop)
pr_warn("unable to install trapped io filter\n");
return -1;
}
-EXPORT_SYMBOL_GPL(register_trapped_io);
void __iomem *match_trapped_io_handler(struct list_head *list,
unsigned long offset,
@@ -131,7 +130,6 @@ void __iomem *match_trapped_io_handler(struct list_head *list,
spin_unlock_irqrestore(&trapped_lock, flags);
return NULL;
}
-EXPORT_SYMBOL_GPL(match_trapped_io_handler);
static struct trapped_io *lookup_tiop(unsigned long address)
{
diff --git a/arch/sh/kernel/iomap.c b/arch/sh/kernel/iomap.c
index ef9e2c97cbb7..0a0dff4e66de 100644
--- a/arch/sh/kernel/iomap.c
+++ b/arch/sh/kernel/iomap.c
@@ -8,31 +8,31 @@
#include <linux/module.h>
#include <linux/io.h>
-unsigned int ioread8(void __iomem *addr)
+unsigned int ioread8(const void __iomem *addr)
{
return readb(addr);
}
EXPORT_SYMBOL(ioread8);
-unsigned int ioread16(void __iomem *addr)
+unsigned int ioread16(const void __iomem *addr)
{
return readw(addr);
}
EXPORT_SYMBOL(ioread16);
-unsigned int ioread16be(void __iomem *addr)
+unsigned int ioread16be(const void __iomem *addr)
{
return be16_to_cpu(__raw_readw(addr));
}
EXPORT_SYMBOL(ioread16be);
-unsigned int ioread32(void __iomem *addr)
+unsigned int ioread32(const void __iomem *addr)
{
return readl(addr);
}
EXPORT_SYMBOL(ioread32);
-unsigned int ioread32be(void __iomem *addr)
+unsigned int ioread32be(const void __iomem *addr)
{
return be32_to_cpu(__raw_readl(addr));
}
@@ -74,7 +74,7 @@ EXPORT_SYMBOL(iowrite32be);
* convert to CPU byte order. We write in "IO byte
* order" (we also don't have IO barriers).
*/
-static inline void mmio_insb(void __iomem *addr, u8 *dst, int count)
+static inline void mmio_insb(const void __iomem *addr, u8 *dst, int count)
{
while (--count >= 0) {
u8 data = __raw_readb(addr);
@@ -83,7 +83,7 @@ static inline void mmio_insb(void __iomem *addr, u8 *dst, int count)
}
}
-static inline void mmio_insw(void __iomem *addr, u16 *dst, int count)
+static inline void mmio_insw(const void __iomem *addr, u16 *dst, int count)
{
while (--count >= 0) {
u16 data = __raw_readw(addr);
@@ -92,7 +92,7 @@ static inline void mmio_insw(void __iomem *addr, u16 *dst, int count)
}
}
-static inline void mmio_insl(void __iomem *addr, u32 *dst, int count)
+static inline void mmio_insl(const void __iomem *addr, u32 *dst, int count)
{
while (--count >= 0) {
u32 data = __raw_readl(addr);
@@ -125,19 +125,19 @@ static inline void mmio_outsl(void __iomem *addr, const u32 *src, int count)
}
}
-void ioread8_rep(void __iomem *addr, void *dst, unsigned long count)
+void ioread8_rep(const void __iomem *addr, void *dst, unsigned long count)
{
mmio_insb(addr, dst, count);
}
EXPORT_SYMBOL(ioread8_rep);
-void ioread16_rep(void __iomem *addr, void *dst, unsigned long count)
+void ioread16_rep(const void __iomem *addr, void *dst, unsigned long count)
{
mmio_insw(addr, dst, count);
}
EXPORT_SYMBOL(ioread16_rep);
-void ioread32_rep(void __iomem *addr, void *dst, unsigned long count)
+void ioread32_rep(const void __iomem *addr, void *dst, unsigned long count)
{
mmio_insl(addr, dst, count);
}
diff --git a/arch/sh/kernel/ioport.c b/arch/sh/kernel/ioport.c
index 34f8cdbbcf0b..f39446a658bd 100644
--- a/arch/sh/kernel/ioport.c
+++ b/arch/sh/kernel/ioport.c
@@ -7,6 +7,7 @@
*/
#include <linux/module.h>
#include <linux/io.h>
+#include <asm/io_trapped.h>
unsigned long sh_io_port_base __read_mostly = -1;
EXPORT_SYMBOL(sh_io_port_base);
diff --git a/arch/sh/kernel/machvec.c b/arch/sh/kernel/machvec.c
index 76bd8955d4fe..d606679a211e 100644
--- a/arch/sh/kernel/machvec.c
+++ b/arch/sh/kernel/machvec.c
@@ -65,10 +65,10 @@ static int __init early_parse_mv(char *from)
mvp = get_mv_byname(mv_name);
if (unlikely(!mvp)) {
- printk("Available vectors:\n\n\t'%s', ", sh_mv.mv_name);
+ pr_info("Available vectors:\n\n\t'%s', ", sh_mv.mv_name);
for_each_mv(mvp)
- printk("'%s', ", mvp->mv_name);
- printk("\n\n");
+ pr_cont("'%s', ", mvp->mv_name);
+ pr_cont("\n\n");
panic("Failed to select machvec '%s' -- halting.\n",
mv_name);
} else
@@ -105,7 +105,7 @@ void __init sh_mv_setup(void)
sh_mv = *(struct sh_machine_vector *)&__machvec_start;
}
- printk(KERN_NOTICE "Booting machvec: %s\n", get_system_type());
+ pr_notice("Booting machvec: %s\n", get_system_type());
/*
* Manually walk the vec, fill in anything that the board hasn't yet
diff --git a/arch/sh/kernel/perf_callchain.c b/arch/sh/kernel/perf_callchain.c
index 6281f2fdf9ca..c9d3aa18732d 100644
--- a/arch/sh/kernel/perf_callchain.c
+++ b/arch/sh/kernel/perf_callchain.c
@@ -11,11 +11,6 @@
#include <asm/unwinder.h>
#include <asm/ptrace.h>
-static int callchain_stack(void *data, char *name)
-{
- return 0;
-}
-
static void callchain_address(void *data, unsigned long addr, int reliable)
{
struct perf_callchain_entry_ctx *entry = data;
@@ -25,7 +20,6 @@ static void callchain_address(void *data, unsigned long addr, int reliable)
}
static const struct stacktrace_ops callchain_ops = {
- .stack = callchain_stack,
.address = callchain_address,
};
diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c
index a432c364b13a..80a5d1c66a51 100644
--- a/arch/sh/kernel/process_32.c
+++ b/arch/sh/kernel/process_32.c
@@ -30,34 +30,30 @@
void show_regs(struct pt_regs * regs)
{
- printk("\n");
+ pr_info("\n");
show_regs_print_info(KERN_DEFAULT);
- printk("PC is at %pS\n", (void *)instruction_pointer(regs));
- printk("PR is at %pS\n", (void *)regs->pr);
+ pr_info("PC is at %pS\n", (void *)instruction_pointer(regs));
+ pr_info("PR is at %pS\n", (void *)regs->pr);
- printk("PC : %08lx SP : %08lx SR : %08lx ",
- regs->pc, regs->regs[15], regs->sr);
+ pr_info("PC : %08lx SP : %08lx SR : %08lx ", regs->pc,
+ regs->regs[15], regs->sr);
#ifdef CONFIG_MMU
- printk("TEA : %08x\n", __raw_readl(MMU_TEA));
+ pr_cont("TEA : %08x\n", __raw_readl(MMU_TEA));
#else
- printk("\n");
+ pr_cont("\n");
#endif
- printk("R0 : %08lx R1 : %08lx R2 : %08lx R3 : %08lx\n",
- regs->regs[0],regs->regs[1],
- regs->regs[2],regs->regs[3]);
- printk("R4 : %08lx R5 : %08lx R6 : %08lx R7 : %08lx\n",
- regs->regs[4],regs->regs[5],
- regs->regs[6],regs->regs[7]);
- printk("R8 : %08lx R9 : %08lx R10 : %08lx R11 : %08lx\n",
- regs->regs[8],regs->regs[9],
- regs->regs[10],regs->regs[11]);
- printk("R12 : %08lx R13 : %08lx R14 : %08lx\n",
- regs->regs[12],regs->regs[13],
- regs->regs[14]);
- printk("MACH: %08lx MACL: %08lx GBR : %08lx PR : %08lx\n",
- regs->mach, regs->macl, regs->gbr, regs->pr);
+ pr_info("R0 : %08lx R1 : %08lx R2 : %08lx R3 : %08lx\n",
+ regs->regs[0], regs->regs[1], regs->regs[2], regs->regs[3]);
+ pr_info("R4 : %08lx R5 : %08lx R6 : %08lx R7 : %08lx\n",
+ regs->regs[4], regs->regs[5], regs->regs[6], regs->regs[7]);
+ pr_info("R8 : %08lx R9 : %08lx R10 : %08lx R11 : %08lx\n",
+ regs->regs[8], regs->regs[9], regs->regs[10], regs->regs[11]);
+ pr_info("R12 : %08lx R13 : %08lx R14 : %08lx\n",
+ regs->regs[12], regs->regs[13], regs->regs[14]);
+ pr_info("MACH: %08lx MACL: %08lx GBR : %08lx PR : %08lx\n",
+ regs->mach, regs->macl, regs->gbr, regs->pr);
show_trace(NULL, (unsigned long *)regs->regs[15], regs, KERN_DEFAULT);
show_code(regs);
diff --git a/arch/sh/kernel/ptrace_32.c b/arch/sh/kernel/ptrace_32.c
index 609b7c917e6e..b05bf92f9c32 100644
--- a/arch/sh/kernel/ptrace_32.c
+++ b/arch/sh/kernel/ptrace_32.c
@@ -457,8 +457,6 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
{
long ret = 0;
- secure_computing_strict(regs->regs[0]);
-
if (test_thread_flag(TIF_SYSCALL_TRACE) &&
tracehook_report_syscall_entry(regs))
/*
@@ -468,6 +466,9 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
*/
ret = -1L;
+ if (secure_computing() == -1)
+ return -1;
+
if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
trace_sys_enter(regs, regs->regs[0]);
diff --git a/arch/sh/kernel/stacktrace.c b/arch/sh/kernel/stacktrace.c
index 2950b19ad077..daf0b53ee066 100644
--- a/arch/sh/kernel/stacktrace.c
+++ b/arch/sh/kernel/stacktrace.c
@@ -15,11 +15,6 @@
#include <asm/ptrace.h>
#include <asm/stacktrace.h>
-static int save_stack_stack(void *data, char *name)
-{
- return 0;
-}
-
/*
* Save stack-backtrace addresses into a stack_trace buffer.
*/
@@ -40,7 +35,6 @@ static void save_stack_address(void *data, unsigned long addr, int reliable)
}
static const struct stacktrace_ops save_stack_ops = {
- .stack = save_stack_stack,
.address = save_stack_address,
};
@@ -73,7 +67,6 @@ save_stack_address_nosched(void *data, unsigned long addr, int reliable)
}
static const struct stacktrace_ops save_stack_ops_nosched = {
- .stack = save_stack_stack,
.address = save_stack_address_nosched,
};
diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl
index 96848db9659e..ae0a00beea5f 100644
--- a/arch/sh/kernel/syscalls/syscall.tbl
+++ b/arch/sh/kernel/syscalls/syscall.tbl
@@ -156,7 +156,7 @@
146 common writev sys_writev
147 common getsid sys_getsid
148 common fdatasync sys_fdatasync
-149 common _sysctl sys_sysctl
+149 common _sysctl sys_ni_syscall
150 common mlock sys_mlock
151 common munlock sys_munlock
152 common mlockall sys_mlockall
diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c
index 058c6181bb30..b62ad0ba2395 100644
--- a/arch/sh/kernel/traps_32.c
+++ b/arch/sh/kernel/traps_32.c
@@ -482,8 +482,6 @@ asmlinkage void do_address_error(struct pt_regs *regs,
error_code = lookup_exception_vector();
#endif
- oldfs = get_fs();
-
if (user_mode(regs)) {
int si_code = BUS_ADRERR;
unsigned int user_action;
@@ -491,13 +489,13 @@ asmlinkage void do_address_error(struct pt_regs *regs,
local_irq_enable();
inc_unaligned_user_access();
- set_fs(USER_DS);
+ oldfs = force_uaccess_begin();
if (copy_from_user(&instruction, (insn_size_t *)(regs->pc & ~1),
sizeof(instruction))) {
- set_fs(oldfs);
+ force_uaccess_end(oldfs);
goto uspace_segv;
}
- set_fs(oldfs);
+ force_uaccess_end(oldfs);
/* shout about userspace fixups */
unaligned_fixups_notify(current, instruction, regs);
@@ -520,11 +518,11 @@ fixup:
goto uspace_segv;
}
- set_fs(USER_DS);
+ oldfs = force_uaccess_begin();
tmp = handle_unaligned_access(instruction, regs,
&user_mem_access, 0,
address);
- set_fs(oldfs);
+ force_uaccess_end(oldfs);
if (tmp == 0)
return; /* sorted */
diff --git a/arch/sh/lib/Makefile b/arch/sh/lib/Makefile
index d0abbe5e38b0..eb473d373ca4 100644
--- a/arch/sh/lib/Makefile
+++ b/arch/sh/lib/Makefile
@@ -30,5 +30,3 @@ memset-$(CONFIG_CPU_SH4) := memset-sh4.o
lib-$(CONFIG_MMU) += copy_page.o __clear_user.o
lib-$(CONFIG_MCOUNT) += mcount.o
lib-y += $(memcpy-y) $(memset-y) $(udivsi3-y)
-
-ccflags-y := -Werror
diff --git a/arch/sh/lib/delay.c b/arch/sh/lib/delay.c
index 540e670dbafc..dad8e6a54906 100644
--- a/arch/sh/lib/delay.c
+++ b/arch/sh/lib/delay.c
@@ -29,7 +29,6 @@ void __delay(unsigned long loops)
: "0" (loops)
: "t");
}
-EXPORT_SYMBOL(__delay);
inline void __const_udelay(unsigned long xloops)
{
diff --git a/arch/sh/mm/Makefile b/arch/sh/mm/Makefile
index 487da0ff03b3..f69ddc70b146 100644
--- a/arch/sh/mm/Makefile
+++ b/arch/sh/mm/Makefile
@@ -43,5 +43,3 @@ obj-$(CONFIG_UNCACHED_MAPPING) += uncached.o
obj-$(CONFIG_HAVE_SRAM_POOL) += sram.o
GCOV_PROFILE_pmb.o := n
-
-ccflags-y := -Werror
diff --git a/arch/sh/mm/consistent.c b/arch/sh/mm/consistent.c
index 3169a343a5ab..0de206c1acfe 100644
--- a/arch/sh/mm/consistent.c
+++ b/arch/sh/mm/consistent.c
@@ -57,8 +57,6 @@ int __init platform_resource_setup_memory(struct platform_device *pdev,
return -ENOMEM;
}
- memset(buf, 0, memsize);
-
r->flags = IORESOURCE_MEM;
r->start = dma_handle;
r->end = r->start + memsize - 1;
diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c
index fbe1f2fe9a8c..88a1f453d73e 100644
--- a/arch/sh/mm/fault.c
+++ b/arch/sh/mm/fault.c
@@ -208,13 +208,12 @@ show_fault_oops(struct pt_regs *regs, unsigned long address)
if (!oops_may_print())
return;
- printk(KERN_ALERT "PC:");
pr_alert("BUG: unable to handle kernel %s at %08lx\n",
address < PAGE_SIZE ? "NULL pointer dereference"
: "paging request",
address);
pr_alert("PC:");
- printk_address(regs->pc, 1, KERN_ALERT);
+ printk_address(regs->pc, 1);
show_pte(NULL, address);
}
@@ -482,22 +481,13 @@ good_area:
* make sure we exit gracefully rather than endlessly redo
* the fault.
*/
- fault = handle_mm_fault(vma, address, flags);
+ fault = handle_mm_fault(vma, address, flags, regs);
if (unlikely(fault & (VM_FAULT_RETRY | VM_FAULT_ERROR)))
if (mm_fault_error(regs, error_code, address, fault))
return;
if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_MAJOR) {
- tsk->maj_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
- regs, address);
- } else {
- tsk->min_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
- regs, address);
- }
if (fault & VM_FAULT_RETRY) {
flags |= FAULT_FLAG_TRIED;
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index 613de8096335..4735176ab811 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -29,6 +29,7 @@
#include <asm/cache.h>
#include <asm/pgalloc.h>
#include <linux/sizes.h>
+#include "ioremap.h"
pgd_t swapper_pg_dir[PTRS_PER_PGD];
@@ -425,15 +426,6 @@ int arch_add_memory(int nid, u64 start, u64 size,
return ret;
}
-#ifdef CONFIG_NUMA
-int memory_add_physaddr_to_nid(u64 addr)
-{
- /* Node 0 for now.. */
- return 0;
-}
-EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
-#endif
-
void arch_remove_memory(int nid, u64 start, u64 size,
struct vmem_altmap *altmap)
{
diff --git a/arch/sh/mm/ioremap.c b/arch/sh/mm/ioremap.c
index f6d02246d665..21342581144d 100644
--- a/arch/sh/mm/ioremap.c
+++ b/arch/sh/mm/ioremap.c
@@ -18,12 +18,59 @@
#include <linux/mm.h>
#include <linux/pci.h>
#include <linux/io.h>
+#include <asm/io_trapped.h>
#include <asm/page.h>
#include <asm/pgalloc.h>
#include <asm/addrspace.h>
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
#include <asm/mmu.h>
+#include "ioremap.h"
+
+/*
+ * On 32-bit SH, we traditionally have the whole physical address space mapped
+ * at all times (as MIPS does), so "ioremap()" and "iounmap()" do not need to do
+ * anything but place the address in the proper segment. This is true for P1
+ * and P2 addresses, as well as some P3 ones. However, most of the P3 addresses
+ * and newer cores using extended addressing need to map through page tables, so
+ * the ioremap() implementation becomes a bit more complicated.
+ */
+#ifdef CONFIG_29BIT
+static void __iomem *
+__ioremap_29bit(phys_addr_t offset, unsigned long size, pgprot_t prot)
+{
+ phys_addr_t last_addr = offset + size - 1;
+
+ /*
+ * For P1 and P2 space this is trivial, as everything is already
+ * mapped. Uncached access for P1 addresses are done through P2.
+ * In the P3 case or for addresses outside of the 29-bit space,
+ * mapping must be done by the PMB or by using page tables.
+ */
+ if (likely(PXSEG(offset) < P3SEG && PXSEG(last_addr) < P3SEG)) {
+ u64 flags = pgprot_val(prot);
+
+ /*
+ * Anything using the legacy PTEA space attributes needs
+ * to be kicked down to page table mappings.
+ */
+ if (unlikely(flags & _PAGE_PCC_MASK))
+ return NULL;
+ if (unlikely(flags & _PAGE_CACHABLE))
+ return (void __iomem *)P1SEGADDR(offset);
+
+ return (void __iomem *)P2SEGADDR(offset);
+ }
+
+ /* P4 above the store queues are always mapped. */
+ if (unlikely(offset >= P3_ADDR_MAX))
+ return (void __iomem *)P4SEGADDR(offset);
+
+ return NULL;
+}
+#else
+#define __ioremap_29bit(offset, size, prot) NULL
+#endif /* CONFIG_29BIT */
/*
* Remap an arbitrary physical address space into the kernel virtual
@@ -42,6 +89,14 @@ __ioremap_caller(phys_addr_t phys_addr, unsigned long size,
unsigned long offset, last_addr, addr, orig_addr;
void __iomem *mapped;
+ mapped = __ioremap_trapped(phys_addr, size);
+ if (mapped)
+ return mapped;
+
+ mapped = __ioremap_29bit(phys_addr, size, pgprot);
+ if (mapped)
+ return mapped;
+
/* Don't allow wraparound or zero size */
last_addr = phys_addr + size - 1;
if (!size || last_addr < phys_addr)
diff --git a/arch/sh/mm/ioremap.h b/arch/sh/mm/ioremap.h
new file mode 100644
index 000000000000..f2544e721a35
--- /dev/null
+++ b/arch/sh/mm/ioremap.h
@@ -0,0 +1,23 @@
+#ifndef _SH_MM_IORMEMAP_H
+#define _SH_MM_IORMEMAP_H 1
+
+#ifdef CONFIG_IOREMAP_FIXED
+void __iomem *ioremap_fixed(phys_addr_t, unsigned long, pgprot_t);
+int iounmap_fixed(void __iomem *);
+void ioremap_fixed_init(void);
+#else
+static inline void __iomem *
+ioremap_fixed(phys_addr_t phys_addr, unsigned long size, pgprot_t prot)
+{
+ BUG();
+ return NULL;
+}
+static inline void ioremap_fixed_init(void)
+{
+}
+static inline int iounmap_fixed(void __iomem *addr)
+{
+ return -EINVAL;
+}
+#endif /* CONFIG_IOREMAP_FIXED */
+#endif /* _SH_MM_IORMEMAP_H */
diff --git a/arch/sh/mm/ioremap_fixed.c b/arch/sh/mm/ioremap_fixed.c
index aab3f82856bb..136113bcac25 100644
--- a/arch/sh/mm/ioremap_fixed.c
+++ b/arch/sh/mm/ioremap_fixed.c
@@ -23,6 +23,7 @@
#include <asm/tlbflush.h>
#include <asm/mmu.h>
#include <asm/mmu_context.h>
+#include "ioremap.h"
struct ioremap_map {
void __iomem *addr;
diff --git a/arch/sh/mm/pgtable.c b/arch/sh/mm/pgtable.c
index 5c8f9247c3c2..cf7ce4b57359 100644
--- a/arch/sh/mm/pgtable.c
+++ b/arch/sh/mm/pgtable.c
@@ -2,8 +2,6 @@
#include <linux/mm.h>
#include <linux/slab.h>
-#define PGALLOC_GFP GFP_KERNEL | __GFP_ZERO
-
static struct kmem_cache *pgd_cachep;
#if PAGETABLE_LEVELS > 2
static struct kmem_cache *pmd_cachep;
@@ -13,6 +11,7 @@ void pgd_ctor(void *x)
{
pgd_t *pgd = x;
+ memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t));
memcpy(pgd + USER_PTRS_PER_PGD,
swapper_pg_dir + USER_PTRS_PER_PGD,
(PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
@@ -32,7 +31,7 @@ void pgtable_cache_init(void)
pgd_t *pgd_alloc(struct mm_struct *mm)
{
- return kmem_cache_alloc(pgd_cachep, PGALLOC_GFP);
+ return kmem_cache_alloc(pgd_cachep, GFP_KERNEL);
}
void pgd_free(struct mm_struct *mm, pgd_t *pgd)
@@ -48,7 +47,7 @@ void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
{
- return kmem_cache_alloc(pmd_cachep, PGALLOC_GFP);
+ return kmem_cache_alloc(pmd_cachep, GFP_KERNEL | __GFP_ZERO);
}
void pmd_free(struct mm_struct *mm, pmd_t *pmd)
diff --git a/arch/sh/oprofile/backtrace.c b/arch/sh/oprofile/backtrace.c
index f1205f92631d..cc16cf86cd92 100644
--- a/arch/sh/oprofile/backtrace.c
+++ b/arch/sh/oprofile/backtrace.c
@@ -19,12 +19,6 @@
#include <asm/sections.h>
#include <asm/stacktrace.h>
-static int backtrace_stack(void *data, char *name)
-{
- /* Yes, we want all stacks */
- return 0;
-}
-
static void backtrace_address(void *data, unsigned long addr, int reliable)
{
unsigned int *depth = data;
@@ -34,7 +28,6 @@ static void backtrace_address(void *data, unsigned long addr, int reliable)
}
static struct stacktrace_ops backtrace_ops = {
- .stack = backtrace_stack,
.address = backtrace_address,
};
diff --git a/arch/sh/tools/mach-types b/arch/sh/tools/mach-types
index 569977e52c91..29e648855d50 100644
--- a/arch/sh/tools/mach-types
+++ b/arch/sh/tools/mach-types
@@ -46,7 +46,6 @@ X3PROTO SH_X3PROTO
MAGICPANELR2 SH_MAGIC_PANEL_R2
R2D_PLUS RTS7751R2D_PLUS
R2D_1 RTS7751R2D_1
-CAYMAN SH_CAYMAN
SDK7780 SH_SDK7780
MIGOR SH_MIGOR
RSK7201 SH_RSK7201
diff --git a/arch/sparc/include/asm/sparsemem.h b/arch/sparc/include/asm/sparsemem.h
index 1dd1b61432db..aa9a676bc341 100644
--- a/arch/sparc/include/asm/sparsemem.h
+++ b/arch/sparc/include/asm/sparsemem.h
@@ -7,7 +7,6 @@
#include <asm/page.h>
#define SECTION_SIZE_BITS 30
-#define MAX_PHYSADDR_BITS MAX_PHYS_ADDRESS_BITS
#define MAX_PHYSMEM_BITS MAX_PHYS_ADDRESS_BITS
#endif /* !(__KERNEL__) */
diff --git a/arch/sparc/include/asm/uaccess_32.h b/arch/sparc/include/asm/uaccess_32.h
index d6d8413eca83..0a2d3ebc4bb8 100644
--- a/arch/sparc/include/asm/uaccess_32.h
+++ b/arch/sparc/include/asm/uaccess_32.h
@@ -28,7 +28,7 @@
#define get_fs() (current->thread.current_ds)
#define set_fs(val) ((current->thread.current_ds) = (val))
-#define segment_eq(a, b) ((a).seg == (b).seg)
+#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg)
/* We have there a nice not-mapped page at PAGE_OFFSET - PAGE_SIZE, so that this test
* can be fairly lightweight.
diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h
index bf9d330073b2..698cf69f74e9 100644
--- a/arch/sparc/include/asm/uaccess_64.h
+++ b/arch/sparc/include/asm/uaccess_64.h
@@ -32,7 +32,7 @@
#define get_fs() ((mm_segment_t){(current_thread_info()->current_ds)})
-#define segment_eq(a, b) ((a).seg == (b).seg)
+#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg)
#define set_fs(val) \
do { \
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index 46024e80ee86..4af114e84f20 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -300,7 +300,7 @@
249 64 nanosleep sys_nanosleep
250 32 mremap sys_mremap
250 64 mremap sys_64_mremap
-251 common _sysctl sys_sysctl compat_sys_sysctl
+251 common _sysctl sys_ni_syscall
252 common getsid sys_getsid
253 common fdatasync sys_fdatasync
254 32 nfsservctl sys_ni_syscall sys_nis_syscall
diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c
index cfef656eda0f..8071bfd72349 100644
--- a/arch/sparc/mm/fault_32.c
+++ b/arch/sparc/mm/fault_32.c
@@ -234,7 +234,7 @@ good_area:
* make sure we exit gracefully rather than endlessly redo
* the fault.
*/
- fault = handle_mm_fault(vma, address, flags);
+ fault = handle_mm_fault(vma, address, flags, regs);
if (fault_signal_pending(fault, regs))
return;
@@ -250,15 +250,6 @@ good_area:
}
if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_MAJOR) {
- current->maj_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ,
- 1, regs, address);
- } else {
- current->min_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN,
- 1, regs, address);
- }
if (fault & VM_FAULT_RETRY) {
flags |= FAULT_FLAG_TRIED;
@@ -410,7 +401,7 @@ good_area:
if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
goto bad_area;
}
- switch (handle_mm_fault(vma, address, flags)) {
+ switch (handle_mm_fault(vma, address, flags, NULL)) {
case VM_FAULT_SIGBUS:
case VM_FAULT_OOM:
goto do_sigbus;
diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
index a3806614e4dc..0a6bcc85fba7 100644
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -422,7 +422,7 @@ good_area:
goto bad_area;
}
- fault = handle_mm_fault(vma, address, flags);
+ fault = handle_mm_fault(vma, address, flags, regs);
if (fault_signal_pending(fault, regs))
goto exit_exception;
@@ -438,15 +438,6 @@ good_area:
}
if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_MAJOR) {
- current->maj_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ,
- 1, regs, address);
- } else {
- current->min_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN,
- 1, regs, address);
- }
if (fault & VM_FAULT_RETRY) {
flags |= FAULT_FLAG_TRIED;
diff --git a/arch/um/Kconfig b/arch/um/Kconfig
index ef69be17ff70..eb51fec75948 100644
--- a/arch/um/Kconfig
+++ b/arch/um/Kconfig
@@ -14,6 +14,7 @@ config UML
select HAVE_FUTEX_CMPXCHG if FUTEX
select HAVE_DEBUG_KMEMLEAK
select HAVE_DEBUG_BUGVERBOSE
+ select NO_DMA
select GENERIC_IRQ_SHOW
select GENERIC_CPU_DEVICES
select GENERIC_CLOCKEVENTS
@@ -167,9 +168,6 @@ config MMAPPER
This driver allows a host file to be used as emulated IO memory inside
UML.
-config NO_DMA
- def_bool y
-
config PGTABLE_LEVELS
int
default 3 if 3_LEVEL_PGTABLES
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 2b3afa354a90..ad12f78bda7e 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -71,7 +71,7 @@ good_area:
do {
vm_fault_t fault;
- fault = handle_mm_fault(vma, address, flags);
+ fault = handle_mm_fault(vma, address, flags, NULL);
if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
goto out_nosemaphore;
@@ -88,10 +88,6 @@ good_area:
BUG();
}
if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_MAJOR)
- current->maj_flt++;
- else
- current->min_flt++;
if (fault & VM_FAULT_RETRY) {
flags |= FAULT_FLAG_TRIED;
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 9a2849527dd7..7101ac64bb20 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -209,6 +209,7 @@ config X86
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
select MMU_GATHER_RCU_TABLE_FREE if PARAVIRT
+ select HAVE_POSIX_CPU_TIMERS_TASK_WORK
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RELIABLE_STACKTRACE if X86_64 && (UNWINDER_FRAME_POINTER || UNWINDER_ORC) && STACK_VALIDATION
select HAVE_FUNCTION_ARG_ACCESS_API
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 29b7d52143e9..df8c017e6161 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -449,8 +449,6 @@
.macro SWITCH_TO_KERNEL_STACK
- ALTERNATIVE "", "jmp .Lend_\@", X86_FEATURE_XENPV
-
BUG_IF_WRONG_CR3
SWITCH_TO_KERNEL_CR3 scratch_reg=%eax
@@ -599,8 +597,6 @@
*/
.macro SWITCH_TO_ENTRY_STACK
- ALTERNATIVE "", "jmp .Lend_\@", X86_FEATURE_XENPV
-
/* Bytes to copy */
movl $PTREGS_SIZE, %ecx
@@ -872,17 +868,6 @@ SYM_ENTRY(__begin_SYSENTER_singlestep_region, SYM_L_GLOBAL, SYM_A_NONE)
* will ignore all of the single-step traps generated in this range.
*/
-#ifdef CONFIG_XEN_PV
-/*
- * Xen doesn't set %esp to be precisely what the normal SYSENTER
- * entry point expects, so fix it up before using the normal path.
- */
-SYM_CODE_START(xen_sysenter_target)
- addl $5*4, %esp /* remove xen-provided frame */
- jmp .Lsysenter_past_esp
-SYM_CODE_END(xen_sysenter_target)
-#endif
-
/*
* 32-bit SYSENTER entry.
*
@@ -965,9 +950,8 @@ SYM_FUNC_START(entry_SYSENTER_32)
movl %esp, %eax
call do_SYSENTER_32
- /* XEN PV guests always use IRET path */
- ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
- "jmp .Lsyscall_32_done", X86_FEATURE_XENPV
+ testl %eax, %eax
+ jz .Lsyscall_32_done
STACKLEAK_ERASE
@@ -1165,95 +1149,6 @@ SYM_FUNC_END(entry_INT80_32)
#endif
.endm
-#ifdef CONFIG_PARAVIRT
-SYM_CODE_START(native_iret)
- iret
- _ASM_EXTABLE(native_iret, asm_iret_error)
-SYM_CODE_END(native_iret)
-#endif
-
-#ifdef CONFIG_XEN_PV
-/*
- * See comment in entry_64.S for further explanation
- *
- * Note: This is not an actual IDT entry point. It's a XEN specific entry
- * point and therefore named to match the 64-bit trampoline counterpart.
- */
-SYM_FUNC_START(xen_asm_exc_xen_hypervisor_callback)
- /*
- * Check to see if we got the event in the critical
- * region in xen_iret_direct, after we've reenabled
- * events and checked for pending events. This simulates
- * iret instruction's behaviour where it delivers a
- * pending interrupt when enabling interrupts:
- */
- cmpl $xen_iret_start_crit, (%esp)
- jb 1f
- cmpl $xen_iret_end_crit, (%esp)
- jae 1f
- call xen_iret_crit_fixup
-1:
- pushl $-1 /* orig_ax = -1 => not a system call */
- SAVE_ALL
- ENCODE_FRAME_POINTER
-
- mov %esp, %eax
- call xen_pv_evtchn_do_upcall
- jmp handle_exception_return
-SYM_FUNC_END(xen_asm_exc_xen_hypervisor_callback)
-
-/*
- * Hypervisor uses this for application faults while it executes.
- * We get here for two reasons:
- * 1. Fault while reloading DS, ES, FS or GS
- * 2. Fault while executing IRET
- * Category 1 we fix up by reattempting the load, and zeroing the segment
- * register if the load fails.
- * Category 2 we fix up by jumping to do_iret_error. We cannot use the
- * normal Linux return path in this case because if we use the IRET hypercall
- * to pop the stack frame we end up in an infinite loop of failsafe callbacks.
- * We distinguish between categories by maintaining a status value in EAX.
- */
-SYM_FUNC_START(xen_failsafe_callback)
- pushl %eax
- movl $1, %eax
-1: mov 4(%esp), %ds
-2: mov 8(%esp), %es
-3: mov 12(%esp), %fs
-4: mov 16(%esp), %gs
- /* EAX == 0 => Category 1 (Bad segment)
- EAX != 0 => Category 2 (Bad IRET) */
- testl %eax, %eax
- popl %eax
- lea 16(%esp), %esp
- jz 5f
- jmp asm_iret_error
-5: pushl $-1 /* orig_ax = -1 => not a system call */
- SAVE_ALL
- ENCODE_FRAME_POINTER
- jmp handle_exception_return
-
-.section .fixup, "ax"
-6: xorl %eax, %eax
- movl %eax, 4(%esp)
- jmp 1b
-7: xorl %eax, %eax
- movl %eax, 8(%esp)
- jmp 2b
-8: xorl %eax, %eax
- movl %eax, 12(%esp)
- jmp 3b
-9: xorl %eax, %eax
- movl %eax, 16(%esp)
- jmp 4b
-.previous
- _ASM_EXTABLE(1b, 6b)
- _ASM_EXTABLE(2b, 7b)
- _ASM_EXTABLE(3b, 8b)
- _ASM_EXTABLE(4b, 9b)
-SYM_FUNC_END(xen_failsafe_callback)
-#endif /* CONFIG_XEN_PV */
-
SYM_CODE_START_LOCAL_NOALIGN(handle_exception)
/* the function address is in %gs's slot on the stack */
SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index e31a75262c9c..9d1102873666 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -160,7 +160,7 @@
146 i386 writev sys_writev compat_sys_writev
147 i386 getsid sys_getsid
148 i386 fdatasync sys_fdatasync
-149 i386 _sysctl sys_sysctl compat_sys_sysctl
+149 i386 _sysctl sys_ni_syscall
150 i386 mlock sys_mlock
151 i386 munlock sys_munlock
152 i386 mlockall sys_mlockall
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 9d82078c949a..f30d6ae9a688 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -164,7 +164,7 @@
153 common vhangup sys_vhangup
154 common modify_ldt sys_modify_ldt
155 common pivot_root sys_pivot_root
-156 64 _sysctl sys_sysctl
+156 64 _sysctl sys_ni_syscall
157 common prctl sys_prctl
158 common arch_prctl sys_arch_prctl
159 common adjtimex sys_adjtimex
diff --git a/arch/x86/entry/vdso/vdso32/note.S b/arch/x86/entry/vdso/vdso32/note.S
index e78047d119f6..2cbd39939dc6 100644
--- a/arch/x86/entry/vdso/vdso32/note.S
+++ b/arch/x86/entry/vdso/vdso32/note.S
@@ -16,33 +16,3 @@ ELFNOTE_START(Linux, 0, "a")
ELFNOTE_END
BUILD_SALT
-
-#ifdef CONFIG_XEN
-/*
- * Add a special note telling glibc's dynamic linker a fake hardware
- * flavor that it will use to choose the search path for libraries in the
- * same way it uses real hardware capabilities like "mmx".
- * We supply "nosegneg" as the fake capability, to indicate that we
- * do not like negative offsets in instructions using segment overrides,
- * since we implement those inefficiently. This makes it possible to
- * install libraries optimized to avoid those access patterns in someplace
- * like /lib/i686/tls/nosegneg. Note that an /etc/ld.so.conf.d/file
- * corresponding to the bits here is needed to make ldconfig work right.
- * It should contain:
- * hwcap 1 nosegneg
- * to match the mapping of bit to name that we give here.
- *
- * At runtime, the fake hardware feature will be considered to be present
- * if its bit is set in the mask word. So, we start with the mask 0, and
- * at boot time we set VDSO_NOTE_NONEGSEG_BIT if running under Xen.
- */
-
-#include "../../xen/vdso.h" /* Defines VDSO_NOTE_NONEGSEG_BIT. */
-
-ELFNOTE_START(GNU, 2, "a")
- .long 1 /* ncaps */
-VDSO32_NOTE_MASK: /* Symbol used by arch/x86/xen/setup.c */
- .long 0 /* mask */
- .byte VDSO_NOTE_NONEGSEG_BIT; .asciz "nosegneg" /* bit, name */
-ELFNOTE_END
-#endif
diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c
index 68b38820b10e..67b411f7e8c4 100644
--- a/arch/x86/events/rapl.c
+++ b/arch/x86/events/rapl.c
@@ -130,11 +130,17 @@ struct rapl_pmus {
struct rapl_pmu *pmus[];
};
+enum rapl_unit_quirk {
+ RAPL_UNIT_QUIRK_NONE,
+ RAPL_UNIT_QUIRK_INTEL_HSW,
+ RAPL_UNIT_QUIRK_INTEL_SPR,
+};
+
struct rapl_model {
struct perf_msr *rapl_msrs;
unsigned long events;
unsigned int msr_power_unit;
- bool apply_quirk;
+ enum rapl_unit_quirk unit_quirk;
};
/* 1/2^hw_unit Joule */
@@ -612,14 +618,28 @@ static int rapl_check_hw_unit(struct rapl_model *rm)
for (i = 0; i < NR_RAPL_DOMAINS; i++)
rapl_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
+ switch (rm->unit_quirk) {
/*
* DRAM domain on HSW server and KNL has fixed energy unit which can be
* different than the unit from power unit MSR. See
* "Intel Xeon Processor E5-1600 and E5-2600 v3 Product Families, V2
* of 2. Datasheet, September 2014, Reference Number: 330784-001 "
*/
- if (rm->apply_quirk)
+ case RAPL_UNIT_QUIRK_INTEL_HSW:
+ rapl_hw_unit[PERF_RAPL_RAM] = 16;
+ break;
+ /*
+ * SPR shares the same DRAM domain energy unit as HSW, plus it
+ * also has a fixed energy unit for Psys domain.
+ */
+ case RAPL_UNIT_QUIRK_INTEL_SPR:
rapl_hw_unit[PERF_RAPL_RAM] = 16;
+ rapl_hw_unit[PERF_RAPL_PSYS] = 0;
+ break;
+ default:
+ break;
+ }
+
/*
* Calculate the timer rate:
@@ -665,7 +685,7 @@ static const struct attribute_group *rapl_attr_update[] = {
&rapl_events_pkg_group,
&rapl_events_ram_group,
&rapl_events_gpu_group,
- &rapl_events_gpu_group,
+ &rapl_events_psys_group,
NULL,
};
@@ -698,7 +718,6 @@ static struct rapl_model model_snb = {
.events = BIT(PERF_RAPL_PP0) |
BIT(PERF_RAPL_PKG) |
BIT(PERF_RAPL_PP1),
- .apply_quirk = false,
.msr_power_unit = MSR_RAPL_POWER_UNIT,
.rapl_msrs = intel_rapl_msrs,
};
@@ -707,7 +726,6 @@ static struct rapl_model model_snbep = {
.events = BIT(PERF_RAPL_PP0) |
BIT(PERF_RAPL_PKG) |
BIT(PERF_RAPL_RAM),
- .apply_quirk = false,
.msr_power_unit = MSR_RAPL_POWER_UNIT,
.rapl_msrs = intel_rapl_msrs,
};
@@ -717,7 +735,6 @@ static struct rapl_model model_hsw = {
BIT(PERF_RAPL_PKG) |
BIT(PERF_RAPL_RAM) |
BIT(PERF_RAPL_PP1),
- .apply_quirk = false,
.msr_power_unit = MSR_RAPL_POWER_UNIT,
.rapl_msrs = intel_rapl_msrs,
};
@@ -726,7 +743,7 @@ static struct rapl_model model_hsx = {
.events = BIT(PERF_RAPL_PP0) |
BIT(PERF_RAPL_PKG) |
BIT(PERF_RAPL_RAM),
- .apply_quirk = true,
+ .unit_quirk = RAPL_UNIT_QUIRK_INTEL_HSW,
.msr_power_unit = MSR_RAPL_POWER_UNIT,
.rapl_msrs = intel_rapl_msrs,
};
@@ -734,7 +751,7 @@ static struct rapl_model model_hsx = {
static struct rapl_model model_knl = {
.events = BIT(PERF_RAPL_PKG) |
BIT(PERF_RAPL_RAM),
- .apply_quirk = true,
+ .unit_quirk = RAPL_UNIT_QUIRK_INTEL_HSW,
.msr_power_unit = MSR_RAPL_POWER_UNIT,
.rapl_msrs = intel_rapl_msrs,
};
@@ -745,14 +762,22 @@ static struct rapl_model model_skl = {
BIT(PERF_RAPL_RAM) |
BIT(PERF_RAPL_PP1) |
BIT(PERF_RAPL_PSYS),
- .apply_quirk = false,
+ .msr_power_unit = MSR_RAPL_POWER_UNIT,
+ .rapl_msrs = intel_rapl_msrs,
+};
+
+static struct rapl_model model_spr = {
+ .events = BIT(PERF_RAPL_PP0) |
+ BIT(PERF_RAPL_PKG) |
+ BIT(PERF_RAPL_RAM) |
+ BIT(PERF_RAPL_PSYS),
+ .unit_quirk = RAPL_UNIT_QUIRK_INTEL_SPR,
.msr_power_unit = MSR_RAPL_POWER_UNIT,
.rapl_msrs = intel_rapl_msrs,
};
static struct rapl_model model_amd_fam17h = {
.events = BIT(PERF_RAPL_PKG),
- .apply_quirk = false,
.msr_power_unit = MSR_AMD_RAPL_POWER_UNIT,
.rapl_msrs = amd_rapl_msrs,
};
@@ -787,6 +812,7 @@ static const struct x86_cpu_id rapl_model_match[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &model_hsx),
X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L, &model_skl),
X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, &model_skl),
+ X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &model_spr),
X86_MATCH_VENDOR_FAM(AMD, 0x17, &model_amd_fam17h),
X86_MATCH_VENDOR_FAM(HYGON, 0x18, &model_amd_fam17h),
{},
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 60b944dd2df1..4f77b8f22e54 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -8,6 +8,7 @@
#include <asm/io.h>
#include <asm/hyperv-tlfs.h>
#include <asm/nospec-branch.h>
+#include <asm/paravirt.h>
typedef int (*hyperv_fill_flush_list_func)(
struct hv_guest_mapping_flush_list *flush,
@@ -54,6 +55,17 @@ typedef int (*hyperv_fill_flush_list_func)(
vclocks_set_used(VDSO_CLOCKMODE_HVCLOCK);
#define hv_get_raw_timer() rdtsc_ordered()
+/*
+ * Reference to pv_ops must be inline so objtool
+ * detection of noinstr violations can work correctly.
+ */
+static __always_inline void hv_setup_sched_clock(void *sched_clock)
+{
+#ifdef CONFIG_PARAVIRT
+ pv_ops.time.sched_clock = sched_clock;
+#endif
+}
+
void hyperv_vector_handler(struct pt_regs *regs);
static inline void hv_enable_stimer0_percpu_irq(int irq) {}
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index 6e81788a30c1..28996fe19301 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -25,7 +25,7 @@ void entry_SYSENTER_compat(void);
void __end_entry_SYSENTER_compat(void);
void entry_SYSCALL_compat(void);
void entry_INT80_compat(void);
-#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
+#ifdef CONFIG_XEN_PV
void xen_entry_INT80_compat(void);
#endif
#endif
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index 6669164abadc..9646c300f128 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -301,7 +301,7 @@ static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node)
extern const char early_idt_handler_array[NUM_EXCEPTION_VECTORS][EARLY_IDT_HANDLER_SIZE];
extern void early_ignore_irq(void);
-#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
+#ifdef CONFIG_XEN_PV
extern const char xen_early_idt_handler_array[NUM_EXCEPTION_VECTORS][XEN_EARLY_IDT_HANDLER_SIZE];
#endif
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 2f3e8f2a958f..ecefaffd15d4 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -33,7 +33,7 @@ static inline void set_fs(mm_segment_t fs)
set_thread_flag(TIF_FSCHECK);
}
-#define segment_eq(a, b) ((a).seg == (b).seg)
+#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg)
#define user_addr_max() (current->thread.addr_limit.seg)
/*
diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h
index fb81fea99093..df01d7349d79 100644
--- a/arch/x86/include/asm/vdso/gettimeofday.h
+++ b/arch/x86/include/asm/vdso/gettimeofday.h
@@ -241,7 +241,8 @@ static u64 vread_hvclock(void)
}
#endif
-static inline u64 __arch_get_hw_counter(s32 clock_mode)
+static inline u64 __arch_get_hw_counter(s32 clock_mode,
+ const struct vdso_data *vd)
{
if (likely(clock_mode == VDSO_CLOCKMODE_TSC))
return (u64)rdtsc_ordered();
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index d1175533d125..c3daf0aaa0ee 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -875,8 +875,6 @@ static void *__text_poke(void *addr, const void *opcode, size_t len)
*/
BUG_ON(!pages[0] || (cross_page_boundary && !pages[1]));
- local_irq_save(flags);
-
/*
* Map the page without the global bit, as TLB flushing is done with
* flush_tlb_mm_range(), which is intended for non-global PTEs.
@@ -893,6 +891,8 @@ static void *__text_poke(void *addr, const void *opcode, size_t len)
*/
VM_BUG_ON(!ptep);
+ local_irq_save(flags);
+
pte = mk_pte(pages[0], pgprot);
set_pte_at(poking_mm, poking_addr, ptep, pte);
@@ -942,8 +942,8 @@ static void *__text_poke(void *addr, const void *opcode, size_t len)
*/
BUG_ON(memcmp(addr, opcode, len));
- pte_unmap_unlock(ptep, ptl);
local_irq_restore(flags);
+ pte_unmap_unlock(ptep, ptl);
return addr;
}
diff --git a/arch/x86/kernel/cpu/acrn.c b/arch/x86/kernel/cpu/acrn.c
index 1da9b1c9a2db..0b2c03943ac6 100644
--- a/arch/x86/kernel/cpu/acrn.c
+++ b/arch/x86/kernel/cpu/acrn.c
@@ -11,14 +11,15 @@
#include <linux/interrupt.h>
#include <asm/apic.h>
+#include <asm/cpufeatures.h>
#include <asm/desc.h>
#include <asm/hypervisor.h>
#include <asm/idtentry.h>
#include <asm/irq_regs.h>
-static uint32_t __init acrn_detect(void)
+static u32 __init acrn_detect(void)
{
- return hypervisor_cpuid_base("ACRNACRNACRN\0\0", 0);
+ return hypervisor_cpuid_base("ACRNACRNACRN", 0);
}
static void __init acrn_init_platform(void)
@@ -29,12 +30,7 @@ static void __init acrn_init_platform(void)
static bool acrn_x2apic_available(void)
{
- /*
- * x2apic is not supported for now. Future enablement will have to check
- * X86_FEATURE_X2APIC to determine whether x2apic is supported in the
- * guest.
- */
- return false;
+ return boot_cpu_has(X86_FEATURE_X2APIC);
}
static void (*acrn_intr_handler)(void);
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index f0b743a2fe9c..d3f0db463f96 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -31,6 +31,7 @@
#include <asm/intel-family.h>
#include <asm/e820/api.h>
#include <asm/hypervisor.h>
+#include <asm/tlbflush.h>
#include "cpu.h"
@@ -1549,7 +1550,12 @@ static ssize_t l1tf_show_state(char *buf)
static ssize_t itlb_multihit_show_state(char *buf)
{
- if (itlb_multihit_kvm_mitigation)
+ if (!boot_cpu_has(X86_FEATURE_MSR_IA32_FEAT_CTL) ||
+ !boot_cpu_has(X86_FEATURE_VMX))
+ return sprintf(buf, "KVM: Mitigation: VMX unsupported\n");
+ else if (!(cr4_read_shadow() & X86_CR4_VMXE))
+ return sprintf(buf, "KVM: Mitigation: VMX disabled\n");
+ else if (itlb_multihit_kvm_mitigation)
return sprintf(buf, "KVM: Mitigation: Split huge pages\n");
else
return sprintf(buf, "KVM: Vulnerable\n");
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index af94f05a5c66..31125448b174 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -361,13 +361,6 @@ static void __init ms_hyperv_init_platform(void)
#endif
}
-void hv_setup_sched_clock(void *sched_clock)
-{
-#ifdef CONFIG_PARAVIRT
- pv_ops.time.sched_clock = sched_clock;
-#endif
-}
-
const __initconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
.name = "Microsoft Hyper-V",
.detect = ms_hyperv_platform,
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index fd87b59452a3..a8f3af257e26 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -230,7 +230,7 @@ static int elf_header_exclude_ranges(struct crash_mem *cmem)
int ret = 0;
/* Exclude the low 1M because it is always reserved */
- ret = crash_exclude_mem_range(cmem, 0, 1<<20);
+ ret = crash_exclude_mem_range(cmem, 0, (1<<20)-1);
if (ret)
return ret;
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 7a2bf884fede..038e19c0019e 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -611,6 +611,10 @@ static void check_xstate_against_struct(int nr)
* This essentially double-checks what the cpu told us about
* how large the XSAVE buffer needs to be. We are recalculating
* it to be safe.
+ *
+ * Dynamic XSAVE features allocate their own buffers and are not
+ * covered by these checks. Only the size of the buffer for task->fpu
+ * is checked here.
*/
static void do_extra_xstate_size_checks(void)
{
@@ -673,6 +677,33 @@ static unsigned int __init get_xsaves_size(void)
return ebx;
}
+/*
+ * Get the total size of the enabled xstates without the dynamic supervisor
+ * features.
+ */
+static unsigned int __init get_xsaves_size_no_dynamic(void)
+{
+ u64 mask = xfeatures_mask_dynamic();
+ unsigned int size;
+
+ if (!mask)
+ return get_xsaves_size();
+
+ /* Disable dynamic features. */
+ wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor());
+
+ /*
+ * Ask the hardware what size is required of the buffer.
+ * This is the size required for the task->fpu buffer.
+ */
+ size = get_xsaves_size();
+
+ /* Re-enable dynamic features so XSAVES will work on them again. */
+ wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() | mask);
+
+ return size;
+}
+
static unsigned int __init get_xsave_size(void)
{
unsigned int eax, ebx, ecx, edx;
@@ -710,7 +741,7 @@ static int __init init_xstate_size(void)
xsave_size = get_xsave_size();
if (boot_cpu_has(X86_FEATURE_XSAVES))
- possible_xstate_size = get_xsaves_size();
+ possible_xstate_size = get_xsaves_size_no_dynamic();
else
possible_xstate_size = xsave_size;
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index f66a6b90f954..7ed84c282233 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -134,38 +134,7 @@ SYM_CODE_START(startup_32)
movl %eax,pa(initial_page_table+0xffc)
#endif
-#ifdef CONFIG_PARAVIRT
- /* This is can only trip for a broken bootloader... */
- cmpw $0x207, pa(boot_params + BP_version)
- jb .Ldefault_entry
-
- /* Paravirt-compatible boot parameters. Look to see what architecture
- we're booting under. */
- movl pa(boot_params + BP_hardware_subarch), %eax
- cmpl $num_subarch_entries, %eax
- jae .Lbad_subarch
-
- movl pa(subarch_entries)(,%eax,4), %eax
- subl $__PAGE_OFFSET, %eax
- jmp *%eax
-
-.Lbad_subarch:
-SYM_INNER_LABEL_ALIGN(xen_entry, SYM_L_WEAK)
- /* Unknown implementation; there's really
- nothing we can do at this point. */
- ud2a
-
- __INITDATA
-
-subarch_entries:
- .long .Ldefault_entry /* normal x86/PC */
- .long xen_entry /* Xen hypervisor */
- .long .Ldefault_entry /* Moorestown MID */
-num_subarch_entries = (. - subarch_entries) / 4
-.previous
-#else
jmp .Ldefault_entry
-#endif /* CONFIG_PARAVIRT */
SYM_CODE_END(startup_32)
#ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index d6f946707270..9afefe325acb 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -390,7 +390,7 @@ unsigned long x86_fsgsbase_read_task(struct task_struct *task,
*/
mutex_lock(&task->mm->context.lock);
ldt = task->mm->context.ldt;
- if (unlikely(idx >= ldt->nr_entries))
+ if (unlikely(!ldt || idx >= ldt->nr_entries))
base = 0;
else
base = get_desc_base(ldt->entries + idx);
diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c
index 46c72f2ec32f..6555a857a1e6 100644
--- a/arch/x86/kernel/tsc_msr.c
+++ b/arch/x86/kernel/tsc_msr.c
@@ -134,10 +134,15 @@ static const struct freq_desc freq_desc_ann = {
.mask = 0x0f,
};
-/* 24 MHz crystal? : 24 * 13 / 4 = 78 MHz */
+/*
+ * 24 MHz crystal? : 24 * 13 / 4 = 78 MHz
+ * Frequency step for Lightning Mountain SoC is fixed to 78 MHz,
+ * so all the frequency entries are 78000.
+ */
static const struct freq_desc freq_desc_lgm = {
.use_msr_plat = true,
- .freqs = { 78000, 78000, 78000, 78000, 78000, 78000, 78000, 78000 },
+ .freqs = { 78000, 78000, 78000, 78000, 78000, 78000, 78000, 78000,
+ 78000, 78000, 78000, 78000, 78000, 78000, 78000, 78000 },
.mask = 0x0f,
};
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index fa873e3e6e90..3fd6eec202d7 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -370,7 +370,8 @@ void kvm_set_cpu_caps(void)
kvm_cpu_cap_mask(CPUID_7_EDX,
F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) |
- F(MD_CLEAR) | F(AVX512_VP2INTERSECT) | F(FSRM)
+ F(MD_CLEAR) | F(AVX512_VP2INTERSECT) | F(FSRM) |
+ F(SERIALIZE)
);
/* TSC_ADJUST and ARCH_CAPABILITIES are emulated in software. */
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index af9cdb426dd2..814d3aee5cef 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -900,6 +900,7 @@ int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages)
kvm_request_apicv_update(vcpu->kvm, false, APICV_INHIBIT_REASON_HYPERV);
synic->active = true;
synic->dont_zero_synic_pages = dont_zero_synic_pages;
+ synic->control = HV_SYNIC_CONTROL_ENABLE;
return 0;
}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 232dd2f9a081..599d73206299 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -820,22 +820,22 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE))
return 1;
- if (cr0 & X86_CR0_PG) {
#ifdef CONFIG_X86_64
- if (!is_paging(vcpu) && (vcpu->arch.efer & EFER_LME)) {
- int cs_db, cs_l;
+ if ((vcpu->arch.efer & EFER_LME) && !is_paging(vcpu) &&
+ (cr0 & X86_CR0_PG)) {
+ int cs_db, cs_l;
- if (!is_pae(vcpu))
- return 1;
- kvm_x86_ops.get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
- if (cs_l)
- return 1;
- } else
-#endif
- if (is_pae(vcpu) && ((cr0 ^ old_cr0) & pdptr_bits) &&
- !load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu)))
+ if (!is_pae(vcpu))
+ return 1;
+ kvm_x86_ops.get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
+ if (cs_l)
return 1;
}
+#endif
+ if (!(vcpu->arch.efer & EFER_LME) && (cr0 & X86_CR0_PG) &&
+ is_pae(vcpu) && ((cr0 ^ old_cr0) & pdptr_bits) &&
+ !load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu)))
+ return 1;
if (!(cr0 & X86_CR0_PG) && kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE))
return 1;
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 0c7643d9f7cb..35f1498e9832 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1139,7 +1139,7 @@ void do_user_addr_fault(struct pt_regs *regs,
struct vm_area_struct *vma;
struct task_struct *tsk;
struct mm_struct *mm;
- vm_fault_t fault, major = 0;
+ vm_fault_t fault;
unsigned int flags = FAULT_FLAG_DEFAULT;
tsk = current;
@@ -1291,8 +1291,7 @@ good_area:
* userland). The return to userland is identified whenever
* FAULT_FLAG_USER|FAULT_FLAG_KILLABLE are both set in flags.
*/
- fault = handle_mm_fault(vma, address, flags);
- major |= fault & VM_FAULT_MAJOR;
+ fault = handle_mm_fault(vma, address, flags, regs);
/* Quick path to respond to signals */
if (fault_signal_pending(fault, regs)) {
@@ -1319,18 +1318,6 @@ good_area:
return;
}
- /*
- * Major/minor page fault accounting. If any of the events
- * returned VM_FAULT_MAJOR, we account it as a major fault.
- */
- if (major) {
- tsk->maj_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address);
- } else {
- tsk->min_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
- }
-
check_v8086_mode(regs, address, tsk);
}
NOKPROBE_SYMBOL(do_user_addr_fault);
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 3b246ae40c8f..a4ac13cc3fdc 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1452,6 +1452,15 @@ static unsigned long probe_memory_block_size(void)
goto done;
}
+ /*
+ * Use max block size to minimize overhead on bare metal, where
+ * alignment for memory hotplug isn't a concern.
+ */
+ if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
+ bz = MAX_BLOCK_SIZE;
+ goto done;
+ }
+
/* Find the largest allowed block size that aligns to memory end */
for (bz = MAX_BLOCK_SIZE; bz > MIN_MEMORY_BLOCK_SIZE; bz >>= 1) {
if (IS_ALIGNED(boot_mem_end, bz))
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index b05f45e5e8e2..aa76ec2d359b 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -929,5 +929,4 @@ int memory_add_physaddr_to_nid(u64 start)
nid = numa_meminfo.blk[0].nid;
return nid;
}
-EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
#endif
diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
index 183ac60e5990..95ea17a9d20c 100644
--- a/arch/x86/purgatory/Makefile
+++ b/arch/x86/purgatory/Makefile
@@ -32,7 +32,7 @@ KCOV_INSTRUMENT := n
# make up the standalone purgatory.ro
PURGATORY_CFLAGS_REMOVE := -mcmodel=kernel
-PURGATORY_CFLAGS := -mcmodel=large -ffreestanding -fno-zero-initialized-in-bss
+PURGATORY_CFLAGS := -mcmodel=large -ffreestanding -fno-zero-initialized-in-bss -g0
PURGATORY_CFLAGS += $(DISABLE_STACKLEAK_PLUGIN) -DDISABLE_BRANCH_PROFILING
PURGATORY_CFLAGS += -fno-stack-protector
@@ -64,6 +64,9 @@ CFLAGS_sha256.o += $(PURGATORY_CFLAGS)
CFLAGS_REMOVE_string.o += $(PURGATORY_CFLAGS_REMOVE)
CFLAGS_string.o += $(PURGATORY_CFLAGS)
+AFLAGS_REMOVE_setup-x86_$(BITS).o += -Wa,-gdwarf-2
+AFLAGS_REMOVE_entry64.o += -Wa,-gdwarf-2
+
$(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE
$(call if_changed,ld)
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 1aded63a95cb..218acbd5c7a0 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -19,6 +19,7 @@ config XEN_PV
bool "Xen PV guest support"
default y
depends on XEN
+ depends on X86_64
select PARAVIRT_XXL
select XEN_HAVE_PVMMU
select XEN_HAVE_VPMU
@@ -50,7 +51,7 @@ config XEN_PVHVM_SMP
config XEN_512GB
bool "Limit Xen pv-domain memory to 512GB"
- depends on XEN_PV && X86_64
+ depends on XEN_PV
default y
help
Limit paravirtualized user domains to 512GB of RAM.
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 5f1db522d06b..fc5c5ba4aacb 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-OBJECT_FILES_NON_STANDARD_xen-asm_$(BITS).o := y
+OBJECT_FILES_NON_STANDARD_xen-asm.o := y
ifdef CONFIG_FUNCTION_TRACER
# Do not profile debug and lowlevel utilities
@@ -33,7 +33,6 @@ obj-$(CONFIG_XEN_PV) += mmu_pv.o
obj-$(CONFIG_XEN_PV) += irq.o
obj-$(CONFIG_XEN_PV) += multicalls.o
obj-$(CONFIG_XEN_PV) += xen-asm.o
-obj-$(CONFIG_XEN_PV) += xen-asm_$(BITS).o
obj-$(CONFIG_XEN_PVH) += enlighten_pvh.o
diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c
index 1aff4ae65655..e82fd1910dae 100644
--- a/arch/x86/xen/apic.c
+++ b/arch/x86/xen/apic.c
@@ -60,10 +60,6 @@ static u32 xen_apic_read(u32 reg)
if (reg == APIC_LVR)
return 0x14;
-#ifdef CONFIG_X86_32
- if (reg == APIC_LDR)
- return SET_APIC_LOGICAL_ID(1UL << smp_processor_id());
-#endif
if (reg != APIC_ID)
return 0;
@@ -129,14 +125,6 @@ static int xen_phys_pkg_id(int initial_apic_id, int index_msb)
return initial_apic_id >> index_msb;
}
-#ifdef CONFIG_X86_32
-static int xen_x86_32_early_logical_apicid(int cpu)
-{
- /* Match with APIC_LDR read. Otherwise setup_local_APIC complains. */
- return 1 << cpu;
-}
-#endif
-
static void xen_noop(void)
{
}
@@ -199,11 +187,6 @@ static struct apic xen_pv_apic = {
.icr_write = xen_apic_icr_write,
.wait_icr_idle = xen_noop,
.safe_wait_icr_idle = xen_safe_apic_wait_icr_idle,
-
-#ifdef CONFIG_X86_32
- /* generic_processor_info and setup_local_APIC. */
- .x86_32_early_logical_apicid = xen_x86_32_early_logical_apicid,
-#endif
};
static void __init xen_apic_check(void)
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 2aab43a13a8c..22e741e0b10c 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -119,14 +119,6 @@ static void __init xen_banner(void)
printk(KERN_INFO "Xen version: %d.%d%s%s\n",
version >> 16, version & 0xffff, extra.extraversion,
xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
-
-#ifdef CONFIG_X86_32
- pr_warn("WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!\n"
- "Support for running as 32-bit PV-guest under Xen will soon be removed\n"
- "from the Linux kernel!\n"
- "Please use either a 64-bit kernel or switch to HVM or PVH mode!\n"
- "WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!\n");
-#endif
}
static void __init xen_pv_init_platform(void)
@@ -353,15 +345,13 @@ static void set_aliased_prot(void *v, pgprot_t prot)
pte_t *ptep;
pte_t pte;
unsigned long pfn;
- struct page *page;
unsigned char dummy;
+ void *va;
ptep = lookup_address((unsigned long)v, &level);
BUG_ON(ptep == NULL);
pfn = pte_pfn(*ptep);
- page = pfn_to_page(pfn);
-
pte = pfn_pte(pfn, prot);
/*
@@ -391,14 +381,10 @@ static void set_aliased_prot(void *v, pgprot_t prot)
if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0))
BUG();
- if (!PageHighMem(page)) {
- void *av = __va(PFN_PHYS(pfn));
+ va = __va(PFN_PHYS(pfn));
- if (av != v)
- if (HYPERVISOR_update_va_mapping((unsigned long)av, pte, 0))
- BUG();
- } else
- kmap_flush_unused();
+ if (va != v && HYPERVISOR_update_va_mapping((unsigned long)va, pte, 0))
+ BUG();
preempt_enable();
}
@@ -538,30 +524,12 @@ static void load_TLS_descriptor(struct thread_struct *t,
static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
{
/*
- * XXX sleazy hack: If we're being called in a lazy-cpu zone
- * and lazy gs handling is enabled, it means we're in a
- * context switch, and %gs has just been saved. This means we
- * can zero it out to prevent faults on exit from the
- * hypervisor if the next process has no %gs. Either way, it
- * has been saved, and the new value will get loaded properly.
- * This will go away as soon as Xen has been modified to not
- * save/restore %gs for normal hypercalls.
- *
- * On x86_64, this hack is not used for %gs, because gs points
- * to KERNEL_GS_BASE (and uses it for PDA references), so we
- * must not zero %gs on x86_64
- *
- * For x86_64, we need to zero %fs, otherwise we may get an
+ * In lazy mode we need to zero %fs, otherwise we may get an
* exception between the new %fs descriptor being loaded and
* %fs being effectively cleared at __switch_to().
*/
- if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) {
-#ifdef CONFIG_X86_32
- lazy_load_gs(0);
-#else
+ if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU)
loadsegment(fs, 0);
-#endif
- }
xen_mc_batch();
@@ -572,13 +540,11 @@ static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
xen_mc_issue(PARAVIRT_LAZY_CPU);
}
-#ifdef CONFIG_X86_64
static void xen_load_gs_index(unsigned int idx)
{
if (HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, idx))
BUG();
}
-#endif
static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
const void *ptr)
@@ -597,7 +563,6 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
preempt_enable();
}
-#ifdef CONFIG_X86_64
void noist_exc_debug(struct pt_regs *regs);
DEFINE_IDTENTRY_RAW(xenpv_exc_nmi)
@@ -697,7 +662,6 @@ static bool __ref get_trap_addr(void **addr, unsigned int ist)
return true;
}
-#endif
static int cvt_gate_to_trap(int vector, const gate_desc *val,
struct trap_info *info)
@@ -710,10 +674,8 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val,
info->vector = vector;
addr = gate_offset(val);
-#ifdef CONFIG_X86_64
if (!get_trap_addr((void **)&addr, val->bits.ist))
return 0;
-#endif /* CONFIG_X86_64 */
info->address = addr;
info->cs = gate_segment(val);
@@ -958,15 +920,12 @@ static u64 xen_read_msr_safe(unsigned int msr, int *err)
static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
{
int ret;
-#ifdef CONFIG_X86_64
unsigned int which;
u64 base;
-#endif
ret = 0;
switch (msr) {
-#ifdef CONFIG_X86_64
case MSR_FS_BASE: which = SEGBASE_FS; goto set;
case MSR_KERNEL_GS_BASE: which = SEGBASE_GS_USER; goto set;
case MSR_GS_BASE: which = SEGBASE_GS_KERNEL; goto set;
@@ -976,7 +935,6 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
if (HYPERVISOR_set_segment_base(which, base) != 0)
ret = -EIO;
break;
-#endif
case MSR_STAR:
case MSR_CSTAR:
@@ -1058,9 +1016,7 @@ void __init xen_setup_vcpu_info_placement(void)
static const struct pv_info xen_info __initconst = {
.shared_kernel_pmd = 0,
-#ifdef CONFIG_X86_64
.extra_user_64bit_cs = FLAT_USER_CS64,
-#endif
.name = "Xen",
};
@@ -1086,18 +1042,14 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
.read_pmc = xen_read_pmc,
.iret = xen_iret,
-#ifdef CONFIG_X86_64
.usergs_sysret64 = xen_sysret64,
-#endif
.load_tr_desc = paravirt_nop,
.set_ldt = xen_set_ldt,
.load_gdt = xen_load_gdt,
.load_idt = xen_load_idt,
.load_tls = xen_load_tls,
-#ifdef CONFIG_X86_64
.load_gs_index = xen_load_gs_index,
-#endif
.alloc_ldt = xen_alloc_ldt,
.free_ldt = xen_free_ldt,
@@ -1364,15 +1316,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
/* keep using Xen gdt for now; no urgent need to change it */
-#ifdef CONFIG_X86_32
- pv_info.kernel_rpl = 1;
- if (xen_feature(XENFEAT_supervisor_mode_kernel))
- pv_info.kernel_rpl = 0;
-#else
pv_info.kernel_rpl = 0;
-#endif
- /* set the limit of our address space */
- xen_reserve_top();
/*
* We used to do this in xen_arch_setup, but that is too late
@@ -1384,12 +1328,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
if (rc != 0)
xen_raw_printk("physdev_op failed %d\n", rc);
-#ifdef CONFIG_X86_32
- /* set up basic CPUID stuff */
- cpu_detect(&new_cpu_data);
- set_cpu_cap(&new_cpu_data, X86_FEATURE_FPU);
- new_cpu_data.x86_capability[CPUID_1_EDX] = cpuid_edx(1);
-#endif
if (xen_start_info->mod_start) {
if (xen_start_info->flags & SIF_MOD_START_PFN)
@@ -1458,12 +1396,8 @@ asmlinkage __visible void __init xen_start_kernel(void)
xen_efi_init(&boot_params);
/* Start the world */
-#ifdef CONFIG_X86_32
- i386_start_kernel();
-#else
cr4_init_shadow(); /* 32b kernel does this in i386_start_kernel() */
x86_64_start_reservations((char *)__pa_symbol(&boot_params));
-#endif
}
static int xen_cpu_up_prepare_pv(unsigned int cpu)
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index a58d9c69807a..3273c985d3dd 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -86,19 +86,8 @@
#include "mmu.h"
#include "debugfs.h"
-#ifdef CONFIG_X86_32
-/*
- * Identity map, in addition to plain kernel map. This needs to be
- * large enough to allocate page table pages to allocate the rest.
- * Each page can map 2MB.
- */
-#define LEVEL1_IDENT_ENTRIES (PTRS_PER_PTE * 4)
-static RESERVE_BRK_ARRAY(pte_t, level1_ident_pgt, LEVEL1_IDENT_ENTRIES);
-#endif
-#ifdef CONFIG_X86_64
/* l3 pud for userspace vsyscall mapping */
static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss;
-#endif /* CONFIG_X86_64 */
/*
* Protects atomic reservation decrease/increase against concurrent increases.
@@ -280,10 +269,7 @@ static inline void __xen_set_pte(pte_t *ptep, pte_t pteval)
if (!xen_batched_set_pte(ptep, pteval)) {
/*
* Could call native_set_pte() here and trap and
- * emulate the PTE write but with 32-bit guests this
- * needs two traps (one for each of the two 32-bit
- * words in the PTE) so do one hypercall directly
- * instead.
+ * emulate the PTE write, but a hypercall is much cheaper.
*/
struct mmu_update u;
@@ -439,26 +425,6 @@ static void xen_set_pud(pud_t *ptr, pud_t val)
xen_set_pud_hyper(ptr, val);
}
-#ifdef CONFIG_X86_PAE
-static void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
-{
- trace_xen_mmu_set_pte_atomic(ptep, pte);
- __xen_set_pte(ptep, pte);
-}
-
-static void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
-{
- trace_xen_mmu_pte_clear(mm, addr, ptep);
- __xen_set_pte(ptep, native_make_pte(0));
-}
-
-static void xen_pmd_clear(pmd_t *pmdp)
-{
- trace_xen_mmu_pmd_clear(pmdp);
- set_pmd(pmdp, __pmd(0));
-}
-#endif /* CONFIG_X86_PAE */
-
__visible pmd_t xen_make_pmd(pmdval_t pmd)
{
pmd = pte_pfn_to_mfn(pmd);
@@ -466,7 +432,6 @@ __visible pmd_t xen_make_pmd(pmdval_t pmd)
}
PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd);
-#ifdef CONFIG_X86_64
__visible pudval_t xen_pud_val(pud_t pud)
{
return pte_mfn_to_pfn(pud.pud);
@@ -571,27 +536,27 @@ __visible p4d_t xen_make_p4d(p4dval_t p4d)
}
PV_CALLEE_SAVE_REGS_THUNK(xen_make_p4d);
#endif /* CONFIG_PGTABLE_LEVELS >= 5 */
-#endif /* CONFIG_X86_64 */
-static int xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd,
- int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
- bool last, unsigned long limit)
+static void xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd,
+ void (*func)(struct mm_struct *mm, struct page *,
+ enum pt_level),
+ bool last, unsigned long limit)
{
- int i, nr, flush = 0;
+ int i, nr;
nr = last ? pmd_index(limit) + 1 : PTRS_PER_PMD;
for (i = 0; i < nr; i++) {
if (!pmd_none(pmd[i]))
- flush |= (*func)(mm, pmd_page(pmd[i]), PT_PTE);
+ (*func)(mm, pmd_page(pmd[i]), PT_PTE);
}
- return flush;
}
-static int xen_pud_walk(struct mm_struct *mm, pud_t *pud,
- int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
- bool last, unsigned long limit)
+static void xen_pud_walk(struct mm_struct *mm, pud_t *pud,
+ void (*func)(struct mm_struct *mm, struct page *,
+ enum pt_level),
+ bool last, unsigned long limit)
{
- int i, nr, flush = 0;
+ int i, nr;
nr = last ? pud_index(limit) + 1 : PTRS_PER_PUD;
for (i = 0; i < nr; i++) {
@@ -602,29 +567,26 @@ static int xen_pud_walk(struct mm_struct *mm, pud_t *pud,
pmd = pmd_offset(&pud[i], 0);
if (PTRS_PER_PMD > 1)
- flush |= (*func)(mm, virt_to_page(pmd), PT_PMD);
- flush |= xen_pmd_walk(mm, pmd, func,
- last && i == nr - 1, limit);
+ (*func)(mm, virt_to_page(pmd), PT_PMD);
+ xen_pmd_walk(mm, pmd, func, last && i == nr - 1, limit);
}
- return flush;
}
-static int xen_p4d_walk(struct mm_struct *mm, p4d_t *p4d,
- int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
- bool last, unsigned long limit)
+static void xen_p4d_walk(struct mm_struct *mm, p4d_t *p4d,
+ void (*func)(struct mm_struct *mm, struct page *,
+ enum pt_level),
+ bool last, unsigned long limit)
{
- int flush = 0;
pud_t *pud;
if (p4d_none(*p4d))
- return flush;
+ return;
pud = pud_offset(p4d, 0);
if (PTRS_PER_PUD > 1)
- flush |= (*func)(mm, virt_to_page(pud), PT_PUD);
- flush |= xen_pud_walk(mm, pud, func, last, limit);
- return flush;
+ (*func)(mm, virt_to_page(pud), PT_PUD);
+ xen_pud_walk(mm, pud, func, last, limit);
}
/*
@@ -636,32 +598,27 @@ static int xen_p4d_walk(struct mm_struct *mm, p4d_t *p4d,
* will be STACK_TOP_MAX, but at boot we need to pin up to
* FIXADDR_TOP.
*
- * For 32-bit the important bit is that we don't pin beyond there,
- * because then we start getting into Xen's ptes.
- *
- * For 64-bit, we must skip the Xen hole in the middle of the address
- * space, just after the big x86-64 virtual hole.
+ * We must skip the Xen hole in the middle of the address space, just after
+ * the big x86-64 virtual hole.
*/
-static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
- int (*func)(struct mm_struct *mm, struct page *,
- enum pt_level),
- unsigned long limit)
+static void __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
+ void (*func)(struct mm_struct *mm, struct page *,
+ enum pt_level),
+ unsigned long limit)
{
- int i, nr, flush = 0;
+ int i, nr;
unsigned hole_low = 0, hole_high = 0;
/* The limit is the last byte to be touched */
limit--;
BUG_ON(limit >= FIXADDR_TOP);
-#ifdef CONFIG_X86_64
/*
* 64-bit has a great big hole in the middle of the address
* space, which contains the Xen mappings.
*/
hole_low = pgd_index(GUARD_HOLE_BASE_ADDR);
hole_high = pgd_index(GUARD_HOLE_END_ADDR);
-#endif
nr = pgd_index(limit) + 1;
for (i = 0; i < nr; i++) {
@@ -674,22 +631,20 @@ static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
continue;
p4d = p4d_offset(&pgd[i], 0);
- flush |= xen_p4d_walk(mm, p4d, func, i == nr - 1, limit);
+ xen_p4d_walk(mm, p4d, func, i == nr - 1, limit);
}
/* Do the top level last, so that the callbacks can use it as
a cue to do final things like tlb flushes. */
- flush |= (*func)(mm, virt_to_page(pgd), PT_PGD);
-
- return flush;
+ (*func)(mm, virt_to_page(pgd), PT_PGD);
}
-static int xen_pgd_walk(struct mm_struct *mm,
- int (*func)(struct mm_struct *mm, struct page *,
- enum pt_level),
- unsigned long limit)
+static void xen_pgd_walk(struct mm_struct *mm,
+ void (*func)(struct mm_struct *mm, struct page *,
+ enum pt_level),
+ unsigned long limit)
{
- return __xen_pgd_walk(mm, mm->pgd, func, limit);
+ __xen_pgd_walk(mm, mm->pgd, func, limit);
}
/* If we're using split pte locks, then take the page's lock and
@@ -722,26 +677,17 @@ static void xen_do_pin(unsigned level, unsigned long pfn)
xen_extend_mmuext_op(&op);
}
-static int xen_pin_page(struct mm_struct *mm, struct page *page,
- enum pt_level level)
+static void xen_pin_page(struct mm_struct *mm, struct page *page,
+ enum pt_level level)
{
unsigned pgfl = TestSetPagePinned(page);
- int flush;
-
- if (pgfl)
- flush = 0; /* already pinned */
- else if (PageHighMem(page))
- /* kmaps need flushing if we found an unpinned
- highpage */
- flush = 1;
- else {
+
+ if (!pgfl) {
void *pt = lowmem_page_address(page);
unsigned long pfn = page_to_pfn(page);
struct multicall_space mcs = __xen_mc_entry(0);
spinlock_t *ptl;
- flush = 0;
-
/*
* We need to hold the pagetable lock between the time
* we make the pagetable RO and when we actually pin
@@ -778,8 +724,6 @@ static int xen_pin_page(struct mm_struct *mm, struct page *page,
xen_mc_callback(xen_pte_unlock, ptl);
}
}
-
- return flush;
}
/* This is called just after a mm has been created, but it has not
@@ -787,39 +731,22 @@ static int xen_pin_page(struct mm_struct *mm, struct page *page,
read-only, and can be pinned. */
static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd)
{
+ pgd_t *user_pgd = xen_get_user_pgd(pgd);
+
trace_xen_mmu_pgd_pin(mm, pgd);
xen_mc_batch();
- if (__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT)) {
- /* re-enable interrupts for flushing */
- xen_mc_issue(0);
+ __xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT);
- kmap_flush_unused();
+ xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd)));
- xen_mc_batch();
+ if (user_pgd) {
+ xen_pin_page(mm, virt_to_page(user_pgd), PT_PGD);
+ xen_do_pin(MMUEXT_PIN_L4_TABLE,
+ PFN_DOWN(__pa(user_pgd)));
}
-#ifdef CONFIG_X86_64
- {
- pgd_t *user_pgd = xen_get_user_pgd(pgd);
-
- xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd)));
-
- if (user_pgd) {
- xen_pin_page(mm, virt_to_page(user_pgd), PT_PGD);
- xen_do_pin(MMUEXT_PIN_L4_TABLE,
- PFN_DOWN(__pa(user_pgd)));
- }
- }
-#else /* CONFIG_X86_32 */
-#ifdef CONFIG_X86_PAE
- /* Need to make sure unshared kernel PMD is pinnable */
- xen_pin_page(mm, pgd_page(pgd[pgd_index(TASK_SIZE)]),
- PT_PMD);
-#endif
- xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd)));
-#endif /* CONFIG_X86_64 */
xen_mc_issue(0);
}
@@ -854,11 +781,10 @@ void xen_mm_pin_all(void)
spin_unlock(&pgd_lock);
}
-static int __init xen_mark_pinned(struct mm_struct *mm, struct page *page,
- enum pt_level level)
+static void __init xen_mark_pinned(struct mm_struct *mm, struct page *page,
+ enum pt_level level)
{
SetPagePinned(page);
- return 0;
}
/*
@@ -870,18 +796,16 @@ static int __init xen_mark_pinned(struct mm_struct *mm, struct page *page,
static void __init xen_after_bootmem(void)
{
static_branch_enable(&xen_struct_pages_ready);
-#ifdef CONFIG_X86_64
SetPagePinned(virt_to_page(level3_user_vsyscall));
-#endif
xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP);
}
-static int xen_unpin_page(struct mm_struct *mm, struct page *page,
- enum pt_level level)
+static void xen_unpin_page(struct mm_struct *mm, struct page *page,
+ enum pt_level level)
{
unsigned pgfl = TestClearPagePinned(page);
- if (pgfl && !PageHighMem(page)) {
+ if (pgfl) {
void *pt = lowmem_page_address(page);
unsigned long pfn = page_to_pfn(page);
spinlock_t *ptl = NULL;
@@ -912,36 +836,24 @@ static int xen_unpin_page(struct mm_struct *mm, struct page *page,
xen_mc_callback(xen_pte_unlock, ptl);
}
}
-
- return 0; /* never need to flush on unpin */
}
/* Release a pagetables pages back as normal RW */
static void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd)
{
+ pgd_t *user_pgd = xen_get_user_pgd(pgd);
+
trace_xen_mmu_pgd_unpin(mm, pgd);
xen_mc_batch();
xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
-#ifdef CONFIG_X86_64
- {
- pgd_t *user_pgd = xen_get_user_pgd(pgd);
-
- if (user_pgd) {
- xen_do_pin(MMUEXT_UNPIN_TABLE,
- PFN_DOWN(__pa(user_pgd)));
- xen_unpin_page(mm, virt_to_page(user_pgd), PT_PGD);
- }
+ if (user_pgd) {
+ xen_do_pin(MMUEXT_UNPIN_TABLE,
+ PFN_DOWN(__pa(user_pgd)));
+ xen_unpin_page(mm, virt_to_page(user_pgd), PT_PGD);
}
-#endif
-
-#ifdef CONFIG_X86_PAE
- /* Need to make sure unshared kernel PMD is unpinned */
- xen_unpin_page(mm, pgd_page(pgd[pgd_index(TASK_SIZE)]),
- PT_PMD);
-#endif
__xen_pgd_walk(mm, pgd, xen_unpin_page, USER_LIMIT);
@@ -1089,7 +1001,6 @@ static void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
BUG();
}
-#ifdef CONFIG_X86_64
static void __init xen_cleanhighmap(unsigned long vaddr,
unsigned long vaddr_end)
{
@@ -1273,17 +1184,15 @@ static void __init xen_pagetable_cleanhighmap(void)
xen_cleanhighmap(addr, roundup(addr + size, PMD_SIZE * 2));
xen_start_info->pt_base = (unsigned long)__va(__pa(xen_start_info->pt_base));
}
-#endif
static void __init xen_pagetable_p2m_setup(void)
{
xen_vmalloc_p2m_tree();
-#ifdef CONFIG_X86_64
xen_pagetable_p2m_free();
xen_pagetable_cleanhighmap();
-#endif
+
/* And revector! Bye bye old array */
xen_start_info->mfn_list = (unsigned long)xen_p2m_addr;
}
@@ -1420,6 +1329,8 @@ static void __xen_write_cr3(bool kernel, unsigned long cr3)
}
static void xen_write_cr3(unsigned long cr3)
{
+ pgd_t *user_pgd = xen_get_user_pgd(__va(cr3));
+
BUG_ON(preemptible());
xen_mc_batch(); /* disables interrupts */
@@ -1430,20 +1341,14 @@ static void xen_write_cr3(unsigned long cr3)
__xen_write_cr3(true, cr3);
-#ifdef CONFIG_X86_64
- {
- pgd_t *user_pgd = xen_get_user_pgd(__va(cr3));
- if (user_pgd)
- __xen_write_cr3(false, __pa(user_pgd));
- else
- __xen_write_cr3(false, 0);
- }
-#endif
+ if (user_pgd)
+ __xen_write_cr3(false, __pa(user_pgd));
+ else
+ __xen_write_cr3(false, 0);
xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */
}
-#ifdef CONFIG_X86_64
/*
* At the start of the day - when Xen launches a guest, it has already
* built pagetables for the guest. We diligently look over them
@@ -1478,49 +1383,39 @@ static void __init xen_write_cr3_init(unsigned long cr3)
xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */
}
-#endif
static int xen_pgd_alloc(struct mm_struct *mm)
{
pgd_t *pgd = mm->pgd;
- int ret = 0;
+ struct page *page = virt_to_page(pgd);
+ pgd_t *user_pgd;
+ int ret = -ENOMEM;
BUG_ON(PagePinned(virt_to_page(pgd)));
+ BUG_ON(page->private != 0);
-#ifdef CONFIG_X86_64
- {
- struct page *page = virt_to_page(pgd);
- pgd_t *user_pgd;
+ user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+ page->private = (unsigned long)user_pgd;
- BUG_ON(page->private != 0);
-
- ret = -ENOMEM;
-
- user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
- page->private = (unsigned long)user_pgd;
-
- if (user_pgd != NULL) {
+ if (user_pgd != NULL) {
#ifdef CONFIG_X86_VSYSCALL_EMULATION
- user_pgd[pgd_index(VSYSCALL_ADDR)] =
- __pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE);
+ user_pgd[pgd_index(VSYSCALL_ADDR)] =
+ __pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE);
#endif
- ret = 0;
- }
-
- BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd))));
+ ret = 0;
}
-#endif
+
+ BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd))));
+
return ret;
}
static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
-#ifdef CONFIG_X86_64
pgd_t *user_pgd = xen_get_user_pgd(pgd);
if (user_pgd)
free_page((unsigned long)user_pgd);
-#endif
}
/*
@@ -1539,7 +1434,6 @@ static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
*/
__visible pte_t xen_make_pte_init(pteval_t pte)
{
-#ifdef CONFIG_X86_64
unsigned long pfn;
/*
@@ -1553,7 +1447,7 @@ __visible pte_t xen_make_pte_init(pteval_t pte)
pfn >= xen_start_info->first_p2m_pfn &&
pfn < xen_start_info->first_p2m_pfn + xen_start_info->nr_p2m_frames)
pte &= ~_PAGE_RW;
-#endif
+
pte = pte_pfn_to_mfn(pte);
return native_make_pte(pte);
}
@@ -1561,13 +1455,6 @@ PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_init);
static void __init xen_set_pte_init(pte_t *ptep, pte_t pte)
{
-#ifdef CONFIG_X86_32
- /* If there's an existing pte, then don't allow _PAGE_RW to be set */
- if (pte_mfn(pte) != INVALID_P2M_ENTRY
- && pte_val_ma(*ptep) & _PAGE_PRESENT)
- pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) &
- pte_val_ma(pte));
-#endif
__xen_set_pte(ptep, pte);
}
@@ -1642,20 +1529,14 @@ static inline void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn,
if (static_branch_likely(&xen_struct_pages_ready))
SetPagePinned(page);
- if (!PageHighMem(page)) {
- xen_mc_batch();
+ xen_mc_batch();
- __set_pfn_prot(pfn, PAGE_KERNEL_RO);
+ __set_pfn_prot(pfn, PAGE_KERNEL_RO);
- if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS)
- __pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
+ if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS)
+ __pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
- xen_mc_issue(PARAVIRT_LAZY_MMU);
- } else {
- /* make sure there are no stray mappings of
- this page */
- kmap_flush_unused();
- }
+ xen_mc_issue(PARAVIRT_LAZY_MMU);
}
}
@@ -1678,16 +1559,15 @@ static inline void xen_release_ptpage(unsigned long pfn, unsigned level)
trace_xen_mmu_release_ptpage(pfn, level, pinned);
if (pinned) {
- if (!PageHighMem(page)) {
- xen_mc_batch();
+ xen_mc_batch();
- if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS)
- __pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
+ if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS)
+ __pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
- __set_pfn_prot(pfn, PAGE_KERNEL);
+ __set_pfn_prot(pfn, PAGE_KERNEL);
+
+ xen_mc_issue(PARAVIRT_LAZY_MMU);
- xen_mc_issue(PARAVIRT_LAZY_MMU);
- }
ClearPagePinned(page);
}
}
@@ -1702,7 +1582,6 @@ static void xen_release_pmd(unsigned long pfn)
xen_release_ptpage(pfn, PT_PMD);
}
-#ifdef CONFIG_X86_64
static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn)
{
xen_alloc_ptpage(mm, pfn, PT_PUD);
@@ -1712,20 +1591,6 @@ static void xen_release_pud(unsigned long pfn)
{
xen_release_ptpage(pfn, PT_PUD);
}
-#endif
-
-void __init xen_reserve_top(void)
-{
-#ifdef CONFIG_X86_32
- unsigned long top = HYPERVISOR_VIRT_START;
- struct xen_platform_parameters pp;
-
- if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0)
- top = pp.virt_start;
-
- reserve_top_address(-top);
-#endif /* CONFIG_X86_32 */
-}
/*
* Like __va(), but returns address in the kernel mapping (which is
@@ -1733,11 +1598,7 @@ void __init xen_reserve_top(void)
*/
static void * __init __ka(phys_addr_t paddr)
{
-#ifdef CONFIG_X86_64
return (void *)(paddr + __START_KERNEL_map);
-#else
- return __va(paddr);
-#endif
}
/* Convert a machine address to physical address */
@@ -1771,56 +1632,7 @@ static void __init set_page_prot(void *addr, pgprot_t prot)
{
return set_page_prot_flags(addr, prot, UVMF_NONE);
}
-#ifdef CONFIG_X86_32
-static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
-{
- unsigned pmdidx, pteidx;
- unsigned ident_pte;
- unsigned long pfn;
-
- level1_ident_pgt = extend_brk(sizeof(pte_t) * LEVEL1_IDENT_ENTRIES,
- PAGE_SIZE);
-
- ident_pte = 0;
- pfn = 0;
- for (pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) {
- pte_t *pte_page;
-
- /* Reuse or allocate a page of ptes */
- if (pmd_present(pmd[pmdidx]))
- pte_page = m2v(pmd[pmdidx].pmd);
- else {
- /* Check for free pte pages */
- if (ident_pte == LEVEL1_IDENT_ENTRIES)
- break;
-
- pte_page = &level1_ident_pgt[ident_pte];
- ident_pte += PTRS_PER_PTE;
-
- pmd[pmdidx] = __pmd(__pa(pte_page) | _PAGE_TABLE);
- }
-
- /* Install mappings */
- for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) {
- pte_t pte;
- if (pfn > max_pfn_mapped)
- max_pfn_mapped = pfn;
-
- if (!pte_none(pte_page[pteidx]))
- continue;
-
- pte = pfn_pte(pfn, PAGE_KERNEL_EXEC);
- pte_page[pteidx] = pte;
- }
- }
-
- for (pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE)
- set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO);
-
- set_page_prot(pmd, PAGE_KERNEL_RO);
-}
-#endif
void __init xen_setup_machphys_mapping(void)
{
struct xen_machphys_mapping mapping;
@@ -1831,13 +1643,8 @@ void __init xen_setup_machphys_mapping(void)
} else {
machine_to_phys_nr = MACH2PHYS_NR_ENTRIES;
}
-#ifdef CONFIG_X86_32
- WARN_ON((machine_to_phys_mapping + (machine_to_phys_nr - 1))
- < machine_to_phys_mapping);
-#endif
}
-#ifdef CONFIG_X86_64
static void __init convert_pfn_mfn(void *v)
{
pte_t *pte = v;
@@ -2168,105 +1975,6 @@ void __init xen_relocate_p2m(void)
xen_start_info->nr_p2m_frames = n_frames;
}
-#else /* !CONFIG_X86_64 */
-static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD);
-static RESERVE_BRK_ARRAY(pmd_t, swapper_kernel_pmd, PTRS_PER_PMD);
-RESERVE_BRK(fixup_kernel_pmd, PAGE_SIZE);
-RESERVE_BRK(fixup_kernel_pte, PAGE_SIZE);
-
-static void __init xen_write_cr3_init(unsigned long cr3)
-{
- unsigned long pfn = PFN_DOWN(__pa(swapper_pg_dir));
-
- BUG_ON(read_cr3_pa() != __pa(initial_page_table));
- BUG_ON(cr3 != __pa(swapper_pg_dir));
-
- /*
- * We are switching to swapper_pg_dir for the first time (from
- * initial_page_table) and therefore need to mark that page
- * read-only and then pin it.
- *
- * Xen disallows sharing of kernel PMDs for PAE
- * guests. Therefore we must copy the kernel PMD from
- * initial_page_table into a new kernel PMD to be used in
- * swapper_pg_dir.
- */
- swapper_kernel_pmd =
- extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE);
- copy_page(swapper_kernel_pmd, initial_kernel_pmd);
- swapper_pg_dir[KERNEL_PGD_BOUNDARY] =
- __pgd(__pa(swapper_kernel_pmd) | _PAGE_PRESENT);
- set_page_prot(swapper_kernel_pmd, PAGE_KERNEL_RO);
-
- set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO);
- xen_write_cr3(cr3);
- pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, pfn);
-
- pin_pagetable_pfn(MMUEXT_UNPIN_TABLE,
- PFN_DOWN(__pa(initial_page_table)));
- set_page_prot(initial_page_table, PAGE_KERNEL);
- set_page_prot(initial_kernel_pmd, PAGE_KERNEL);
-
- pv_ops.mmu.write_cr3 = &xen_write_cr3;
-}
-
-/*
- * For 32 bit domains xen_start_info->pt_base is the pgd address which might be
- * not the first page table in the page table pool.
- * Iterate through the initial page tables to find the real page table base.
- */
-static phys_addr_t __init xen_find_pt_base(pmd_t *pmd)
-{
- phys_addr_t pt_base, paddr;
- unsigned pmdidx;
-
- pt_base = min(__pa(xen_start_info->pt_base), __pa(pmd));
-
- for (pmdidx = 0; pmdidx < PTRS_PER_PMD; pmdidx++)
- if (pmd_present(pmd[pmdidx]) && !pmd_large(pmd[pmdidx])) {
- paddr = m2p(pmd[pmdidx].pmd);
- pt_base = min(pt_base, paddr);
- }
-
- return pt_base;
-}
-
-void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
-{
- pmd_t *kernel_pmd;
-
- kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd);
-
- xen_pt_base = xen_find_pt_base(kernel_pmd);
- xen_pt_size = xen_start_info->nr_pt_frames * PAGE_SIZE;
-
- initial_kernel_pmd =
- extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE);
-
- max_pfn_mapped = PFN_DOWN(xen_pt_base + xen_pt_size + 512 * 1024);
-
- copy_page(initial_kernel_pmd, kernel_pmd);
-
- xen_map_identity_early(initial_kernel_pmd, max_pfn);
-
- copy_page(initial_page_table, pgd);
- initial_page_table[KERNEL_PGD_BOUNDARY] =
- __pgd(__pa(initial_kernel_pmd) | _PAGE_PRESENT);
-
- set_page_prot(initial_kernel_pmd, PAGE_KERNEL_RO);
- set_page_prot(initial_page_table, PAGE_KERNEL_RO);
- set_page_prot(empty_zero_page, PAGE_KERNEL_RO);
-
- pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
-
- pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE,
- PFN_DOWN(__pa(initial_page_table)));
- xen_write_cr3(__pa(initial_page_table));
-
- memblock_reserve(xen_pt_base, xen_pt_size);
-}
-#endif /* CONFIG_X86_64 */
-
void __init xen_reserve_special_pages(void)
{
phys_addr_t paddr;
@@ -2300,12 +2008,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
switch (idx) {
case FIX_BTMAP_END ... FIX_BTMAP_BEGIN:
-#ifdef CONFIG_X86_32
- case FIX_WP_TEST:
-# ifdef CONFIG_HIGHMEM
- case FIX_KMAP_BEGIN ... FIX_KMAP_END:
-# endif
-#elif defined(CONFIG_X86_VSYSCALL_EMULATION)
+#ifdef CONFIG_X86_VSYSCALL_EMULATION
case VSYSCALL_PAGE:
#endif
/* All local page mappings */
@@ -2357,9 +2060,7 @@ static void __init xen_post_allocator_init(void)
pv_ops.mmu.set_pte = xen_set_pte;
pv_ops.mmu.set_pmd = xen_set_pmd;
pv_ops.mmu.set_pud = xen_set_pud;
-#ifdef CONFIG_X86_64
pv_ops.mmu.set_p4d = xen_set_p4d;
-#endif
/* This will work as long as patching hasn't happened yet
(which it hasn't) */
@@ -2367,15 +2068,11 @@ static void __init xen_post_allocator_init(void)
pv_ops.mmu.alloc_pmd = xen_alloc_pmd;
pv_ops.mmu.release_pte = xen_release_pte;
pv_ops.mmu.release_pmd = xen_release_pmd;
-#ifdef CONFIG_X86_64
pv_ops.mmu.alloc_pud = xen_alloc_pud;
pv_ops.mmu.release_pud = xen_release_pud;
-#endif
pv_ops.mmu.make_pte = PV_CALLEE_SAVE(xen_make_pte);
-#ifdef CONFIG_X86_64
pv_ops.mmu.write_cr3 = &xen_write_cr3;
-#endif
}
static void xen_leave_lazy_mmu(void)
@@ -2420,17 +2117,11 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
.make_pte = PV_CALLEE_SAVE(xen_make_pte_init),
.make_pgd = PV_CALLEE_SAVE(xen_make_pgd),
-#ifdef CONFIG_X86_PAE
- .set_pte_atomic = xen_set_pte_atomic,
- .pte_clear = xen_pte_clear,
- .pmd_clear = xen_pmd_clear,
-#endif /* CONFIG_X86_PAE */
.set_pud = xen_set_pud_hyper,
.make_pmd = PV_CALLEE_SAVE(xen_make_pmd),
.pmd_val = PV_CALLEE_SAVE(xen_pmd_val),
-#ifdef CONFIG_X86_64
.pud_val = PV_CALLEE_SAVE(xen_pud_val),
.make_pud = PV_CALLEE_SAVE(xen_make_pud),
.set_p4d = xen_set_p4d_hyper,
@@ -2442,7 +2133,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
.p4d_val = PV_CALLEE_SAVE(xen_p4d_val),
.make_p4d = PV_CALLEE_SAVE(xen_make_p4d),
#endif
-#endif /* CONFIG_X86_64 */
.activate_mm = xen_activate_mm,
.dup_mmap = xen_dup_mmap,
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 0acba2c712ab..be4151f42611 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -379,12 +379,8 @@ static void __init xen_rebuild_p2m_list(unsigned long *p2m)
if (type == P2M_TYPE_PFN || i < chunk) {
/* Use initial p2m page contents. */
-#ifdef CONFIG_X86_64
mfns = alloc_p2m_page();
copy_page(mfns, xen_p2m_addr + pfn);
-#else
- mfns = xen_p2m_addr + pfn;
-#endif
ptep = populate_extra_pte((unsigned long)(p2m + pfn));
set_pte(ptep,
pfn_pte(PFN_DOWN(__pa(mfns)), PAGE_KERNEL));
@@ -467,7 +463,7 @@ EXPORT_SYMBOL_GPL(get_phys_to_machine);
* Allocate new pmd(s). It is checked whether the old pmd is still in place.
* If not, nothing is changed. This is okay as the only reason for allocating
* a new pmd is to replace p2m_missing_pte or p2m_identity_pte by a individual
- * pmd. In case of PAE/x86-32 there are multiple pmds to allocate!
+ * pmd.
*/
static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *pte_pg)
{
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 3566e37241d7..7eab14d56369 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -32,7 +32,6 @@
#include <xen/features.h>
#include <xen/hvc-console.h>
#include "xen-ops.h"
-#include "vdso.h"
#include "mmu.h"
#define GB(x) ((uint64_t)(x) * 1024 * 1024 * 1024)
@@ -545,13 +544,10 @@ static unsigned long __init xen_get_pages_limit(void)
{
unsigned long limit;
-#ifdef CONFIG_X86_32
- limit = GB(64) / PAGE_SIZE;
-#else
limit = MAXMEM / PAGE_SIZE;
if (!xen_initial_domain() && xen_512gb_limit)
limit = GB(512) / PAGE_SIZE;
-#endif
+
return limit;
}
@@ -722,17 +718,8 @@ static void __init xen_reserve_xen_mfnlist(void)
if (!xen_is_e820_reserved(start, size))
return;
-#ifdef CONFIG_X86_32
- /*
- * Relocating the p2m on 32 bit system to an arbitrary virtual address
- * is not supported, so just give up.
- */
- xen_raw_console_write("Xen hypervisor allocated p2m list conflicts with E820 map\n");
- BUG();
-#else
xen_relocate_p2m();
memblock_free(start, size);
-#endif
}
/**
@@ -921,20 +908,6 @@ char * __init xen_memory_setup(void)
return "Xen";
}
-/*
- * Set the bit indicating "nosegneg" library variants should be used.
- * We only need to bother in pure 32-bit mode; compat 32-bit processes
- * can have un-truncated segments, so wrapping around is allowed.
- */
-static void __init fiddle_vdso(void)
-{
-#ifdef CONFIG_X86_32
- u32 *mask = vdso_image_32.data +
- vdso_image_32.sym_VDSO32_NOTE_MASK;
- *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT;
-#endif
-}
-
static int register_callback(unsigned type, const void *func)
{
struct callback_register callback = {
@@ -951,11 +924,7 @@ void xen_enable_sysenter(void)
int ret;
unsigned sysenter_feature;
-#ifdef CONFIG_X86_32
- sysenter_feature = X86_FEATURE_SEP;
-#else
sysenter_feature = X86_FEATURE_SYSENTER32;
-#endif
if (!boot_cpu_has(sysenter_feature))
return;
@@ -967,7 +936,6 @@ void xen_enable_sysenter(void)
void xen_enable_syscall(void)
{
-#ifdef CONFIG_X86_64
int ret;
ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target);
@@ -983,7 +951,6 @@ void xen_enable_syscall(void)
if (ret != 0)
setup_clear_cpu_cap(X86_FEATURE_SYSCALL32);
}
-#endif /* CONFIG_X86_64 */
}
static void __init xen_pvmmu_arch_setup(void)
@@ -1024,7 +991,6 @@ void __init xen_arch_setup(void)
disable_cpuidle();
disable_cpufreq();
WARN_ON(xen_set_default_idle());
- fiddle_vdso();
#ifdef CONFIG_NUMA
numa_off = 1;
#endif
diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
index 47c8f4b444c9..c2ac319f11a4 100644
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -211,15 +211,6 @@ static void __init xen_pv_smp_prepare_boot_cpu(void)
* sure the old memory can be recycled. */
make_lowmem_page_readwrite(xen_initial_gdt);
-#ifdef CONFIG_X86_32
- /*
- * Xen starts us with XEN_FLAT_RING1_DS, but linux code
- * expects __USER_DS
- */
- loadsegment(ds, __USER_DS);
- loadsegment(es, __USER_DS);
-#endif
-
xen_filter_cpu_maps();
xen_setup_vcpu_info_placement();
@@ -300,10 +291,6 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
gdt = get_cpu_gdt_rw(cpu);
-#ifdef CONFIG_X86_32
- ctxt->user_regs.fs = __KERNEL_PERCPU;
- ctxt->user_regs.gs = __KERNEL_STACK_CANARY;
-#endif
memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
/*
@@ -341,12 +328,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
ctxt->kernel_ss = __KERNEL_DS;
ctxt->kernel_sp = task_top_of_stack(idle);
-#ifdef CONFIG_X86_32
- ctxt->event_callback_cs = __KERNEL_CS;
- ctxt->failsafe_callback_cs = __KERNEL_CS;
-#else
ctxt->gs_base_kernel = per_cpu_offset(cpu);
-#endif
ctxt->event_callback_eip =
(unsigned long)xen_asm_exc_xen_hypervisor_callback;
ctxt->failsafe_callback_eip =
diff --git a/arch/x86/xen/vdso.h b/arch/x86/xen/vdso.h
deleted file mode 100644
index 873c54c488fe..000000000000
--- a/arch/x86/xen/vdso.h
+++ /dev/null
@@ -1,6 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-/* Bit used for the pseudo-hwcap for non-negative segments. We use
- bit 1 to avoid bugs in some versions of glibc when bit 0 is
- used; the choice is otherwise arbitrary. */
-#define VDSO_NOTE_NONEGSEG_BIT 1
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
index 508fe204520b..1cb0e84b9161 100644
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -6,12 +6,18 @@
* operations here; the indirect forms are better handled in C.
*/
+#include <asm/errno.h>
#include <asm/asm-offsets.h>
#include <asm/percpu.h>
#include <asm/processor-flags.h>
-#include <asm/frame.h>
+#include <asm/segment.h>
+#include <asm/thread_info.h>
#include <asm/asm.h>
+#include <asm/frame.h>
+#include <xen/interface/xen.h>
+
+#include <linux/init.h>
#include <linux/linkage.h>
/*
@@ -76,11 +82,7 @@ SYM_FUNC_END(xen_save_fl_direct)
*/
SYM_FUNC_START(xen_restore_fl_direct)
FRAME_BEGIN
-#ifdef CONFIG_X86_64
testw $X86_EFLAGS_IF, %di
-#else
- testb $X86_EFLAGS_IF>>8, %ah
-#endif
setz PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
/*
* Preempt here doesn't matter because that will deal with any
@@ -104,15 +106,6 @@ SYM_FUNC_END(xen_restore_fl_direct)
*/
SYM_FUNC_START(check_events)
FRAME_BEGIN
-#ifdef CONFIG_X86_32
- push %eax
- push %ecx
- push %edx
- call xen_force_evtchn_callback
- pop %edx
- pop %ecx
- pop %eax
-#else
push %rax
push %rcx
push %rdx
@@ -132,7 +125,6 @@ SYM_FUNC_START(check_events)
pop %rdx
pop %rcx
pop %rax
-#endif
FRAME_END
ret
SYM_FUNC_END(check_events)
@@ -151,3 +143,175 @@ SYM_FUNC_START(xen_read_cr2_direct)
FRAME_END
ret
SYM_FUNC_END(xen_read_cr2_direct);
+
+.macro xen_pv_trap name
+SYM_CODE_START(xen_\name)
+ pop %rcx
+ pop %r11
+ jmp \name
+SYM_CODE_END(xen_\name)
+_ASM_NOKPROBE(xen_\name)
+.endm
+
+xen_pv_trap asm_exc_divide_error
+xen_pv_trap asm_xenpv_exc_debug
+xen_pv_trap asm_exc_int3
+xen_pv_trap asm_xenpv_exc_nmi
+xen_pv_trap asm_exc_overflow
+xen_pv_trap asm_exc_bounds
+xen_pv_trap asm_exc_invalid_op
+xen_pv_trap asm_exc_device_not_available
+xen_pv_trap asm_exc_double_fault
+xen_pv_trap asm_exc_coproc_segment_overrun
+xen_pv_trap asm_exc_invalid_tss
+xen_pv_trap asm_exc_segment_not_present
+xen_pv_trap asm_exc_stack_segment
+xen_pv_trap asm_exc_general_protection
+xen_pv_trap asm_exc_page_fault
+xen_pv_trap asm_exc_spurious_interrupt_bug
+xen_pv_trap asm_exc_coprocessor_error
+xen_pv_trap asm_exc_alignment_check
+#ifdef CONFIG_X86_MCE
+xen_pv_trap asm_exc_machine_check
+#endif /* CONFIG_X86_MCE */
+xen_pv_trap asm_exc_simd_coprocessor_error
+#ifdef CONFIG_IA32_EMULATION
+xen_pv_trap entry_INT80_compat
+#endif
+xen_pv_trap asm_exc_xen_hypervisor_callback
+
+ __INIT
+SYM_CODE_START(xen_early_idt_handler_array)
+ i = 0
+ .rept NUM_EXCEPTION_VECTORS
+ pop %rcx
+ pop %r11
+ jmp early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE
+ i = i + 1
+ .fill xen_early_idt_handler_array + i*XEN_EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
+ .endr
+SYM_CODE_END(xen_early_idt_handler_array)
+ __FINIT
+
+hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32
+/*
+ * Xen64 iret frame:
+ *
+ * ss
+ * rsp
+ * rflags
+ * cs
+ * rip <-- standard iret frame
+ *
+ * flags
+ *
+ * rcx }
+ * r11 }<-- pushed by hypercall page
+ * rsp->rax }
+ */
+SYM_CODE_START(xen_iret)
+ pushq $0
+ jmp hypercall_iret
+SYM_CODE_END(xen_iret)
+
+SYM_CODE_START(xen_sysret64)
+ /*
+ * We're already on the usermode stack at this point, but
+ * still with the kernel gs, so we can easily switch back.
+ *
+ * tss.sp2 is scratch space.
+ */
+ movq %rsp, PER_CPU_VAR(cpu_tss_rw + TSS_sp2)
+ movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+
+ pushq $__USER_DS
+ pushq PER_CPU_VAR(cpu_tss_rw + TSS_sp2)
+ pushq %r11
+ pushq $__USER_CS
+ pushq %rcx
+
+ pushq $VGCF_in_syscall
+ jmp hypercall_iret
+SYM_CODE_END(xen_sysret64)
+
+/*
+ * Xen handles syscall callbacks much like ordinary exceptions, which
+ * means we have:
+ * - kernel gs
+ * - kernel rsp
+ * - an iret-like stack frame on the stack (including rcx and r11):
+ * ss
+ * rsp
+ * rflags
+ * cs
+ * rip
+ * r11
+ * rsp->rcx
+ */
+
+/* Normal 64-bit system call target */
+SYM_FUNC_START(xen_syscall_target)
+ popq %rcx
+ popq %r11
+
+ /*
+ * Neither Xen nor the kernel really knows what the old SS and
+ * CS were. The kernel expects __USER_DS and __USER_CS, so
+ * report those values even though Xen will guess its own values.
+ */
+ movq $__USER_DS, 4*8(%rsp)
+ movq $__USER_CS, 1*8(%rsp)
+
+ jmp entry_SYSCALL_64_after_hwframe
+SYM_FUNC_END(xen_syscall_target)
+
+#ifdef CONFIG_IA32_EMULATION
+
+/* 32-bit compat syscall target */
+SYM_FUNC_START(xen_syscall32_target)
+ popq %rcx
+ popq %r11
+
+ /*
+ * Neither Xen nor the kernel really knows what the old SS and
+ * CS were. The kernel expects __USER32_DS and __USER32_CS, so
+ * report those values even though Xen will guess its own values.
+ */
+ movq $__USER32_DS, 4*8(%rsp)
+ movq $__USER32_CS, 1*8(%rsp)
+
+ jmp entry_SYSCALL_compat_after_hwframe
+SYM_FUNC_END(xen_syscall32_target)
+
+/* 32-bit compat sysenter target */
+SYM_FUNC_START(xen_sysenter_target)
+ /*
+ * NB: Xen is polite and clears TF from EFLAGS for us. This means
+ * that we don't need to guard against single step exceptions here.
+ */
+ popq %rcx
+ popq %r11
+
+ /*
+ * Neither Xen nor the kernel really knows what the old SS and
+ * CS were. The kernel expects __USER32_DS and __USER32_CS, so
+ * report those values even though Xen will guess its own values.
+ */
+ movq $__USER32_DS, 4*8(%rsp)
+ movq $__USER32_CS, 1*8(%rsp)
+
+ jmp entry_SYSENTER_compat_after_hwframe
+SYM_FUNC_END(xen_sysenter_target)
+
+#else /* !CONFIG_IA32_EMULATION */
+
+SYM_FUNC_START_ALIAS(xen_syscall32_target)
+SYM_FUNC_START(xen_sysenter_target)
+ lea 16(%rsp), %rsp /* strip %rcx, %r11 */
+ mov $-ENOSYS, %rax
+ pushq $0
+ jmp hypercall_iret
+SYM_FUNC_END(xen_sysenter_target)
+SYM_FUNC_END_ALIAS(xen_syscall32_target)
+
+#endif /* CONFIG_IA32_EMULATION */
diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S
deleted file mode 100644
index 4757cec33abe..000000000000
--- a/arch/x86/xen/xen-asm_32.S
+++ /dev/null
@@ -1,185 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Asm versions of Xen pv-ops, suitable for direct use.
- *
- * We only bother with direct forms (ie, vcpu in pda) of the
- * operations here; the indirect forms are better handled in C.
- */
-
-#include <asm/thread_info.h>
-#include <asm/processor-flags.h>
-#include <asm/segment.h>
-#include <asm/asm.h>
-
-#include <xen/interface/xen.h>
-
-#include <linux/linkage.h>
-
-/* Pseudo-flag used for virtual NMI, which we don't implement yet */
-#define XEN_EFLAGS_NMI 0x80000000
-
-/*
- * This is run where a normal iret would be run, with the same stack setup:
- * 8: eflags
- * 4: cs
- * esp-> 0: eip
- *
- * This attempts to make sure that any pending events are dealt with
- * on return to usermode, but there is a small window in which an
- * event can happen just before entering usermode. If the nested
- * interrupt ends up setting one of the TIF_WORK_MASK pending work
- * flags, they will not be tested again before returning to
- * usermode. This means that a process can end up with pending work,
- * which will be unprocessed until the process enters and leaves the
- * kernel again, which could be an unbounded amount of time. This
- * means that a pending signal or reschedule event could be
- * indefinitely delayed.
- *
- * The fix is to notice a nested interrupt in the critical window, and
- * if one occurs, then fold the nested interrupt into the current
- * interrupt stack frame, and re-process it iteratively rather than
- * recursively. This means that it will exit via the normal path, and
- * all pending work will be dealt with appropriately.
- *
- * Because the nested interrupt handler needs to deal with the current
- * stack state in whatever form its in, we keep things simple by only
- * using a single register which is pushed/popped on the stack.
- */
-
-.macro POP_FS
-1:
- popw %fs
-.pushsection .fixup, "ax"
-2: movw $0, (%esp)
- jmp 1b
-.popsection
- _ASM_EXTABLE(1b,2b)
-.endm
-
-SYM_CODE_START(xen_iret)
- /* test eflags for special cases */
- testl $(X86_EFLAGS_VM | XEN_EFLAGS_NMI), 8(%esp)
- jnz hyper_iret
-
- push %eax
- ESP_OFFSET=4 # bytes pushed onto stack
-
- /* Store vcpu_info pointer for easy access */
-#ifdef CONFIG_SMP
- pushw %fs
- movl $(__KERNEL_PERCPU), %eax
- movl %eax, %fs
- movl %fs:xen_vcpu, %eax
- POP_FS
-#else
- movl %ss:xen_vcpu, %eax
-#endif
-
- /* check IF state we're restoring */
- testb $X86_EFLAGS_IF>>8, 8+1+ESP_OFFSET(%esp)
-
- /*
- * Maybe enable events. Once this happens we could get a
- * recursive event, so the critical region starts immediately
- * afterwards. However, if that happens we don't end up
- * resuming the code, so we don't have to be worried about
- * being preempted to another CPU.
- */
- setz %ss:XEN_vcpu_info_mask(%eax)
-xen_iret_start_crit:
-
- /* check for unmasked and pending */
- cmpw $0x0001, %ss:XEN_vcpu_info_pending(%eax)
-
- /*
- * If there's something pending, mask events again so we can
- * jump back into exc_xen_hypervisor_callback. Otherwise do not
- * touch XEN_vcpu_info_mask.
- */
- jne 1f
- movb $1, %ss:XEN_vcpu_info_mask(%eax)
-
-1: popl %eax
-
- /*
- * From this point on the registers are restored and the stack
- * updated, so we don't need to worry about it if we're
- * preempted
- */
-iret_restore_end:
-
- /*
- * Jump to hypervisor_callback after fixing up the stack.
- * Events are masked, so jumping out of the critical region is
- * OK.
- */
- je xen_asm_exc_xen_hypervisor_callback
-
-1: iret
-xen_iret_end_crit:
- _ASM_EXTABLE(1b, asm_iret_error)
-
-hyper_iret:
- /* put this out of line since its very rarely used */
- jmp hypercall_page + __HYPERVISOR_iret * 32
-SYM_CODE_END(xen_iret)
-
- .globl xen_iret_start_crit, xen_iret_end_crit
-
-/*
- * This is called by xen_asm_exc_xen_hypervisor_callback in entry_32.S when it sees
- * that the EIP at the time of interrupt was between
- * xen_iret_start_crit and xen_iret_end_crit.
- *
- * The stack format at this point is:
- * ----------------
- * ss : (ss/esp may be present if we came from usermode)
- * esp :
- * eflags } outer exception info
- * cs }
- * eip }
- * ----------------
- * eax : outer eax if it hasn't been restored
- * ----------------
- * eflags }
- * cs } nested exception info
- * eip }
- * return address : (into xen_asm_exc_xen_hypervisor_callback)
- *
- * In order to deliver the nested exception properly, we need to discard the
- * nested exception frame such that when we handle the exception, we do it
- * in the context of the outer exception rather than starting a new one.
- *
- * The only caveat is that if the outer eax hasn't been restored yet (i.e.
- * it's still on stack), we need to restore its value here.
-*/
-.pushsection .noinstr.text, "ax"
-SYM_CODE_START(xen_iret_crit_fixup)
- /*
- * Paranoia: Make sure we're really coming from kernel space.
- * One could imagine a case where userspace jumps into the
- * critical range address, but just before the CPU delivers a
- * PF, it decides to deliver an interrupt instead. Unlikely?
- * Definitely. Easy to avoid? Yes.
- */
- testb $2, 2*4(%esp) /* nested CS */
- jnz 2f
-
- /*
- * If eip is before iret_restore_end then stack
- * hasn't been restored yet.
- */
- cmpl $iret_restore_end, 1*4(%esp)
- jae 1f
-
- movl 4*4(%esp), %eax /* load outer EAX */
- ret $4*4 /* discard nested EIP, CS, and EFLAGS as
- * well as the just restored EAX */
-
-1:
- ret $3*4 /* discard nested EIP, CS, and EFLAGS */
-
-2:
- ret
-SYM_CODE_END(xen_iret_crit_fixup)
-.popsection
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
deleted file mode 100644
index aab1d99b2b48..000000000000
--- a/arch/x86/xen/xen-asm_64.S
+++ /dev/null
@@ -1,192 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Asm versions of Xen pv-ops, suitable for direct use.
- *
- * We only bother with direct forms (ie, vcpu in pda) of the
- * operations here; the indirect forms are better handled in C.
- */
-
-#include <asm/errno.h>
-#include <asm/percpu.h>
-#include <asm/processor-flags.h>
-#include <asm/segment.h>
-#include <asm/asm-offsets.h>
-#include <asm/thread_info.h>
-#include <asm/asm.h>
-
-#include <xen/interface/xen.h>
-
-#include <linux/init.h>
-#include <linux/linkage.h>
-
-.macro xen_pv_trap name
-SYM_CODE_START(xen_\name)
- pop %rcx
- pop %r11
- jmp \name
-SYM_CODE_END(xen_\name)
-_ASM_NOKPROBE(xen_\name)
-.endm
-
-xen_pv_trap asm_exc_divide_error
-xen_pv_trap asm_xenpv_exc_debug
-xen_pv_trap asm_exc_int3
-xen_pv_trap asm_xenpv_exc_nmi
-xen_pv_trap asm_exc_overflow
-xen_pv_trap asm_exc_bounds
-xen_pv_trap asm_exc_invalid_op
-xen_pv_trap asm_exc_device_not_available
-xen_pv_trap asm_exc_double_fault
-xen_pv_trap asm_exc_coproc_segment_overrun
-xen_pv_trap asm_exc_invalid_tss
-xen_pv_trap asm_exc_segment_not_present
-xen_pv_trap asm_exc_stack_segment
-xen_pv_trap asm_exc_general_protection
-xen_pv_trap asm_exc_page_fault
-xen_pv_trap asm_exc_spurious_interrupt_bug
-xen_pv_trap asm_exc_coprocessor_error
-xen_pv_trap asm_exc_alignment_check
-#ifdef CONFIG_X86_MCE
-xen_pv_trap asm_exc_machine_check
-#endif /* CONFIG_X86_MCE */
-xen_pv_trap asm_exc_simd_coprocessor_error
-#ifdef CONFIG_IA32_EMULATION
-xen_pv_trap entry_INT80_compat
-#endif
-xen_pv_trap asm_exc_xen_hypervisor_callback
-
- __INIT
-SYM_CODE_START(xen_early_idt_handler_array)
- i = 0
- .rept NUM_EXCEPTION_VECTORS
- pop %rcx
- pop %r11
- jmp early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE
- i = i + 1
- .fill xen_early_idt_handler_array + i*XEN_EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
- .endr
-SYM_CODE_END(xen_early_idt_handler_array)
- __FINIT
-
-hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32
-/*
- * Xen64 iret frame:
- *
- * ss
- * rsp
- * rflags
- * cs
- * rip <-- standard iret frame
- *
- * flags
- *
- * rcx }
- * r11 }<-- pushed by hypercall page
- * rsp->rax }
- */
-SYM_CODE_START(xen_iret)
- pushq $0
- jmp hypercall_iret
-SYM_CODE_END(xen_iret)
-
-SYM_CODE_START(xen_sysret64)
- /*
- * We're already on the usermode stack at this point, but
- * still with the kernel gs, so we can easily switch back.
- *
- * tss.sp2 is scratch space.
- */
- movq %rsp, PER_CPU_VAR(cpu_tss_rw + TSS_sp2)
- movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
-
- pushq $__USER_DS
- pushq PER_CPU_VAR(cpu_tss_rw + TSS_sp2)
- pushq %r11
- pushq $__USER_CS
- pushq %rcx
-
- pushq $VGCF_in_syscall
- jmp hypercall_iret
-SYM_CODE_END(xen_sysret64)
-
-/*
- * Xen handles syscall callbacks much like ordinary exceptions, which
- * means we have:
- * - kernel gs
- * - kernel rsp
- * - an iret-like stack frame on the stack (including rcx and r11):
- * ss
- * rsp
- * rflags
- * cs
- * rip
- * r11
- * rsp->rcx
- */
-
-/* Normal 64-bit system call target */
-SYM_FUNC_START(xen_syscall_target)
- popq %rcx
- popq %r11
-
- /*
- * Neither Xen nor the kernel really knows what the old SS and
- * CS were. The kernel expects __USER_DS and __USER_CS, so
- * report those values even though Xen will guess its own values.
- */
- movq $__USER_DS, 4*8(%rsp)
- movq $__USER_CS, 1*8(%rsp)
-
- jmp entry_SYSCALL_64_after_hwframe
-SYM_FUNC_END(xen_syscall_target)
-
-#ifdef CONFIG_IA32_EMULATION
-
-/* 32-bit compat syscall target */
-SYM_FUNC_START(xen_syscall32_target)
- popq %rcx
- popq %r11
-
- /*
- * Neither Xen nor the kernel really knows what the old SS and
- * CS were. The kernel expects __USER32_DS and __USER32_CS, so
- * report those values even though Xen will guess its own values.
- */
- movq $__USER32_DS, 4*8(%rsp)
- movq $__USER32_CS, 1*8(%rsp)
-
- jmp entry_SYSCALL_compat_after_hwframe
-SYM_FUNC_END(xen_syscall32_target)
-
-/* 32-bit compat sysenter target */
-SYM_FUNC_START(xen_sysenter_target)
- /*
- * NB: Xen is polite and clears TF from EFLAGS for us. This means
- * that we don't need to guard against single step exceptions here.
- */
- popq %rcx
- popq %r11
-
- /*
- * Neither Xen nor the kernel really knows what the old SS and
- * CS were. The kernel expects __USER32_DS and __USER32_CS, so
- * report those values even though Xen will guess its own values.
- */
- movq $__USER32_DS, 4*8(%rsp)
- movq $__USER32_CS, 1*8(%rsp)
-
- jmp entry_SYSENTER_compat_after_hwframe
-SYM_FUNC_END(xen_sysenter_target)
-
-#else /* !CONFIG_IA32_EMULATION */
-
-SYM_FUNC_START_ALIAS(xen_syscall32_target)
-SYM_FUNC_START(xen_sysenter_target)
- lea 16(%rsp), %rsp /* strip %rcx, %r11 */
- mov $-ENOSYS, %rax
- pushq $0
- jmp hypercall_iret
-SYM_FUNC_END(xen_sysenter_target)
-SYM_FUNC_END_ALIAS(xen_syscall32_target)
-
-#endif /* CONFIG_IA32_EMULATION */
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 1ba601df3a37..2d7c8f34f56c 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -35,13 +35,8 @@ SYM_CODE_START(startup_xen)
rep __ASM_SIZE(stos)
mov %_ASM_SI, xen_start_info
-#ifdef CONFIG_X86_64
mov initial_stack(%rip), %rsp
-#else
- mov initial_stack, %esp
-#endif
-#ifdef CONFIG_X86_64
/* Set up %gs.
*
* The base of %gs always points to fixed_percpu_data. If the
@@ -53,7 +48,6 @@ SYM_CODE_START(startup_xen)
movq $INIT_PER_CPU_VAR(fixed_percpu_data),%rax
cdq
wrmsr
-#endif
call xen_start_kernel
SYM_CODE_END(startup_xen)
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 53b224fd6177..45d556f71858 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -33,7 +33,6 @@ void xen_setup_mfn_list_list(void);
void xen_build_mfn_list_list(void);
void xen_setup_machphys_mapping(void);
void xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn);
-void xen_reserve_top(void);
void __init xen_reserve_special_pages(void);
void __init xen_pt_check_e820(void);
diff --git a/arch/xtensa/include/asm/uaccess.h b/arch/xtensa/include/asm/uaccess.h
index e57f0d0a88d8..b9758119feca 100644
--- a/arch/xtensa/include/asm/uaccess.h
+++ b/arch/xtensa/include/asm/uaccess.h
@@ -35,7 +35,7 @@
#define get_fs() (current->thread.current_ds)
#define set_fs(val) (current->thread.current_ds = (val))
-#define segment_eq(a, b) ((a).seg == (b).seg)
+#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg)
#define __kernel_ok (uaccess_kernel())
#define __user_ok(addr, size) \
diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl
index d216ccba42f7..6276e3c2d3fc 100644
--- a/arch/xtensa/kernel/syscalls/syscall.tbl
+++ b/arch/xtensa/kernel/syscalls/syscall.tbl
@@ -222,7 +222,7 @@
204 common quotactl sys_quotactl
# 205 was old nfsservctl
205 common nfsservctl sys_ni_syscall
-206 common _sysctl sys_sysctl
+206 common _sysctl sys_ni_syscall
207 common bdflush sys_bdflush
208 common uname sys_newuname
209 common sysinfo sys_sysinfo
diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c
index c128dcc7c85b..7666408ce12a 100644
--- a/arch/xtensa/mm/fault.c
+++ b/arch/xtensa/mm/fault.c
@@ -72,6 +72,9 @@ void do_page_fault(struct pt_regs *regs)
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
+
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
+
retry:
mmap_read_lock(mm);
vma = find_vma(mm, address);
@@ -107,7 +110,7 @@ good_area:
* make sure we exit gracefully rather than endlessly redo
* the fault.
*/
- fault = handle_mm_fault(vma, address, flags);
+ fault = handle_mm_fault(vma, address, flags, regs);
if (fault_signal_pending(fault, regs))
return;
@@ -122,10 +125,6 @@ good_area:
BUG();
}
if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_MAJOR)
- current->maj_flt++;
- else
- current->min_flt++;
if (fault & VM_FAULT_RETRY) {
flags |= FAULT_FLAG_TRIED;
@@ -139,12 +138,6 @@ good_area:
}
mmap_read_unlock(mm);
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
- if (flags & VM_FAULT_MAJOR)
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address);
- else
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
-
return;
/* Something tried to access memory that isn't in our memory map..