diff options
author | Linus Torvalds | 2022-01-14 15:17:26 +0100 |
---|---|---|
committer | Linus Torvalds | 2022-01-14 15:17:26 +0100 |
commit | 29ec39fcf11e4583eb8d5174f756ea109c77cc44 (patch) | |
tree | 656f5c7166efe176ab2c7e24042f4e38a86b4473 /arch/powerpc | |
parent | 3fb561b1e0bf4c75bc5f4d799845b08fa5ab3853 (diff) | |
parent | f1aa0e47c29268776205698f2453dc07fab49855 (diff) |
Merge tag 'powerpc-5.17-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc updates from Michael Ellerman:
- Optimise radix KVM guest entry/exit by 2x on Power9/Power10.
- Allow firmware to tell us whether to disable the entry and uaccess
flushes on Power10 or later CPUs.
- Add BPF_PROBE_MEM support for 32 and 64-bit BPF jits.
- Several fixes and improvements to our hard lockup watchdog.
- Activate HAVE_DYNAMIC_FTRACE_WITH_REGS on 32-bit.
- Allow building the 64-bit Book3S kernel without hash MMU support, ie.
Radix only.
- Add KUAP (SMAP) support for 40x, 44x, 8xx, Book3E (64-bit).
- Add new encodings for perf_mem_data_src.mem_hops field, and use them
on Power10.
- A series of small performance improvements to 64-bit interrupt entry.
- Several commits fixing issues when building with the clang integrated
assembler.
- Many other small features and fixes.
Thanks to Alan Modra, Alexey Kardashevskiy, Ammar Faizi, Anders Roxell,
Arnd Bergmann, Athira Rajeev, Cédric Le Goater, Christophe JAILLET,
Christophe Leroy, Christoph Hellwig, Daniel Axtens, David Yang, Erhard
Furtner, Fabiano Rosas, Greg Kroah-Hartman, Guo Ren, Hari Bathini, Jason
Wang, Joel Stanley, Julia Lawall, Kajol Jain, Kees Cook, Laurent Dufour,
Madhavan Srinivasan, Mark Brown, Minghao Chi, Nageswara R Sastry, Naresh
Kamboju, Nathan Chancellor, Nathan Lynch, Nicholas Piggin, Nick Child,
Oliver O'Halloran, Peiwei Hu, Randy Dunlap, Ravi Bangoria, Rob Herring,
Russell Currey, Sachin Sant, Sean Christopherson, Segher Boessenkool,
Thadeu Lima de Souza Cascardo, Tyrel Datwyler, Xiang wangx, and Yang
Guang.
* tag 'powerpc-5.17-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (240 commits)
powerpc/xmon: Dump XIVE information for online-only processors.
powerpc/opal: use default_groups in kobj_type
powerpc/cacheinfo: use default_groups in kobj_type
powerpc/sched: Remove unused TASK_SIZE_OF
powerpc/xive: Add missing null check after calling kmalloc
powerpc/floppy: Remove usage of the deprecated "pci-dma-compat.h" API
selftests/powerpc: Add a test of sigreturning to an unaligned address
powerpc/64s: Use EMIT_WARN_ENTRY for SRR debug warnings
powerpc/64s: Mask NIP before checking against SRR0
powerpc/perf: Fix spelling of "its"
powerpc/32: Fix boot failure with GCC latent entropy plugin
powerpc/code-patching: Replace patch_instruction() by ppc_inst_write() in selftests
powerpc/code-patching: Move code patching selftests in its own file
powerpc/code-patching: Move instr_is_branch_{i/b}form() in code-patching.h
powerpc/code-patching: Move patch_exception() outside code-patching.c
powerpc/code-patching: Use test_trampoline for prefixed patch test
powerpc/code-patching: Fix patch_branch() return on out-of-range failure
powerpc/code-patching: Reorganise do_patch_instruction() to ease error handling
powerpc/code-patching: Fix unmap_patch_area() error handling
powerpc/code-patching: Fix error handling in do_patch_instruction()
...
Diffstat (limited to 'arch/powerpc')
355 files changed, 5408 insertions, 3419 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index dea74d7717c0..0631c9241af3 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -129,7 +129,7 @@ config PPC select ARCH_HAS_KCOV select ARCH_HAS_MEMBARRIER_CALLBACKS select ARCH_HAS_MEMBARRIER_SYNC_CORE - select ARCH_HAS_MEMREMAP_COMPAT_ALIGN + select ARCH_HAS_MEMREMAP_COMPAT_ALIGN if PPC_64S_HASH_MMU select ARCH_HAS_MMIOWB if PPC64 select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE select ARCH_HAS_PHYS_TO_DMA @@ -165,6 +165,7 @@ config PPC select BINFMT_ELF select BUILDTIME_TABLE_SORT select CLONE_BACKWARDS + select CPUMASK_OFFSTACK if NR_CPUS >= 8192 select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN select DMA_OPS_BYPASS if PPC64 select DMA_OPS if PPC64 @@ -205,7 +206,7 @@ config PPC select HAVE_DEBUG_KMEMLEAK select HAVE_DEBUG_STACKOVERFLOW select HAVE_DYNAMIC_FTRACE - select HAVE_DYNAMIC_FTRACE_WITH_REGS if MPROFILE_KERNEL + select HAVE_DYNAMIC_FTRACE_WITH_REGS if MPROFILE_KERNEL || PPC32 select HAVE_EBPF_JIT select HAVE_EFFICIENT_UNALIGNED_ACCESS if !(CPU_LITTLE_ENDIAN && POWER7_CPU) select HAVE_FAST_GUP @@ -229,7 +230,7 @@ config PPC select HAVE_KPROBES_ON_FTRACE select HAVE_KRETPROBES select HAVE_LD_DEAD_CODE_DATA_ELIMINATION - select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS + select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS && PPC64 select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI if PERF_EVENTS || (PPC64 && PPC_BOOK3S) select HAVE_OPTPROBES @@ -845,7 +846,7 @@ config FORCE_MAX_ZONEORDER config PPC_SUBPAGE_PROT bool "Support setting protections for 4k subpages (subpage_prot syscall)" default n - depends on PPC_BOOK3S_64 && PPC_64K_PAGES + depends on PPC_64S_HASH_MMU && PPC_64K_PAGES help This option adds support for system call to allow user programs to set access permissions (read/write, readonly, or no access) @@ -943,6 +944,7 @@ config PPC_MEM_KEYS prompt "PowerPC Memory Protection Keys" def_bool y depends on PPC_BOOK3S_64 + depends on PPC_64S_HASH_MMU select ARCH_USES_HIGH_VMA_FLAGS select ARCH_HAS_PKEYS help diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index e02568f17334..5f16ac1583c5 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -245,7 +245,9 @@ cpu-as-$(CONFIG_E500) += -Wa,-me500 # When using '-many -mpower4' gas will first try and find a matching power4 # mnemonic and failing that it will allow any valid mnemonic that GAS knows # about. GCC will pass -many to GAS when assembling, clang does not. -cpu-as-$(CONFIG_PPC_BOOK3S_64) += -Wa,-mpower4 -Wa,-many +# LLVM IAS doesn't understand either flag: https://github.com/ClangBuiltLinux/linux/issues/675 +# but LLVM IAS only supports ISA >= 2.06 for Book3S 64 anyway... +cpu-as-$(CONFIG_PPC_BOOK3S_64) += $(call as-option,-Wa$(comma)-mpower4) $(call as-option,-Wa$(comma)-many) cpu-as-$(CONFIG_PPC_E500MC) += $(call as-option,-Wa$(comma)-me500mc) KBUILD_AFLAGS += $(cpu-as-y) @@ -445,10 +447,11 @@ PHONY += checkbin # Check toolchain versions: # - gcc-4.6 is the minimum kernel-wide version so nothing required. checkbin: - @if test "x${CONFIG_CPU_LITTLE_ENDIAN}" = "xy" \ - && $(LD) --version | head -1 | grep ' 2\.24$$' >/dev/null ; then \ + @if test "x${CONFIG_LD_IS_LLD}" != "xy" -a \ + "x$(call ld-ifversion, -le, 22400, y)" = "xy" ; then \ echo -n '*** binutils 2.24 miscompiles weak symbols ' ; \ echo 'in some circumstances.' ; \ + echo '*** binutils 2.23 do not define the TOC symbol ' ; \ echo -n '*** Please use a different binutils version.' ; \ false ; \ fi diff --git a/arch/powerpc/boot/crt0.S b/arch/powerpc/boot/crt0.S index 1d83966f5ef6..feadee18e271 100644 --- a/arch/powerpc/boot/crt0.S +++ b/arch/powerpc/boot/crt0.S @@ -28,7 +28,7 @@ p_etext: .8byte _etext p_bss_start: .8byte __bss_start p_end: .8byte _end -p_toc: .8byte __toc_start + 0x8000 - p_base +p_toc: .8byte .TOC. - p_base p_dyn: .8byte __dynamic_start - p_base p_rela: .8byte __rela_dyn_start - p_base p_prom: .8byte 0 @@ -226,16 +226,19 @@ p_base: mflr r10 /* r10 now points to runtime addr of p_base */ #ifdef __powerpc64__ #define PROM_FRAME_SIZE 512 -#define SAVE_GPR(n, base) std n,8*(n)(base) -#define REST_GPR(n, base) ld n,8*(n)(base) -#define SAVE_2GPRS(n, base) SAVE_GPR(n, base); SAVE_GPR(n+1, base) -#define SAVE_4GPRS(n, base) SAVE_2GPRS(n, base); SAVE_2GPRS(n+2, base) -#define SAVE_8GPRS(n, base) SAVE_4GPRS(n, base); SAVE_4GPRS(n+4, base) -#define SAVE_10GPRS(n, base) SAVE_8GPRS(n, base); SAVE_2GPRS(n+8, base) -#define REST_2GPRS(n, base) REST_GPR(n, base); REST_GPR(n+1, base) -#define REST_4GPRS(n, base) REST_2GPRS(n, base); REST_2GPRS(n+2, base) -#define REST_8GPRS(n, base) REST_4GPRS(n, base); REST_4GPRS(n+4, base) -#define REST_10GPRS(n, base) REST_8GPRS(n, base); REST_2GPRS(n+8, base) + +.macro OP_REGS op, width, start, end, base, offset + .Lreg=\start + .rept (\end - \start + 1) + \op .Lreg,\offset+\width*.Lreg(\base) + .Lreg=.Lreg+1 + .endr +.endm + +#define SAVE_GPRS(start, end, base) OP_REGS std, 8, start, end, base, 0 +#define REST_GPRS(start, end, base) OP_REGS ld, 8, start, end, base, 0 +#define SAVE_GPR(n, base) SAVE_GPRS(n, n, base) +#define REST_GPR(n, base) REST_GPRS(n, n, base) /* prom handles the jump into and return from firmware. The prom args pointer is loaded in r3. */ @@ -246,9 +249,7 @@ prom: stdu r1,-PROM_FRAME_SIZE(r1) /* Save SP and create stack space */ SAVE_GPR(2, r1) - SAVE_GPR(13, r1) - SAVE_8GPRS(14, r1) - SAVE_10GPRS(22, r1) + SAVE_GPRS(13, 31, r1) mfcr r10 std r10,8*32(r1) mfmsr r10 @@ -283,9 +284,7 @@ prom: /* Restore other registers */ REST_GPR(2, r1) - REST_GPR(13, r1) - REST_8GPRS(14, r1) - REST_10GPRS(22, r1) + REST_GPRS(13, 31, r1) ld r10,8*32(r1) mtcr r10 diff --git a/arch/powerpc/boot/dts/digsy_mtc.dts b/arch/powerpc/boot/dts/digsy_mtc.dts index 57024a4c1e7d..dfaf974c0ce6 100644 --- a/arch/powerpc/boot/dts/digsy_mtc.dts +++ b/arch/powerpc/boot/dts/digsy_mtc.dts @@ -25,14 +25,6 @@ status = "disabled"; }; - spi@f00 { - msp430@0 { - compatible = "spidev"; - spi-max-frequency = <32000>; - reg = <0>; - }; - }; - psc@2000 { // PSC1 status = "disabled"; }; diff --git a/arch/powerpc/boot/dts/o2d.dtsi b/arch/powerpc/boot/dts/o2d.dtsi index b55a9e5bd828..7e52509fa506 100644 --- a/arch/powerpc/boot/dts/o2d.dtsi +++ b/arch/powerpc/boot/dts/o2d.dtsi @@ -34,12 +34,6 @@ #address-cells = <1>; #size-cells = <0>; cell-index = <0>; - - spidev@0 { - compatible = "spidev"; - spi-max-frequency = <250000>; - reg = <0>; - }; }; psc@2200 { // PSC2 diff --git a/arch/powerpc/boot/zImage.lds.S b/arch/powerpc/boot/zImage.lds.S index d6f072865627..d65cd55a6f38 100644 --- a/arch/powerpc/boot/zImage.lds.S +++ b/arch/powerpc/boot/zImage.lds.S @@ -36,12 +36,9 @@ SECTIONS } #ifdef CONFIG_PPC64_BOOT_WRAPPER - . = ALIGN(256); - .got : + .got : ALIGN(256) { - __toc_start = .; - *(.got) - *(.toc) + *(.got .toc) } #endif diff --git a/arch/powerpc/configs/microwatt_defconfig b/arch/powerpc/configs/microwatt_defconfig index 07d87a4044b2..eff933ebbb9e 100644 --- a/arch/powerpc/configs/microwatt_defconfig +++ b/arch/powerpc/configs/microwatt_defconfig @@ -15,6 +15,8 @@ CONFIG_EMBEDDED=y # CONFIG_COMPAT_BRK is not set # CONFIG_SLAB_MERGE_DEFAULT is not set CONFIG_PPC64=y +CONFIG_POWER9_CPU=y +# CONFIG_PPC_64S_HASH_MMU is not set # CONFIG_PPC_KUEP is not set # CONFIG_PPC_KUAP is not set CONFIG_CPU_LITTLE_ENDIAN=y @@ -27,7 +29,6 @@ CONFIG_PPC_MICROWATT=y CONFIG_CPU_FREQ=y CONFIG_HZ_100=y CONFIG_PPC_4K_PAGES=y -# CONFIG_PPC_MEM_KEYS is not set # CONFIG_SECCOMP is not set # CONFIG_MQ_IOSCHED_KYBER is not set # CONFIG_COREDUMP is not set diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index 203d0b7f0bb8..c8b0e80d613b 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -26,7 +26,6 @@ CONFIG_PPC64=y CONFIG_NR_CPUS=2048 CONFIG_PPC_SPLPAR=y CONFIG_DTL=y -CONFIG_SCANLOG=m CONFIG_PPC_SMLPAR=y CONFIG_IBMEBUS=y CONFIG_PPC_SVM=y diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index e64f2242abe1..b571d084c148 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -38,7 +38,6 @@ CONFIG_MODULE_SRCVERSION_ALL=y CONFIG_PARTITION_ADVANCED=y CONFIG_PPC_SPLPAR=y CONFIG_DTL=y -CONFIG_SCANLOG=m CONFIG_PPC_SMLPAR=y CONFIG_IBMEBUS=y CONFIG_PAPR_SCM=m diff --git a/arch/powerpc/crypto/md5-asm.S b/arch/powerpc/crypto/md5-asm.S index 948d100a2934..fa6bc440cf4a 100644 --- a/arch/powerpc/crypto/md5-asm.S +++ b/arch/powerpc/crypto/md5-asm.S @@ -38,15 +38,11 @@ #define INITIALIZE \ PPC_STLU r1,-INT_FRAME_SIZE(r1); \ - SAVE_8GPRS(14, r1); /* push registers onto stack */ \ - SAVE_4GPRS(22, r1); \ - SAVE_GPR(26, r1) + SAVE_GPRS(14, 26, r1) /* push registers onto stack */ #define FINALIZE \ - REST_8GPRS(14, r1); /* pop registers from stack */ \ - REST_4GPRS(22, r1); \ - REST_GPR(26, r1); \ - addi r1,r1,INT_FRAME_SIZE; + REST_GPRS(14, 26, r1); /* pop registers from stack */ \ + addi r1,r1,INT_FRAME_SIZE #ifdef __BIG_ENDIAN__ #define LOAD_DATA(reg, off) \ diff --git a/arch/powerpc/crypto/sha1-powerpc-asm.S b/arch/powerpc/crypto/sha1-powerpc-asm.S index 23e248beff71..f0d5ed557ab1 100644 --- a/arch/powerpc/crypto/sha1-powerpc-asm.S +++ b/arch/powerpc/crypto/sha1-powerpc-asm.S @@ -125,8 +125,7 @@ _GLOBAL(powerpc_sha_transform) PPC_STLU r1,-INT_FRAME_SIZE(r1) - SAVE_8GPRS(14, r1) - SAVE_10GPRS(22, r1) + SAVE_GPRS(14, 31, r1) /* Load up A - E */ lwz RA(0),0(r3) /* A */ @@ -184,7 +183,6 @@ _GLOBAL(powerpc_sha_transform) stw RD(0),12(r3) stw RE(0),16(r3) - REST_8GPRS(14, r1) - REST_10GPRS(22, r1) + REST_GPRS(14, 31, r1) addi r1,r1,INT_FRAME_SIZE blr diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index 222823861a67..41b8a1e1144a 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -141,11 +141,6 @@ static inline void kvmppc_restore_tm_hv(struct kvm_vcpu *vcpu, u64 msr, bool preserve_nv) { } #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ -void kvmhv_save_host_pmu(void); -void kvmhv_load_host_pmu(void); -void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use); -void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu); - void kvmppc_p9_enter_guest(struct kvm_vcpu *vcpu); long kvmppc_h_set_dabr(struct kvm_vcpu *vcpu, unsigned long dabr); diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h index fd594fdbd84d..853dc86864f4 100644 --- a/arch/powerpc/include/asm/atomic.h +++ b/arch/powerpc/include/asm/atomic.h @@ -37,62 +37,62 @@ static __inline__ void arch_atomic_set(atomic_t *v, int i) __asm__ __volatile__("stw%U0%X0 %1,%0" : "=m<>"(v->counter) : "r"(i)); } -#define ATOMIC_OP(op, asm_op) \ +#define ATOMIC_OP(op, asm_op, suffix, sign, ...) \ static __inline__ void arch_atomic_##op(int a, atomic_t *v) \ { \ int t; \ \ __asm__ __volatile__( \ "1: lwarx %0,0,%3 # atomic_" #op "\n" \ - #asm_op " %0,%2,%0\n" \ + #asm_op "%I2" suffix " %0,%0,%2\n" \ " stwcx. %0,0,%3 \n" \ " bne- 1b\n" \ : "=&r" (t), "+m" (v->counter) \ - : "r" (a), "r" (&v->counter) \ - : "cc"); \ + : "r"#sign (a), "r" (&v->counter) \ + : "cc", ##__VA_ARGS__); \ } \ -#define ATOMIC_OP_RETURN_RELAXED(op, asm_op) \ +#define ATOMIC_OP_RETURN_RELAXED(op, asm_op, suffix, sign, ...) \ static inline int arch_atomic_##op##_return_relaxed(int a, atomic_t *v) \ { \ int t; \ \ __asm__ __volatile__( \ "1: lwarx %0,0,%3 # atomic_" #op "_return_relaxed\n" \ - #asm_op " %0,%2,%0\n" \ + #asm_op "%I2" suffix " %0,%0,%2\n" \ " stwcx. %0,0,%3\n" \ " bne- 1b\n" \ : "=&r" (t), "+m" (v->counter) \ - : "r" (a), "r" (&v->counter) \ - : "cc"); \ + : "r"#sign (a), "r" (&v->counter) \ + : "cc", ##__VA_ARGS__); \ \ return t; \ } -#define ATOMIC_FETCH_OP_RELAXED(op, asm_op) \ +#define ATOMIC_FETCH_OP_RELAXED(op, asm_op, suffix, sign, ...) \ static inline int arch_atomic_fetch_##op##_relaxed(int a, atomic_t *v) \ { \ int res, t; \ \ __asm__ __volatile__( \ "1: lwarx %0,0,%4 # atomic_fetch_" #op "_relaxed\n" \ - #asm_op " %1,%3,%0\n" \ + #asm_op "%I3" suffix " %1,%0,%3\n" \ " stwcx. %1,0,%4\n" \ " bne- 1b\n" \ : "=&r" (res), "=&r" (t), "+m" (v->counter) \ - : "r" (a), "r" (&v->counter) \ - : "cc"); \ + : "r"#sign (a), "r" (&v->counter) \ + : "cc", ##__VA_ARGS__); \ \ return res; \ } -#define ATOMIC_OPS(op, asm_op) \ - ATOMIC_OP(op, asm_op) \ - ATOMIC_OP_RETURN_RELAXED(op, asm_op) \ - ATOMIC_FETCH_OP_RELAXED(op, asm_op) +#define ATOMIC_OPS(op, asm_op, suffix, sign, ...) \ + ATOMIC_OP(op, asm_op, suffix, sign, ##__VA_ARGS__) \ + ATOMIC_OP_RETURN_RELAXED(op, asm_op, suffix, sign, ##__VA_ARGS__)\ + ATOMIC_FETCH_OP_RELAXED(op, asm_op, suffix, sign, ##__VA_ARGS__) -ATOMIC_OPS(add, add) -ATOMIC_OPS(sub, subf) +ATOMIC_OPS(add, add, "c", I, "xer") +ATOMIC_OPS(sub, sub, "c", I, "xer") #define arch_atomic_add_return_relaxed arch_atomic_add_return_relaxed #define arch_atomic_sub_return_relaxed arch_atomic_sub_return_relaxed @@ -101,13 +101,13 @@ ATOMIC_OPS(sub, subf) #define arch_atomic_fetch_sub_relaxed arch_atomic_fetch_sub_relaxed #undef ATOMIC_OPS -#define ATOMIC_OPS(op, asm_op) \ - ATOMIC_OP(op, asm_op) \ - ATOMIC_FETCH_OP_RELAXED(op, asm_op) +#define ATOMIC_OPS(op, asm_op, suffix, sign) \ + ATOMIC_OP(op, asm_op, suffix, sign) \ + ATOMIC_FETCH_OP_RELAXED(op, asm_op, suffix, sign) -ATOMIC_OPS(and, and) -ATOMIC_OPS(or, or) -ATOMIC_OPS(xor, xor) +ATOMIC_OPS(and, and, ".", K) +ATOMIC_OPS(or, or, "", K) +ATOMIC_OPS(xor, xor, "", K) #define arch_atomic_fetch_and_relaxed arch_atomic_fetch_and_relaxed #define arch_atomic_fetch_or_relaxed arch_atomic_fetch_or_relaxed @@ -118,71 +118,6 @@ ATOMIC_OPS(xor, xor) #undef ATOMIC_OP_RETURN_RELAXED #undef ATOMIC_OP -static __inline__ void arch_atomic_inc(atomic_t *v) -{ - int t; - - __asm__ __volatile__( -"1: lwarx %0,0,%2 # atomic_inc\n\ - addic %0,%0,1\n" -" stwcx. %0,0,%2 \n\ - bne- 1b" - : "=&r" (t), "+m" (v->counter) - : "r" (&v->counter) - : "cc", "xer"); -} -#define arch_atomic_inc arch_atomic_inc - -static __inline__ int arch_atomic_inc_return_relaxed(atomic_t *v) -{ - int t; - - __asm__ __volatile__( -"1: lwarx %0,0,%2 # atomic_inc_return_relaxed\n" -" addic %0,%0,1\n" -" stwcx. %0,0,%2\n" -" bne- 1b" - : "=&r" (t), "+m" (v->counter) - : "r" (&v->counter) - : "cc", "xer"); - - return t; -} - -static __inline__ void arch_atomic_dec(atomic_t *v) -{ - int t; - - __asm__ __volatile__( -"1: lwarx %0,0,%2 # atomic_dec\n\ - addic %0,%0,-1\n" -" stwcx. %0,0,%2\n\ - bne- 1b" - : "=&r" (t), "+m" (v->counter) - : "r" (&v->counter) - : "cc", "xer"); -} -#define arch_atomic_dec arch_atomic_dec - -static __inline__ int arch_atomic_dec_return_relaxed(atomic_t *v) -{ - int t; - - __asm__ __volatile__( -"1: lwarx %0,0,%2 # atomic_dec_return_relaxed\n" -" addic %0,%0,-1\n" -" stwcx. %0,0,%2\n" -" bne- 1b" - : "=&r" (t), "+m" (v->counter) - : "r" (&v->counter) - : "cc", "xer"); - - return t; -} - -#define arch_atomic_inc_return_relaxed arch_atomic_inc_return_relaxed -#define arch_atomic_dec_return_relaxed arch_atomic_dec_return_relaxed - #define arch_atomic_cmpxchg(v, o, n) \ (arch_cmpxchg(&((v)->counter), (o), (n))) #define arch_atomic_cmpxchg_relaxed(v, o, n) \ @@ -241,50 +176,20 @@ static __inline__ int arch_atomic_fetch_add_unless(atomic_t *v, int a, int u) "1: lwarx %0,0,%1 # atomic_fetch_add_unless\n\ cmpw 0,%0,%3 \n\ beq 2f \n\ - add %0,%2,%0 \n" + add%I2c %0,%0,%2 \n" " stwcx. %0,0,%1 \n\ bne- 1b \n" PPC_ATOMIC_EXIT_BARRIER -" subf %0,%2,%0 \n\ +" sub%I2c %0,%0,%2 \n\ 2:" : "=&r" (t) - : "r" (&v->counter), "r" (a), "r" (u) - : "cc", "memory"); + : "r" (&v->counter), "rI" (a), "r" (u) + : "cc", "memory", "xer"); return t; } #define arch_atomic_fetch_add_unless arch_atomic_fetch_add_unless -/** - * atomic_inc_not_zero - increment unless the number is zero - * @v: pointer of type atomic_t - * - * Atomically increments @v by 1, so long as @v is non-zero. - * Returns non-zero if @v was non-zero, and zero otherwise. - */ -static __inline__ int arch_atomic_inc_not_zero(atomic_t *v) -{ - int t1, t2; - - __asm__ __volatile__ ( - PPC_ATOMIC_ENTRY_BARRIER -"1: lwarx %0,0,%2 # atomic_inc_not_zero\n\ - cmpwi 0,%0,0\n\ - beq- 2f\n\ - addic %1,%0,1\n" -" stwcx. %1,0,%2\n\ - bne- 1b\n" - PPC_ATOMIC_EXIT_BARRIER - "\n\ -2:" - : "=&r" (t1), "=&r" (t2) - : "r" (&v->counter) - : "cc", "xer", "memory"); - - return t1; -} -#define arch_atomic_inc_not_zero(v) arch_atomic_inc_not_zero((v)) - /* * Atomically test *v and decrement if it is greater than 0. * The function returns the old value of *v minus 1, even if diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h index 11847b6a244e..a05d8c62cbea 100644 --- a/arch/powerpc/include/asm/bitops.h +++ b/arch/powerpc/include/asm/bitops.h @@ -71,19 +71,61 @@ static inline void fn(unsigned long mask, \ __asm__ __volatile__ ( \ prefix \ "1:" PPC_LLARX "%0,0,%3,0\n" \ - stringify_in_c(op) "%0,%0,%2\n" \ + #op "%I2 %0,%0,%2\n" \ PPC_STLCX "%0,0,%3\n" \ "bne- 1b\n" \ : "=&r" (old), "+m" (*p) \ - : "r" (mask), "r" (p) \ + : "rK" (mask), "r" (p) \ : "cc", "memory"); \ } DEFINE_BITOP(set_bits, or, "") -DEFINE_BITOP(clear_bits, andc, "") -DEFINE_BITOP(clear_bits_unlock, andc, PPC_RELEASE_BARRIER) DEFINE_BITOP(change_bits, xor, "") +static __always_inline bool is_rlwinm_mask_valid(unsigned long x) +{ + if (!x) + return false; + if (x & 1) + x = ~x; // make the mask non-wrapping + x += x & -x; // adding the low set bit results in at most one bit set + + return !(x & (x - 1)); +} + +#define DEFINE_CLROP(fn, prefix) \ +static inline void fn(unsigned long mask, volatile unsigned long *_p) \ +{ \ + unsigned long old; \ + unsigned long *p = (unsigned long *)_p; \ + \ + if (IS_ENABLED(CONFIG_PPC32) && \ + __builtin_constant_p(mask) && is_rlwinm_mask_valid(~mask)) {\ + asm volatile ( \ + prefix \ + "1:" "lwarx %0,0,%3\n" \ + "rlwinm %0,%0,0,%2\n" \ + "stwcx. %0,0,%3\n" \ + "bne- 1b\n" \ + : "=&r" (old), "+m" (*p) \ + : "n" (~mask), "r" (p) \ + : "cc", "memory"); \ + } else { \ + asm volatile ( \ + prefix \ + "1:" PPC_LLARX "%0,0,%3,0\n" \ + "andc %0,%0,%2\n" \ + PPC_STLCX "%0,0,%3\n" \ + "bne- 1b\n" \ + : "=&r" (old), "+m" (*p) \ + : "r" (mask), "r" (p) \ + : "cc", "memory"); \ + } \ +} + +DEFINE_CLROP(clear_bits, "") +DEFINE_CLROP(clear_bits_unlock, PPC_RELEASE_BARRIER) + static inline void arch_set_bit(int nr, volatile unsigned long *addr) { set_bits(BIT_MASK(nr), addr + BIT_WORD(nr)); @@ -116,12 +158,12 @@ static inline unsigned long fn( \ __asm__ __volatile__ ( \ prefix \ "1:" PPC_LLARX "%0,0,%3,%4\n" \ - stringify_in_c(op) "%1,%0,%2\n" \ + #op "%I2 %1,%0,%2\n" \ PPC_STLCX "%1,0,%3\n" \ "bne- 1b\n" \ postfix \ : "=&r" (old), "=&r" (t) \ - : "r" (mask), "r" (p), "i" (IS_ENABLED(CONFIG_PPC64) ? eh : 0) \ + : "rK" (mask), "r" (p), "i" (IS_ENABLED(CONFIG_PPC64) ? eh : 0) \ : "cc", "memory"); \ return (old & mask); \ } @@ -130,11 +172,42 @@ DEFINE_TESTOP(test_and_set_bits, or, PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, 0) DEFINE_TESTOP(test_and_set_bits_lock, or, "", PPC_ACQUIRE_BARRIER, 1) -DEFINE_TESTOP(test_and_clear_bits, andc, PPC_ATOMIC_ENTRY_BARRIER, - PPC_ATOMIC_EXIT_BARRIER, 0) DEFINE_TESTOP(test_and_change_bits, xor, PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, 0) +static inline unsigned long test_and_clear_bits(unsigned long mask, volatile unsigned long *_p) +{ + unsigned long old, t; + unsigned long *p = (unsigned long *)_p; + + if (IS_ENABLED(CONFIG_PPC32) && + __builtin_constant_p(mask) && is_rlwinm_mask_valid(~mask)) { + asm volatile ( + PPC_ATOMIC_ENTRY_BARRIER + "1:" "lwarx %0,0,%3\n" + "rlwinm %1,%0,0,%2\n" + "stwcx. %1,0,%3\n" + "bne- 1b\n" + PPC_ATOMIC_EXIT_BARRIER + : "=&r" (old), "=&r" (t) + : "n" (~mask), "r" (p) + : "cc", "memory"); + } else { + asm volatile ( + PPC_ATOMIC_ENTRY_BARRIER + "1:" PPC_LLARX "%0,0,%3,0\n" + "andc %1,%0,%2\n" + PPC_STLCX "%1,0,%3\n" + "bne- 1b\n" + PPC_ATOMIC_EXIT_BARRIER + : "=&r" (old), "=&r" (t) + : "r" (mask), "r" (p) + : "cc", "memory"); + } + + return (old & mask); +} + static inline int arch_test_and_set_bit(unsigned long nr, volatile unsigned long *addr) { diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h index 9f38040f0641..678f9c9d89b6 100644 --- a/arch/powerpc/include/asm/book3s/32/kup.h +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -12,50 +12,10 @@ #include <linux/jump_label.h> extern struct static_key_false disable_kuap_key; -extern struct static_key_false disable_kuep_key; - -static __always_inline bool kuap_is_disabled(void) -{ - return !IS_ENABLED(CONFIG_PPC_KUAP) || static_branch_unlikely(&disable_kuap_key); -} static __always_inline bool kuep_is_disabled(void) { - return !IS_ENABLED(CONFIG_PPC_KUEP) || static_branch_unlikely(&disable_kuep_key); -} - -static inline void kuep_lock(void) -{ - if (kuep_is_disabled()) - return; - - update_user_segments(mfsr(0) | SR_NX); - /* - * This isync() shouldn't be necessary as the kernel is not excepted to - * run any instruction in userspace soon after the update of segments, - * but hash based cores (at least G3) seem to exhibit a random - * behaviour when the 'isync' is not there. 603 cores don't have this - * behaviour so don't do the 'isync' as it saves several CPU cycles. - */ - if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) - isync(); /* Context sync required after mtsr() */ -} - -static inline void kuep_unlock(void) -{ - if (kuep_is_disabled()) - return; - - update_user_segments(mfsr(0) & ~SR_NX); - /* - * This isync() shouldn't be necessary as a 'rfi' will soon be executed - * to return to userspace, but hash based cores (at least G3) seem to - * exhibit a random behaviour when the 'isync' is not there. 603 cores - * don't have this behaviour so don't do the 'isync' as it saves several - * CPU cycles. - */ - if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) - isync(); /* Context sync required after mtsr() */ + return !IS_ENABLED(CONFIG_PPC_KUEP); } #ifdef CONFIG_PPC_KUAP @@ -65,6 +25,11 @@ static inline void kuep_unlock(void) #define KUAP_NONE (~0UL) #define KUAP_ALL (~1UL) +static __always_inline bool kuap_is_disabled(void) +{ + return static_branch_unlikely(&disable_kuap_key); +} + static inline void kuap_lock_one(unsigned long addr) { mtsr(mfsr(addr) | SR_KS, addr); @@ -92,7 +57,7 @@ static inline void kuap_unlock_all(void) void kuap_lock_all_ool(void); void kuap_unlock_all_ool(void); -static inline void kuap_lock(unsigned long addr, bool ool) +static inline void kuap_lock_addr(unsigned long addr, bool ool) { if (likely(addr != KUAP_ALL)) kuap_lock_one(addr); @@ -112,33 +77,31 @@ static inline void kuap_unlock(unsigned long addr, bool ool) kuap_unlock_all_ool(); } -static inline void kuap_save_and_lock(struct pt_regs *regs) +static inline void __kuap_lock(void) { - unsigned long kuap = current->thread.kuap; +} - if (kuap_is_disabled()) - return; +static inline void __kuap_save_and_lock(struct pt_regs *regs) +{ + unsigned long kuap = current->thread.kuap; regs->kuap = kuap; if (unlikely(kuap == KUAP_NONE)) return; current->thread.kuap = KUAP_NONE; - kuap_lock(kuap, false); + kuap_lock_addr(kuap, false); } static inline void kuap_user_restore(struct pt_regs *regs) { } -static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap) +static inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap) { - if (kuap_is_disabled()) - return; - if (unlikely(kuap != KUAP_NONE)) { current->thread.kuap = KUAP_NONE; - kuap_lock(kuap, false); + kuap_lock_addr(kuap, false); } if (likely(regs->kuap == KUAP_NONE)) @@ -149,29 +112,18 @@ static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap) kuap_unlock(regs->kuap, false); } -static inline unsigned long kuap_get_and_assert_locked(void) +static inline unsigned long __kuap_get_and_assert_locked(void) { unsigned long kuap = current->thread.kuap; - if (kuap_is_disabled()) - return KUAP_NONE; - WARN_ON_ONCE(IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && kuap != KUAP_NONE); return kuap; } -static inline void kuap_assert_locked(void) -{ - kuap_get_and_assert_locked(); -} - -static __always_inline void allow_user_access(void __user *to, const void __user *from, - u32 size, unsigned long dir) +static __always_inline void __allow_user_access(void __user *to, const void __user *from, + u32 size, unsigned long dir) { - if (kuap_is_disabled()) - return; - BUILD_BUG_ON(!__builtin_constant_p(dir)); if (!(dir & KUAP_WRITE)) @@ -181,42 +133,33 @@ static __always_inline void allow_user_access(void __user *to, const void __user kuap_unlock_one((__force u32)to); } -static __always_inline void prevent_user_access(unsigned long dir) +static __always_inline void __prevent_user_access(unsigned long dir) { u32 kuap = current->thread.kuap; - if (kuap_is_disabled()) - return; - BUILD_BUG_ON(!__builtin_constant_p(dir)); if (!(dir & KUAP_WRITE)) return; current->thread.kuap = KUAP_NONE; - kuap_lock(kuap, true); + kuap_lock_addr(kuap, true); } -static inline unsigned long prevent_user_access_return(void) +static inline unsigned long __prevent_user_access_return(void) { unsigned long flags = current->thread.kuap; - if (kuap_is_disabled()) - return KUAP_NONE; - if (flags != KUAP_NONE) { current->thread.kuap = KUAP_NONE; - kuap_lock(flags, true); + kuap_lock_addr(flags, true); } return flags; } -static inline void restore_user_access(unsigned long flags) +static inline void __restore_user_access(unsigned long flags) { - if (kuap_is_disabled()) - return; - if (flags != KUAP_NONE) { current->thread.kuap = flags; kuap_unlock(flags, true); @@ -224,13 +167,10 @@ static inline void restore_user_access(unsigned long flags) } static inline bool -bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) +__bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) { unsigned long kuap = regs->kuap; - if (kuap_is_disabled()) - return false; - if (!is_write || kuap == KUAP_ALL) return false; if (kuap == KUAP_NONE) diff --git a/arch/powerpc/include/asm/book3s/32/mmu-hash.h b/arch/powerpc/include/asm/book3s/32/mmu-hash.h index f5be185cbdf8..7be27862329f 100644 --- a/arch/powerpc/include/asm/book3s/32/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/32/mmu-hash.h @@ -64,7 +64,82 @@ struct ppc_bat { #define SR_KP 0x20000000 /* User key */ #define SR_KS 0x40000000 /* Supervisor key */ -#ifndef __ASSEMBLY__ +#ifdef __ASSEMBLY__ + +#include <asm/asm-offsets.h> + +.macro uus_addi sr reg1 reg2 imm + .if NUM_USER_SEGMENTS > \sr + addi \reg1,\reg2,\imm + .endif +.endm + +.macro uus_mtsr sr reg1 + .if NUM_USER_SEGMENTS > \sr + mtsr \sr, \reg1 + .endif +.endm + +/* + * This isync() shouldn't be necessary as the kernel is not excepted to run + * any instruction in userspace soon after the update of segments and 'rfi' + * instruction is used to return to userspace, but hash based cores + * (at least G3) seem to exhibit a random behaviour when the 'isync' is not + * there. 603 cores don't have this behaviour so don't do the 'isync' as it + * saves several CPU cycles. + */ +.macro uus_isync +#ifdef CONFIG_PPC_BOOK3S_604 +BEGIN_MMU_FTR_SECTION + isync +END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) +#endif +.endm + +.macro update_user_segments_by_4 tmp1 tmp2 tmp3 tmp4 + uus_addi 1, \tmp2, \tmp1, 0x111 + uus_addi 2, \tmp3, \tmp1, 0x222 + uus_addi 3, \tmp4, \tmp1, 0x333 + + uus_mtsr 0, \tmp1 + uus_mtsr 1, \tmp2 + uus_mtsr 2, \tmp3 + uus_mtsr 3, \tmp4 + + uus_addi 4, \tmp1, \tmp1, 0x444 + uus_addi 5, \tmp2, \tmp2, 0x444 + uus_addi 6, \tmp3, \tmp3, 0x444 + uus_addi 7, \tmp4, \tmp4, 0x444 + + uus_mtsr 4, \tmp1 + uus_mtsr 5, \tmp2 + uus_mtsr 6, \tmp3 + uus_mtsr 7, \tmp4 + + uus_addi 8, \tmp1, \tmp1, 0x444 + uus_addi 9, \tmp2, \tmp2, 0x444 + uus_addi 10, \tmp3, \tmp3, 0x444 + uus_addi 11, \tmp4, \tmp4, 0x444 + + uus_mtsr 8, \tmp1 + uus_mtsr 9, \tmp2 + uus_mtsr 10, \tmp3 + uus_mtsr 11, \tmp4 + + uus_addi 12, \tmp1, \tmp1, 0x444 + uus_addi 13, \tmp2, \tmp2, 0x444 + uus_addi 14, \tmp3, \tmp3, 0x444 + uus_addi 15, \tmp4, \tmp4, 0x444 + + uus_mtsr 12, \tmp1 + uus_mtsr 13, \tmp2 + uus_mtsr 14, \tmp3 + uus_mtsr 15, \tmp4 + + uus_isync +.endm + +#else /* * This macro defines the mapping from contexts to VSIDs (virtual @@ -100,9 +175,14 @@ struct hash_pte { typedef struct { unsigned long id; + unsigned long sr0; void __user *vdso; } mm_context_t; +#ifdef CONFIG_PPC_KUEP +#define INIT_MM_CONTEXT(mm) .context.sr0 = SR_NX +#endif + void update_bats(void); static inline void cleanup_cpu_mmu_context(void) { } diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index 674fe0e890dc..a7a0572f3846 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -99,10 +99,6 @@ * Defines the address of the vmemap area, in its own region on * hash table CPUs. */ -#ifdef CONFIG_PPC_MM_SLICES -#define HAVE_ARCH_UNMAPPED_AREA -#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN -#endif /* CONFIG_PPC_MM_SLICES */ /* PTEIDX nibble */ #define _PTEIDX_SECONDARY 0x8 diff --git a/arch/powerpc/include/asm/book3s/64/kup.h b/arch/powerpc/include/asm/book3s/64/kup.h index 170339969b7c..69fcf63eec94 100644 --- a/arch/powerpc/include/asm/book3s/64/kup.h +++ b/arch/powerpc/include/asm/book3s/64/kup.h @@ -229,6 +229,11 @@ static inline u64 current_thread_iamr(void) #ifdef CONFIG_PPC_KUAP +static __always_inline bool kuap_is_disabled(void) +{ + return !mmu_has_feature(MMU_FTR_BOOK3S_KUAP); +} + static inline void kuap_user_restore(struct pt_regs *regs) { bool restore_amr = false, restore_iamr = false; @@ -268,40 +273,38 @@ static inline void kuap_user_restore(struct pt_regs *regs) */ } -static inline void kuap_kernel_restore(struct pt_regs *regs, - unsigned long amr) +static inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long amr) { - if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) { - if (unlikely(regs->amr != amr)) { - isync(); - mtspr(SPRN_AMR, regs->amr); - /* - * No isync required here because we are about to rfi - * back to previous context before any user accesses - * would be made, which is a CSI. - */ - } - } + if (likely(regs->amr == amr)) + return; + + isync(); + mtspr(SPRN_AMR, regs->amr); /* + * No isync required here because we are about to rfi + * back to previous context before any user accesses + * would be made, which is a CSI. + * * No need to restore IAMR when returning to kernel space. */ } -static inline unsigned long kuap_get_and_assert_locked(void) +static inline unsigned long __kuap_get_and_assert_locked(void) { - if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) { - unsigned long amr = mfspr(SPRN_AMR); - if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG)) /* kuap_check_amr() */ - WARN_ON_ONCE(amr != AMR_KUAP_BLOCKED); - return amr; - } - return 0; + unsigned long amr = mfspr(SPRN_AMR); + + if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG)) /* kuap_check_amr() */ + WARN_ON_ONCE(amr != AMR_KUAP_BLOCKED); + return amr; } -static inline void kuap_assert_locked(void) +/* Do nothing, book3s/64 does that in ASM */ +static inline void __kuap_lock(void) +{ +} + +static inline void __kuap_save_and_lock(struct pt_regs *regs) { - if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) - WARN_ON_ONCE(mfspr(SPRN_AMR) != AMR_KUAP_BLOCKED); } /* @@ -339,11 +342,8 @@ static inline void set_kuap(unsigned long value) isync(); } -static inline bool bad_kuap_fault(struct pt_regs *regs, unsigned long address, - bool is_write) +static inline bool __bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) { - if (!mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) - return false; /* * For radix this will be a storage protection fault (DSISR_PROTFAULT). * For hash this will be a key fault (DSISR_KEYFAULT) diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 3004f3323144..21f780942911 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -523,8 +523,14 @@ void slb_save_contents(struct slb_entry *slb_ptr); void slb_dump_contents(struct slb_entry *slb_ptr); extern void slb_vmalloc_update(void); -extern void slb_set_size(u16 size); void preload_new_slb_context(unsigned long start, unsigned long sp); + +#ifdef CONFIG_PPC_64S_HASH_MMU +void slb_set_size(u16 size); +#else +static inline void slb_set_size(u16 size) { } +#endif + #endif /* __ASSEMBLY__ */ /* diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index c02f42d1031e..ba5b1becf518 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -4,6 +4,12 @@ #include <asm/page.h> +#ifdef CONFIG_HUGETLB_PAGE +#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA +#endif +#define HAVE_ARCH_UNMAPPED_AREA +#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN + #ifndef __ASSEMBLY__ /* * Page size definition @@ -62,6 +68,9 @@ extern struct patb_entry *partition_tb; #define PRTS_MASK 0x1f /* process table size field */ #define PRTB_MASK 0x0ffffffffffff000UL +/* Number of supported LPID bits */ +extern unsigned int mmu_lpid_bits; + /* Number of supported PID bits */ extern unsigned int mmu_pid_bits; @@ -76,10 +85,8 @@ extern unsigned long __ro_after_init radix_mem_block_size; #define PRTB_SIZE_SHIFT (mmu_pid_bits + 4) #define PRTB_ENTRIES (1ul << mmu_pid_bits) -/* - * Power9 currently only support 64K partition table size. - */ -#define PATB_SIZE_SHIFT 16 +#define PATB_SIZE_SHIFT (mmu_lpid_bits + 4) +#define PATB_ENTRIES (1ul << mmu_lpid_bits) typedef unsigned long mm_context_id_t; struct spinlock; @@ -98,7 +105,9 @@ typedef struct { * from EA and new context ids to build the new VAs. */ mm_context_id_t id; +#ifdef CONFIG_PPC_64S_HASH_MMU mm_context_id_t extended_id[TASK_SIZE_USER64/TASK_CONTEXT_SIZE]; +#endif }; /* Number of bits in the mm_cpumask */ @@ -110,7 +119,9 @@ typedef struct { /* Number of user space windows opened in process mm_context */ atomic_t vas_windows; +#ifdef CONFIG_PPC_64S_HASH_MMU struct hash_mm_context *hash_context; +#endif void __user *vdso; /* @@ -133,6 +144,7 @@ typedef struct { #endif } mm_context_t; +#ifdef CONFIG_PPC_64S_HASH_MMU static inline u16 mm_ctx_user_psize(mm_context_t *ctx) { return ctx->hash_context->user_psize; @@ -193,8 +205,15 @@ static inline struct subpage_prot_table *mm_ctx_subpage_prot(mm_context_t *ctx) extern int mmu_linear_psize; extern int mmu_virtual_psize; extern int mmu_vmalloc_psize; -extern int mmu_vmemmap_psize; extern int mmu_io_psize; +#else /* CONFIG_PPC_64S_HASH_MMU */ +#ifdef CONFIG_PPC_64K_PAGES +#define mmu_virtual_psize MMU_PAGE_64K +#else +#define mmu_virtual_psize MMU_PAGE_4K +#endif +#endif +extern int mmu_vmemmap_psize; /* MMU initialization */ void mmu_early_init_devtree(void); @@ -233,12 +252,13 @@ static inline void setup_initial_memory_limit(phys_addr_t first_memblock_base, * know which translations we will pick. Hence go with hash * restrictions. */ - return hash__setup_initial_memory_limit(first_memblock_base, - first_memblock_size); + if (!early_radix_enabled()) + hash__setup_initial_memory_limit(first_memblock_base, + first_memblock_size); } #ifdef CONFIG_PPC_PSERIES -extern void radix_init_pseries(void); +void __init radix_init_pseries(void); #else static inline void radix_init_pseries(void) { } #endif @@ -255,6 +275,7 @@ static inline void radix_init_pseries(void) { } void cleanup_cpu_mmu_context(void); #endif +#ifdef CONFIG_PPC_64S_HASH_MMU static inline int get_user_context(mm_context_t *ctx, unsigned long ea) { int index = ea >> MAX_EA_BITS_PER_CONTEXT; @@ -274,6 +295,7 @@ static inline unsigned long get_user_vsid(mm_context_t *ctx, return get_vsid(context, ea, ssize); } +#endif #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_BOOK3S_64_MMU_H_ */ diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h index 3b95769739c7..8b762f282190 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h @@ -112,8 +112,14 @@ static inline void hash__flush_tlb_kernel_range(unsigned long start, struct mmu_gather; extern void hash__tlb_flush(struct mmu_gather *tlb); +void flush_tlb_pmd_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr); + +#ifdef CONFIG_PPC_64S_HASH_MMU /* Private function for use by PCI IO mapping code */ extern void __flush_hash_table_range(unsigned long start, unsigned long end); extern void flush_tlb_pmd_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr); +#else +static inline void __flush_hash_table_range(unsigned long start, unsigned long end) { } +#endif #endif /* _ASM_POWERPC_BOOK3S_64_TLBFLUSH_HASH_H */ diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h index 215973b4cb26..d2e80f178b6d 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h @@ -14,7 +14,6 @@ enum { TLB_INVAL_SCOPE_LPID = 1, /* invalidate TLBs for current LPID */ }; -#ifdef CONFIG_PPC_NATIVE static inline void tlbiel_all(void) { /* @@ -30,9 +29,6 @@ static inline void tlbiel_all(void) else hash__tlbiel_all(TLB_INVAL_SCOPE_GLOBAL); } -#else -static inline void tlbiel_all(void) { BUG(); } -#endif static inline void tlbiel_all_lpid(bool radix) { diff --git a/arch/powerpc/include/asm/book3s/pgtable.h b/arch/powerpc/include/asm/book3s/pgtable.h index ad130e15a126..e8269434ecbe 100644 --- a/arch/powerpc/include/asm/book3s/pgtable.h +++ b/arch/powerpc/include/asm/book3s/pgtable.h @@ -25,6 +25,7 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot); #define __HAVE_PHYS_MEM_ACCESS_PROT +#if defined(CONFIG_PPC32) || defined(CONFIG_PPC_64S_HASH_MMU) /* * This gets called at the end of handling a page fault, when * the kernel has put a new PTE into the page table for the process. @@ -35,6 +36,9 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, * waiting for the inevitable extra hash-table miss exception. */ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep); +#else +static inline void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) {} +#endif #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/powerpc/include/asm/btext.h b/arch/powerpc/include/asm/btext.h index 461b0f193864..860f8868f11e 100644 --- a/arch/powerpc/include/asm/btext.h +++ b/arch/powerpc/include/asm/btext.h @@ -23,12 +23,12 @@ extern void btext_unmap(void); extern void btext_drawchar(char c); extern void btext_drawstring(const char *str); -extern void btext_drawhex(unsigned long v); -extern void btext_drawtext(const char *c, unsigned int len); +void __init btext_drawhex(unsigned long v); +void __init btext_drawtext(const char *c, unsigned int len); -extern void btext_clearscreen(void); -extern void btext_flushscreen(void); -extern void btext_flushline(void); +void __init btext_clearscreen(void); +void __init btext_flushscreen(void); +void __init btext_flushline(void); #endif /* __KERNEL__ */ #endif /* __PPC_BTEXT_H */ diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index 4ba834599c4d..e26080539c31 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h @@ -24,20 +24,20 @@ bool is_offset_in_branch_range(long offset); bool is_offset_in_cond_branch_range(long offset); -int create_branch(struct ppc_inst *instr, const u32 *addr, +int create_branch(ppc_inst_t *instr, const u32 *addr, unsigned long target, int flags); -int create_cond_branch(struct ppc_inst *instr, const u32 *addr, +int create_cond_branch(ppc_inst_t *instr, const u32 *addr, unsigned long target, int flags); int patch_branch(u32 *addr, unsigned long target, int flags); -int patch_instruction(u32 *addr, struct ppc_inst instr); -int raw_patch_instruction(u32 *addr, struct ppc_inst instr); +int patch_instruction(u32 *addr, ppc_inst_t instr); +int raw_patch_instruction(u32 *addr, ppc_inst_t instr); static inline unsigned long patch_site_addr(s32 *site) { return (unsigned long)site + *site; } -static inline int patch_instruction_site(s32 *site, struct ppc_inst instr) +static inline int patch_instruction_site(s32 *site, ppc_inst_t instr) { return patch_instruction((u32 *)patch_site_addr(site), instr); } @@ -58,18 +58,26 @@ static inline int modify_instruction_site(s32 *site, unsigned int clr, unsigned return modify_instruction((unsigned int *)patch_site_addr(site), clr, set); } -int instr_is_relative_branch(struct ppc_inst instr); -int instr_is_relative_link_branch(struct ppc_inst instr); +static inline unsigned int branch_opcode(ppc_inst_t instr) +{ + return ppc_inst_primary_opcode(instr) & 0x3F; +} + +static inline int instr_is_branch_iform(ppc_inst_t instr) +{ + return branch_opcode(instr) == 18; +} + +static inline int instr_is_branch_bform(ppc_inst_t instr) +{ + return branch_opcode(instr) == 16; +} + +int instr_is_relative_branch(ppc_inst_t instr); +int instr_is_relative_link_branch(ppc_inst_t instr); unsigned long branch_target(const u32 *instr); -int translate_branch(struct ppc_inst *instr, const u32 *dest, const u32 *src); -extern bool is_conditional_branch(struct ppc_inst instr); -#ifdef CONFIG_PPC_BOOK3E_64 -void __patch_exception(int exc, unsigned long addr); -#define patch_exception(exc, name) do { \ - extern unsigned int name; \ - __patch_exception((exc), (unsigned long)&name); \ -} while (0) -#endif +int translate_branch(ppc_inst_t *instr, const u32 *dest, const u32 *src); +bool is_conditional_branch(ppc_inst_t instr); #define OP_RT_RA_MASK 0xffff0000UL #define LIS_R2 (PPC_RAW_LIS(_R2, 0)) diff --git a/arch/powerpc/include/asm/cpm2.h b/arch/powerpc/include/asm/cpm2.h index bda45788cfcc..9ee192a6c5d7 100644 --- a/arch/powerpc/include/asm/cpm2.h +++ b/arch/powerpc/include/asm/cpm2.h @@ -1133,8 +1133,8 @@ enum cpm_clk { CPM_CLK_DUMMY }; -extern int cpm2_clk_setup(enum cpm_clk_target target, int clock, int mode); -extern int cpm2_smc_clk_setup(enum cpm_clk_target target, int clock); +int __init cpm2_clk_setup(enum cpm_clk_target target, int clock, int mode); +int __init cpm2_smc_clk_setup(enum cpm_clk_target target, int clock); #define CPM_PIN_INPUT 0 #define CPM_PIN_OUTPUT 1 @@ -1143,7 +1143,7 @@ extern int cpm2_smc_clk_setup(enum cpm_clk_target target, int clock); #define CPM_PIN_GPIO 4 #define CPM_PIN_OPENDRAIN 8 -void cpm2_set_pin(int port, int pin, int flags); +void __init cpm2_set_pin(int port, int pin, int flags); #endif /* __CPM2__ */ #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h index 9844b3ded187..0cce5dc7fb1c 100644 --- a/arch/powerpc/include/asm/cpuidle.h +++ b/arch/powerpc/include/asm/cpuidle.h @@ -85,7 +85,7 @@ extern struct pnv_idle_states_t *pnv_idle_states; extern int nr_pnv_idle_states; unsigned long pnv_cpu_offline(unsigned int cpu); -int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags); +int __init validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags); static inline void report_invalid_psscr_val(u64 psscr_val, int err) { switch (err) { diff --git a/arch/powerpc/include/asm/cputhreads.h b/arch/powerpc/include/asm/cputhreads.h index b167186aaee4..f26c430f3982 100644 --- a/arch/powerpc/include/asm/cputhreads.h +++ b/arch/powerpc/include/asm/cputhreads.h @@ -32,44 +32,11 @@ extern cpumask_t threads_core_mask; #define threads_core_mask (*get_cpu_mask(0)) #endif -/* cpu_thread_mask_to_cores - Return a cpumask of one per cores - * hit by the argument - * - * @threads: a cpumask of online threads - * - * This function returns a cpumask which will have one online cpu's - * bit set for each core that has at least one thread set in the argument. - * - * This can typically be used for things like IPI for tlb invalidations - * since those need to be done only once per core/TLB - */ -static inline cpumask_t cpu_thread_mask_to_cores(const struct cpumask *threads) -{ - cpumask_t tmp, res; - int i, cpu; - - cpumask_clear(&res); - for (i = 0; i < NR_CPUS; i += threads_per_core) { - cpumask_shift_left(&tmp, &threads_core_mask, i); - if (cpumask_intersects(threads, &tmp)) { - cpu = cpumask_next_and(-1, &tmp, cpu_online_mask); - if (cpu < nr_cpu_ids) - cpumask_set_cpu(cpu, &res); - } - } - return res; -} - static inline int cpu_nr_cores(void) { return nr_cpu_ids >> threads_shift; } -static inline cpumask_t cpu_online_cores_map(void) -{ - return cpu_thread_mask_to_cores(cpu_online_mask); -} - #ifdef CONFIG_SMP int cpu_core_index_of_thread(int cpu); int cpu_first_thread_of_core(int core); diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index b1a5bba2e0b9..bd513fd49be9 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -460,7 +460,7 @@ static inline void eeh_readsl(const volatile void __iomem *addr, void * buf, } -void eeh_cache_debugfs_init(void); +void __init eeh_cache_debugfs_init(void); #endif /* CONFIG_PPC64 */ #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/exception-64e.h b/arch/powerpc/include/asm/exception-64e.h index 40cdcb2fb057..b1ef1e92c34a 100644 --- a/arch/powerpc/include/asm/exception-64e.h +++ b/arch/powerpc/include/asm/exception-64e.h @@ -149,6 +149,10 @@ exc_##label##_book3e: addi r11,r13,PACA_EXTLB; \ TLB_MISS_RESTORE(r11) +#ifndef __ASSEMBLY__ +extern unsigned int interrupt_base_book3e; +#endif + #define SET_IVOR(vector_number, vector_offset) \ LOAD_REG_ADDR(r3,interrupt_base_book3e);\ ori r3,r3,vector_offset@l; \ diff --git a/arch/powerpc/include/asm/fadump-internal.h b/arch/powerpc/include/asm/fadump-internal.h index 8d61c8f3fec4..52189928ec08 100644 --- a/arch/powerpc/include/asm/fadump-internal.h +++ b/arch/powerpc/include/asm/fadump-internal.h @@ -137,10 +137,10 @@ struct fadump_ops { }; /* Helper functions */ -s32 fadump_setup_cpu_notes_buf(u32 num_cpus); +s32 __init fadump_setup_cpu_notes_buf(u32 num_cpus); void fadump_free_cpu_notes_buf(void); -u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs); -void fadump_update_elfcore_header(char *bufp); +u32 *__init fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs); +void __init fadump_update_elfcore_header(char *bufp); bool is_fadump_boot_mem_contiguous(void); bool is_fadump_reserved_mem_contiguous(void); diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h index 97a3bd9ffeb9..9b702d2b80fb 100644 --- a/arch/powerpc/include/asm/firmware.h +++ b/arch/powerpc/include/asm/firmware.h @@ -80,8 +80,6 @@ enum { FW_FEATURE_POWERNV_ALWAYS = 0, FW_FEATURE_PS3_POSSIBLE = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1, FW_FEATURE_PS3_ALWAYS = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1, - FW_FEATURE_NATIVE_POSSIBLE = 0, - FW_FEATURE_NATIVE_ALWAYS = 0, FW_FEATURE_POSSIBLE = #ifdef CONFIG_PPC_PSERIES FW_FEATURE_PSERIES_POSSIBLE | @@ -92,9 +90,6 @@ enum { #ifdef CONFIG_PPC_PS3 FW_FEATURE_PS3_POSSIBLE | #endif -#ifdef CONFIG_PPC_NATIVE - FW_FEATURE_NATIVE_ALWAYS | -#endif 0, FW_FEATURE_ALWAYS = #ifdef CONFIG_PPC_PSERIES @@ -106,9 +101,6 @@ enum { #ifdef CONFIG_PPC_PS3 FW_FEATURE_PS3_ALWAYS & #endif -#ifdef CONFIG_PPC_NATIVE - FW_FEATURE_NATIVE_ALWAYS & -#endif FW_FEATURE_POSSIBLE, #else /* CONFIG_PPC64 */ diff --git a/arch/powerpc/include/asm/floppy.h b/arch/powerpc/include/asm/floppy.h index 7af9a68fd949..f8ce178b43b7 100644 --- a/arch/powerpc/include/asm/floppy.h +++ b/arch/powerpc/include/asm/floppy.h @@ -134,17 +134,19 @@ static int hard_dma_setup(char *addr, unsigned long size, int mode, int io) int dir; doing_vdma = 0; - dir = (mode == DMA_MODE_READ) ? PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE; + dir = (mode == DMA_MODE_READ) ? DMA_FROM_DEVICE : DMA_TO_DEVICE; if (bus_addr && (addr != prev_addr || size != prev_size || dir != prev_dir)) { /* different from last time -- unmap prev */ - pci_unmap_single(isa_bridge_pcidev, bus_addr, prev_size, prev_dir); + dma_unmap_single(&isa_bridge_pcidev->dev, bus_addr, prev_size, + prev_dir); bus_addr = 0; } if (!bus_addr) /* need to map it */ - bus_addr = pci_map_single(isa_bridge_pcidev, addr, size, dir); + bus_addr = dma_map_single(&isa_bridge_pcidev->dev, addr, size, + dir); /* remember this one as prev */ prev_addr = addr; diff --git a/arch/powerpc/include/asm/head-64.h b/arch/powerpc/include/asm/head-64.h index 242204e12993..d73153b0275d 100644 --- a/arch/powerpc/include/asm/head-64.h +++ b/arch/powerpc/include/asm/head-64.h @@ -98,13 +98,9 @@ start_text: . = sname##_len; #define USE_FIXED_SECTION(sname) \ - fs_label = start_##sname; \ - fs_start = sname##_start; \ use_ftsec sname; #define USE_TEXT_SECTION() \ - fs_label = start_text; \ - fs_start = text_start; \ .text #define CLOSE_FIXED_SECTION(sname) \ @@ -161,13 +157,15 @@ name: * - ABS_ADDR is used to find the absolute address of any symbol, from within * a fixed section. */ -#define DEFINE_FIXED_SYMBOL(label) \ - label##_absolute = (label - fs_label + fs_start) +// define label as being _in_ sname +#define DEFINE_FIXED_SYMBOL(label, sname) \ + label##_absolute = (label - start_ ## sname + sname ## _start) #define FIXED_SYMBOL_ABS_ADDR(label) \ (label##_absolute) -#define ABS_ADDR(label) (label - fs_label + fs_start) +// find label from _within_ sname +#define ABS_ADDR(label, sname) (label - start_ ## sname + sname ## _start) #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index f18c543bc01d..962708fa1017 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -15,7 +15,7 @@ extern bool hugetlb_disabled; -void hugetlbpage_init_default(void); +void __init hugetlbpage_init_default(void); int slice_is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, unsigned long len); diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h index abebfbee5b1c..84d39fd42f71 100644 --- a/arch/powerpc/include/asm/hw_breakpoint.h +++ b/arch/powerpc/include/asm/hw_breakpoint.h @@ -10,7 +10,6 @@ #define _PPC_BOOK3S_64_HW_BREAKPOINT_H #include <asm/cpu_has_feature.h> -#include <asm/inst.h> #ifdef __KERNEL__ struct arch_hw_breakpoint { @@ -56,11 +55,11 @@ static inline int nr_wp_slots(void) return cpu_has_feature(CPU_FTR_DAWR1) ? 2 : 1; } -bool wp_check_constraints(struct pt_regs *regs, struct ppc_inst instr, +bool wp_check_constraints(struct pt_regs *regs, ppc_inst_t instr, unsigned long ea, int type, int size, struct arch_hw_breakpoint *info); -void wp_get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr, +void wp_get_instr_detail(struct pt_regs *regs, ppc_inst_t *instr, int *type, int *size, unsigned long *ea); #ifdef CONFIG_HAVE_HW_BREAKPOINT diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 21cc571ea9c2..a58fb4aa6c81 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -61,7 +61,7 @@ static inline void __hard_irq_enable(void) { - if (IS_ENABLED(CONFIG_BOOKE) || IS_ENABLED(CONFIG_40x)) + if (IS_ENABLED(CONFIG_BOOKE_OR_40x)) wrtee(MSR_EE); else if (IS_ENABLED(CONFIG_PPC_8xx)) wrtspr(SPRN_EIE); @@ -73,7 +73,7 @@ static inline void __hard_irq_enable(void) static inline void __hard_irq_disable(void) { - if (IS_ENABLED(CONFIG_BOOKE) || IS_ENABLED(CONFIG_40x)) + if (IS_ENABLED(CONFIG_BOOKE_OR_40x)) wrtee(0); else if (IS_ENABLED(CONFIG_PPC_8xx)) wrtspr(SPRN_EID); @@ -85,7 +85,7 @@ static inline void __hard_irq_disable(void) static inline void __hard_EE_RI_disable(void) { - if (IS_ENABLED(CONFIG_BOOKE) || IS_ENABLED(CONFIG_40x)) + if (IS_ENABLED(CONFIG_BOOKE_OR_40x)) wrtee(0); else if (IS_ENABLED(CONFIG_PPC_8xx)) wrtspr(SPRN_NRI); @@ -97,7 +97,7 @@ static inline void __hard_EE_RI_disable(void) static inline void __hard_RI_enable(void) { - if (IS_ENABLED(CONFIG_BOOKE) || IS_ENABLED(CONFIG_40x)) + if (IS_ENABLED(CONFIG_BOOKE_OR_40x)) return; if (IS_ENABLED(CONFIG_PPC_8xx)) @@ -224,6 +224,42 @@ static inline bool arch_irqs_disabled(void) return arch_irqs_disabled_flags(arch_local_save_flags()); } +static inline void set_pmi_irq_pending(void) +{ + /* + * Invoked from PMU callback functions to set PMI bit in the paca. + * This has to be called with irq's disabled (via hard_irq_disable()). + */ + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + WARN_ON_ONCE(mfmsr() & MSR_EE); + + get_paca()->irq_happened |= PACA_IRQ_PMI; +} + +static inline void clear_pmi_irq_pending(void) +{ + /* + * Invoked from PMU callback functions to clear the pending PMI bit + * in the paca. + */ + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + WARN_ON_ONCE(mfmsr() & MSR_EE); + + get_paca()->irq_happened &= ~PACA_IRQ_PMI; +} + +static inline bool pmi_irq_pending(void) +{ + /* + * Invoked from PMU callback functions to check if there is a pending + * PMI bit in the paca. + */ + if (get_paca()->irq_happened & PACA_IRQ_PMI) + return true; + + return false; +} + #ifdef CONFIG_PPC_BOOK3S /* * To support disabling and enabling of irq with PMI, set of @@ -306,18 +342,57 @@ static inline bool lazy_irq_pending_nocheck(void) return __lazy_irq_pending(local_paca->irq_happened); } +bool power_pmu_wants_prompt_pmi(void); + +/* + * This is called by asynchronous interrupts to check whether to + * conditionally re-enable hard interrupts after having cleared + * the source of the interrupt. They are kept disabled if there + * is a different soft-masked interrupt pending that requires hard + * masking. + */ +static inline bool should_hard_irq_enable(void) +{ +#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG + WARN_ON(irq_soft_mask_return() == IRQS_ENABLED); + WARN_ON(mfmsr() & MSR_EE); +#endif +#ifdef CONFIG_PERF_EVENTS + /* + * If the PMU is not running, there is not much reason to enable + * MSR[EE] in irq handlers because any interrupts would just be + * soft-masked. + * + * TODO: Add test for 64e + */ + if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !power_pmu_wants_prompt_pmi()) + return false; + + if (get_paca()->irq_happened & PACA_IRQ_MUST_HARD_MASK) + return false; + + return true; +#else + return false; +#endif +} + /* - * This is called by asynchronous interrupts to conditionally - * re-enable hard interrupts after having cleared the source - * of the interrupt. They are kept disabled if there is a different - * soft-masked interrupt pending that requires hard masking. + * Do the hard enabling, only call this if should_hard_irq_enable is true. */ -static inline void may_hard_irq_enable(void) +static inline void do_hard_irq_enable(void) { - if (!(get_paca()->irq_happened & PACA_IRQ_MUST_HARD_MASK)) { - get_paca()->irq_happened &= ~PACA_IRQ_HARD_DIS; - __hard_irq_enable(); - } +#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG + WARN_ON(irq_soft_mask_return() == IRQS_ENABLED); + WARN_ON(get_paca()->irq_happened & PACA_IRQ_MUST_HARD_MASK); + WARN_ON(mfmsr() & MSR_EE); +#endif + /* + * This allows PMI interrupts (and watchdog soft-NMIs) through. + * There is no other reason to enable this way. + */ + get_paca()->irq_happened &= ~PACA_IRQ_HARD_DIS; + __hard_irq_enable(); } static inline bool arch_irq_disabled_regs(struct pt_regs *regs) @@ -398,7 +473,7 @@ static inline bool arch_irq_disabled_regs(struct pt_regs *regs) return !(regs->msr & MSR_EE); } -static inline bool may_hard_irq_enable(void) +static inline bool should_hard_irq_enable(void) { return false; } @@ -408,6 +483,10 @@ static inline void do_hard_irq_enable(void) BUILD_BUG(); } +static inline void clear_pmi_irq_pending(void) { } +static inline void set_pmi_irq_pending(void) { } +static inline bool pmi_irq_pending(void) { return false; } + static inline void irq_soft_mask_regs_set_state(struct pt_regs *regs, unsigned long val) { } diff --git a/arch/powerpc/include/asm/i8259.h b/arch/powerpc/include/asm/i8259.h index d7f08ae49e12..75481d363cd8 100644 --- a/arch/powerpc/include/asm/i8259.h +++ b/arch/powerpc/include/asm/i8259.h @@ -7,7 +7,7 @@ extern void i8259_init(struct device_node *node, unsigned long intack_addr); extern unsigned int i8259_irq(void); -extern struct irq_domain *i8259_get_host(void); +struct irq_domain *__init i8259_get_host(void); #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_I8259_H */ diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h index b11c0e2f9639..80b6d74146c6 100644 --- a/arch/powerpc/include/asm/inst.h +++ b/arch/powerpc/include/asm/inst.h @@ -3,20 +3,21 @@ #define _ASM_POWERPC_INST_H #include <asm/ppc-opcode.h> - -#ifdef CONFIG_PPC64 +#include <asm/reg.h> +#include <asm/disassemble.h> +#include <asm/uaccess.h> #define ___get_user_instr(gu_op, dest, ptr) \ ({ \ long __gui_ret; \ u32 __user *__gui_ptr = (u32 __user *)ptr; \ - struct ppc_inst __gui_inst; \ + ppc_inst_t __gui_inst; \ unsigned int __prefix, __suffix; \ \ __chk_user_ptr(ptr); \ __gui_ret = gu_op(__prefix, __gui_ptr); \ if (__gui_ret == 0) { \ - if ((__prefix >> 26) == OP_PREFIX) { \ + if (IS_ENABLED(CONFIG_PPC64) && (__prefix >> 26) == OP_PREFIX) { \ __gui_ret = gu_op(__suffix, __gui_ptr + 1); \ __gui_inst = ppc_inst_prefix(__prefix, __suffix); \ } else { \ @@ -27,13 +28,6 @@ } \ __gui_ret; \ }) -#else /* !CONFIG_PPC64 */ -#define ___get_user_instr(gu_op, dest, ptr) \ -({ \ - __chk_user_ptr(ptr); \ - gu_op((dest).val, (u32 __user *)(ptr)); \ -}) -#endif /* CONFIG_PPC64 */ #define get_user_instr(x, ptr) ___get_user_instr(get_user, x, ptr) @@ -43,44 +37,46 @@ * Instruction data type for POWER */ -struct ppc_inst { - u32 val; -#ifdef CONFIG_PPC64 - u32 suffix; -#endif -} __packed; - -static inline u32 ppc_inst_val(struct ppc_inst x) +#if defined(CONFIG_PPC64) || defined(__CHECKER__) +static inline u32 ppc_inst_val(ppc_inst_t x) { return x.val; } -static inline int ppc_inst_primary_opcode(struct ppc_inst x) +#define ppc_inst(x) ((ppc_inst_t){ .val = (x) }) + +#else +static inline u32 ppc_inst_val(ppc_inst_t x) { - return ppc_inst_val(x) >> 26; + return x; } +#define ppc_inst(x) (x) +#endif -#define ppc_inst(x) ((struct ppc_inst){ .val = (x) }) +static inline int ppc_inst_primary_opcode(ppc_inst_t x) +{ + return ppc_inst_val(x) >> 26; +} #ifdef CONFIG_PPC64 -#define ppc_inst_prefix(x, y) ((struct ppc_inst){ .val = (x), .suffix = (y) }) +#define ppc_inst_prefix(x, y) ((ppc_inst_t){ .val = (x), .suffix = (y) }) -static inline u32 ppc_inst_suffix(struct ppc_inst x) +static inline u32 ppc_inst_suffix(ppc_inst_t x) { return x.suffix; } #else -#define ppc_inst_prefix(x, y) ppc_inst(x) +#define ppc_inst_prefix(x, y) ((void)y, ppc_inst(x)) -static inline u32 ppc_inst_suffix(struct ppc_inst x) +static inline u32 ppc_inst_suffix(ppc_inst_t x) { return 0; } #endif /* CONFIG_PPC64 */ -static inline struct ppc_inst ppc_inst_read(const u32 *ptr) +static inline ppc_inst_t ppc_inst_read(const u32 *ptr) { if (IS_ENABLED(CONFIG_PPC64) && (*ptr >> 26) == OP_PREFIX) return ppc_inst_prefix(*ptr, *(ptr + 1)); @@ -88,17 +84,17 @@ static inline struct ppc_inst ppc_inst_read(const u32 *ptr) return ppc_inst(*ptr); } -static inline bool ppc_inst_prefixed(struct ppc_inst x) +static inline bool ppc_inst_prefixed(ppc_inst_t x) { return IS_ENABLED(CONFIG_PPC64) && ppc_inst_primary_opcode(x) == OP_PREFIX; } -static inline struct ppc_inst ppc_inst_swab(struct ppc_inst x) +static inline ppc_inst_t ppc_inst_swab(ppc_inst_t x) { return ppc_inst_prefix(swab32(ppc_inst_val(x)), swab32(ppc_inst_suffix(x))); } -static inline bool ppc_inst_equal(struct ppc_inst x, struct ppc_inst y) +static inline bool ppc_inst_equal(ppc_inst_t x, ppc_inst_t y) { if (ppc_inst_val(x) != ppc_inst_val(y)) return false; @@ -107,7 +103,7 @@ static inline bool ppc_inst_equal(struct ppc_inst x, struct ppc_inst y) return ppc_inst_suffix(x) == ppc_inst_suffix(y); } -static inline int ppc_inst_len(struct ppc_inst x) +static inline int ppc_inst_len(ppc_inst_t x) { return ppc_inst_prefixed(x) ? 8 : 4; } @@ -118,14 +114,14 @@ static inline int ppc_inst_len(struct ppc_inst x) */ static inline u32 *ppc_inst_next(u32 *location, u32 *value) { - struct ppc_inst tmp; + ppc_inst_t tmp; tmp = ppc_inst_read(value); return (void *)location + ppc_inst_len(tmp); } -static inline unsigned long ppc_inst_as_ulong(struct ppc_inst x) +static inline unsigned long ppc_inst_as_ulong(ppc_inst_t x) { if (IS_ENABLED(CONFIG_PPC32)) return ppc_inst_val(x); @@ -135,9 +131,17 @@ static inline unsigned long ppc_inst_as_ulong(struct ppc_inst x) return (u64)ppc_inst_val(x) << 32 | ppc_inst_suffix(x); } +static inline void ppc_inst_write(u32 *ptr, ppc_inst_t x) +{ + if (!ppc_inst_prefixed(x)) + *ptr = ppc_inst_val(x); + else + *(u64 *)ptr = ppc_inst_as_ulong(x); +} + #define PPC_INST_STR_LEN sizeof("00000000 00000000") -static inline char *__ppc_inst_as_str(char str[PPC_INST_STR_LEN], struct ppc_inst x) +static inline char *__ppc_inst_as_str(char str[PPC_INST_STR_LEN], ppc_inst_t x) { if (ppc_inst_prefixed(x)) sprintf(str, "%08x %08x", ppc_inst_val(x), ppc_inst_suffix(x)); @@ -154,6 +158,27 @@ static inline char *__ppc_inst_as_str(char str[PPC_INST_STR_LEN], struct ppc_ins __str; \ }) -int copy_inst_from_kernel_nofault(struct ppc_inst *inst, u32 *src); +static inline int copy_inst_from_kernel_nofault(ppc_inst_t *inst, u32 *src) +{ + unsigned int val, suffix; + + if (unlikely(!is_kernel_addr((unsigned long)src))) + return -ERANGE; + +/* See https://github.com/ClangBuiltLinux/linux/issues/1521 */ +#if defined(CONFIG_CC_IS_CLANG) && CONFIG_CLANG_VERSION < 140000 + val = suffix = 0; +#endif + __get_kernel_nofault(&val, src, u32, Efault); + if (IS_ENABLED(CONFIG_PPC64) && get_op(val) == OP_PREFIX) { + __get_kernel_nofault(&suffix, src + 1, u32, Efault); + *inst = ppc_inst_prefix(val, suffix); + } else { + *inst = ppc_inst(val); + } + return 0; +Efault: + return -EFAULT; +} #endif /* _ASM_POWERPC_INST_H */ diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index a1d238255f07..fc28f46d2f9d 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -97,6 +97,11 @@ static inline void srr_regs_clobbered(void) local_paca->hsrr_valid = 0; } #else +static inline unsigned long search_kernel_restart_table(unsigned long addr) +{ + return 0; +} + static inline bool is_implicit_soft_masked(struct pt_regs *regs) { return false; @@ -139,39 +144,68 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup if (!arch_irq_disabled_regs(regs)) trace_hardirqs_off(); - if (user_mode(regs)) { - kuep_lock(); - account_cpu_user_entry(); - } else { + if (user_mode(regs)) + kuap_lock(); + else kuap_save_and_lock(regs); - } + + if (user_mode(regs)) + account_cpu_user_entry(); #endif #ifdef CONFIG_PPC64 - if (irq_soft_mask_set_return(IRQS_ALL_DISABLED) == IRQS_ENABLED) + bool trace_enable = false; + + if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS)) { + if (irq_soft_mask_set_return(IRQS_ALL_DISABLED) == IRQS_ENABLED) + trace_enable = true; + } else { + irq_soft_mask_set(IRQS_ALL_DISABLED); + } + + /* + * If the interrupt was taken with HARD_DIS clear, then enable MSR[EE]. + * Asynchronous interrupts get here with HARD_DIS set (see below), so + * this enables MSR[EE] for synchronous interrupts. IRQs remain + * soft-masked. The interrupt handler may later call + * interrupt_cond_local_irq_enable() to achieve a regular process + * context. + */ + if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) { + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + BUG_ON(!(regs->msr & MSR_EE)); + __hard_irq_enable(); + } else { + __hard_RI_enable(); + } + + /* Do this when RI=1 because it can cause SLB faults */ + if (trace_enable) trace_hardirqs_off(); - local_paca->irq_happened |= PACA_IRQ_HARD_DIS; if (user_mode(regs)) { + kuap_lock(); CT_WARN_ON(ct_state() != CONTEXT_USER); user_exit_irqoff(); account_cpu_user_entry(); account_stolen_time(); } else { + kuap_save_and_lock(regs); /* * CT_WARN_ON comes here via program_check_exception, * so avoid recursion. */ if (TRAP(regs) != INTERRUPT_PROGRAM) { CT_WARN_ON(ct_state() != CONTEXT_KERNEL); - BUG_ON(is_implicit_soft_masked(regs)); + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + BUG_ON(is_implicit_soft_masked(regs)); } -#ifdef CONFIG_PPC_BOOK3S + /* Move this under a debugging check */ - if (arch_irq_disabled_regs(regs)) + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && + arch_irq_disabled_regs(regs)) BUG_ON(search_kernel_restart_table(regs->nip)); -#endif } if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) BUG_ON(!arch_irq_disabled_regs(regs) && !(regs->msr & MSR_EE)); @@ -200,13 +234,20 @@ static inline void interrupt_exit_prepare(struct pt_regs *regs, struct interrupt static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct interrupt_state *state) { +#ifdef CONFIG_PPC64 + /* Ensure interrupt_enter_prepare does not enable MSR[EE] */ + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; +#endif + interrupt_enter_prepare(regs, state); #ifdef CONFIG_PPC_BOOK3S_64 + /* + * RI=1 is set by interrupt_enter_prepare, so this thread flags access + * has to come afterward (it can cause SLB faults). + */ if (cpu_has_feature(CPU_FTR_CTRL) && !test_thread_local_flags(_TLF_RUNLATCH)) __ppc64_runlatch_on(); #endif - - interrupt_enter_prepare(regs, state); irq_enter(); } @@ -276,6 +317,8 @@ static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct inte regs->softe = IRQS_ALL_DISABLED; } + __hard_RI_enable(); + /* Don't do any per-CPU operations until interrupt state is fixed */ if (nmi_disables_ftrace(regs)) { @@ -373,6 +416,8 @@ interrupt_handler long func(struct pt_regs *regs) \ { \ long ret; \ \ + __hard_RI_enable(); \ + \ ret = ____##func (regs); \ \ return ret; \ @@ -564,7 +609,7 @@ DECLARE_INTERRUPT_HANDLER(kernel_bad_stack); /* slb.c */ DECLARE_INTERRUPT_HANDLER_RAW(do_slb_fault); -DECLARE_INTERRUPT_HANDLER(do_bad_slb_fault); +DECLARE_INTERRUPT_HANDLER(do_bad_segment_interrupt); /* hash_utils.c */ DECLARE_INTERRUPT_HANDLER_RAW(do_hash_fault); diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index c361212ac160..d7912b66c874 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -275,7 +275,7 @@ extern void iommu_unmap_page(struct iommu_table *tbl, dma_addr_t dma_handle, size_t size, enum dma_data_direction direction, unsigned long attrs); -extern void iommu_init_early_pSeries(void); +void __init iommu_init_early_pSeries(void); extern void iommu_init_early_dart(struct pci_controller_ops *controller_ops); extern void iommu_init_early_pasemi(void); diff --git a/arch/powerpc/include/asm/ipic.h b/arch/powerpc/include/asm/ipic.h index 0524df31a7e6..b47ca7dc7199 100644 --- a/arch/powerpc/include/asm/ipic.h +++ b/arch/powerpc/include/asm/ipic.h @@ -65,7 +65,7 @@ enum ipic_mcp_irq { IPIC_MCP_MU = 7, }; -extern void ipic_set_default_priority(void); +void __init ipic_set_default_priority(void); extern u32 ipic_get_mcp_status(void); extern void ipic_clear_mcp_status(u32 mask); diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h index 2b3278534bc1..13f0409dd617 100644 --- a/arch/powerpc/include/asm/irq.h +++ b/arch/powerpc/include/asm/irq.h @@ -36,7 +36,7 @@ extern int distribute_irqs; struct pt_regs; -#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) +#ifdef CONFIG_BOOKE_OR_40x /* * Per-cpu stacks for handling critical, debug and machine check * level interrupts. diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index c6f250eca3fb..8ebdd23d987c 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -84,7 +84,7 @@ extern int crash_shutdown_register(crash_shutdown_t handler); extern int crash_shutdown_unregister(crash_shutdown_t handler); extern void crash_kexec_secondary(struct pt_regs *regs); -extern int overlaps_crashkernel(unsigned long start, unsigned long size); +int __init overlaps_crashkernel(unsigned long start, unsigned long size); extern void reserve_crashkernel(void); extern void machine_kexec_mask_interrupts(void); diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index 1df763002726..fb2237809d63 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -14,6 +14,10 @@ #include <asm/nohash/32/kup-8xx.h> #endif +#ifdef CONFIG_BOOKE_OR_40x +#include <asm/nohash/kup-booke.h> +#endif + #ifdef CONFIG_PPC_BOOK3S_32 #include <asm/book3s/32/kup.h> #endif @@ -32,34 +36,29 @@ extern bool disable_kuap; #include <linux/pgtable.h> -#ifdef CONFIG_PPC_KUEP +void setup_kup(void); void setup_kuep(bool disabled); -#else -static inline void setup_kuep(bool disabled) { } -#endif /* CONFIG_PPC_KUEP */ - -#ifndef CONFIG_PPC_BOOK3S_32 -static inline void kuep_lock(void) { } -static inline void kuep_unlock(void) { } -#endif #ifdef CONFIG_PPC_KUAP void setup_kuap(bool disabled); #else static inline void setup_kuap(bool disabled) { } +static __always_inline bool kuap_is_disabled(void) { return true; } + static inline bool -bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) +__bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) { return false; } -static inline void kuap_assert_locked(void) { } -static inline void kuap_save_and_lock(struct pt_regs *regs) { } +static inline void __kuap_assert_locked(void) { } +static inline void __kuap_lock(void) { } +static inline void __kuap_save_and_lock(struct pt_regs *regs) { } static inline void kuap_user_restore(struct pt_regs *regs) { } -static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long amr) { } +static inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long amr) { } -static inline unsigned long kuap_get_and_assert_locked(void) +static inline unsigned long __kuap_get_and_assert_locked(void) { return 0; } @@ -70,20 +69,99 @@ static inline unsigned long kuap_get_and_assert_locked(void) * platforms. */ #ifndef CONFIG_PPC_BOOK3S_64 -static inline void allow_user_access(void __user *to, const void __user *from, - unsigned long size, unsigned long dir) { } -static inline void prevent_user_access(unsigned long dir) { } -static inline unsigned long prevent_user_access_return(void) { return 0UL; } -static inline void restore_user_access(unsigned long flags) { } +static inline void __allow_user_access(void __user *to, const void __user *from, + unsigned long size, unsigned long dir) { } +static inline void __prevent_user_access(unsigned long dir) { } +static inline unsigned long __prevent_user_access_return(void) { return 0UL; } +static inline void __restore_user_access(unsigned long flags) { } #endif /* CONFIG_PPC_BOOK3S_64 */ #endif /* CONFIG_PPC_KUAP */ -static __always_inline void setup_kup(void) +static __always_inline bool +bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) +{ + if (kuap_is_disabled()) + return false; + + return __bad_kuap_fault(regs, address, is_write); +} + +static __always_inline void kuap_assert_locked(void) { - setup_kuep(disable_kuep); - setup_kuap(disable_kuap); + if (kuap_is_disabled()) + return; + + if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG)) + __kuap_get_and_assert_locked(); +} + +static __always_inline void kuap_lock(void) +{ + if (kuap_is_disabled()) + return; + + __kuap_lock(); +} + +static __always_inline void kuap_save_and_lock(struct pt_regs *regs) +{ + if (kuap_is_disabled()) + return; + + __kuap_save_and_lock(regs); } +static __always_inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long amr) +{ + if (kuap_is_disabled()) + return; + + __kuap_kernel_restore(regs, amr); +} + +static __always_inline unsigned long kuap_get_and_assert_locked(void) +{ + if (kuap_is_disabled()) + return 0; + + return __kuap_get_and_assert_locked(); +} + +#ifndef CONFIG_PPC_BOOK3S_64 +static __always_inline void allow_user_access(void __user *to, const void __user *from, + unsigned long size, unsigned long dir) +{ + if (kuap_is_disabled()) + return; + + __allow_user_access(to, from, size, dir); +} + +static __always_inline void prevent_user_access(unsigned long dir) +{ + if (kuap_is_disabled()) + return; + + __prevent_user_access(dir); +} + +static __always_inline unsigned long prevent_user_access_return(void) +{ + if (kuap_is_disabled()) + return 0; + + return __prevent_user_access_return(); +} + +static __always_inline void restore_user_access(unsigned long flags) +{ + if (kuap_is_disabled()) + return; + + __restore_user_access(flags); +} +#endif /* CONFIG_PPC_BOOK3S_64 */ + static __always_inline void allow_read_from_user(const void __user *from, unsigned long size) { barrier_nospec(); diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h index fbbf3cec92e9..d68d71987d5c 100644 --- a/arch/powerpc/include/asm/kvm_asm.h +++ b/arch/powerpc/include/asm/kvm_asm.h @@ -79,6 +79,7 @@ #define BOOK3S_INTERRUPT_FP_UNAVAIL 0x800 #define BOOK3S_INTERRUPT_DECREMENTER 0x900 #define BOOK3S_INTERRUPT_HV_DECREMENTER 0x980 +#define BOOK3S_INTERRUPT_NESTED_HV_DECREMENTER 0x1980 #define BOOK3S_INTERRUPT_DOORBELL 0xa00 #define BOOK3S_INTERRUPT_SYSCALL 0xc00 #define BOOK3S_INTERRUPT_TRACE 0xd00 diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 3d31f2c59e43..91c9f937edcd 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -406,6 +406,12 @@ static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu) return vcpu->arch.fault_dar; } +/* Expiry time of vcpu DEC relative to host TB */ +static inline u64 kvmppc_dec_expires_host_tb(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.dec_expires - vcpu->arch.vcore->tb_offset; +} + static inline bool is_kvmppc_resume_guest(int r) { return (r == RESUME_GUEST || r == RESUME_GUEST_NV); diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index fff391b9b97b..fe07558173ef 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -44,7 +44,6 @@ struct kvm_nested_guest { struct mutex tlb_lock; /* serialize page faults and tlbies */ struct kvm_nested_guest *next; cpumask_t need_tlb_flush; - cpumask_t cpu_in_guest; short prev_cpu[NR_CPUS]; u8 radix; /* is this nested guest radix */ }; @@ -154,7 +153,9 @@ static inline bool kvmhv_vcpu_is_radix(struct kvm_vcpu *vcpu) return radix; } -int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr); +unsigned long kvmppc_msr_hard_disable_set_facilities(struct kvm_vcpu *vcpu, unsigned long msr); + +int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr, u64 *tb); #define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */ #endif diff --git a/arch/powerpc/include/asm/kvm_guest.h b/arch/powerpc/include/asm/kvm_guest.h index c63105d2c9e7..68e499abdb24 100644 --- a/arch/powerpc/include/asm/kvm_guest.h +++ b/arch/powerpc/include/asm/kvm_guest.h @@ -16,7 +16,7 @@ static inline bool is_kvm_guest(void) return static_branch_unlikely(&kvm_guest); } -int check_kvm_guest(void); +int __init check_kvm_guest(void); #else static inline bool is_kvm_guest(void) { return false; } static inline int check_kvm_guest(void) { return 0; } diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index e4d23193eba7..17263276189e 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -287,7 +287,6 @@ struct kvm_arch { u32 online_vcores; atomic_t hpte_mod_interest; cpumask_t need_tlb_flush; - cpumask_t cpu_in_guest; u8 radix; u8 fwnmi_enabled; u8 secure_guest; @@ -579,6 +578,10 @@ struct kvm_vcpu_arch { ulong cfar; ulong ppr; u32 pspb; + u8 load_ebb; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + u8 load_tm; +#endif ulong fscr; ulong shadow_fscr; ulong ebbhr; @@ -741,7 +744,7 @@ struct kvm_vcpu_arch { struct hrtimer dec_timer; u64 dec_jiffies; - u64 dec_expires; + u64 dec_expires; /* Relative to guest timebase. */ unsigned long pending_exceptions; u8 ceded; u8 prodded; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 671fbd1a765e..33db83b82fbd 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -552,8 +552,7 @@ extern void kvm_hv_vm_activated(void); extern void kvm_hv_vm_deactivated(void); extern bool kvm_hv_mode_active(void); -extern void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu, - struct kvm_nested_guest *nested); +extern void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu); #else static inline void __init kvm_cma_reserve(void) @@ -760,6 +759,7 @@ void kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu); void kvmppc_subcore_enter_guest(void); void kvmppc_subcore_exit_guest(void); long kvmppc_realmode_hmi_handler(void); +long kvmppc_p9_realmode_hmi_handler(struct kvm_vcpu *vcpu); long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, long pte_index, unsigned long pteh, unsigned long ptel); long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags, diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 9c3c9f04129f..e821037f74f0 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -235,8 +235,6 @@ extern struct machdep_calls *machine_id; machine_id == &mach_##name; \ }) -extern void probe_machine(void); - #ifdef CONFIG_PPC_PMAC /* * Power macintoshes have either a CUDA, PMU or SMU controlling diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 8abe8e42e045..5f41565a1e5d 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -157,7 +157,7 @@ DECLARE_PER_CPU(int, next_tlbcam_idx); enum { MMU_FTRS_POSSIBLE = -#if defined(CONFIG_PPC_BOOK3S_64) || defined(CONFIG_PPC_BOOK3S_604) +#if defined(CONFIG_PPC_BOOK3S_604) MMU_FTR_HPTE_TABLE | #endif #ifdef CONFIG_PPC_8xx @@ -184,15 +184,18 @@ enum { MMU_FTR_USE_TLBRSRV | MMU_FTR_USE_PAIRED_MAS | #endif #ifdef CONFIG_PPC_BOOK3S_64 + MMU_FTR_KERNEL_RO | +#ifdef CONFIG_PPC_64S_HASH_MMU MMU_FTR_NO_SLBIE_B | MMU_FTR_16M_PAGE | MMU_FTR_TLBIEL | MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_CI_LARGE_PAGE | MMU_FTR_1T_SEGMENT | MMU_FTR_TLBIE_CROP_VA | - MMU_FTR_KERNEL_RO | MMU_FTR_68_BIT_VA | + MMU_FTR_68_BIT_VA | MMU_FTR_HPTE_TABLE | #endif #ifdef CONFIG_PPC_RADIX_MMU MMU_FTR_TYPE_RADIX | MMU_FTR_GTSE | #endif /* CONFIG_PPC_RADIX_MMU */ +#endif #ifdef CONFIG_PPC_KUAP MMU_FTR_BOOK3S_KUAP | #endif /* CONFIG_PPC_KUAP */ @@ -224,6 +227,13 @@ enum { #define MMU_FTRS_ALWAYS MMU_FTR_TYPE_FSL_E #endif +/* BOOK3S_64 options */ +#if defined(CONFIG_PPC_RADIX_MMU) && !defined(CONFIG_PPC_64S_HASH_MMU) +#define MMU_FTRS_ALWAYS MMU_FTR_TYPE_RADIX +#elif !defined(CONFIG_PPC_RADIX_MMU) && defined(CONFIG_PPC_64S_HASH_MMU) +#define MMU_FTRS_ALWAYS MMU_FTR_HPTE_TABLE +#endif + #ifndef MMU_FTRS_ALWAYS #define MMU_FTRS_ALWAYS 0 #endif @@ -329,7 +339,7 @@ static __always_inline bool radix_enabled(void) return mmu_has_feature(MMU_FTR_TYPE_RADIX); } -static inline bool early_radix_enabled(void) +static __always_inline bool early_radix_enabled(void) { return early_mmu_has_feature(MMU_FTR_TYPE_RADIX); } diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 9ba6b585337f..fd277b15635c 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -71,10 +71,11 @@ static inline void switch_mmu_context(struct mm_struct *prev, } extern int hash__alloc_context_id(void); -extern void hash__reserve_context_id(int id); +void __init hash__reserve_context_id(int id); extern void __destroy_context(int context_id); static inline void mmu_context_init(void) { } +#ifdef CONFIG_PPC_64S_HASH_MMU static inline int alloc_extended_context(struct mm_struct *mm, unsigned long ea) { @@ -100,6 +101,7 @@ static inline bool need_extra_context(struct mm_struct *mm, unsigned long ea) return true; return false; } +#endif #else extern void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next, diff --git a/arch/powerpc/include/asm/mpic.h b/arch/powerpc/include/asm/mpic.h index 0abf2e7fd222..58353c5bd3fb 100644 --- a/arch/powerpc/include/asm/mpic.h +++ b/arch/powerpc/include/asm/mpic.h @@ -472,7 +472,7 @@ extern int mpic_cpu_get_priority(void); extern void mpic_cpu_set_priority(int prio); /* Request IPIs on primary mpic */ -extern void mpic_request_ipis(void); +void __init mpic_request_ipis(void); /* Send a message (IPI) to a given target (cpu number or MSG_*) */ void smp_mpic_message_pass(int target, int msg); diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h index 882a0bc7887a..c44d97751723 100644 --- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h @@ -20,11 +20,12 @@ static __always_inline bool kuap_is_disabled(void) return static_branch_unlikely(&disable_kuap_key); } -static inline void kuap_save_and_lock(struct pt_regs *regs) +static inline void __kuap_lock(void) { - if (kuap_is_disabled()) - return; +} +static inline void __kuap_save_and_lock(struct pt_regs *regs) +{ regs->kuap = mfspr(SPRN_MD_AP); mtspr(SPRN_MD_AP, MD_APG_KUAP); } @@ -33,21 +34,15 @@ static inline void kuap_user_restore(struct pt_regs *regs) { } -static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap) +static inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap) { - if (kuap_is_disabled()) - return; - mtspr(SPRN_MD_AP, regs->kuap); } -static inline unsigned long kuap_get_and_assert_locked(void) +static inline unsigned long __kuap_get_and_assert_locked(void) { unsigned long kuap; - if (kuap_is_disabled()) - return MD_APG_INIT; - kuap = mfspr(SPRN_MD_AP); if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG)) @@ -56,36 +51,21 @@ static inline unsigned long kuap_get_and_assert_locked(void) return kuap; } -static inline void kuap_assert_locked(void) -{ - if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && !kuap_is_disabled()) - kuap_get_and_assert_locked(); -} - -static inline void allow_user_access(void __user *to, const void __user *from, - unsigned long size, unsigned long dir) +static inline void __allow_user_access(void __user *to, const void __user *from, + unsigned long size, unsigned long dir) { - if (kuap_is_disabled()) - return; - mtspr(SPRN_MD_AP, MD_APG_INIT); } -static inline void prevent_user_access(unsigned long dir) +static inline void __prevent_user_access(unsigned long dir) { - if (kuap_is_disabled()) - return; - mtspr(SPRN_MD_AP, MD_APG_KUAP); } -static inline unsigned long prevent_user_access_return(void) +static inline unsigned long __prevent_user_access_return(void) { unsigned long flags; - if (kuap_is_disabled()) - return MD_APG_INIT; - flags = mfspr(SPRN_MD_AP); mtspr(SPRN_MD_AP, MD_APG_KUAP); @@ -93,20 +73,14 @@ static inline unsigned long prevent_user_access_return(void) return flags; } -static inline void restore_user_access(unsigned long flags) +static inline void __restore_user_access(unsigned long flags) { - if (kuap_is_disabled()) - return; - mtspr(SPRN_MD_AP, flags); } static inline bool -bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) +__bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) { - if (kuap_is_disabled()) - return false; - return !((regs->kuap ^ MD_APG_KUAP) & 0xff000000); } diff --git a/arch/powerpc/include/asm/nohash/32/mmu-44x.h b/arch/powerpc/include/asm/nohash/32/mmu-44x.h index 43ceca128531..2d92a39d8f2e 100644 --- a/arch/powerpc/include/asm/nohash/32/mmu-44x.h +++ b/arch/powerpc/include/asm/nohash/32/mmu-44x.h @@ -113,7 +113,6 @@ typedef struct { /* patch sites */ extern s32 patch__tlb_44x_hwater_D, patch__tlb_44x_hwater_I; -extern s32 patch__tlb_44x_kuep, patch__tlb_47x_kuep; #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h index 997cec973406..0e93a4728c9e 100644 --- a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h @@ -39,12 +39,10 @@ * 0 => Kernel => 11 (all accesses performed according as user iaw page definition) * 1 => Kernel+Accessed => 01 (all accesses performed according to page definition) * 2 => User => 11 (all accesses performed according as user iaw page definition) - * 3 => User+Accessed => 00 (all accesses performed as supervisor iaw page definition) for INIT - * => 10 (all accesses performed according to swaped page definition) for KUEP + * 3 => User+Accessed => 10 (all accesses performed according to swaped page definition) for KUEP * 4-15 => Not Used */ -#define MI_APG_INIT 0xdc000000 -#define MI_APG_KUEP 0xde000000 +#define MI_APG_INIT 0xde000000 /* The effective page number register. When read, contains the information * about the last instruction TLB miss. When MI_RPN is written, bits in diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index 9d2905a47410..a3313e853e5e 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -313,6 +313,12 @@ extern int __meminit vmemmap_create_mapping(unsigned long start, unsigned long phys); extern void vmemmap_remove_mapping(unsigned long start, unsigned long page_size); +void __patch_exception(int exc, unsigned long addr); +#define patch_exception(exc, name) do { \ + extern unsigned int name; \ + __patch_exception((exc), (unsigned long)&name); \ +} while (0) + #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_NOHASH_64_PGTABLE_H */ diff --git a/arch/powerpc/include/asm/nohash/kup-booke.h b/arch/powerpc/include/asm/nohash/kup-booke.h new file mode 100644 index 000000000000..49bb41ed0816 --- /dev/null +++ b/arch/powerpc/include/asm/nohash/kup-booke.h @@ -0,0 +1,110 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_KUP_BOOKE_H_ +#define _ASM_POWERPC_KUP_BOOKE_H_ + +#include <asm/bug.h> + +#ifdef CONFIG_PPC_KUAP + +#ifdef __ASSEMBLY__ + +.macro kuap_check_amr gpr1, gpr2 +.endm + +#else + +#include <linux/jump_label.h> +#include <linux/sched.h> + +#include <asm/reg.h> + +extern struct static_key_false disable_kuap_key; + +static __always_inline bool kuap_is_disabled(void) +{ + return static_branch_unlikely(&disable_kuap_key); +} + +static inline void __kuap_lock(void) +{ + mtspr(SPRN_PID, 0); + isync(); +} + +static inline void __kuap_save_and_lock(struct pt_regs *regs) +{ + regs->kuap = mfspr(SPRN_PID); + mtspr(SPRN_PID, 0); + isync(); +} + +static inline void kuap_user_restore(struct pt_regs *regs) +{ + if (kuap_is_disabled()) + return; + + mtspr(SPRN_PID, current->thread.pid); + + /* Context synchronisation is performed by rfi */ +} + +static inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap) +{ + if (regs->kuap) + mtspr(SPRN_PID, current->thread.pid); + + /* Context synchronisation is performed by rfi */ +} + +static inline unsigned long __kuap_get_and_assert_locked(void) +{ + unsigned long kuap = mfspr(SPRN_PID); + + if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG)) + WARN_ON_ONCE(kuap); + + return kuap; +} + +static inline void __allow_user_access(void __user *to, const void __user *from, + unsigned long size, unsigned long dir) +{ + mtspr(SPRN_PID, current->thread.pid); + isync(); +} + +static inline void __prevent_user_access(unsigned long dir) +{ + mtspr(SPRN_PID, 0); + isync(); +} + +static inline unsigned long __prevent_user_access_return(void) +{ + unsigned long flags = mfspr(SPRN_PID); + + mtspr(SPRN_PID, 0); + isync(); + + return flags; +} + +static inline void __restore_user_access(unsigned long flags) +{ + if (flags) { + mtspr(SPRN_PID, current->thread.pid); + isync(); + } +} + +static inline bool +__bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) +{ + return !regs->kuap; +} + +#endif /* !__ASSEMBLY__ */ + +#endif /* CONFIG_PPC_KUAP */ + +#endif /* _ASM_POWERPC_KUP_BOOKE_H_ */ diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index 0b63ba7d5917..a2bc4b95e703 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -1094,6 +1094,7 @@ enum { OPAL_XIVE_IRQ_SHIFT_BUG = 0x00000008, /* P9 DD1.0 workaround */ OPAL_XIVE_IRQ_MASK_VIA_FW = 0x00000010, /* P9 DD1.0 workaround */ OPAL_XIVE_IRQ_EOI_VIA_FW = 0x00000020, /* P9 DD1.0 workaround */ + OPAL_XIVE_IRQ_STORE_EOI2 = 0x00000040, }; /* Flags for OPAL_XIVE_GET/SET_QUEUE_INFO */ diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 6ea9001de9a9..bfd3142cd0ba 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -314,7 +314,7 @@ extern int early_init_dt_scan_opal(unsigned long node, const char *uname, int depth, void *data); extern int early_init_dt_scan_recoverable_ranges(unsigned long node, const char *uname, int depth, void *data); -extern void opal_configure_cores(void); +void __init opal_configure_cores(void); extern int opal_get_chars(uint32_t vtermno, char *buf, int count); extern int opal_put_chars(uint32_t vtermno, const char *buf, int total_len); diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index dc05a862e72a..295573a82c66 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -97,7 +97,9 @@ struct paca_struct { /* this becomes non-zero. */ u8 kexec_state; /* set when kexec down has irqs off */ #ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU struct slb_shadow *slb_shadow_ptr; +#endif struct dtl_entry *dispatch_log; struct dtl_entry *dispatch_log_end; #endif @@ -110,6 +112,7 @@ struct paca_struct { /* used for most interrupts/exceptions */ u64 exgen[EX_SIZE] __attribute__((aligned(0x80))); +#ifdef CONFIG_PPC_64S_HASH_MMU /* SLB related definitions */ u16 vmalloc_sllp; u8 slb_cache_ptr; @@ -120,6 +123,7 @@ struct paca_struct { u32 slb_used_bitmap; /* Bitmaps for first 32 SLB entries. */ u32 slb_kern_bitmap; u32 slb_cache[SLB_CACHE_ENTRIES]; +#endif #endif /* CONFIG_PPC_BOOK3S_64 */ #ifdef CONFIG_PPC_BOOK3E @@ -149,6 +153,7 @@ struct paca_struct { #endif /* CONFIG_PPC_BOOK3E */ #ifdef CONFIG_PPC_BOOK3S +#ifdef CONFIG_PPC_64S_HASH_MMU #ifdef CONFIG_PPC_MM_SLICES unsigned char mm_ctx_low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE]; unsigned char mm_ctx_high_slices_psize[SLICE_ARRAY_SIZE]; @@ -157,6 +162,7 @@ struct paca_struct { u16 mm_ctx_sllp; #endif #endif +#endif /* * then miscellaneous read-write fields @@ -268,9 +274,11 @@ struct paca_struct { #endif /* CONFIG_PPC_PSERIES */ #ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU /* Capture SLB related old contents in MCE handler. */ struct slb_entry *mce_faulty_slbs; u16 slb_save_cache_ptr; +#endif #endif /* CONFIG_PPC_BOOK3S_64 */ #ifdef CONFIG_STACKPROTECTOR unsigned long canary; diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h index d1f53260725c..915d6ee4b40a 100644 --- a/arch/powerpc/include/asm/pci.h +++ b/arch/powerpc/include/asm/pci.h @@ -48,7 +48,7 @@ static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel) } #ifdef CONFIG_PCI -extern void set_pci_dma_ops(const struct dma_map_ops *dma_ops); +void __init set_pci_dma_ops(const struct dma_map_ops *dma_ops); #else /* CONFIG_PCI */ #define set_pci_dma_ops(d) #endif diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h index f4c3428e816b..e2221d29fdf9 100644 --- a/arch/powerpc/include/asm/perf_event_server.h +++ b/arch/powerpc/include/asm/perf_event_server.h @@ -98,7 +98,7 @@ struct power_pmu { #define PPMU_LIMITED_PMC_REQD 2 /* have to put this on a limited PMC */ #define PPMU_ONLY_COUNT_RUN 4 /* only counting in run state */ -extern int register_power_pmu(struct power_pmu *); +int __init register_power_pmu(struct power_pmu *pmu); struct pt_regs; extern unsigned long perf_misc_flags(struct pt_regs *regs); diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index baea657bc868..efad07081cc0 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -249,6 +249,7 @@ #define PPC_INST_COPY 0x7c20060c #define PPC_INST_DCBA 0x7c0005ec #define PPC_INST_DCBA_MASK 0xfc0007fe +#define PPC_INST_DSSALL 0x7e00066c #define PPC_INST_ISEL 0x7c00001e #define PPC_INST_ISEL_MASK 0xfc00003e #define PPC_INST_LSWI 0x7c0004aa @@ -393,6 +394,7 @@ (0x7c000264 | ___PPC_RB(rb) | ___PPC_RS(rs) | ___PPC_RIC(ric) | ___PPC_PRS(prs) | ___PPC_R(r)) #define PPC_RAW_TLBIEL(rb, rs, ric, prs, r) \ (0x7c000224 | ___PPC_RB(rb) | ___PPC_RS(rs) | ___PPC_RIC(ric) | ___PPC_PRS(prs) | ___PPC_R(r)) +#define PPC_RAW_TLBIEL_v205(rb, l) (0x7c000224 | ___PPC_RB(rb) | (l << 21)) #define PPC_RAW_TLBSRX_DOT(a, b) (0x7c0006a5 | __PPC_RA0(a) | __PPC_RB(b)) #define PPC_RAW_TLBIVAX(a, b) (0x7c000624 | __PPC_RA0(a) | __PPC_RB(b)) #define PPC_RAW_ERATWE(s, a, w) (0x7c0001a6 | __PPC_RS(s) | __PPC_RA(a) | __PPC_WS(w)) @@ -566,6 +568,8 @@ #define PPC_RAW_MTSPR(spr, d) (0x7c0003a6 | ___PPC_RS(d) | __PPC_SPR(spr)) #define PPC_RAW_EIEIO() (0x7c0006ac) +#define PPC_RAW_BRANCH(addr) (PPC_INST_BRANCH | ((addr) & 0x03fffffc)) + /* Deal with instructions that older assemblers aren't aware of */ #define PPC_BCCTR_FLUSH stringify_in_c(.long PPC_INST_BCCTR_FLUSH) #define PPC_CP_ABORT stringify_in_c(.long PPC_RAW_CP_ABORT) @@ -575,6 +579,7 @@ #define PPC_DCBZL(a, b) stringify_in_c(.long PPC_RAW_DCBZL(a, b)) #define PPC_DIVDE(t, a, b) stringify_in_c(.long PPC_RAW_DIVDE(t, a, b)) #define PPC_DIVDEU(t, a, b) stringify_in_c(.long PPC_RAW_DIVDEU(t, a, b)) +#define PPC_DSSALL stringify_in_c(.long PPC_INST_DSSALL) #define PPC_LQARX(t, a, b, eh) stringify_in_c(.long PPC_RAW_LQARX(t, a, b, eh)) #define PPC_STQCX(t, a, b) stringify_in_c(.long PPC_RAW_STQCX(t, a, b)) #define PPC_MADDHD(t, a, b, c) stringify_in_c(.long PPC_RAW_MADDHD(t, a, b, c)) @@ -602,6 +607,7 @@ stringify_in_c(.long PPC_RAW_TLBIE_5(rb, rs, ric, prs, r)) #define PPC_TLBIEL(rb,rs,ric,prs,r) \ stringify_in_c(.long PPC_RAW_TLBIEL(rb, rs, ric, prs, r)) +#define PPC_TLBIEL_v205(rb, l) stringify_in_c(.long PPC_RAW_TLBIEL_v205(rb, l)) #define PPC_TLBSRX_DOT(a, b) stringify_in_c(.long PPC_RAW_TLBSRX_DOT(a, b)) #define PPC_TLBIVAX(a, b) stringify_in_c(.long PPC_RAW_TLBIVAX(a, b)) diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 7be24048b8d1..f21e6bde17a1 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -17,29 +17,40 @@ #define SZL (BITS_PER_LONG/8) /* + * This expands to a sequence of operations with reg incrementing from + * start to end inclusive, of this form: + * + * op reg, (offset + (width * reg))(base) + * + * Note that offset is not the offset of the first operation unless start + * is zero (or width is zero). + */ +.macro OP_REGS op, width, start, end, base, offset + .Lreg=\start + .rept (\end - \start + 1) + \op .Lreg, \offset + \width * .Lreg(\base) + .Lreg=.Lreg+1 + .endr +.endm + +/* * Macros for storing registers into and loading registers from * exception frames. */ #ifdef __powerpc64__ -#define SAVE_GPR(n, base) std n,GPR0+8*(n)(base) -#define REST_GPR(n, base) ld n,GPR0+8*(n)(base) -#define SAVE_NVGPRS(base) SAVE_8GPRS(14, base); SAVE_10GPRS(22, base) -#define REST_NVGPRS(base) REST_8GPRS(14, base); REST_10GPRS(22, base) +#define SAVE_GPRS(start, end, base) OP_REGS std, 8, start, end, base, GPR0 +#define REST_GPRS(start, end, base) OP_REGS ld, 8, start, end, base, GPR0 +#define SAVE_NVGPRS(base) SAVE_GPRS(14, 31, base) +#define REST_NVGPRS(base) REST_GPRS(14, 31, base) #else -#define SAVE_GPR(n, base) stw n,GPR0+4*(n)(base) -#define REST_GPR(n, base) lwz n,GPR0+4*(n)(base) -#define SAVE_NVGPRS(base) SAVE_GPR(13, base); SAVE_8GPRS(14, base); SAVE_10GPRS(22, base) -#define REST_NVGPRS(base) REST_GPR(13, base); REST_8GPRS(14, base); REST_10GPRS(22, base) +#define SAVE_GPRS(start, end, base) OP_REGS stw, 4, start, end, base, GPR0 +#define REST_GPRS(start, end, base) OP_REGS lwz, 4, start, end, base, GPR0 +#define SAVE_NVGPRS(base) SAVE_GPRS(13, 31, base) +#define REST_NVGPRS(base) REST_GPRS(13, 31, base) #endif -#define SAVE_2GPRS(n, base) SAVE_GPR(n, base); SAVE_GPR(n+1, base) -#define SAVE_4GPRS(n, base) SAVE_2GPRS(n, base); SAVE_2GPRS(n+2, base) -#define SAVE_8GPRS(n, base) SAVE_4GPRS(n, base); SAVE_4GPRS(n+4, base) -#define SAVE_10GPRS(n, base) SAVE_8GPRS(n, base); SAVE_2GPRS(n+8, base) -#define REST_2GPRS(n, base) REST_GPR(n, base); REST_GPR(n+1, base) -#define REST_4GPRS(n, base) REST_2GPRS(n, base); REST_2GPRS(n+2, base) -#define REST_8GPRS(n, base) REST_4GPRS(n, base); REST_4GPRS(n+4, base) -#define REST_10GPRS(n, base) REST_8GPRS(n, base); REST_2GPRS(n+8, base) +#define SAVE_GPR(n, base) SAVE_GPRS(n, n, base) +#define REST_GPR(n, base) REST_GPRS(n, n, base) #define SAVE_FPR(n, base) stfd n,8*TS_FPRWIDTH*(n)(base) #define SAVE_2FPRS(n, base) SAVE_FPR(n, base); SAVE_FPR(n+1, base) diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index e39bd0ff69f3..2c8686d9e964 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -157,8 +157,12 @@ struct thread_struct { #ifdef CONFIG_PPC_BOOK3S_32 unsigned long r0, r3, r4, r5, r6, r8, r9, r11; unsigned long lr, ctr; + unsigned long sr0; #endif #endif /* CONFIG_PPC32 */ +#if defined(CONFIG_BOOKE_OR_40x) && defined(CONFIG_PPC_KUAP) + unsigned long pid; /* value written in PID reg. at interrupt exit */ +#endif /* Debug Registers */ struct debug_reg debug; #ifdef CONFIG_PPC_FPU_REGS @@ -191,8 +195,10 @@ struct thread_struct { int used_vsr; /* set if process has used VSX */ #endif /* CONFIG_VSX */ #ifdef CONFIG_SPE - unsigned long evr[32]; /* upper 32-bits of SPE regs */ - u64 acc; /* Accumulator */ + struct_group(spe, + unsigned long evr[32]; /* upper 32-bits of SPE regs */ + u64 acc; /* Accumulator */ + ); unsigned long spefscr; /* SPE & eFP status */ unsigned long spefscr_last; /* SPEFSCR value on last prctl call or trap return */ @@ -276,6 +282,12 @@ struct thread_struct { #define SPEFSCR_INIT #endif +#ifdef CONFIG_PPC_BOOK3S_32 +#define SR0_INIT .sr0 = IS_ENABLED(CONFIG_PPC_KUEP) ? SR_NX : 0, +#else +#define SR0_INIT +#endif + #if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP) #define INIT_THREAD { \ .ksp = INIT_SP, \ @@ -283,6 +295,7 @@ struct thread_struct { .kuap = ~0UL, /* KUAP_NONE */ \ .fpexc_mode = MSR_FE0 | MSR_FE1, \ SPEFSCR_INIT \ + SR0_INIT \ } #elif defined(CONFIG_PPC32) #define INIT_THREAD { \ @@ -290,6 +303,7 @@ struct thread_struct { .pgdir = swapper_pg_dir, \ .fpexc_mode = MSR_FE0 | MSR_FE1, \ SPEFSCR_INIT \ + SR0_INIT \ } #else #define INIT_THREAD { \ diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 6e560f035614..42f89e2d8f04 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -291,7 +291,7 @@ static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) static inline bool cpu_has_msr_ri(void) { - return !IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x); + return !IS_ENABLED(CONFIG_BOOKE_OR_40x); } static inline bool regs_is_unrecoverable(struct pt_regs *regs) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index e9d27265253b..2835f6363228 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -18,9 +18,9 @@ #include <asm/feature-fixups.h> /* Pickup Book E specific registers. */ -#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) +#ifdef CONFIG_BOOKE_OR_40x #include <asm/reg_booke.h> -#endif /* CONFIG_BOOKE || CONFIG_40x */ +#endif #ifdef CONFIG_FSL_EMB_PERFMON #include <asm/reg_fsl_emb.h> @@ -1366,6 +1366,18 @@ /* Macros for setting and retrieving special purpose registers */ #ifndef __ASSEMBLY__ + +#if defined(CONFIG_PPC64) || defined(__CHECKER__) +typedef struct { + u32 val; +#ifdef CONFIG_PPC64 + u32 suffix; +#endif +} __packed ppc_inst_t; +#else +typedef u32 ppc_inst_t; +#endif + #define mfmsr() ({unsigned long rval; \ asm volatile("mfmsr %0" : "=r" (rval) : \ : "memory"); rval;}) diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 9dc97d2f9d27..82e5b055fa2a 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -264,7 +264,7 @@ extern void rtas_get_rtc_time(struct rtc_time *rtc_time); extern int rtas_set_rtc_time(struct rtc_time *rtc_time); extern unsigned int rtas_busy_delay_time(int status); -extern unsigned int rtas_busy_delay(int status); +bool rtas_busy_delay(int status); extern int early_init_dt_scan_rtas(unsigned long node, const char *uname, int depth, void *data); diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h index 79cb7a25a5fb..38f79e42bf3c 100644 --- a/arch/powerpc/include/asm/sections.h +++ b/arch/powerpc/include/asm/sections.h @@ -25,16 +25,16 @@ extern char start_virt_trampolines[]; extern char end_virt_trampolines[]; #endif +/* + * This assumes the kernel is never compiled -mcmodel=small or + * the total .toc is always less than 64k. + */ static inline unsigned long kernel_toc_addr(void) { - /* Defined by the linker, see vmlinux.lds.S */ - extern unsigned long __toc_start; - - /* - * The TOC register (r2) points 32kB into the TOC, so that 64kB of - * the TOC can be addressed using a single machine instruction. - */ - return (unsigned long)(&__toc_start) + 0x8000UL; + unsigned long toc_ptr; + + asm volatile("mr %0, 2" : "=r" (toc_ptr)); + return toc_ptr; } static inline int overlaps_interrupt_vector_text(unsigned long start, diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index 6c1a7d217d1a..d0d3dd531c7f 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -9,7 +9,6 @@ extern void ppc_printk_progress(char *s, unsigned short hex); extern unsigned int rtas_data; extern unsigned long long memory_limit; -extern bool init_mem_is_free; extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask); struct device_node; @@ -32,7 +31,7 @@ void setup_panic(void); extern bool pseries_enable_reloc_on_exc(void); extern void pseries_disable_reloc_on_exc(void); extern void pseries_big_endian_exceptions(void); -extern void pseries_little_endian_exceptions(void); +void __init pseries_little_endian_exceptions(void); #else static inline bool pseries_enable_reloc_on_exc(void) { return false; } static inline void pseries_disable_reloc_on_exc(void) {} @@ -55,7 +54,7 @@ void setup_entry_flush(bool enable); void setup_uaccess_flush(bool enable); void do_rfi_flush_fixups(enum l1d_flush_type types); #ifdef CONFIG_PPC_BARRIER_NOSPEC -void setup_barrier_nospec(void); +void __init setup_barrier_nospec(void); #else static inline void setup_barrier_nospec(void) { } #endif @@ -71,11 +70,11 @@ static inline void do_barrier_nospec_fixups_range(bool enable, void *start, void #endif #ifdef CONFIG_PPC_FSL_BOOK3E -void setup_spectre_v2(void); +void __init setup_spectre_v2(void); #else static inline void setup_spectre_v2(void) {} #endif -void do_btb_flush_fixups(void); +void __init do_btb_flush_fixups(void); #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/smu.h b/arch/powerpc/include/asm/smu.h index 4b30a0205c93..2ac6ab903023 100644 --- a/arch/powerpc/include/asm/smu.h +++ b/arch/powerpc/include/asm/smu.h @@ -456,7 +456,7 @@ extern void smu_poll(void); /* * Init routine, presence check.... */ -extern int smu_init(void); +int __init smu_init(void); extern int smu_present(void); struct platform_device; extern struct platform_device *smu_get_ofdev(void); diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h index 1df867c2e054..50950deedb87 100644 --- a/arch/powerpc/include/asm/sstep.h +++ b/arch/powerpc/include/asm/sstep.h @@ -145,7 +145,7 @@ union vsx_reg { * otherwise. */ extern int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, - struct ppc_inst instr); + ppc_inst_t instr); /* * Emulate an instruction that can be executed just by updating @@ -162,7 +162,7 @@ void emulate_update_regs(struct pt_regs *reg, struct instruction_op *op); * 0 if it could not be emulated, or -1 for an instruction that * should not be emulated (rfid, mtmsrd clearing MSR_RI, etc.). */ -extern int emulate_step(struct pt_regs *regs, struct ppc_inst instr); +int emulate_step(struct pt_regs *regs, ppc_inst_t instr); /* * Emulate a load or store instruction by reading/writing the diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h index 9d1fbd8be1c7..1f43ef696033 100644 --- a/arch/powerpc/include/asm/switch_to.h +++ b/arch/powerpc/include/asm/switch_to.h @@ -112,6 +112,9 @@ static inline void clear_task_ebb(struct task_struct *t) #endif } +void kvmppc_save_user_regs(void); +void kvmppc_save_current_sprs(void); + extern int set_thread_tidr(struct task_struct *t); #endif /* _ASM_POWERPC_SWITCH_TO_H */ diff --git a/arch/powerpc/include/asm/task_size_64.h b/arch/powerpc/include/asm/task_size_64.h index c993482237ed..38fdf8041d12 100644 --- a/arch/powerpc/include/asm/task_size_64.h +++ b/arch/powerpc/include/asm/task_size_64.h @@ -44,11 +44,7 @@ */ #define TASK_SIZE_USER32 (0x0000000100000000UL - (1 * PAGE_SIZE)) -#define TASK_SIZE_OF(tsk) \ - (test_tsk_thread_flag(tsk, TIF_32BIT) ? TASK_SIZE_USER32 : \ - TASK_SIZE_USER64) - -#define TASK_SIZE TASK_SIZE_OF(current) +#define TASK_SIZE (is_32bit_task() ? TASK_SIZE_USER32 : TASK_SIZE_USER64) #define TASK_UNMAPPED_BASE_USER32 (PAGE_ALIGN(TASK_SIZE_USER32 / 4)) #define TASK_UNMAPPED_BASE_USER64 (PAGE_ALIGN(DEFAULT_MAP_WINDOW_USER64 / 4)) diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index 8c2c3dd4ddba..924b2157882f 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -18,6 +18,8 @@ #include <asm/vdso/timebase.h> /* time.c */ +extern u64 decrementer_max; + extern unsigned long tb_ticks_per_jiffy; extern unsigned long tb_ticks_per_usec; extern unsigned long tb_ticks_per_sec; @@ -97,19 +99,16 @@ extern void div128_by_32(u64 dividend_high, u64 dividend_low, extern void secondary_cpu_time_init(void); extern void __init time_init(void); -#ifdef CONFIG_PPC64 -static inline unsigned long test_irq_work_pending(void) -{ - unsigned long x; +DECLARE_PER_CPU(u64, decrementers_next_tb); - asm volatile("lbz %0,%1(13)" - : "=r" (x) - : "i" (offsetof(struct paca_struct, irq_work_pending))); - return x; +static inline u64 timer_get_next_tb(void) +{ + return __this_cpu_read(decrementers_next_tb); } -#endif -DECLARE_PER_CPU(u64, decrementers_next_tb); +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE +void timer_rearm_host_dec(u64 now); +#endif /* Convert timebase ticks to nanoseconds */ unsigned long long tb_to_ns(unsigned long long tb_ticks); diff --git a/arch/powerpc/include/asm/udbg.h b/arch/powerpc/include/asm/udbg.h index 0ea9e70ed78b..b4aa0d88ce2c 100644 --- a/arch/powerpc/include/asm/udbg.h +++ b/arch/powerpc/include/asm/udbg.h @@ -23,14 +23,14 @@ extern void udbg_printf(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); extern void udbg_progress(char *s, unsigned short hex); -extern void udbg_uart_init_mmio(void __iomem *addr, unsigned int stride); -extern void udbg_uart_init_pio(unsigned long port, unsigned int stride); +void __init udbg_uart_init_mmio(void __iomem *addr, unsigned int stride); +void __init udbg_uart_init_pio(unsigned long port, unsigned int stride); -extern void udbg_uart_setup(unsigned int speed, unsigned int clock); -extern unsigned int udbg_probe_uart_speed(unsigned int clock); +void __init udbg_uart_setup(unsigned int speed, unsigned int clock); +unsigned int __init udbg_probe_uart_speed(unsigned int clock); struct device_node; -extern void udbg_scc_init(int force_scc); +void __init udbg_scc_init(int force_scc); extern int udbg_adb_init(int force_btext); extern void udbg_adb_init_early(void); diff --git a/arch/powerpc/include/asm/uprobes.h b/arch/powerpc/include/asm/uprobes.h index fe683371336f..a7ae1860115a 100644 --- a/arch/powerpc/include/asm/uprobes.h +++ b/arch/powerpc/include/asm/uprobes.h @@ -11,7 +11,6 @@ #include <linux/notifier.h> #include <asm/probes.h> -#include <asm/inst.h> typedef ppc_opcode_t uprobe_opcode_t; diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h index 0ac9bfddf704..e2e704eca5f6 100644 --- a/arch/powerpc/include/asm/xics.h +++ b/arch/powerpc/include/asm/xics.h @@ -38,13 +38,13 @@ static inline int icp_native_init(void) { return -ENODEV; } /* PAPR ICP */ #ifdef CONFIG_PPC_ICP_HV -extern int icp_hv_init(void); +int __init icp_hv_init(void); #else static inline int icp_hv_init(void) { return -ENODEV; } #endif #ifdef CONFIG_PPC_POWERNV -extern int icp_opal_init(void); +int __init icp_opal_init(void); extern void icp_opal_flush_interrupt(void); #else static inline int icp_opal_init(void) { return -ENODEV; } diff --git a/arch/powerpc/include/asm/xmon.h b/arch/powerpc/include/asm/xmon.h index 68bfb2361f03..f2d44b44f46c 100644 --- a/arch/powerpc/include/asm/xmon.h +++ b/arch/powerpc/include/asm/xmon.h @@ -12,7 +12,7 @@ #ifdef CONFIG_XMON extern void xmon_setup(void); -extern void xmon_register_spus(struct list_head *list); +void __init xmon_register_spus(struct list_head *list); struct pt_regs; extern int xmon(struct pt_regs *excp); extern irqreturn_t xmon_irq(int, void *); diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index b039877c743d..4d7829399570 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -11,6 +11,7 @@ CFLAGS_prom_init.o += -fPIC CFLAGS_btext.o += -fPIC endif +CFLAGS_early_32.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) CFLAGS_cputable.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) CFLAGS_prom_init.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) CFLAGS_btext.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index bf96b954a4eb..3e37ece06739 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -105,7 +105,7 @@ static struct aligninfo spe_aligninfo[32] = { * so we don't need the address swizzling. */ static int emulate_spe(struct pt_regs *regs, unsigned int reg, - struct ppc_inst ppc_instr) + ppc_inst_t ppc_instr) { union { u64 ll; @@ -300,7 +300,7 @@ Efault_write: int fix_alignment(struct pt_regs *regs) { - struct ppc_inst instr; + ppc_inst_t instr; struct instruction_op op; int r, type; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index cc05522f50bf..7582f3e3a330 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -54,7 +54,7 @@ #endif #ifdef CONFIG_PPC32 -#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) +#ifdef CONFIG_BOOKE_OR_40x #include "head_booke.h" #endif #endif @@ -139,6 +139,7 @@ int main(void) OFFSET(THR11, thread_struct, r11); OFFSET(THLR, thread_struct, lr); OFFSET(THCTR, thread_struct, ctr); + OFFSET(THSR0, thread_struct, sr0); #endif #ifdef CONFIG_SPE OFFSET(THREAD_EVR0, thread_struct, evr[0]); @@ -218,10 +219,12 @@ int main(void) OFFSET(PACA_EXGEN, paca_struct, exgen); OFFSET(PACA_EXMC, paca_struct, exmc); OFFSET(PACA_EXNMI, paca_struct, exnmi); +#ifdef CONFIG_PPC_64S_HASH_MMU OFFSET(PACA_SLBSHADOWPTR, paca_struct, slb_shadow_ptr); OFFSET(SLBSHADOW_STACKVSID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid); OFFSET(SLBSHADOW_STACKESID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].esid); OFFSET(SLBSHADOW_SAVEAREA, slb_shadow, save_area); +#endif OFFSET(LPPACA_PMCINUSE, lppaca, pmcregs_in_use); #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE OFFSET(PACA_PMCINUSE, paca_struct, pmcregs_in_use); diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c index 803c2a45b22a..9d9d56b574cc 100644 --- a/arch/powerpc/kernel/btext.c +++ b/arch/powerpc/kernel/btext.c @@ -161,7 +161,7 @@ void btext_map(void) boot_text_mapped = 1; } -static int btext_initialize(struct device_node *np) +static int __init btext_initialize(struct device_node *np) { unsigned int width, height, depth, pitch; unsigned long address = 0; @@ -241,8 +241,10 @@ int __init btext_find_display(int allow_nonstdout) rc = btext_initialize(np); printk("result: %d\n", rc); } - if (rc == 0) + if (rc == 0) { + of_node_put(np); break; + } } return rc; } @@ -290,7 +292,7 @@ void btext_update_display(unsigned long phys, int width, int height, } EXPORT_SYMBOL(btext_update_display); -void btext_clearscreen(void) +void __init btext_clearscreen(void) { unsigned int *base = (unsigned int *)calc_base(0, 0); unsigned long width = ((dispDeviceRect[2] - dispDeviceRect[0]) * @@ -308,7 +310,7 @@ void btext_clearscreen(void) rmci_maybe_off(); } -void btext_flushscreen(void) +void __init btext_flushscreen(void) { unsigned int *base = (unsigned int *)calc_base(0, 0); unsigned long width = ((dispDeviceRect[2] - dispDeviceRect[0]) * @@ -327,7 +329,7 @@ void btext_flushscreen(void) __asm__ __volatile__ ("sync" ::: "memory"); } -void btext_flushline(void) +void __init btext_flushline(void) { unsigned int *base = (unsigned int *)calc_base(0, g_loc_Y << 4); unsigned long width = ((dispDeviceRect[2] - dispDeviceRect[0]) * @@ -542,7 +544,7 @@ void btext_drawstring(const char *c) btext_drawchar(*c++); } -void btext_drawtext(const char *c, unsigned int len) +void __init btext_drawtext(const char *c, unsigned int len) { if (!boot_text_mapped) return; @@ -550,7 +552,7 @@ void btext_drawtext(const char *c, unsigned int len) btext_drawchar(*c++); } -void btext_drawhex(unsigned long v) +void __init btext_drawhex(unsigned long v) { if (!boot_text_mapped) return; diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c index cf1be75b7833..00b0992be3e7 100644 --- a/arch/powerpc/kernel/cacheinfo.c +++ b/arch/powerpc/kernel/cacheinfo.c @@ -710,7 +710,7 @@ static struct kobj_attribute cache_shared_cpu_list_attr = __ATTR(shared_cpu_list, 0444, shared_cpu_list_show, NULL); /* Attributes which should always be created -- the kobject/sysfs core - * does this automatically via kobj_type->default_attrs. This is the + * does this automatically via kobj_type->default_groups. This is the * minimum data required to uniquely identify a cache. */ static struct attribute *cache_index_default_attrs[] = { @@ -720,6 +720,7 @@ static struct attribute *cache_index_default_attrs[] = { &cache_shared_cpu_list_attr.attr, NULL, }; +ATTRIBUTE_GROUPS(cache_index_default); /* Attributes which should be created if the cache device node has the * right properties -- see cacheinfo_create_index_opt_attrs @@ -738,7 +739,7 @@ static const struct sysfs_ops cache_index_ops = { static struct kobj_type cache_index_type = { .release = cache_index_release, .sysfs_ops = &cache_index_ops, - .default_attrs = cache_index_default_attrs, + .default_groups = cache_index_default_groups, }; static void cacheinfo_create_index_opt_attrs(struct cache_index_dir *dir) diff --git a/arch/powerpc/kernel/cpu_setup_power.c b/arch/powerpc/kernel/cpu_setup_power.c index 3cca88ee96d7..3dc61e203f37 100644 --- a/arch/powerpc/kernel/cpu_setup_power.c +++ b/arch/powerpc/kernel/cpu_setup_power.c @@ -109,7 +109,7 @@ static void init_PMU_HV_ISA207(void) static void init_PMU(void) { mtspr(SPRN_MMCRA, 0); - mtspr(SPRN_MMCR0, 0); + mtspr(SPRN_MMCR0, MMCR0_FC); mtspr(SPRN_MMCR1, 0); mtspr(SPRN_MMCR2, 0); } @@ -123,7 +123,7 @@ static void init_PMU_ISA31(void) { mtspr(SPRN_MMCR3, 0); mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE); - mtspr(SPRN_MMCR0, MMCR0_PMCCEXT); + mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMCCEXT); } /* @@ -137,6 +137,7 @@ void __setup_cpu_power7(unsigned long offset, struct cpu_spec *t) return; mtspr(SPRN_LPID, 0); + mtspr(SPRN_AMOR, ~0); mtspr(SPRN_PCR, PCR_MASK); init_LPCR_ISA206(mfspr(SPRN_LPCR), LPCR_LPES1 >> LPCR_LPES_SH); } @@ -150,6 +151,7 @@ void __restore_cpu_power7(void) return; mtspr(SPRN_LPID, 0); + mtspr(SPRN_AMOR, ~0); mtspr(SPRN_PCR, PCR_MASK); init_LPCR_ISA206(mfspr(SPRN_LPCR), LPCR_LPES1 >> LPCR_LPES_SH); } @@ -164,6 +166,7 @@ void __setup_cpu_power8(unsigned long offset, struct cpu_spec *t) return; mtspr(SPRN_LPID, 0); + mtspr(SPRN_AMOR, ~0); mtspr(SPRN_PCR, PCR_MASK); init_LPCR_ISA206(mfspr(SPRN_LPCR) | LPCR_PECEDH, 0); /* LPES = 0 */ init_HFSCR(); @@ -184,6 +187,7 @@ void __restore_cpu_power8(void) return; mtspr(SPRN_LPID, 0); + mtspr(SPRN_AMOR, ~0); mtspr(SPRN_PCR, PCR_MASK); init_LPCR_ISA206(mfspr(SPRN_LPCR) | LPCR_PECEDH, 0); /* LPES = 0 */ init_HFSCR(); @@ -202,6 +206,7 @@ void __setup_cpu_power9(unsigned long offset, struct cpu_spec *t) mtspr(SPRN_PSSCR, 0); mtspr(SPRN_LPID, 0); mtspr(SPRN_PID, 0); + mtspr(SPRN_AMOR, ~0); mtspr(SPRN_PCR, PCR_MASK); init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\ LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0); @@ -223,6 +228,7 @@ void __restore_cpu_power9(void) mtspr(SPRN_PSSCR, 0); mtspr(SPRN_LPID, 0); mtspr(SPRN_PID, 0); + mtspr(SPRN_AMOR, ~0); mtspr(SPRN_PCR, PCR_MASK); init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\ LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0); @@ -242,6 +248,7 @@ void __setup_cpu_power10(unsigned long offset, struct cpu_spec *t) mtspr(SPRN_PSSCR, 0); mtspr(SPRN_LPID, 0); mtspr(SPRN_PID, 0); + mtspr(SPRN_AMOR, ~0); mtspr(SPRN_PCR, PCR_MASK); init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\ LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0); @@ -264,6 +271,7 @@ void __restore_cpu_power10(void) mtspr(SPRN_PSSCR, 0); mtspr(SPRN_LPID, 0); mtspr(SPRN_PID, 0); + mtspr(SPRN_AMOR, ~0); mtspr(SPRN_PCR, PCR_MASK); init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\ LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0); diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c index 5545c9cd17c1..f55c6fb34a3a 100644 --- a/arch/powerpc/kernel/dbell.c +++ b/arch/powerpc/kernel/dbell.c @@ -27,7 +27,8 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(doorbell_exception) ppc_msgsync(); - may_hard_irq_enable(); + if (should_hard_irq_enable()) + do_hard_irq_enable(); kvmppc_clear_host_ipi(smp_processor_id()); __this_cpu_inc(irq_stat.doorbell_irqs); diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index ba527fb52993..7d1b2c4a4891 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -80,6 +80,7 @@ static void __restore_cpu_cpufeatures(void) mtspr(SPRN_LPCR, system_registers.lpcr); if (hv_mode) { mtspr(SPRN_LPID, 0); + mtspr(SPRN_AMOR, ~0); mtspr(SPRN_HFSCR, system_registers.hfscr); mtspr(SPRN_PCR, system_registers.pcr); } @@ -216,6 +217,7 @@ static int __init feat_enable_hv(struct dt_cpu_feature *f) } mtspr(SPRN_LPID, 0); + mtspr(SPRN_AMOR, ~0); lpcr = mfspr(SPRN_LPCR); lpcr &= ~LPCR_LPES0; /* HV external interrupts */ @@ -271,6 +273,9 @@ static int __init feat_enable_mmu_hash(struct dt_cpu_feature *f) { u64 lpcr; + if (!IS_ENABLED(CONFIG_PPC_64S_HASH_MMU)) + return 0; + lpcr = mfspr(SPRN_LPCR); lpcr &= ~LPCR_ISL; @@ -290,6 +295,9 @@ static int __init feat_enable_mmu_hash_v3(struct dt_cpu_feature *f) { u64 lpcr; + if (!IS_ENABLED(CONFIG_PPC_64S_HASH_MMU)) + return 0; + lpcr = mfspr(SPRN_LPCR); lpcr &= ~(LPCR_ISL | LPCR_UPRT | LPCR_HR); mtspr(SPRN_LPCR, lpcr); @@ -303,15 +311,15 @@ static int __init feat_enable_mmu_hash_v3(struct dt_cpu_feature *f) static int __init feat_enable_mmu_radix(struct dt_cpu_feature *f) { -#ifdef CONFIG_PPC_RADIX_MMU + if (!IS_ENABLED(CONFIG_PPC_RADIX_MMU)) + return 0; + + cur_cpu_spec->mmu_features |= MMU_FTR_KERNEL_RO; cur_cpu_spec->mmu_features |= MMU_FTR_TYPE_RADIX; - cur_cpu_spec->mmu_features |= MMU_FTRS_HASH_BASE; cur_cpu_spec->mmu_features |= MMU_FTR_GTSE; cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_MMU; return 1; -#endif - return 0; } static int __init feat_enable_dscr(struct dt_cpu_feature *f) @@ -336,7 +344,7 @@ static int __init feat_enable_dscr(struct dt_cpu_feature *f) return 1; } -static void hfscr_pmu_enable(void) +static void __init hfscr_pmu_enable(void) { u64 hfscr = mfspr(SPRN_HFSCR); hfscr |= PPC_BIT(60); @@ -351,7 +359,7 @@ static void init_pmu_power8(void) } mtspr(SPRN_MMCRA, 0); - mtspr(SPRN_MMCR0, 0); + mtspr(SPRN_MMCR0, MMCR0_FC); mtspr(SPRN_MMCR1, 0); mtspr(SPRN_MMCR2, 0); mtspr(SPRN_MMCRS, 0); @@ -390,7 +398,7 @@ static void init_pmu_power9(void) mtspr(SPRN_MMCRC, 0); mtspr(SPRN_MMCRA, 0); - mtspr(SPRN_MMCR0, 0); + mtspr(SPRN_MMCR0, MMCR0_FC); mtspr(SPRN_MMCR1, 0); mtspr(SPRN_MMCR2, 0); } @@ -426,7 +434,7 @@ static void init_pmu_power10(void) mtspr(SPRN_MMCR3, 0); mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE); - mtspr(SPRN_MMCR0, MMCR0_PMCCEXT); + mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMCCEXT); } static int __init feat_enable_pmu_power10(struct dt_cpu_feature *f) diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c index 9bdaaf7fddc9..2f9dbf8ad2ee 100644 --- a/arch/powerpc/kernel/eeh_cache.c +++ b/arch/powerpc/kernel/eeh_cache.c @@ -280,7 +280,7 @@ static int eeh_addr_cache_show(struct seq_file *s, void *v) } DEFINE_SHOW_ATTRIBUTE(eeh_addr_cache); -void eeh_cache_debugfs_init(void) +void __init eeh_cache_debugfs_init(void) { debugfs_create_file_unsafe("eeh_address_cache", 0400, arch_debugfs_dir, NULL, diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 350dab18e137..422f80b5b27b 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -905,18 +905,19 @@ void eeh_handle_normal_event(struct eeh_pe *pe) } #endif /* CONFIG_STACKTRACE */ + eeh_for_each_pe(pe, tmp_pe) + eeh_pe_for_each_dev(tmp_pe, edev, tmp) + edev->mode &= ~EEH_DEV_NO_HANDLER; + eeh_pe_update_time_stamp(pe); pe->freeze_count++; if (pe->freeze_count > eeh_max_freezes) { pr_err("EEH: PHB#%x-PE#%x has failed %d times in the last hour and has been permanently disabled.\n", pe->phb->global_number, pe->addr, pe->freeze_count); - result = PCI_ERS_RESULT_DISCONNECT; - } - eeh_for_each_pe(pe, tmp_pe) - eeh_pe_for_each_dev(tmp_pe, edev, tmp) - edev->mode &= ~EEH_DEV_NO_HANDLER; + goto recover_failed; + } /* Walk the various device drivers attached to this slot through * a reset sequence, giving each an opportunity to do what it needs @@ -928,39 +929,38 @@ void eeh_handle_normal_event(struct eeh_pe *pe) * the error. Override the result if necessary to have partially * hotplug for this case. */ - if (result != PCI_ERS_RESULT_DISCONNECT) { - pr_warn("EEH: This PCI device has failed %d times in the last hour and will be permanently disabled after %d failures.\n", - pe->freeze_count, eeh_max_freezes); - pr_info("EEH: Notify device drivers to shutdown\n"); - eeh_set_channel_state(pe, pci_channel_io_frozen); - eeh_set_irq_state(pe, false); - eeh_pe_report("error_detected(IO frozen)", pe, - eeh_report_error, &result); - if ((pe->type & EEH_PE_PHB) && - result != PCI_ERS_RESULT_NONE && - result != PCI_ERS_RESULT_NEED_RESET) - result = PCI_ERS_RESULT_NEED_RESET; - } + pr_warn("EEH: This PCI device has failed %d times in the last hour and will be permanently disabled after %d failures.\n", + pe->freeze_count, eeh_max_freezes); + pr_info("EEH: Notify device drivers to shutdown\n"); + eeh_set_channel_state(pe, pci_channel_io_frozen); + eeh_set_irq_state(pe, false); + eeh_pe_report("error_detected(IO frozen)", pe, + eeh_report_error, &result); + if (result == PCI_ERS_RESULT_DISCONNECT) + goto recover_failed; + + /* + * Error logged on a PHB are always fences which need a full + * PHB reset to clear so force that to happen. + */ + if ((pe->type & EEH_PE_PHB) && result != PCI_ERS_RESULT_NONE) + result = PCI_ERS_RESULT_NEED_RESET; /* Get the current PCI slot state. This can take a long time, * sometimes over 300 seconds for certain systems. */ - if (result != PCI_ERS_RESULT_DISCONNECT) { - rc = eeh_wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000); - if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) { - pr_warn("EEH: Permanent failure\n"); - result = PCI_ERS_RESULT_DISCONNECT; - } + rc = eeh_wait_state(pe, MAX_WAIT_FOR_RECOVERY * 1000); + if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) { + pr_warn("EEH: Permanent failure\n"); + goto recover_failed; } /* Since rtas may enable MMIO when posting the error log, * don't post the error log until after all dev drivers * have been informed. */ - if (result != PCI_ERS_RESULT_DISCONNECT) { - pr_info("EEH: Collect temporary log\n"); - eeh_slot_error_detail(pe, EEH_LOG_TEMP); - } + pr_info("EEH: Collect temporary log\n"); + eeh_slot_error_detail(pe, EEH_LOG_TEMP); /* If all device drivers were EEH-unaware, then shut * down all of the device drivers, and hope they @@ -970,9 +970,8 @@ void eeh_handle_normal_event(struct eeh_pe *pe) pr_info("EEH: Reset with hotplug activity\n"); rc = eeh_reset_device(pe, bus, NULL, false); if (rc) { - pr_warn("%s: Unable to reset, err=%d\n", - __func__, rc); - result = PCI_ERS_RESULT_DISCONNECT; + pr_warn("%s: Unable to reset, err=%d\n", __func__, rc); + goto recover_failed; } } @@ -980,10 +979,10 @@ void eeh_handle_normal_event(struct eeh_pe *pe) if (result == PCI_ERS_RESULT_CAN_RECOVER) { pr_info("EEH: Enable I/O for affected devices\n"); rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); + if (rc < 0) + goto recover_failed; - if (rc < 0) { - result = PCI_ERS_RESULT_DISCONNECT; - } else if (rc) { + if (rc) { result = PCI_ERS_RESULT_NEED_RESET; } else { pr_info("EEH: Notify device drivers to resume I/O\n"); @@ -991,15 +990,13 @@ void eeh_handle_normal_event(struct eeh_pe *pe) eeh_report_mmio_enabled, &result); } } - - /* If all devices reported they can proceed, then re-enable DMA */ if (result == PCI_ERS_RESULT_CAN_RECOVER) { pr_info("EEH: Enabled DMA for affected devices\n"); rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA); + if (rc < 0) + goto recover_failed; - if (rc < 0) { - result = PCI_ERS_RESULT_DISCONNECT; - } else if (rc) { + if (rc) { result = PCI_ERS_RESULT_NEED_RESET; } else { /* @@ -1017,16 +1014,15 @@ void eeh_handle_normal_event(struct eeh_pe *pe) pr_info("EEH: Reset without hotplug activity\n"); rc = eeh_reset_device(pe, bus, &rmv_data, true); if (rc) { - pr_warn("%s: Cannot reset, err=%d\n", - __func__, rc); - result = PCI_ERS_RESULT_DISCONNECT; - } else { - result = PCI_ERS_RESULT_NONE; - eeh_set_channel_state(pe, pci_channel_io_normal); - eeh_set_irq_state(pe, true); - eeh_pe_report("slot_reset", pe, eeh_report_reset, - &result); + pr_warn("%s: Cannot reset, err=%d\n", __func__, rc); + goto recover_failed; } + + result = PCI_ERS_RESULT_NONE; + eeh_set_channel_state(pe, pci_channel_io_normal); + eeh_set_irq_state(pe, true); + eeh_pe_report("slot_reset", pe, eeh_report_reset, + &result); } if ((result == PCI_ERS_RESULT_RECOVERED) || @@ -1054,45 +1050,47 @@ void eeh_handle_normal_event(struct eeh_pe *pe) } pr_info("EEH: Recovery successful.\n"); - } else { - /* - * About 90% of all real-life EEH failures in the field - * are due to poorly seated PCI cards. Only 10% or so are - * due to actual, failed cards. - */ - pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n" - "Please try reseating or replacing it\n", - pe->phb->global_number, pe->addr); + goto out; + } - eeh_slot_error_detail(pe, EEH_LOG_PERM); +recover_failed: + /* + * About 90% of all real-life EEH failures in the field + * are due to poorly seated PCI cards. Only 10% or so are + * due to actual, failed cards. + */ + pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n" + "Please try reseating or replacing it\n", + pe->phb->global_number, pe->addr); - /* Notify all devices that they're about to go down. */ - eeh_set_channel_state(pe, pci_channel_io_perm_failure); - eeh_set_irq_state(pe, false); - eeh_pe_report("error_detected(permanent failure)", pe, - eeh_report_failure, NULL); + eeh_slot_error_detail(pe, EEH_LOG_PERM); - /* Mark the PE to be removed permanently */ - eeh_pe_state_mark(pe, EEH_PE_REMOVED); + /* Notify all devices that they're about to go down. */ + eeh_set_channel_state(pe, pci_channel_io_perm_failure); + eeh_set_irq_state(pe, false); + eeh_pe_report("error_detected(permanent failure)", pe, + eeh_report_failure, NULL); - /* - * Shut down the device drivers for good. We mark - * all removed devices correctly to avoid access - * the their PCI config any more. - */ - if (pe->type & EEH_PE_VF) { - eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL); - eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); - } else { - eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true); - eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); + /* Mark the PE to be removed permanently */ + eeh_pe_state_mark(pe, EEH_PE_REMOVED); - pci_lock_rescan_remove(); - pci_hp_remove_devices(bus); - pci_unlock_rescan_remove(); - /* The passed PE should no longer be used */ - return; - } + /* + * Shut down the device drivers for good. We mark + * all removed devices correctly to avoid access + * the their PCI config any more. + */ + if (pe->type & EEH_PE_VF) { + eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL); + eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); + } else { + eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true); + eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); + + pci_lock_rescan_remove(); + pci_hp_remove_devices(bus); + pci_unlock_rescan_remove(); + /* The passed PE should no longer be used */ + return; } out: diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 61fdd53cdd9a..7748c278d13c 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -73,13 +73,39 @@ prepare_transfer_to_handler: _ASM_NOKPROBE_SYMBOL(prepare_transfer_to_handler) #endif /* CONFIG_PPC_BOOK3S_32 || CONFIG_E500 */ +#if defined(CONFIG_PPC_KUEP) && defined(CONFIG_PPC_BOOK3S_32) + .globl __kuep_lock +__kuep_lock: + lwz r9, THREAD+THSR0(r2) + update_user_segments_by_4 r9, r10, r11, r12 + blr + +__kuep_unlock: + lwz r9, THREAD+THSR0(r2) + rlwinm r9,r9,0,~SR_NX + update_user_segments_by_4 r9, r10, r11, r12 + blr + +.macro kuep_lock + bl __kuep_lock +.endm +.macro kuep_unlock + bl __kuep_unlock +.endm +#else +.macro kuep_lock +.endm +.macro kuep_unlock +.endm +#endif + .globl transfer_to_syscall transfer_to_syscall: stw r11, GPR1(r1) stw r11, 0(r1) mflr r12 stw r12, _LINK(r1) -#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) +#ifdef CONFIG_BOOKE_OR_40x rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ #endif lis r12,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ @@ -90,10 +116,10 @@ transfer_to_syscall: stw r12,8(r1) stw r2,_TRAP(r1) SAVE_GPR(0, r1) - SAVE_4GPRS(3, r1) - SAVE_2GPRS(7, r1) + SAVE_GPRS(3, 8, r1) addi r2,r10,-THREAD SAVE_NVGPRS(r1) + kuep_lock /* Calling convention has r9 = orig r0, r10 = regs */ addi r10,r1,STACK_FRAME_OVERHEAD @@ -110,6 +136,7 @@ ret_from_syscall: cmplwi cr0,r5,0 bne- 2f #endif /* CONFIG_PPC_47x */ + kuep_unlock lwz r4,_LINK(r1) lwz r5,_CCR(r1) mtlr r4 @@ -139,7 +166,7 @@ syscall_exit_finish: mtxer r5 lwz r0,GPR0(r1) lwz r3,GPR3(r1) - REST_8GPRS(4,r1) + REST_GPRS(4, 11, r1) lwz r12,GPR12(r1) b 1b @@ -232,9 +259,9 @@ fast_exception_return: beq 3f /* if not, we've got problems */ #endif -2: REST_4GPRS(3, r11) +2: REST_GPRS(3, 6, r11) lwz r10,_CCR(r11) - REST_2GPRS(1, r11) + REST_GPRS(1, 2, r11) mtcr r10 lwz r10,_LINK(r11) mtlr r10 @@ -273,6 +300,7 @@ interrupt_return: beq .Lkernel_interrupt_return bl interrupt_exit_user_prepare cmpwi r3,0 + kuep_unlock bne- .Lrestore_nvgprs .Lfast_user_interrupt_return: @@ -298,16 +326,14 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) * the reliable stack unwinder later on. Clear it. */ stw r0,8(r1) - REST_4GPRS(7, r1) - REST_2GPRS(11, r1) + REST_GPRS(7, 12, r1) mtcr r3 mtlr r4 mtctr r5 mtspr SPRN_XER,r6 - REST_4GPRS(2, r1) - REST_GPR(6, r1) + REST_GPRS(2, 6, r1) REST_GPR(0, r1) REST_GPR(1, r1) rfi @@ -341,8 +367,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) lwz r6,_CCR(r1) li r0,0 - REST_4GPRS(7, r1) - REST_2GPRS(11, r1) + REST_GPRS(7, 12, r1) mtlr r3 mtctr r4 @@ -354,7 +379,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) */ stw r0,8(r1) - REST_4GPRS(2, r1) + REST_GPRS(2, 5, r1) bne- cr1,1f /* emulate stack store */ mtcr r6 @@ -430,8 +455,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return) bne interrupt_return; \ lwz r0,GPR0(r1); \ lwz r2,GPR2(r1); \ - REST_4GPRS(3, r1); \ - REST_2GPRS(7, r1); \ + REST_GPRS(3, 8, r1); \ lwz r10,_XER(r1); \ lwz r11,_CTR(r1); \ mtspr SPRN_XER,r10; \ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 70cff7b49e17..9581906b5ee9 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -180,7 +180,7 @@ _GLOBAL(_switch) #endif ld r8,KSP(r4) /* new stack pointer */ -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU BEGIN_MMU_FTR_SECTION b 2f END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) @@ -232,7 +232,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) slbmte r7,r0 isync 2: -#endif /* CONFIG_PPC_BOOK3S_64 */ +#endif /* CONFIG_PPC_64S_HASH_MMU */ clrrdi r7, r8, THREAD_SHIFT /* base of new stack */ /* Note: this uses SWITCH_FRAME_SIZE rather than INT_FRAME_SIZE diff --git a/arch/powerpc/kernel/epapr_paravirt.c b/arch/powerpc/kernel/epapr_paravirt.c index 93b0f3ec8fb0..d4b8aff20815 100644 --- a/arch/powerpc/kernel/epapr_paravirt.c +++ b/arch/powerpc/kernel/epapr_paravirt.c @@ -37,7 +37,7 @@ static int __init early_init_dt_scan_epapr(unsigned long node, return -1; for (i = 0; i < (len / 4); i++) { - struct ppc_inst inst = ppc_inst(be32_to_cpu(insts[i])); + ppc_inst_t inst = ppc_inst(be32_to_cpu(insts[i])); patch_instruction(epapr_hypercall_start + i, inst); #if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64) patch_instruction(epapr_ev_idle_start + i, inst); diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 711c66b76df1..67dc4e3179a0 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -198,8 +198,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) stdcx. r0,0,r1 /* to clear the reservation */ - REST_4GPRS(2, r1) - REST_4GPRS(6, r1) + REST_GPRS(2, 9, r1) ld r10,_CTR(r1) ld r11,_XER(r1) @@ -375,9 +374,7 @@ ret_from_mc_except: exc_##n##_common: \ std r0,GPR0(r1); /* save r0 in stackframe */ \ std r2,GPR2(r1); /* save r2 in stackframe */ \ - SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \ - SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \ - std r9,GPR9(r1); /* save r9 in stackframe */ \ + SAVE_GPRS(3, 9, r1); /* save r3 - r9 in stackframe */ \ std r10,_NIP(r1); /* save SRR0 to stackframe */ \ std r11,_MSR(r1); /* save SRR1 to stackframe */ \ beq 2f; /* if from kernel mode */ \ @@ -1061,9 +1058,7 @@ bad_stack_book3e: std r11,_ESR(r1) std r0,GPR0(r1); /* save r0 in stackframe */ \ std r2,GPR2(r1); /* save r2 in stackframe */ \ - SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \ - SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \ - std r9,GPR9(r1); /* save r9 in stackframe */ \ + SAVE_GPRS(3, 9, r1); /* save r3 - r9 in stackframe */ \ ld r3,PACA_EXGEN+EX_R10(r13);/* get back r10 */ \ ld r4,PACA_EXGEN+EX_R11(r13);/* get back r11 */ \ mfspr r5,SPRN_SPRG_GEN_SCRATCH;/* get back r13 XXX can be wrong */ \ @@ -1077,8 +1072,7 @@ bad_stack_book3e: std r10,_LINK(r1) std r11,_CTR(r1) std r12,_XER(r1) - SAVE_10GPRS(14,r1) - SAVE_8GPRS(24,r1) + SAVE_GPRS(14, 31, r1) lhz r12,PACA_TRAP_SAVE(r13) std r12,_TRAP(r1) addi r11,r1,INT_FRAME_SIZE diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index eaf1f72131a1..55caeee37c08 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -48,7 +48,7 @@ .balign IFETCH_ALIGN_BYTES; \ .global name; \ _ASM_NOKPROBE_SYMBOL(name); \ - DEFINE_FIXED_SYMBOL(name); \ + DEFINE_FIXED_SYMBOL(name, text); \ name: #define TRAMP_REAL_BEGIN(name) \ @@ -76,31 +76,18 @@ name: ld reg,PACAKBASE(r13); /* get high part of &label */ \ ori reg,reg,FIXED_SYMBOL_ABS_ADDR(label) -#define __LOAD_HANDLER(reg, label) \ +#define __LOAD_HANDLER(reg, label, section) \ ld reg,PACAKBASE(r13); \ - ori reg,reg,(ABS_ADDR(label))@l + ori reg,reg,(ABS_ADDR(label, section))@l /* * Branches from unrelocated code (e.g., interrupts) to labels outside * head-y require >64K offsets. */ -#define __LOAD_FAR_HANDLER(reg, label) \ +#define __LOAD_FAR_HANDLER(reg, label, section) \ ld reg,PACAKBASE(r13); \ - ori reg,reg,(ABS_ADDR(label))@l; \ - addis reg,reg,(ABS_ADDR(label))@h - -/* - * Branch to label using its 0xC000 address. This results in instruction - * address suitable for MSR[IR]=0 or 1, which allows relocation to be turned - * on using mtmsr rather than rfid. - * - * This could set the 0xc bits for !RELOCATABLE as an immediate, rather than - * load KBASE for a slight optimisation. - */ -#define BRANCH_TO_C000(reg, label) \ - __LOAD_FAR_HANDLER(reg, label); \ - mtctr reg; \ - bctr + ori reg,reg,(ABS_ADDR(label, section))@l; \ + addis reg,reg,(ABS_ADDR(label, section))@h /* * Interrupt code generation macros @@ -111,9 +98,10 @@ name: #define IAREA .L_IAREA_\name\() /* PACA save area */ #define IVIRT .L_IVIRT_\name\() /* Has virt mode entry point */ #define IISIDE .L_IISIDE_\name\() /* Uses SRR0/1 not DAR/DSISR */ +#define ICFAR .L_ICFAR_\name\() /* Uses CFAR */ +#define ICFAR_IF_HVMODE .L_ICFAR_IF_HVMODE_\name\() /* Uses CFAR if HV */ #define IDAR .L_IDAR_\name\() /* Uses DAR (or SRR0) */ #define IDSISR .L_IDSISR_\name\() /* Uses DSISR (or SRR1) */ -#define ISET_RI .L_ISET_RI_\name\() /* Run common code w/ MSR[RI]=1 */ #define IBRANCH_TO_COMMON .L_IBRANCH_TO_COMMON_\name\() /* ENTRY branch to common */ #define IREALMODE_COMMON .L_IREALMODE_COMMON_\name\() /* Common runs in realmode */ #define IMASK .L_IMASK_\name\() /* IRQ soft-mask bit */ @@ -151,15 +139,18 @@ do_define_int n .ifndef IISIDE IISIDE=0 .endif + .ifndef ICFAR + ICFAR=1 + .endif + .ifndef ICFAR_IF_HVMODE + ICFAR_IF_HVMODE=0 + .endif .ifndef IDAR IDAR=0 .endif .ifndef IDSISR IDSISR=0 .endif - .ifndef ISET_RI - ISET_RI=1 - .endif .ifndef IBRANCH_TO_COMMON IBRANCH_TO_COMMON=1 .endif @@ -291,9 +282,21 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) HMT_MEDIUM std r10,IAREA+EX_R10(r13) /* save r10 - r12 */ + .if ICFAR BEGIN_FTR_SECTION mfspr r10,SPRN_CFAR END_FTR_SECTION_IFSET(CPU_FTR_CFAR) + .elseif ICFAR_IF_HVMODE +BEGIN_FTR_SECTION + BEGIN_FTR_SECTION_NESTED(69) + mfspr r10,SPRN_CFAR + END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 69) +FTR_SECTION_ELSE + BEGIN_FTR_SECTION_NESTED(69) + li r10,0 + END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 69) +ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) + .endif .if \ool .if !\virt b tramp_real_\name @@ -309,9 +312,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) BEGIN_FTR_SECTION std r9,IAREA+EX_PPR(r13) END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) + .if ICFAR || ICFAR_IF_HVMODE BEGIN_FTR_SECTION std r10,IAREA+EX_CFAR(r13) END_FTR_SECTION_IFSET(CPU_FTR_CFAR) + .endif INTERRUPT_TO_KERNEL mfctr r10 std r10,IAREA+EX_CTR(r13) @@ -376,7 +381,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) * This switches to virtual mode and sets MSR[RI]. */ .macro __GEN_COMMON_ENTRY name -DEFINE_FIXED_SYMBOL(\name\()_common_real) +DEFINE_FIXED_SYMBOL(\name\()_common_real, text) \name\()_common_real: .if IKVM_REAL KVMTEST \name kvm_interrupt @@ -399,7 +404,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real) .endif .balign IFETCH_ALIGN_BYTES -DEFINE_FIXED_SYMBOL(\name\()_common_virt) +DEFINE_FIXED_SYMBOL(\name\()_common_virt, text) \name\()_common_virt: .if IKVM_VIRT KVMTEST \name kvm_interrupt @@ -413,7 +418,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_virt) * want to run in real mode. */ .macro __GEN_REALMODE_COMMON_ENTRY name -DEFINE_FIXED_SYMBOL(\name\()_common_real) +DEFINE_FIXED_SYMBOL(\name\()_common_real, text) \name\()_common_real: .if IKVM_REAL KVMTEST \name kvm_interrupt @@ -512,11 +517,6 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real) stb r10,PACASRR_VALID(r13) .endif - .if ISET_RI - li r10,MSR_RI - mtmsrd r10,1 /* Set MSR_RI */ - .endif - .if ISTACK .if IKUAP kuap_save_amr_and_lock r9, r10, cr1, cr0 @@ -568,14 +568,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) .endif BEGIN_FTR_SECTION + .if ICFAR || ICFAR_IF_HVMODE ld r10,IAREA+EX_CFAR(r13) + .else + li r10,0 + .endif std r10,ORIG_GPR3(r1) END_FTR_SECTION_IFSET(CPU_FTR_CFAR) ld r10,IAREA+EX_CTR(r13) std r10,_CTR(r1) std r2,GPR2(r1) /* save r2 in stackframe */ - SAVE_4GPRS(3, r1) /* save r3 - r6 in stackframe */ - SAVE_2GPRS(7, r1) /* save r7, r8 in stackframe */ + SAVE_GPRS(3, 8, r1) /* save r3 - r8 in stackframe */ mflr r9 /* Get LR, later save to stack */ ld r2,PACATOC(r13) /* get kernel TOC into r2 */ std r9,_LINK(r1) @@ -693,8 +696,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) mtlr r9 ld r9,_CCR(r1) mtcr r9 - REST_8GPRS(2, r1) - REST_4GPRS(10, r1) + REST_GPRS(2, 13, r1) REST_GPR(0, r1) /* restore original r1. */ ld r1,GPR1(r1) @@ -850,12 +852,12 @@ SOFT_MASK_TABLE(0xc000000000003000, 0xc000000000004000) #ifdef CONFIG_RELOCATABLE TRAMP_VIRT_BEGIN(system_call_vectored_tramp) - __LOAD_HANDLER(r10, system_call_vectored_common) + __LOAD_HANDLER(r10, system_call_vectored_common, virt_trampolines) mtctr r10 bctr TRAMP_VIRT_BEGIN(system_call_vectored_sigill_tramp) - __LOAD_HANDLER(r10, system_call_vectored_sigill) + __LOAD_HANDLER(r10, system_call_vectored_sigill, virt_trampolines) mtctr r10 bctr #endif @@ -902,11 +904,6 @@ INT_DEFINE_BEGIN(system_reset) IVEC=0x100 IAREA=PACA_EXNMI IVIRT=0 /* no virt entry point */ - /* - * MSR_RI is not enabled, because PACA_EXNMI and nmi stack is - * being used, so a nested NMI exception would corrupt it. - */ - ISET_RI=0 ISTACK=0 IKVM_REAL=1 INT_DEFINE_END(system_reset) @@ -964,7 +961,9 @@ TRAMP_REAL_BEGIN(system_reset_idle_wake) /* We are waking up from idle, so may clobber any volatile register */ cmpwi cr1,r5,2 bltlr cr1 /* no state loss, return to idle caller with r3=SRR1 */ - BRANCH_TO_C000(r12, DOTSYM(idle_return_gpr_loss)) + __LOAD_FAR_HANDLER(r12, DOTSYM(idle_return_gpr_loss), real_trampolines) + mtctr r12 + bctr #endif #ifdef CONFIG_PPC_PSERIES @@ -979,16 +978,14 @@ TRAMP_REAL_BEGIN(system_reset_fwnmi) EXC_COMMON_BEGIN(system_reset_common) __GEN_COMMON_ENTRY system_reset /* - * Increment paca->in_nmi then enable MSR_RI. SLB or MCE will be able - * to recover, but nested NMI will notice in_nmi and not recover - * because of the use of the NMI stack. in_nmi reentrancy is tested in - * system_reset_exception. + * Increment paca->in_nmi. When the interrupt entry wrapper later + * enable MSR_RI, then SLB or MCE will be able to recover, but a nested + * NMI will notice in_nmi and not recover because of the use of the NMI + * stack. in_nmi reentrancy is tested in system_reset_exception. */ lhz r10,PACA_IN_NMI(r13) addi r10,r10,1 sth r10,PACA_IN_NMI(r13) - li r10,MSR_RI - mtmsrd r10,1 mr r10,r1 ld r1,PACA_NMI_EMERG_SP(r13) @@ -1062,12 +1059,6 @@ INT_DEFINE_BEGIN(machine_check_early) IAREA=PACA_EXMC IVIRT=0 /* no virt entry point */ IREALMODE_COMMON=1 - /* - * MSR_RI is not enabled, because PACA_EXMC is being used, so a - * nested machine check corrupts it. machine_check_common enables - * MSR_RI. - */ - ISET_RI=0 ISTACK=0 IDAR=1 IDSISR=1 @@ -1078,7 +1069,6 @@ INT_DEFINE_BEGIN(machine_check) IVEC=0x200 IAREA=PACA_EXMC IVIRT=0 /* no virt entry point */ - ISET_RI=0 IDAR=1 IDSISR=1 IKVM_REAL=1 @@ -1148,9 +1138,6 @@ EXC_COMMON_BEGIN(machine_check_early_common) BEGIN_FTR_SECTION bl enable_machine_check END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) - li r10,MSR_RI - mtmsrd r10,1 - addi r3,r1,STACK_FRAME_OVERHEAD bl machine_check_early std r3,RESULT(r1) /* Save result */ @@ -1238,10 +1225,6 @@ EXC_COMMON_BEGIN(machine_check_common) * save area: PACA_EXMC instead of PACA_EXGEN. */ GEN_COMMON machine_check - - /* Enable MSR_RI when finished with PACA_EXMC */ - li r10,MSR_RI - mtmsrd r10,1 addi r3,r1,STACK_FRAME_OVERHEAD bl machine_check_exception_async b interrupt_return_srr @@ -1369,11 +1352,15 @@ EXC_COMMON_BEGIN(data_access_common) addi r3,r1,STACK_FRAME_OVERHEAD andis. r0,r4,DSISR_DABRMATCH@h bne- 1f +#ifdef CONFIG_PPC_64S_HASH_MMU BEGIN_MMU_FTR_SECTION bl do_hash_fault MMU_FTR_SECTION_ELSE bl do_page_fault ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) +#else + bl do_page_fault +#endif b interrupt_return_srr 1: bl do_break @@ -1416,6 +1403,7 @@ EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80) EXC_VIRT_END(data_access_slb, 0x4380, 0x80) EXC_COMMON_BEGIN(data_access_slb_common) GEN_COMMON data_access_slb +#ifdef CONFIG_PPC_64S_HASH_MMU BEGIN_MMU_FTR_SECTION /* HPT case, do SLB fault */ addi r3,r1,STACK_FRAME_OVERHEAD @@ -1428,9 +1416,12 @@ MMU_FTR_SECTION_ELSE /* Radix case, access is outside page table range */ li r3,-EFAULT ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) +#else + li r3,-EFAULT +#endif std r3,RESULT(r1) addi r3,r1,STACK_FRAME_OVERHEAD - bl do_bad_slb_fault + bl do_bad_segment_interrupt b interrupt_return_srr @@ -1462,11 +1453,15 @@ EXC_VIRT_END(instruction_access, 0x4400, 0x80) EXC_COMMON_BEGIN(instruction_access_common) GEN_COMMON instruction_access addi r3,r1,STACK_FRAME_OVERHEAD +#ifdef CONFIG_PPC_64S_HASH_MMU BEGIN_MMU_FTR_SECTION bl do_hash_fault MMU_FTR_SECTION_ELSE bl do_page_fault ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) +#else + bl do_page_fault +#endif b interrupt_return_srr @@ -1496,6 +1491,7 @@ EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80) EXC_VIRT_END(instruction_access_slb, 0x4480, 0x80) EXC_COMMON_BEGIN(instruction_access_slb_common) GEN_COMMON instruction_access_slb +#ifdef CONFIG_PPC_64S_HASH_MMU BEGIN_MMU_FTR_SECTION /* HPT case, do SLB fault */ addi r3,r1,STACK_FRAME_OVERHEAD @@ -1508,9 +1504,12 @@ MMU_FTR_SECTION_ELSE /* Radix case, access is outside page table range */ li r3,-EFAULT ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) +#else + li r3,-EFAULT +#endif std r3,RESULT(r1) addi r3,r1,STACK_FRAME_OVERHEAD - bl do_bad_slb_fault + bl do_bad_segment_interrupt b interrupt_return_srr @@ -1536,6 +1535,12 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) * * If soft masked, the masked handler will note the pending interrupt for * replay, and clear MSR[EE] in the interrupted context. + * + * CFAR is not required because this is an asynchronous interrupt that in + * general won't have much bearing on the state of the CPU, with the possible + * exception of crash/debug IPIs, but those are generally moving to use SRESET + * IPIs. Unless this is an HV interrupt and KVM HV is possible, in which case + * it may be exiting the guest and need CFAR to be saved. */ INT_DEFINE_BEGIN(hardware_interrupt) IVEC=0x500 @@ -1543,6 +1548,10 @@ INT_DEFINE_BEGIN(hardware_interrupt) IMASK=IRQS_DISABLED IKVM_REAL=1 IKVM_VIRT=1 + ICFAR=0 +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + ICFAR_IF_HVMODE=1 +#endif INT_DEFINE_END(hardware_interrupt) EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100) @@ -1764,6 +1773,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM) * If PPC_WATCHDOG is configured, the soft masked handler will actually set * things back up to run soft_nmi_interrupt as a regular interrupt handler * on the emergency stack. + * + * CFAR is not required because this is asynchronous (see hardware_interrupt). + * A watchdog interrupt may like to have CFAR, but usually the interesting + * branch is long gone by that point (e.g., infinite loop). */ INT_DEFINE_BEGIN(decrementer) IVEC=0x900 @@ -1771,6 +1784,7 @@ INT_DEFINE_BEGIN(decrementer) #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE IKVM_REAL=1 #endif + ICFAR=0 INT_DEFINE_END(decrementer) EXC_REAL_BEGIN(decrementer, 0x900, 0x80) @@ -1846,6 +1860,8 @@ EXC_COMMON_BEGIN(hdecrementer_common) * If soft masked, the masked handler will note the pending interrupt for * replay, leaving MSR[EE] enabled in the interrupted context because the * doorbells are edge triggered. + * + * CFAR is not required, similarly to hardware_interrupt. */ INT_DEFINE_BEGIN(doorbell_super) IVEC=0xa00 @@ -1853,6 +1869,7 @@ INT_DEFINE_BEGIN(doorbell_super) #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE IKVM_REAL=1 #endif + ICFAR=0 INT_DEFINE_END(doorbell_super) EXC_REAL_BEGIN(doorbell_super, 0xa00, 0x100) @@ -1904,6 +1921,7 @@ INT_DEFINE_BEGIN(system_call) IVEC=0xc00 IKVM_REAL=1 IKVM_VIRT=1 + ICFAR=0 INT_DEFINE_END(system_call) .macro SYSTEM_CALL virt @@ -1942,12 +1960,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) HMT_MEDIUM .if ! \virt - __LOAD_HANDLER(r10, system_call_common_real) + __LOAD_HANDLER(r10, system_call_common_real, real_vectors) mtctr r10 bctr .else #ifdef CONFIG_RELOCATABLE - __LOAD_HANDLER(r10, system_call_common) + __LOAD_HANDLER(r10, system_call_common, virt_vectors) mtctr r10 bctr #else @@ -2001,7 +2019,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) * Requires __LOAD_FAR_HANDLER beause kvmppc_hcall lives * outside the head section. */ - __LOAD_FAR_HANDLER(r10, kvmppc_hcall) + __LOAD_FAR_HANDLER(r10, kvmppc_hcall, real_trampolines) mtctr r10 bctr #else @@ -2202,6 +2220,11 @@ EXC_COMMON_BEGIN(hmi_exception_common) * Interrupt 0xe80 - Directed Hypervisor Doorbell Interrupt. * This is an asynchronous interrupt in response to a msgsnd doorbell. * Similar to the 0xa00 doorbell but for host rather than guest. + * + * CFAR is not required (similar to doorbell_interrupt), unless KVM HV + * is enabled, in which case it may be a guest exit. Most PowerNV kernels + * include KVM support so it would be nice if this could be dynamically + * patched out if KVM was not currently running any guests. */ INT_DEFINE_BEGIN(h_doorbell) IVEC=0xe80 @@ -2209,6 +2232,9 @@ INT_DEFINE_BEGIN(h_doorbell) IMASK=IRQS_DISABLED IKVM_REAL=1 IKVM_VIRT=1 +#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE + ICFAR=0 +#endif INT_DEFINE_END(h_doorbell) EXC_REAL_BEGIN(h_doorbell, 0xe80, 0x20) @@ -2232,6 +2258,9 @@ EXC_COMMON_BEGIN(h_doorbell_common) * Interrupt 0xea0 - Hypervisor Virtualization Interrupt. * This is an asynchronous interrupt in response to an "external exception". * Similar to 0x500 but for host only. + * + * Like h_doorbell, CFAR is only required for KVM HV because this can be + * a guest exit. */ INT_DEFINE_BEGIN(h_virt_irq) IVEC=0xea0 @@ -2239,6 +2268,9 @@ INT_DEFINE_BEGIN(h_virt_irq) IMASK=IRQS_DISABLED IKVM_REAL=1 IKVM_VIRT=1 +#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE + ICFAR=0 +#endif INT_DEFINE_END(h_virt_irq) EXC_REAL_BEGIN(h_virt_irq, 0xea0, 0x20) @@ -2275,6 +2307,8 @@ EXC_VIRT_NONE(0x4ee0, 0x20) * * If soft masked, the masked handler will note the pending interrupt for * replay, and clear MSR[EE] in the interrupted context. + * + * CFAR is not used by perf interrupts so not required. */ INT_DEFINE_BEGIN(performance_monitor) IVEC=0xf00 @@ -2282,6 +2316,7 @@ INT_DEFINE_BEGIN(performance_monitor) #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE IKVM_REAL=1 #endif + ICFAR=0 INT_DEFINE_END(performance_monitor) EXC_REAL_BEGIN(performance_monitor, 0xf00, 0x20) @@ -2706,6 +2741,7 @@ EXC_VIRT_NONE(0x5800, 0x100) INT_DEFINE_BEGIN(soft_nmi) IVEC=0x900 ISTACK=0 + ICFAR=0 INT_DEFINE_END(soft_nmi) /* @@ -3025,7 +3061,7 @@ USE_FIXED_SECTION(virt_trampolines) .align 7 .globl __end_interrupts __end_interrupts: -DEFINE_FIXED_SYMBOL(__end_interrupts) +DEFINE_FIXED_SYMBOL(__end_interrupts, virt_trampolines) CLOSE_FIXED_SECTION(real_vectors); CLOSE_FIXED_SECTION(real_trampolines); diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index b7ceb041743c..d03e488cfe9c 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -251,7 +251,7 @@ bool is_fadump_reserved_mem_contiguous(void) } /* Print firmware assisted dump configurations for debugging purpose. */ -static void fadump_show_config(void) +static void __init fadump_show_config(void) { int i; @@ -353,7 +353,7 @@ static __init u64 fadump_calculate_reserve_size(void) * Calculate the total memory size required to be reserved for * firmware-assisted dump registration. */ -static unsigned long get_fadump_area_size(void) +static unsigned long __init get_fadump_area_size(void) { unsigned long size = 0; @@ -462,7 +462,7 @@ static int __init fadump_get_boot_mem_regions(void) * with the given memory range. * False, otherwise. */ -static bool overlaps_reserved_ranges(u64 base, u64 end, int *idx) +static bool __init overlaps_reserved_ranges(u64 base, u64 end, int *idx) { bool ret = false; int i; @@ -737,7 +737,7 @@ void crash_fadump(struct pt_regs *regs, const char *str) fw_dump.ops->fadump_trigger(fdh, str); } -u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs) +u32 *__init fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs) { struct elf_prstatus prstatus; @@ -752,7 +752,7 @@ u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs) return buf; } -void fadump_update_elfcore_header(char *bufp) +void __init fadump_update_elfcore_header(char *bufp) { struct elf_phdr *phdr; @@ -770,7 +770,7 @@ void fadump_update_elfcore_header(char *bufp) return; } -static void *fadump_alloc_buffer(unsigned long size) +static void *__init fadump_alloc_buffer(unsigned long size) { unsigned long count, i; struct page *page; @@ -792,7 +792,7 @@ static void fadump_free_buffer(unsigned long vaddr, unsigned long size) free_reserved_area((void *)vaddr, (void *)(vaddr + size), -1, NULL); } -s32 fadump_setup_cpu_notes_buf(u32 num_cpus) +s32 __init fadump_setup_cpu_notes_buf(u32 num_cpus) { /* Allocate buffer to hold cpu crash notes. */ fw_dump.cpu_notes_buf_size = num_cpus * sizeof(note_buf_t); @@ -1447,7 +1447,7 @@ static ssize_t release_mem_store(struct kobject *kobj, } /* Release the reserved memory and disable the FADump */ -static void unregister_fadump(void) +static void __init unregister_fadump(void) { fadump_cleanup(); fadump_release_memory(fw_dump.reserve_dump_area_start, @@ -1547,7 +1547,7 @@ ATTRIBUTE_GROUPS(fadump); DEFINE_SHOW_ATTRIBUTE(fadump_region); -static void fadump_init_files(void) +static void __init fadump_init_files(void) { int rc = 0; @@ -1641,6 +1641,14 @@ int __init setup_fadump(void) else if (fw_dump.reserve_dump_area_size) fw_dump.ops->fadump_init_mem_struct(&fw_dump); + /* + * In case of panic, fadump is triggered via ppc_panic_event() + * panic notifier. Setting crash_kexec_post_notifiers to 'true' + * lets panic() function take crash friendly path before panic + * notifiers are invoked. + */ + crash_kexec_post_notifiers = true; + return 1; } subsys_initcall(setup_fadump); diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index ba4afe3b5a9c..f71f2bbd4de6 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -81,7 +81,12 @@ EXPORT_SYMBOL(store_fp_state) */ _GLOBAL(load_up_fpu) mfmsr r5 +#ifdef CONFIG_PPC_BOOK3S_64 + /* interrupt doesn't set MSR[RI] and HPT can fault on current access */ + ori r5,r5,MSR_FP|MSR_RI +#else ori r5,r5,MSR_FP +#endif #ifdef CONFIG_VSX BEGIN_FTR_SECTION oris r5,r5,MSR_VSX@h diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 349c4a820231..c3286260a7d1 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -115,8 +115,7 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) stw r10,8(r1) li r10, \trapno stw r10,_TRAP(r1) - SAVE_4GPRS(3, r1) - SAVE_2GPRS(7, r1) + SAVE_GPRS(3, 8, r1) SAVE_NVGPRS(r1) stw r2,GPR2(r1) stw r12,_NIP(r1) @@ -136,6 +135,12 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) andi. r12,r9,MSR_PR bne 777f bl prepare_transfer_to_handler +#ifdef CONFIG_PPC_KUEP + b 778f +777: + bl __kuep_lock +778: +#endif 777: #endif .endm diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 7d72ee5ab387..b6c6d1de5fd5 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -27,6 +27,7 @@ #include <linux/init.h> #include <linux/pgtable.h> +#include <linux/sizes.h> #include <asm/processor.h> #include <asm/page.h> #include <asm/mmu.h> @@ -297,6 +298,10 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) 3: mfspr r11,SPRN_SPRG_THREAD lwz r11,PGDIR(r11) +#ifdef CONFIG_PPC_KUAP + rlwinm. r9, r9, 0, 0xff + beq 5f /* Kuap fault */ +#endif 4: tophys(r11, r11) rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */ @@ -377,6 +382,10 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) 3: mfspr r11,SPRN_SPRG_THREAD lwz r11,PGDIR(r11) +#ifdef CONFIG_PPC_KUAP + rlwinm. r9, r9, 0, 0xff + beq 5f /* Kuap fault */ +#endif 4: tophys(r11, r11) rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */ @@ -650,7 +659,7 @@ start_here: b . /* prevent prefetch past rfi */ /* Set up the initial MMU state so we can do the first level of - * kernel initialization. This maps the first 16 MBytes of memory 1:1 + * kernel initialization. This maps the first 32 MBytes of memory 1:1 * virtual to physical and more importantly sets the cache mode. */ initial_mmu: @@ -687,6 +696,12 @@ initial_mmu: tlbwe r4,r0,TLB_DATA /* Load the data portion of the entry */ tlbwe r3,r0,TLB_TAG /* Load the tag portion of the entry */ + li r0,62 /* TLB slot 62 */ + addis r4,r4,SZ_16M@h + addis r3,r3,SZ_16M@h + tlbwe r4,r0,TLB_DATA /* Load the data portion of the entry */ + tlbwe r3,r0,TLB_TAG /* Load the tag portion of the entry */ + isync /* Establish the exception vector base diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index 02d2928d1e01..b73a56466903 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -334,6 +334,10 @@ interrupt_base: mfspr r12,SPRN_MMUCR mfspr r13,SPRN_PID /* Get PID */ rlwimi r12,r13,0,24,31 /* Set TID */ +#ifdef CONFIG_PPC_KUAP + cmpwi r13,0 + beq 2f /* KUAP Fault */ +#endif 4: mtspr SPRN_MMUCR,r12 @@ -444,6 +448,10 @@ interrupt_base: mfspr r12,SPRN_MMUCR mfspr r13,SPRN_PID /* Get PID */ rlwimi r12,r13,0,24,31 /* Set TID */ +#ifdef CONFIG_PPC_KUAP + cmpwi r13,0 + beq 2f /* KUAP Fault */ +#endif 4: mtspr SPRN_MMUCR,r12 @@ -532,10 +540,7 @@ finish_tlb_load_44x: andi. r10,r12,_PAGE_USER /* User page ? */ beq 1f /* nope, leave U bits empty */ rlwimi r11,r11,3,26,28 /* yes, copy S bits to U */ -#ifdef CONFIG_PPC_KUEP -0: rlwinm r11,r11,0,~PPC44x_TLB_SX /* Clear SX if User page */ - patch_site 0b, patch__tlb_44x_kuep -#endif + rlwinm r11,r11,0,~PPC44x_TLB_SX /* Clear SX if User page */ 1: tlbwe r11,r13,PPC44x_TLB_ATTRIB /* Write ATTRIB */ /* Done...restore registers and get out of here. @@ -575,6 +580,10 @@ finish_tlb_load_44x: 3: mfspr r11,SPRN_SPRG3 lwz r11,PGDIR(r11) mfspr r12,SPRN_PID /* Get PID */ +#ifdef CONFIG_PPC_KUAP + cmpwi r12,0 + beq 2f /* KUAP Fault */ +#endif 4: mtspr SPRN_MMUCR,r12 /* Set MMUCR */ /* Mask of required permission bits. Note that while we @@ -672,6 +681,10 @@ finish_tlb_load_44x: 3: mfspr r11,SPRN_SPRG_THREAD lwz r11,PGDIR(r11) mfspr r12,SPRN_PID /* Get PID */ +#ifdef CONFIG_PPC_KUAP + cmpwi r12,0 + beq 2f /* KUAP Fault */ +#endif 4: mtspr SPRN_MMUCR,r12 /* Set MMUCR */ /* Make up the required permissions */ @@ -747,10 +760,7 @@ finish_tlb_load_47x: andi. r10,r12,_PAGE_USER /* User page ? */ beq 1f /* nope, leave U bits empty */ rlwimi r11,r11,3,26,28 /* yes, copy S bits to U */ -#ifdef CONFIG_PPC_KUEP -0: rlwinm r11,r11,0,~PPC47x_TLB2_SX /* Clear SX if User page */ - patch_site 0b, patch__tlb_47x_kuep -#endif + rlwinm r11,r11,0,~PPC47x_TLB2_SX /* Clear SX if User page */ 1: tlbwe r11,r13,2 /* Done...restore registers and get out of here. diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index f17ae2083733..5c5181e8d5f1 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -126,7 +126,7 @@ __secondary_hold_acknowledge: . = 0x5c .globl __run_at_load __run_at_load: -DEFINE_FIXED_SYMBOL(__run_at_load) +DEFINE_FIXED_SYMBOL(__run_at_load, first_256B) .long RUN_AT_LOAD_DEFAULT #endif @@ -156,7 +156,7 @@ __secondary_hold: /* Tell the master cpu we're here */ /* Relocation is off & we are located at an address less */ /* than 0x100, so only need to grab low order offset. */ - std r24,(ABS_ADDR(__secondary_hold_acknowledge))(0) + std r24,(ABS_ADDR(__secondary_hold_acknowledge, first_256B))(0) sync li r26,0 @@ -164,7 +164,7 @@ __secondary_hold: tovirt(r26,r26) #endif /* All secondary cpus wait here until told to start. */ -100: ld r12,(ABS_ADDR(__secondary_hold_spinloop))(r26) +100: ld r12,(ABS_ADDR(__secondary_hold_spinloop, first_256B))(r26) cmpdi 0,r12,0 beq 100b @@ -649,15 +649,15 @@ __after_prom_start: 3: #endif /* # bytes of memory to copy */ - lis r5,(ABS_ADDR(copy_to_here))@ha - addi r5,r5,(ABS_ADDR(copy_to_here))@l + lis r5,(ABS_ADDR(copy_to_here, text))@ha + addi r5,r5,(ABS_ADDR(copy_to_here, text))@l bl copy_and_flush /* copy the first n bytes */ /* this includes the code being */ /* executed here. */ /* Jump to the copy of this code that we just made */ - addis r8,r3,(ABS_ADDR(4f))@ha - addi r12,r8,(ABS_ADDR(4f))@l + addis r8,r3,(ABS_ADDR(4f, text))@ha + addi r12,r8,(ABS_ADDR(4f, text))@l mtctr r12 bctr @@ -669,8 +669,8 @@ p_end: .8byte _end - copy_to_here * Now copy the rest of the kernel up to _end, add * _end - copy_to_here to the copy limit and run again. */ - addis r8,r26,(ABS_ADDR(p_end))@ha - ld r8,(ABS_ADDR(p_end))@l(r8) + addis r8,r26,(ABS_ADDR(p_end, text))@ha + ld r8,(ABS_ADDR(p_end, text))@l(r8) add r5,r5,r8 5: bl copy_and_flush /* copy the rest */ @@ -904,7 +904,7 @@ _GLOBAL(relative_toc) blr .balign 8 -p_toc: .8byte __toc_start + 0x8000 - 0b +p_toc: .8byte .TOC. - 0b /* * This is where the main kernel code starts. diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 68e5c0a7e99d..fa84744d6b24 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -931,7 +931,11 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) _GLOBAL(load_segment_registers) li r0, NUM_USER_SEGMENTS /* load up user segment register values */ mtctr r0 /* for context 0 */ +#ifdef CONFIG_PPC_KUEP + lis r3, SR_NX@h /* Kp = 0, Ks = 0, VSID = 0 */ +#else li r3, 0 /* Kp = 0, Ks = 0, VSID = 0 */ +#endif li r4, 0 3: mtsrin r3, r4 addi r3, r3, 0x111 /* increment VSID */ diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index ef8d1b1c234e..bb6d5d0fc4ac 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -87,8 +87,7 @@ END_BTB_FLUSH_SECTION stw r10, 8(r1) li r10, \trapno stw r10,_TRAP(r1) - SAVE_4GPRS(3, r1) - SAVE_2GPRS(7, r1) + SAVE_GPRS(3, 8, r1) SAVE_NVGPRS(r1) stw r2,GPR2(r1) stw r12,_NIP(r1) diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 0a9a0f301474..ac2b4dcf5fd3 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -462,6 +462,12 @@ END_BTB_FLUSH_SECTION mfspr r11,SPRN_SPRG_THREAD lwz r11,PGDIR(r11) +#ifdef CONFIG_PPC_KUAP + mfspr r12, SPRN_MAS1 + rlwinm. r12,r12,0,0x3fff0000 + beq 2f /* KUAP fault */ +#endif + 4: /* Mask of required permission bits. Note that while we * do copy ESR:ST to _PAGE_RW position as trying to write @@ -571,6 +577,12 @@ END_BTB_FLUSH_SECTION mfspr r11,SPRN_SPRG_THREAD lwz r11,PGDIR(r11) +#ifdef CONFIG_PPC_KUAP + mfspr r12, SPRN_MAS1 + rlwinm. r12,r12,0,0x3fff0000 + beq 2f /* KUAP fault */ +#endif + /* Make up the required permissions for user code */ #ifdef CONFIG_PTE_64BIT li r13,_PAGE_PRESENT | _PAGE_BAP_UX @@ -777,6 +789,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS) andi. r10, r11, _PAGE_USER /* Test for _PAGE_USER */ slwi r10, r12, 1 or r10, r10, r12 + rlwinm r10, r10, 0, ~_PAGE_EXEC /* Clear SX on user pages */ iseleq r12, r12, r10 rlwimi r13, r12, 0, 20, 31 /* Get RPN from PTE, merge w/ perms */ mtspr SPRN_MAS3, r13 diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 91a3be14808b..2669f80b3a49 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -523,7 +523,7 @@ static void larx_stcx_err(struct perf_event *bp, struct arch_hw_breakpoint *info static bool stepping_handler(struct pt_regs *regs, struct perf_event **bp, struct arch_hw_breakpoint **info, int *hit, - struct ppc_inst instr) + ppc_inst_t instr) { int i; int stepped; @@ -616,7 +616,7 @@ int hw_breakpoint_handler(struct die_args *args) int hit[HBP_NUM_MAX] = {0}; int nr_hit = 0; bool ptrace_bp = false; - struct ppc_inst instr = ppc_inst(0); + ppc_inst_t instr = ppc_inst(0); int type = 0; int size = 0; unsigned long ea; diff --git a/arch/powerpc/kernel/hw_breakpoint_constraints.c b/arch/powerpc/kernel/hw_breakpoint_constraints.c index 42b967e3d85c..a74623025f3a 100644 --- a/arch/powerpc/kernel/hw_breakpoint_constraints.c +++ b/arch/powerpc/kernel/hw_breakpoint_constraints.c @@ -80,7 +80,7 @@ static bool check_dawrx_constraints(struct pt_regs *regs, int type, * Return true if the event is valid wrt dawr configuration, * including extraneous exception. Otherwise return false. */ -bool wp_check_constraints(struct pt_regs *regs, struct ppc_inst instr, +bool wp_check_constraints(struct pt_regs *regs, ppc_inst_t instr, unsigned long ea, int type, int size, struct arch_hw_breakpoint *info) { @@ -127,7 +127,7 @@ bool wp_check_constraints(struct pt_regs *regs, struct ppc_inst instr, return false; } -void wp_get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr, +void wp_get_instr_detail(struct pt_regs *regs, ppc_inst_t *instr, int *type, int *size, unsigned long *ea) { struct instruction_op op; diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index 1f835539fda4..4ad79eb638c6 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -82,7 +82,7 @@ void power4_idle(void) return; if (cpu_has_feature(CPU_FTR_ALTIVEC)) - asm volatile("DSSALL ; sync" ::: "memory"); + asm volatile(PPC_DSSALL " ; sync" ::: "memory"); power4_idle_nap(); diff --git a/arch/powerpc/kernel/idle_6xx.S b/arch/powerpc/kernel/idle_6xx.S index 13cad9297d82..3c097356366b 100644 --- a/arch/powerpc/kernel/idle_6xx.S +++ b/arch/powerpc/kernel/idle_6xx.S @@ -129,7 +129,7 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFCLR(CPU_FTR_NO_DPM) mtspr SPRN_HID0,r4 BEGIN_FTR_SECTION - DSSALL + PPC_DSSALL sync END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) lwz r8,TI_LOCAL_FLAGS(r2) /* set napping bit */ diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index 563ebfcd16e2..7cd6ce3ec423 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -81,7 +81,7 @@ notrace long system_call_exception(long r3, long r4, long r5, { syscall_fn f; - kuep_lock(); + kuap_lock(); regs->orig_gpr3 = r3; @@ -406,7 +406,6 @@ again: /* Restore user access locks last */ kuap_user_restore(regs); - kuep_unlock(); return ret; } diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S index ec950b08a8dc..92088f848266 100644 --- a/arch/powerpc/kernel/interrupt_64.S +++ b/arch/powerpc/kernel/interrupt_64.S @@ -30,21 +30,23 @@ COMPAT_SYS_CALL_TABLE: .ifc \srr,srr mfspr r11,SPRN_SRR0 ld r12,_NIP(r1) + clrrdi r12,r12,2 100: tdne r11,r12 - EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) + EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) mfspr r11,SPRN_SRR1 ld r12,_MSR(r1) 100: tdne r11,r12 - EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) + EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) .else mfspr r11,SPRN_HSRR0 ld r12,_NIP(r1) + clrrdi r12,r12,2 100: tdne r11,r12 - EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) + EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) mfspr r11,SPRN_HSRR1 ld r12,_MSR(r1) 100: tdne r11,r12 - EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) + EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) .endif #endif .endm @@ -162,10 +164,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) * The value of AMR only matters while we're in the kernel. */ mtcr r2 - ld r2,GPR2(r1) - ld r3,GPR3(r1) - ld r13,GPR13(r1) - ld r1,GPR1(r1) + REST_GPRS(2, 3, r1) + REST_GPR(13, r1) + REST_GPR(1, r1) RFSCV_TO_USER b . /* prevent speculative execution */ @@ -183,9 +184,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) mtctr r3 mtlr r4 mtspr SPRN_XER,r5 - REST_10GPRS(2, r1) - REST_2GPRS(12, r1) - ld r1,GPR1(r1) + REST_GPRS(2, 13, r1) + REST_GPR(1, r1) RFI_TO_USER .Lsyscall_vectored_\name\()_rst_end: @@ -374,10 +374,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) * The value of AMR only matters while we're in the kernel. */ mtcr r2 - ld r2,GPR2(r1) - ld r3,GPR3(r1) - ld r13,GPR13(r1) - ld r1,GPR1(r1) + REST_GPRS(2, 3, r1) + REST_GPR(13, r1) + REST_GPR(1, r1) RFI_TO_USER b . /* prevent speculative execution */ @@ -388,8 +387,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) mtctr r3 mtspr SPRN_XER,r4 ld r0,GPR0(r1) - REST_8GPRS(4, r1) - ld r12,GPR12(r1) + REST_GPRS(4, 12, r1) b .Lsyscall_restore_regs_cont .Lsyscall_rst_end: @@ -518,17 +516,14 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) ld r6,_XER(r1) li r0,0 - REST_4GPRS(7, r1) - REST_2GPRS(11, r1) - REST_GPR(13, r1) + REST_GPRS(7, 13, r1) mtcr r3 mtlr r4 mtctr r5 mtspr SPRN_XER,r6 - REST_4GPRS(2, r1) - REST_GPR(6, r1) + REST_GPRS(2, 6, r1) REST_GPR(0, r1) REST_GPR(1, r1) .ifc \srr,srr @@ -625,8 +620,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) ld r6,_CCR(r1) li r0,0 - REST_4GPRS(7, r1) - REST_2GPRS(11, r1) + REST_GPRS(7, 12, r1) mtlr r3 mtctr r4 @@ -638,7 +632,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) */ std r0,STACK_FRAME_OVERHEAD-16(r1) - REST_4GPRS(2, r1) + REST_GPRS(2, 5, r1) bne- cr1,1f /* emulate stack store */ mtcr r6 @@ -703,7 +697,7 @@ interrupt_return_macro hsrr .globl __end_soft_masked __end_soft_masked: -DEFINE_FIXED_SYMBOL(__end_soft_masked) +DEFINE_FIXED_SYMBOL(__end_soft_masked, text) #endif /* CONFIG_PPC_BOOK3S */ #ifdef CONFIG_PPC_BOOK3S diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index c4f1d6b7d992..2cf31a97126c 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -745,7 +745,8 @@ void __do_irq(struct pt_regs *regs) irq = ppc_md.get_irq(); /* We can hard enable interrupts now to allow perf interrupts */ - may_hard_irq_enable(); + if (should_hard_irq_enable()) + do_hard_irq_enable(); /* And finally process it */ if (unlikely(!irq)) @@ -811,7 +812,7 @@ void __init init_IRQ(void) ppc_md.init_IRQ(); } -#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) +#ifdef CONFIG_BOOKE_OR_40x void *critirq_ctx[NR_CPUS] __read_mostly; void *dbgirq_ctx[NR_CPUS] __read_mostly; void *mcheckirq_ctx[NR_CPUS] __read_mostly; diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index bdee7262c080..9f8d0fa7b718 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -48,7 +48,7 @@ static struct hard_trap_info { 0x0800, 0x08 /* SIGFPE */ }, /* fp unavailable */ { 0x0900, 0x0e /* SIGALRM */ }, /* decrementer */ { 0x0c00, 0x14 /* SIGCHLD */ }, /* system call */ -#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) +#ifdef CONFIG_BOOKE_OR_40x { 0x2002, 0x05 /* SIGTRAP */ }, /* debug */ #if defined(CONFIG_FSL_BOOKE) { 0x2010, 0x08 /* SIGFPE */ }, /* spe unavailable */ @@ -67,7 +67,7 @@ static struct hard_trap_info { 0x2010, 0x08 /* SIGFPE */ }, /* fp unavailable */ { 0x2020, 0x08 /* SIGFPE */ }, /* ap unavailable */ #endif -#else /* ! (defined(CONFIG_40x) || defined(CONFIG_BOOKE)) */ +#else /* !CONFIG_BOOKE_OR_40x */ { 0x0d00, 0x05 /* SIGTRAP */ }, /* single-step */ #if defined(CONFIG_PPC_8xx) { 0x1000, 0x04 /* SIGILL */ }, /* software emulation */ diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 86d77ff056a6..9a492fdec1df 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -124,7 +124,7 @@ int arch_prepare_kprobe(struct kprobe *p) { int ret = 0; struct kprobe *prev; - struct ppc_inst insn = ppc_inst_read(p->addr); + ppc_inst_t insn = ppc_inst_read(p->addr); if ((unsigned long)p->addr & 0x03) { printk("Attempt to register kprobe at an unaligned address\n"); @@ -244,7 +244,7 @@ NOKPROBE_SYMBOL(arch_prepare_kretprobe); static int try_to_emulate(struct kprobe *p, struct pt_regs *regs) { int ret; - struct ppc_inst insn = ppc_inst_read(p->ainsn.insn); + ppc_inst_t insn = ppc_inst_read(p->ainsn.insn); /* regs->nip is also adjusted if emulate_step returns 1 */ ret = emulate_step(regs, insn); diff --git a/arch/powerpc/kernel/l2cr_6xx.S b/arch/powerpc/kernel/l2cr_6xx.S index 225511d73bef..f2e03ed423d0 100644 --- a/arch/powerpc/kernel/l2cr_6xx.S +++ b/arch/powerpc/kernel/l2cr_6xx.S @@ -96,7 +96,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_L2CR) /* Stop DST streams */ BEGIN_FTR_SECTION - DSSALL + PPC_DSSALL sync END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) @@ -292,7 +292,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_L3CR) isync /* Stop DST streams */ - DSSALL + PPC_DSSALL sync /* Get the current enable bit of the L3CR into r4 */ @@ -401,7 +401,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_L3CR) _GLOBAL(__flush_disable_L1) /* Stop pending alitvec streams and memory accesses */ BEGIN_FTR_SECTION - DSSALL + PPC_DSSALL END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) sync diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index fd829f7f25a4..2503dd4713b9 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -586,7 +586,7 @@ void machine_check_print_event_info(struct machine_check_event *evt, mc_error_class[evt->error_class] : "Unknown"; printk("%sMCE: CPU%d: %s\n", level, evt->cpu, subtype); -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU /* Display faulty slb contents for SLB errors. */ if (evt->error_type == MCE_ERROR_TYPE_SLB && !in_guest) slb_dump_contents(local_paca->mce_faulty_slbs); diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index c2f55fe7092d..71e8f2a92e36 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -77,15 +77,15 @@ static bool mce_in_guest(void) } /* flush SLBs and reload */ -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU void flush_and_reload_slb(void) { - /* Invalidate all SLBs */ - slb_flush_all_realmode(); - if (early_radix_enabled()) return; + /* Invalidate all SLBs */ + slb_flush_all_realmode(); + /* * This probably shouldn't happen, but it may be possible it's * called in early boot before SLB shadows are allocated. @@ -99,7 +99,7 @@ void flush_and_reload_slb(void) void flush_erat(void) { -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) { flush_and_reload_slb(); return; @@ -114,7 +114,7 @@ void flush_erat(void) static int mce_flush(int what) { -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU if (what == MCE_FLUSH_SLB) { flush_and_reload_slb(); return 1; @@ -455,7 +455,7 @@ static int mce_find_instr_ea_and_phys(struct pt_regs *regs, uint64_t *addr, * in real-mode is tricky and can lead to recursive * faults */ - struct ppc_inst instr; + ppc_inst_t instr; unsigned long pfn, instr_addr; struct instruction_op op; struct pt_regs tmp = *regs; @@ -499,8 +499,10 @@ static int mce_handle_ierror(struct pt_regs *regs, unsigned long srr1, /* attempt to correct the error */ switch (table[i].error_type) { case MCE_ERROR_TYPE_SLB: +#ifdef CONFIG_PPC_64S_HASH_MMU if (local_paca->in_mce == 1) slb_save_contents(local_paca->mce_faulty_slbs); +#endif handled = mce_flush(MCE_FLUSH_SLB); break; case MCE_ERROR_TYPE_ERAT: @@ -588,8 +590,10 @@ static int mce_handle_derror(struct pt_regs *regs, /* attempt to correct the error */ switch (table[i].error_type) { case MCE_ERROR_TYPE_SLB: +#ifdef CONFIG_PPC_64S_HASH_MMU if (local_paca->in_mce == 1) slb_save_contents(local_paca->mce_faulty_slbs); +#endif if (mce_flush(MCE_FLUSH_SLB)) handled = 1; break; diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c index ed04a3ba66fe..40a583e9d3c7 100644 --- a/arch/powerpc/kernel/module.c +++ b/arch/powerpc/kernel/module.c @@ -90,16 +90,17 @@ int module_finalize(const Elf_Ehdr *hdr, } static __always_inline void * -__module_alloc(unsigned long size, unsigned long start, unsigned long end) +__module_alloc(unsigned long size, unsigned long start, unsigned long end, bool nowarn) { pgprot_t prot = strict_module_rwx_enabled() ? PAGE_KERNEL : PAGE_KERNEL_EXEC; + gfp_t gfp = GFP_KERNEL | (nowarn ? __GFP_NOWARN : 0); /* * Don't do huge page allocations for modules yet until more testing * is done. STRICT_MODULE_RWX may require extra work to support this * too. */ - return __vmalloc_node_range(size, 1, start, end, GFP_KERNEL, prot, + return __vmalloc_node_range(size, 1, start, end, gfp, prot, VM_FLUSH_RESET_PERMS | VM_NO_HUGE_VMAP, NUMA_NO_NODE, __builtin_return_address(0)); } @@ -114,13 +115,13 @@ void *module_alloc(unsigned long size) /* First try within 32M limit from _etext to avoid branch trampolines */ if (MODULES_VADDR < PAGE_OFFSET && MODULES_END > limit) - ptr = __module_alloc(size, limit, MODULES_END); + ptr = __module_alloc(size, limit, MODULES_END, true); if (!ptr) - ptr = __module_alloc(size, MODULES_VADDR, MODULES_END); + ptr = __module_alloc(size, MODULES_VADDR, MODULES_END, false); return ptr; #else - return __module_alloc(size, VMALLOC_START, VMALLOC_END); + return __module_alloc(size, VMALLOC_START, VMALLOC_END, false); #endif } diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c index f417afc08d33..a491ad481d85 100644 --- a/arch/powerpc/kernel/module_32.c +++ b/arch/powerpc/kernel/module_32.c @@ -273,6 +273,31 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, } #ifdef CONFIG_DYNAMIC_FTRACE +int module_trampoline_target(struct module *mod, unsigned long addr, + unsigned long *target) +{ + unsigned int jmp[4]; + + /* Find where the trampoline jumps to */ + if (copy_from_kernel_nofault(jmp, (void *)addr, sizeof(jmp))) + return -EFAULT; + + /* verify that this is what we expect it to be */ + if ((jmp[0] & 0xffff0000) != PPC_RAW_LIS(_R12, 0) || + (jmp[1] & 0xffff0000) != PPC_RAW_ADDI(_R12, _R12, 0) || + jmp[2] != PPC_RAW_MTCTR(_R12) || + jmp[3] != PPC_RAW_BCTR()) + return -EINVAL; + + addr = (jmp[1] & 0xffff) | ((jmp[0] & 0xffff) << 16); + if (addr & 0x8000) + addr -= 0x10000; + + *target = addr; + + return 0; +} + int module_finalize_ftrace(struct module *module, const Elf_Shdr *sechdrs) { module->arch.tramp = do_plt_call(module->core_layout.base, @@ -281,6 +306,14 @@ int module_finalize_ftrace(struct module *module, const Elf_Shdr *sechdrs) if (!module->arch.tramp) return -ENOENT; +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS + module->arch.tramp_regs = do_plt_call(module->core_layout.base, + (unsigned long)ftrace_regs_caller, + sechdrs, module); + if (!module->arch.tramp_regs) + return -ENOENT; +#endif + return 0; } #endif diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index 3c8d9bbb51cf..0d9f9cd41e13 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -540,7 +540,7 @@ static struct pstore_info nvram_pstore_info = { .write = nvram_pstore_write, }; -static int nvram_pstore_init(void) +static int __init nvram_pstore_init(void) { int rc = 0; @@ -562,7 +562,7 @@ static int nvram_pstore_init(void) return rc; } #else -static int nvram_pstore_init(void) +static int __init nvram_pstore_init(void) { return -1; } @@ -755,7 +755,7 @@ static unsigned char __init nvram_checksum(struct nvram_header *p) * Per the criteria passed via nvram_remove_partition(), should this * partition be removed? 1=remove, 0=keep */ -static int nvram_can_remove_partition(struct nvram_partition *part, +static int __init nvram_can_remove_partition(struct nvram_partition *part, const char *name, int sig, const char *exceptions[]) { if (part->header.signature != sig) diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c index ce1903064031..3b1c2236cbee 100644 --- a/arch/powerpc/kernel/optprobes.c +++ b/arch/powerpc/kernel/optprobes.c @@ -153,7 +153,7 @@ static void patch_imm_load_insns(unsigned long val, int reg, kprobe_opcode_t *ad int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) { - struct ppc_inst branch_op_callback, branch_emulate_step, temp; + ppc_inst_t branch_op_callback, branch_emulate_step, temp; unsigned long op_callback_addr, emulate_step_addr; kprobe_opcode_t *buff; long b_offset; @@ -228,12 +228,8 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) /* * 3. load instruction to be emulated into relevant register, and */ - if (IS_ENABLED(CONFIG_PPC64)) { - temp = ppc_inst_read(p->ainsn.insn); - patch_imm_load_insns(ppc_inst_as_ulong(temp), 4, buff + TMPL_INSN_IDX); - } else { - patch_imm_load_insns((unsigned long)p->ainsn.insn, 4, buff + TMPL_INSN_IDX); - } + temp = ppc_inst_read(p->ainsn.insn); + patch_imm_load_insns(ppc_inst_as_ulong(temp), 4, buff + TMPL_INSN_IDX); /* * 4. branch back from trampoline @@ -269,7 +265,7 @@ int arch_check_optimized_kprobe(struct optimized_kprobe *op) void arch_optimize_kprobes(struct list_head *oplist) { - struct ppc_inst instr; + ppc_inst_t instr; struct optimized_kprobe *op; struct optimized_kprobe *tmp; diff --git a/arch/powerpc/kernel/optprobes_head.S b/arch/powerpc/kernel/optprobes_head.S index 19ea3312403c..5c7f0b4b784b 100644 --- a/arch/powerpc/kernel/optprobes_head.S +++ b/arch/powerpc/kernel/optprobes_head.S @@ -10,8 +10,8 @@ #include <asm/asm-offsets.h> #ifdef CONFIG_PPC64 -#define SAVE_30GPRS(base) SAVE_10GPRS(2,base); SAVE_10GPRS(12,base); SAVE_10GPRS(22,base) -#define REST_30GPRS(base) REST_10GPRS(2,base); REST_10GPRS(12,base); REST_10GPRS(22,base) +#define SAVE_30GPRS(base) SAVE_GPRS(2, 31, base) +#define REST_30GPRS(base) REST_GPRS(2, 31, base) #define TEMPLATE_FOR_IMM_LOAD_INSNS nop; nop; nop; nop; nop #else #define SAVE_30GPRS(base) stmw r2, GPR2(base) diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 4208b4044d12..39da688a9455 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -139,8 +139,7 @@ static struct lppaca * __init new_lppaca(int cpu, unsigned long limit) } #endif /* CONFIG_PPC_PSERIES */ -#ifdef CONFIG_PPC_BOOK3S_64 - +#ifdef CONFIG_PPC_64S_HASH_MMU /* * 3 persistent SLBs are allocated here. The buffer will be zero * initially, hence will all be invaild until we actually write them. @@ -169,8 +168,7 @@ static struct slb_shadow * __init new_slb_shadow(int cpu, unsigned long limit) return s; } - -#endif /* CONFIG_PPC_BOOK3S_64 */ +#endif /* CONFIG_PPC_64S_HASH_MMU */ #ifdef CONFIG_PPC_PSERIES /** @@ -226,7 +224,7 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu) new_paca->kexec_state = KEXEC_STATE_NONE; new_paca->__current = &init_task; new_paca->data_offset = 0xfeeeeeeeeeeeeeeeULL; -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU new_paca->slb_shadow_ptr = NULL; #endif @@ -307,7 +305,7 @@ void __init allocate_paca(int cpu) #ifdef CONFIG_PPC_PSERIES paca->lppaca_ptr = new_lppaca(cpu, limit); #endif -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU paca->slb_shadow_ptr = new_slb_shadow(cpu, limit); #endif #ifdef CONFIG_PPC_PSERIES @@ -328,7 +326,7 @@ void __init free_unused_pacas(void) paca_nr_cpu_ids = nr_cpu_ids; paca_ptrs_size = new_ptrs_size; -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU if (early_radix_enabled()) { /* Ugly fixup, see new_slb_shadow() */ memblock_phys_free(__pa(paca_ptrs[boot_cpuid]->slb_shadow_ptr), @@ -341,9 +339,9 @@ void __init free_unused_pacas(void) paca_ptrs_size + paca_struct_size, nr_cpu_ids); } +#ifdef CONFIG_PPC_64S_HASH_MMU void copy_mm_to_paca(struct mm_struct *mm) { -#ifdef CONFIG_PPC_BOOK3S mm_context_t *context = &mm->context; #ifdef CONFIG_PPC_MM_SLICES @@ -356,7 +354,5 @@ void copy_mm_to_paca(struct mm_struct *mm) get_paca()->mm_ctx_user_psize = context->user_psize; get_paca()->mm_ctx_sllp = context->sllp; #endif -#else /* !CONFIG_PPC_BOOK3S */ - return; -#endif } +#endif /* CONFIG_PPC_64S_HASH_MMU */ diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 6749905932f4..8bc9cf62cd93 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -62,7 +62,7 @@ EXPORT_SYMBOL(isa_mem_base); static const struct dma_map_ops *pci_dma_ops; -void set_pci_dma_ops(const struct dma_map_ops *dma_ops) +void __init set_pci_dma_ops(const struct dma_map_ops *dma_ops) { pci_dma_ops = dma_ops; } diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index b49e1060a3bf..48537964fba1 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -37,7 +37,7 @@ int pcibios_assign_bus_offset = 1; EXPORT_SYMBOL(isa_io_base); EXPORT_SYMBOL(pci_dram_offset); -void pcibios_make_OF_bus_map(void); +void __init pcibios_make_OF_bus_map(void); static void fixup_cpc710_pci64(struct pci_dev* dev); static u8* pci_to_OF_bus_map; @@ -109,7 +109,7 @@ make_one_node_map(struct device_node* node, u8 pci_bus) } } -void +void __init pcibios_make_OF_bus_map(void) { int i; diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 406d7ee9e322..984813a4d5dc 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -628,7 +628,7 @@ static void do_break_handler(struct pt_regs *regs) { struct arch_hw_breakpoint null_brk = {0}; struct arch_hw_breakpoint *info; - struct ppc_inst instr = ppc_inst(0); + ppc_inst_t instr = ppc_inst(0); int type = 0; int size = 0; unsigned long ea; @@ -1156,6 +1156,40 @@ static inline void save_sprs(struct thread_struct *t) #endif } +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE +void kvmppc_save_user_regs(void) +{ + unsigned long usermsr; + + if (!current->thread.regs) + return; + + usermsr = current->thread.regs->msr; + + if (usermsr & MSR_FP) + save_fpu(current); + + if (usermsr & MSR_VEC) + save_altivec(current); + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (usermsr & MSR_TM) { + current->thread.tm_tfhar = mfspr(SPRN_TFHAR); + current->thread.tm_tfiar = mfspr(SPRN_TFIAR); + current->thread.tm_texasr = mfspr(SPRN_TEXASR); + current->thread.regs->msr &= ~MSR_TM; + } +#endif +} +EXPORT_SYMBOL_GPL(kvmppc_save_user_regs); + +void kvmppc_save_current_sprs(void) +{ + save_sprs(¤t->thread); +} +EXPORT_SYMBOL_GPL(kvmppc_save_current_sprs); +#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ + static inline void restore_sprs(struct thread_struct *old_thread, struct thread_struct *new_thread) { @@ -1206,7 +1240,7 @@ struct task_struct *__switch_to(struct task_struct *prev, { struct thread_struct *new_thread, *old_thread; struct task_struct *last; -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU struct ppc64_tlb_batch *batch; #endif @@ -1215,7 +1249,7 @@ struct task_struct *__switch_to(struct task_struct *prev, WARN_ON(!irqs_disabled()); -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU batch = this_cpu_ptr(&ppc64_tlb_batch); if (batch->active) { current_thread_info()->local_flags |= _TLF_LAZY_MMU; @@ -1281,9 +1315,9 @@ struct task_struct *__switch_to(struct task_struct *prev, set_return_regs_changed(); /* _switch changes stack (and regs) */ -#ifdef CONFIG_PPC32 - kuap_assert_locked(); -#endif + if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64)) + kuap_assert_locked(); + last = _switch(old_thread, new_thread); /* @@ -1294,6 +1328,7 @@ struct task_struct *__switch_to(struct task_struct *prev, */ #ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU /* * This applies to a process that was context switched while inside * arch_enter_lazy_mmu_mode(), to re-activate the batch that was @@ -1305,6 +1340,7 @@ struct task_struct *__switch_to(struct task_struct *prev, batch = this_cpu_ptr(&ppc64_tlb_batch); batch->active = 1; } +#endif /* * Math facilities are masked out of the child MSR in copy_thread. @@ -1655,7 +1691,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) static void setup_ksp_vsid(struct task_struct *p, unsigned long sp) { -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU unsigned long sp_vsid; unsigned long llp = mmu_psize_defs[mmu_linear_psize].sllp; @@ -1767,6 +1803,9 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, #if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP) p->thread.kuap = KUAP_NONE; #endif +#if defined(CONFIG_BOOKE_OR_40x) && defined(CONFIG_PPC_KUAP) + p->thread.pid = MMU_NO_CONTEXT; +#endif setup_ksp_vsid(p, sp); @@ -2299,10 +2338,9 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) * the heap, we can put it above 1TB so it is backed by a 1TB * segment. Otherwise the heap will be in the bottom 1TB * which always uses 256MB segments and this may result in a - * performance penalty. We don't need to worry about radix. For - * radix, mmu_highuser_ssize remains unchanged from 256MB. + * performance penalty. */ - if (!is_32bit_task() && (mmu_highuser_ssize == MMU_SEGSIZE_1T)) + if (!radix_enabled() && !is_32bit_task() && (mmu_highuser_ssize == MMU_SEGSIZE_1T)) base = max_t(unsigned long, mm->brk, 1UL << SID_SHIFT_1T); #endif diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index ad1230c4f3fe..3d30d40a0e9c 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -231,7 +231,7 @@ static void __init check_cpu_pa_features(unsigned long node) ibm_pa_features, ARRAY_SIZE(ibm_pa_features)); } -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU static void __init init_mmu_slb_size(unsigned long node) { const __be32 *slb_size_ptr; @@ -447,7 +447,7 @@ static int __init early_init_dt_scan_chosen_ppc(unsigned long node, */ #ifdef CONFIG_SPARSEMEM -static bool validate_mem_limit(u64 base, u64 *size) +static bool __init validate_mem_limit(u64 base, u64 *size) { u64 max_mem = 1UL << (MAX_PHYSMEM_BITS); @@ -458,7 +458,7 @@ static bool validate_mem_limit(u64 base, u64 *size) return true; } #else -static bool validate_mem_limit(u64 base, u64 *size) +static bool __init validate_mem_limit(u64 base, u64 *size) { return true; } diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 18b04b08b983..0ac5faacc909 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -672,7 +672,7 @@ static inline int __init prom_getproplen(phandle node, const char *pname) return call_prom("getproplen", 2, 1, node, ADDR(pname)); } -static void add_string(char **str, const char *q) +static void __init add_string(char **str, const char *q) { char *p = *str; @@ -682,7 +682,7 @@ static void add_string(char **str, const char *q) *str = p; } -static char *tohex(unsigned int x) +static char *__init tohex(unsigned int x) { static const char digits[] __initconst = "0123456789abcdef"; static char result[9] __prombss; @@ -728,7 +728,7 @@ static int __init prom_setprop(phandle node, const char *nodename, #define prom_islower(c) ('a' <= (c) && (c) <= 'z') #define prom_toupper(c) (prom_islower(c) ? ((c) - 'a' + 'A') : (c)) -static unsigned long prom_strtoul(const char *cp, const char **endp) +static unsigned long __init prom_strtoul(const char *cp, const char **endp) { unsigned long result = 0, base = 10, value; @@ -753,7 +753,7 @@ static unsigned long prom_strtoul(const char *cp, const char **endp) return result; } -static unsigned long prom_memparse(const char *ptr, const char **retptr) +static unsigned long __init prom_memparse(const char *ptr, const char **retptr) { unsigned long ret = prom_strtoul(ptr, retptr); int shift = 0; @@ -1786,7 +1786,7 @@ static void __init prom_close_stdin(void) } #ifdef CONFIG_PPC_SVM -static int prom_rtas_hcall(uint64_t args) +static int __init prom_rtas_hcall(uint64_t args) { register uint64_t arg1 asm("r3") = H_RTAS; register uint64_t arg2 asm("r4") = args; @@ -2991,7 +2991,7 @@ static void __init fixup_device_tree_efika_add_phy(void) /* Check if the phy-handle property exists - bail if it does */ rv = prom_getprop(node, "phy-handle", prop, sizeof(prop)); - if (!rv) + if (rv <= 0) return; /* @@ -3248,7 +3248,7 @@ static void __init prom_check_initrd(unsigned long r3, unsigned long r4) /* * Perform the Enter Secure Mode ultracall. */ -static int enter_secure_mode(unsigned long kbase, unsigned long fdt) +static int __init enter_secure_mode(unsigned long kbase, unsigned long fdt) { register unsigned long r3 asm("r3") = UV_ESM; register unsigned long r4 asm("r4") = kbase; diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index ff80bbad22a5..733e6ef36758 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -492,8 +492,25 @@ int rtas_call(int token, int nargs, int nret, int *outputs, ...) } EXPORT_SYMBOL(rtas_call); -/* For RTAS_BUSY (-2), delay for 1 millisecond. For an extended busy status - * code of 990n, perform the hinted delay of 10^n (last digit) milliseconds. +/** + * rtas_busy_delay_time() - From an RTAS status value, calculate the + * suggested delay time in milliseconds. + * + * @status: a value returned from rtas_call() or similar APIs which return + * the status of a RTAS function call. + * + * Context: Any context. + * + * Return: + * * 100000 - If @status is 9905. + * * 10000 - If @status is 9904. + * * 1000 - If @status is 9903. + * * 100 - If @status is 9902. + * * 10 - If @status is 9901. + * * 1 - If @status is either 9900 or -2. This is "wrong" for -2, but + * some callers depend on this behavior, and the worst outcome + * is that they will delay for longer than necessary. + * * 0 - If @status is not a busy or extended delay value. */ unsigned int rtas_busy_delay_time(int status) { @@ -513,17 +530,77 @@ unsigned int rtas_busy_delay_time(int status) } EXPORT_SYMBOL(rtas_busy_delay_time); -/* For an RTAS busy status code, perform the hinted delay. */ -unsigned int rtas_busy_delay(int status) +/** + * rtas_busy_delay() - helper for RTAS busy and extended delay statuses + * + * @status: a value returned from rtas_call() or similar APIs which return + * the status of a RTAS function call. + * + * Context: Process context. May sleep or schedule. + * + * Return: + * * true - @status is RTAS_BUSY or an extended delay hint. The + * caller may assume that the CPU has been yielded if necessary, + * and that an appropriate delay for @status has elapsed. + * Generally the caller should reattempt the RTAS call which + * yielded @status. + * + * * false - @status is not @RTAS_BUSY nor an extended delay hint. The + * caller is responsible for handling @status. + */ +bool rtas_busy_delay(int status) { unsigned int ms; + bool ret; - might_sleep(); - ms = rtas_busy_delay_time(status); - if (ms && need_resched()) - msleep(ms); + switch (status) { + case RTAS_EXTENDED_DELAY_MIN...RTAS_EXTENDED_DELAY_MAX: + ret = true; + ms = rtas_busy_delay_time(status); + /* + * The extended delay hint can be as high as 100 seconds. + * Surely any function returning such a status is either + * buggy or isn't going to be significantly slowed by us + * polling at 1HZ. Clamp the sleep time to one second. + */ + ms = clamp(ms, 1U, 1000U); + /* + * The delay hint is an order-of-magnitude suggestion, not + * a minimum. It is fine, possibly even advantageous, for + * us to pause for less time than hinted. For small values, + * use usleep_range() to ensure we don't sleep much longer + * than actually needed. + * + * See Documentation/timers/timers-howto.rst for + * explanation of the threshold used here. In effect we use + * usleep_range() for 9900 and 9901, msleep() for + * 9902-9905. + */ + if (ms <= 20) + usleep_range(ms * 100, ms * 1000); + else + msleep(ms); + break; + case RTAS_BUSY: + ret = true; + /* + * We should call again immediately if there's no other + * work to do. + */ + cond_resched(); + break; + default: + ret = false; + /* + * Not a busy or extended delay status; the caller should + * handle @status itself. Ensure we warn on misuses in + * atomic context regardless. + */ + might_sleep(); + break; + } - return ms; + return ret; } EXPORT_SYMBOL(rtas_busy_delay); @@ -809,13 +886,13 @@ void rtas_os_term(char *str) /** * rtas_activate_firmware() - Activate a new version of firmware. * + * Context: This function may sleep. + * * Activate a new version of partition firmware. The OS must call this * after resuming from a partition hibernation or migration in order * to maintain the ability to perform live firmware updates. It's not * catastrophic for this method to be absent or to fail; just log the * condition in that case. - * - * Context: This function may sleep. */ void rtas_activate_firmware(void) { @@ -890,11 +967,12 @@ int rtas_call_reentrant(int token, int nargs, int nret, int *outputs, ...) #endif /* CONFIG_PPC_PSERIES */ /** - * Find a specific pseries error log in an RTAS extended event log. + * get_pseries_errorlog() - Find a specific pseries error log in an RTAS + * extended event log. * @log: RTAS error/event log * @section_id: two character section identifier * - * Returns a pointer to the specified errorlog or NULL if not found. + * Return: A pointer to the specified errorlog or NULL if not found. */ struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log, uint16_t section_id) diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index 32ee17753eb4..cf0f42909ddf 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -455,7 +455,7 @@ static void rtas_event_scan(struct work_struct *w) } #ifdef CONFIG_PPC64 -static void retrieve_nvram_error_log(void) +static void __init retrieve_nvram_error_log(void) { unsigned int err_type ; int rc ; @@ -473,12 +473,12 @@ static void retrieve_nvram_error_log(void) } } #else /* CONFIG_PPC64 */ -static void retrieve_nvram_error_log(void) +static void __init retrieve_nvram_error_log(void) { } #endif /* CONFIG_PPC64 */ -static void start_event_scan(void) +static void __init start_event_scan(void) { printk(KERN_DEBUG "RTAS daemon started\n"); pr_debug("rtasd: will sleep for %d milliseconds\n", diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index 15fb5ea1b9ea..e159d4093d98 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -44,7 +44,7 @@ static void enable_barrier_nospec(bool enable) do_barrier_nospec_fixups(enable); } -void setup_barrier_nospec(void) +void __init setup_barrier_nospec(void) { bool enable; @@ -132,7 +132,7 @@ early_param("nospectre_v2", handle_nospectre_v2); #endif /* CONFIG_PPC_FSL_BOOK3E || CONFIG_PPC_BOOK3S_64 */ #ifdef CONFIG_PPC_FSL_BOOK3E -void setup_spectre_v2(void) +void __init setup_spectre_v2(void) { if (no_spectrev2 || cpu_mitigations_off()) do_btb_flush_fixups(); diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 4f1322b65760..f8da937df918 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -582,7 +582,7 @@ static __init int add_pcspkr(void) device_initcall(add_pcspkr); #endif /* CONFIG_PCSPKR_PLATFORM */ -void probe_machine(void) +static __init void probe_machine(void) { extern struct machdep_calls __machine_desc_start; extern struct machdep_calls __machine_desc_end; diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h index 84058bbc8fe9..93f22da12abe 100644 --- a/arch/powerpc/kernel/setup.h +++ b/arch/powerpc/kernel/setup.h @@ -29,7 +29,7 @@ void setup_tlb_core_data(void); static inline void setup_tlb_core_data(void) { } #endif -#if defined(CONFIG_PPC_BOOK3E) || defined(CONFIG_BOOKE) || defined(CONFIG_40x) +#ifdef CONFIG_BOOKE_OR_40x void exc_lvl_early_init(void); #else static inline void exc_lvl_early_init(void) { } diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 7ec5c47fce0e..a6e9d36d7c01 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -75,7 +75,7 @@ EXPORT_SYMBOL(DMA_MODE_WRITE); notrace void __init machine_init(u64 dt_ptr) { u32 *addr = (u32 *)patch_site_addr(&patch__memset_nocache); - struct ppc_inst insn; + ppc_inst_t insn; /* Configure static keys first, now that we're relocated. */ setup_feature_keys(); @@ -175,7 +175,7 @@ void __init emergency_stack_init(void) } #endif -#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) +#ifdef CONFIG_BOOKE_OR_40x void __init exc_lvl_early_init(void) { unsigned int i, hw_cpu; diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 6052f5d5ded3..d87f7c1103ce 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -499,7 +499,7 @@ void smp_release_cpus(void) * routines and/or provided to userland */ -static void init_cache_info(struct ppc_cache_info *info, u32 size, u32 lsize, +static void __init init_cache_info(struct ppc_cache_info *info, u32 size, u32 lsize, u32 bsize, u32 sets) { info->size = size; @@ -880,14 +880,23 @@ void __init setup_per_cpu_areas(void) int rc = -EINVAL; /* - * Linear mapping is one of 4K, 1M and 16M. For 4K, no need - * to group units. For larger mappings, use 1M atom which - * should be large enough to contain a number of units. + * BookE and BookS radix are historical values and should be revisited. */ - if (mmu_linear_psize == MMU_PAGE_4K) + if (IS_ENABLED(CONFIG_PPC_BOOK3E)) { + atom_size = SZ_1M; + } else if (radix_enabled()) { atom_size = PAGE_SIZE; - else - atom_size = 1 << 20; + } else if (IS_ENABLED(CONFIG_PPC_64S_HASH_MMU)) { + /* + * Linear mapping is one of 4K, 1M and 16M. For 4K, no need + * to group units. For larger mappings, use 1M atom which + * should be large enough to contain a number of units. + */ + if (mmu_linear_psize == MMU_PAGE_4K) + atom_size = PAGE_SIZE; + else + atom_size = SZ_1M; + } if (pcpu_chosen_fc != PCPU_FC_PAGE) { rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance, diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 3e053e2fd6b6..d84c434b2b78 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -527,16 +527,20 @@ static long restore_user_regs(struct pt_regs *regs, regs_set_return_msr(regs, regs->msr & ~(MSR_FP | MSR_FE0 | MSR_FE1)); #ifdef CONFIG_SPE - /* force the process to reload the spe registers from - current->thread when it next does spe instructions */ + /* + * Force the process to reload the spe registers from + * current->thread when it next does spe instructions. + * Since this is user ABI, we must enforce the sizing. + */ + BUILD_BUG_ON(sizeof(current->thread.spe) != ELF_NEVRREG * sizeof(u32)); regs_set_return_msr(regs, regs->msr & ~MSR_SPE); if (msr & MSR_SPE) { /* restore spe registers from the stack */ - unsafe_copy_from_user(current->thread.evr, &sr->mc_vregs, - ELF_NEVRREG * sizeof(u32), failed); + unsafe_copy_from_user(¤t->thread.spe, &sr->mc_vregs, + sizeof(current->thread.spe), failed); current->thread.used_spe = true; } else if (current->thread.used_spe) - memset(current->thread.evr, 0, ELF_NEVRREG * sizeof(u32)); + memset(¤t->thread.spe, 0, sizeof(current->thread.spe)); /* Always get SPEFSCR back */ unsafe_get_user(current->thread.spefscr, (u32 __user *)&sr->mc_vregs + ELF_NEVRREG, failed); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index c23ee842c4c3..b7fd6a72aa76 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -61,6 +61,7 @@ #include <asm/cpu_has_feature.h> #include <asm/ftrace.h> #include <asm/kup.h> +#include <asm/fadump.h> #ifdef DEBUG #include <asm/udbg.h> @@ -621,6 +622,45 @@ void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) #endif #ifdef CONFIG_NMI_IPI +static void crash_stop_this_cpu(struct pt_regs *regs) +#else +static void crash_stop_this_cpu(void *dummy) +#endif +{ + /* + * Just busy wait here and avoid marking CPU as offline to ensure + * register data is captured appropriately. + */ + while (1) + cpu_relax(); +} + +void crash_smp_send_stop(void) +{ + static bool stopped = false; + + /* + * In case of fadump, register data for all CPUs is captured by f/w + * on ibm,os-term rtas call. Skip IPI callbacks to other CPUs before + * this rtas call to avoid tricky post processing of those CPUs' + * backtraces. + */ + if (should_fadump_crash()) + return; + + if (stopped) + return; + + stopped = true; + +#ifdef CONFIG_NMI_IPI + smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, crash_stop_this_cpu, 1000000); +#else + smp_call_function(crash_stop_this_cpu, NULL, 0); +#endif /* CONFIG_NMI_IPI */ +} + +#ifdef CONFIG_NMI_IPI static void nmi_stop_this_cpu(struct pt_regs *regs) { /* @@ -896,7 +936,8 @@ out: return tg; } -static int update_mask_from_threadgroup(cpumask_var_t *mask, struct thread_groups *tg, int cpu, int cpu_group_start) +static int __init update_mask_from_threadgroup(cpumask_var_t *mask, struct thread_groups *tg, + int cpu, int cpu_group_start) { int first_thread = cpu_first_thread_sibling(cpu); int i; @@ -1635,12 +1676,14 @@ void start_secondary(void *unused) BUG(); } +#ifdef CONFIG_PROFILING int setup_profiling_timer(unsigned int multiplier) { return 0; } +#endif -static void fixup_topology(void) +static void __init fixup_topology(void) { int i; diff --git a/arch/powerpc/kernel/swsusp_32.S b/arch/powerpc/kernel/swsusp_32.S index f73f4d72fea4..e0cbd63007f2 100644 --- a/arch/powerpc/kernel/swsusp_32.S +++ b/arch/powerpc/kernel/swsusp_32.S @@ -181,7 +181,7 @@ _GLOBAL(swsusp_arch_resume) #ifdef CONFIG_ALTIVEC /* Stop pending alitvec streams and memory accesses */ BEGIN_FTR_SECTION - DSSALL + PPC_DSSALL END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif sync diff --git a/arch/powerpc/kernel/swsusp_asm64.S b/arch/powerpc/kernel/swsusp_asm64.S index 96bb20715aa9..9f1903c7f540 100644 --- a/arch/powerpc/kernel/swsusp_asm64.S +++ b/arch/powerpc/kernel/swsusp_asm64.S @@ -141,7 +141,7 @@ END_FW_FTR_SECTION_IFCLR(FW_FEATURE_LPAR) _GLOBAL(swsusp_arch_resume) /* Stop pending alitvec streams and memory accesses */ BEGIN_FTR_SECTION - DSSALL + PPC_DSSALL END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) sync diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 08d8072d6e7a..d45a415d5374 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -214,7 +214,7 @@ static ssize_t __used store_dscr_default(struct device *dev, static DEVICE_ATTR(dscr_default, 0600, show_dscr_default, store_dscr_default); -static void sysfs_create_dscr_default(void) +static void __init sysfs_create_dscr_default(void) { if (cpu_has_feature(CPU_FTR_DSCR)) { int cpu; @@ -744,12 +744,12 @@ static ssize_t show_svm(struct device *dev, struct device_attribute *attr, char } static DEVICE_ATTR(svm, 0444, show_svm, NULL); -static void create_svm_file(void) +static void __init create_svm_file(void) { device_create_file(cpu_subsys.dev_root, &dev_attr_svm); } #else -static void create_svm_file(void) +static void __init create_svm_file(void) { } #endif /* CONFIG_PPC_SVM */ @@ -1110,7 +1110,7 @@ EXPORT_SYMBOL_GPL(cpu_remove_dev_attr_group); /* NUMA stuff */ #ifdef CONFIG_NUMA -static void register_nodes(void) +static void __init register_nodes(void) { int i; @@ -1134,7 +1134,7 @@ void sysfs_remove_device_from_node(struct device *dev, int nid) EXPORT_SYMBOL_GPL(sysfs_remove_device_from_node); #else -static void register_nodes(void) +static void __init register_nodes(void) { return; } diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index cae8f03a44fe..62361cc7281c 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -88,6 +88,7 @@ static struct clocksource clocksource_timebase = { #define DECREMENTER_DEFAULT_MAX 0x7FFFFFFF u64 decrementer_max = DECREMENTER_DEFAULT_MAX; +EXPORT_SYMBOL_GPL(decrementer_max); /* for KVM HDEC */ static int decrementer_set_next_event(unsigned long evt, struct clock_event_device *dev); @@ -107,6 +108,7 @@ struct clock_event_device decrementer_clockevent = { EXPORT_SYMBOL(decrementer_clockevent); DEFINE_PER_CPU(u64, decrementers_next_tb); +EXPORT_SYMBOL_GPL(decrementers_next_tb); static DEFINE_PER_CPU(struct clock_event_device, decrementers); #define XSEC_PER_SEC (1024*1024) @@ -496,6 +498,16 @@ EXPORT_SYMBOL(profile_pc); * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable... */ #ifdef CONFIG_PPC64 +static inline unsigned long test_irq_work_pending(void) +{ + unsigned long x; + + asm volatile("lbz %0,%1(13)" + : "=r" (x) + : "i" (offsetof(struct paca_struct, irq_work_pending))); + return x; +} + static inline void set_irq_work_pending_flag(void) { asm volatile("stb %0,%1(13)" : : @@ -539,13 +551,44 @@ void arch_irq_work_raise(void) preempt_enable(); } +static void set_dec_or_work(u64 val) +{ + set_dec(val); + /* We may have raced with new irq work */ + if (unlikely(test_irq_work_pending())) + set_dec(1); +} + #else /* CONFIG_IRQ_WORK */ #define test_irq_work_pending() 0 #define clear_irq_work_pending() +static void set_dec_or_work(u64 val) +{ + set_dec(val); +} #endif /* CONFIG_IRQ_WORK */ +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE +void timer_rearm_host_dec(u64 now) +{ + u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); + + WARN_ON_ONCE(!arch_irqs_disabled()); + WARN_ON_ONCE(mfmsr() & MSR_EE); + + if (now >= *next_tb) { + local_paca->irq_happened |= PACA_IRQ_DEC; + } else { + now = *next_tb - now; + if (now <= decrementer_max) + set_dec_or_work(now); + } +} +EXPORT_SYMBOL_GPL(timer_rearm_host_dec); +#endif + /* * timer_interrupt - gets called when the decrementer overflows, * with interrupts disabled. @@ -566,22 +609,23 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt) return; } - /* Ensure a positive value is written to the decrementer, or else - * some CPUs will continue to take decrementer exceptions. When the - * PPC_WATCHDOG (decrementer based) is configured, keep this at most - * 31 bits, which is about 4 seconds on most systems, which gives - * the watchdog a chance of catching timer interrupt hard lockups. - */ - if (IS_ENABLED(CONFIG_PPC_WATCHDOG)) - set_dec(0x7fffffff); - else - set_dec(decrementer_max); - - /* Conditionally hard-enable interrupts now that the DEC has been - * bumped to its maximum value - */ - may_hard_irq_enable(); + /* Conditionally hard-enable interrupts. */ + if (should_hard_irq_enable()) { + /* + * Ensure a positive value is written to the decrementer, or + * else some CPUs will continue to take decrementer exceptions. + * When the PPC_WATCHDOG (decrementer based) is configured, + * keep this at most 31 bits, which is about 4 seconds on most + * systems, which gives the watchdog a chance of catching timer + * interrupt hard lockups. + */ + if (IS_ENABLED(CONFIG_PPC_WATCHDOG)) + set_dec(0x7fffffff); + else + set_dec(decrementer_max); + do_hard_irq_enable(); + } #if defined(CONFIG_PPC32) && defined(CONFIG_PPC_PMAC) if (atomic_read(&ppc_n_lost_interrupts) != 0) @@ -606,10 +650,7 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt) } else { now = *next_tb - now; if (now <= decrementer_max) - set_dec(now); - /* We may have raced with new irq work */ - if (test_irq_work_pending()) - set_dec(1); + set_dec_or_work(now); __this_cpu_inc(irq_stat.timer_irqs_others); } @@ -730,7 +771,7 @@ static int __init get_freq(char *name, int cells, unsigned long *val) static void start_cpu_decrementer(void) { -#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) +#ifdef CONFIG_BOOKE_OR_40x unsigned int tcr; /* Clear any pending timer interrupts */ @@ -843,11 +884,7 @@ static int decrementer_set_next_event(unsigned long evt, struct clock_event_device *dev) { __this_cpu_write(decrementers_next_tb, get_tb() + evt); - set_dec(evt); - - /* We may have raced with new irq work */ - if (test_irq_work_pending()) - set_dec(1); + set_dec_or_work(evt); return 0; } diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 2b91f233b05d..3beecc32940b 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -226,11 +226,8 @@ _GLOBAL(tm_reclaim) /* Sync the userland GPRs 2-12, 14-31 to thread->regs: */ SAVE_GPR(0, r7) /* user r0 */ - SAVE_GPR(2, r7) /* user r2 */ - SAVE_4GPRS(3, r7) /* user r3-r6 */ - SAVE_GPR(8, r7) /* user r8 */ - SAVE_GPR(9, r7) /* user r9 */ - SAVE_GPR(10, r7) /* user r10 */ + SAVE_GPRS(2, 6, r7) /* user r2-r6 */ + SAVE_GPRS(8, 10, r7) /* user r8-r10 */ ld r3, GPR1(r1) /* user r1 */ ld r4, GPR7(r1) /* user r7 */ ld r5, GPR11(r1) /* user r11 */ @@ -445,12 +442,8 @@ restore_gprs: ld r6, THREAD_TM_PPR(r3) REST_GPR(0, r7) /* GPR0 */ - REST_2GPRS(2, r7) /* GPR2-3 */ - REST_GPR(4, r7) /* GPR4 */ - REST_4GPRS(8, r7) /* GPR8-11 */ - REST_2GPRS(12, r7) /* GPR12-13 */ - - REST_NVGPRS(r7) /* GPR14-31 */ + REST_GPRS(2, 4, r7) /* GPR2-4 */ + REST_GPRS(8, 31, r7) /* GPR8-31 */ /* Load up PPR and DSCR here so we don't run with user values for long */ mtspr SPRN_DSCR, r5 diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index d89c5df4f206..80b6285769f2 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -41,10 +41,10 @@ #define NUM_FTRACE_TRAMPS 8 static unsigned long ftrace_tramps[NUM_FTRACE_TRAMPS]; -static struct ppc_inst +static ppc_inst_t ftrace_call_replace(unsigned long ip, unsigned long addr, int link) { - struct ppc_inst op; + ppc_inst_t op; addr = ppc_function_entry((void *)addr); @@ -55,9 +55,9 @@ ftrace_call_replace(unsigned long ip, unsigned long addr, int link) } static int -ftrace_modify_code(unsigned long ip, struct ppc_inst old, struct ppc_inst new) +ftrace_modify_code(unsigned long ip, ppc_inst_t old, ppc_inst_t new) { - struct ppc_inst replaced; + ppc_inst_t replaced; /* * Note: @@ -90,24 +90,24 @@ ftrace_modify_code(unsigned long ip, struct ppc_inst old, struct ppc_inst new) */ static int test_24bit_addr(unsigned long ip, unsigned long addr) { - struct ppc_inst op; + ppc_inst_t op; addr = ppc_function_entry((void *)addr); /* use the create_branch to verify that this offset can be branched */ return create_branch(&op, (u32 *)ip, addr, 0) == 0; } -static int is_bl_op(struct ppc_inst op) +static int is_bl_op(ppc_inst_t op) { return (ppc_inst_val(op) & 0xfc000003) == 0x48000001; } -static int is_b_op(struct ppc_inst op) +static int is_b_op(ppc_inst_t op) { return (ppc_inst_val(op) & 0xfc000003) == 0x48000000; } -static unsigned long find_bl_target(unsigned long ip, struct ppc_inst op) +static unsigned long find_bl_target(unsigned long ip, ppc_inst_t op) { int offset; @@ -127,7 +127,7 @@ __ftrace_make_nop(struct module *mod, { unsigned long entry, ptr, tramp; unsigned long ip = rec->ip; - struct ppc_inst op, pop; + ppc_inst_t op, pop; /* read where this goes */ if (copy_inst_from_kernel_nofault(&op, (void *)ip)) { @@ -221,10 +221,9 @@ static int __ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { - struct ppc_inst op; - unsigned int jmp[4]; + ppc_inst_t op; unsigned long ip = rec->ip; - unsigned long tramp; + unsigned long tramp, ptr; if (copy_from_kernel_nofault(&op, (void *)ip, MCOUNT_INSN_SIZE)) return -EFAULT; @@ -238,41 +237,13 @@ __ftrace_make_nop(struct module *mod, /* lets find where the pointer goes */ tramp = find_bl_target(ip, op); - /* - * On PPC32 the trampoline looks like: - * 0x3d, 0x80, 0x00, 0x00 lis r12,sym@ha - * 0x39, 0x8c, 0x00, 0x00 addi r12,r12,sym@l - * 0x7d, 0x89, 0x03, 0xa6 mtctr r12 - * 0x4e, 0x80, 0x04, 0x20 bctr - */ - - pr_devel("ip:%lx jumps to %lx", ip, tramp); - /* Find where the trampoline jumps to */ - if (copy_from_kernel_nofault(jmp, (void *)tramp, sizeof(jmp))) { - pr_err("Failed to read %lx\n", tramp); + if (module_trampoline_target(mod, tramp, &ptr)) { + pr_err("Failed to get trampoline target\n"); return -EFAULT; } - pr_devel(" %08x %08x ", jmp[0], jmp[1]); - - /* verify that this is what we expect it to be */ - if (((jmp[0] & 0xffff0000) != 0x3d800000) || - ((jmp[1] & 0xffff0000) != 0x398c0000) || - (jmp[2] != 0x7d8903a6) || - (jmp[3] != 0x4e800420)) { - pr_err("Not a trampoline\n"); - return -EINVAL; - } - - tramp = (jmp[1] & 0xffff) | - ((jmp[0] & 0xffff) << 16); - if (tramp & 0x8000) - tramp -= 0x10000; - - pr_devel(" %lx ", tramp); - - if (tramp != addr) { + if (ptr != addr) { pr_err("Trampoline location %08lx does not match addr\n", tramp); return -EINVAL; @@ -291,7 +262,7 @@ __ftrace_make_nop(struct module *mod, static unsigned long find_ftrace_tramp(unsigned long ip) { int i; - struct ppc_inst instr; + ppc_inst_t instr; /* * We have the compiler generated long_branch tramps at the end @@ -329,9 +300,9 @@ static int add_ftrace_tramp(unsigned long tramp) static int setup_mcount_compiler_tramp(unsigned long tramp) { int i; - struct ppc_inst op; + ppc_inst_t op; unsigned long ptr; - struct ppc_inst instr; + ppc_inst_t instr; static unsigned long ftrace_plt_tramps[NUM_FTRACE_TRAMPS]; /* Is this a known long jump tramp? */ @@ -396,7 +367,7 @@ static int setup_mcount_compiler_tramp(unsigned long tramp) static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr) { unsigned long tramp, ip = rec->ip; - struct ppc_inst op; + ppc_inst_t op; /* Read where this goes */ if (copy_inst_from_kernel_nofault(&op, (void *)ip)) { @@ -436,7 +407,7 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { unsigned long ip = rec->ip; - struct ppc_inst old, new; + ppc_inst_t old, new; /* * If the calling address is more that 24 bits away, @@ -489,7 +460,7 @@ int ftrace_make_nop(struct module *mod, */ #ifndef CONFIG_MPROFILE_KERNEL static int -expected_nop_sequence(void *ip, struct ppc_inst op0, struct ppc_inst op1) +expected_nop_sequence(void *ip, ppc_inst_t op0, ppc_inst_t op1) { /* * We expect to see: @@ -507,7 +478,7 @@ expected_nop_sequence(void *ip, struct ppc_inst op0, struct ppc_inst op1) } #else static int -expected_nop_sequence(void *ip, struct ppc_inst op0, struct ppc_inst op1) +expected_nop_sequence(void *ip, ppc_inst_t op0, ppc_inst_t op1) { /* look for patched "NOP" on ppc64 with -mprofile-kernel */ if (!ppc_inst_equal(op0, ppc_inst(PPC_RAW_NOP()))) @@ -519,8 +490,8 @@ expected_nop_sequence(void *ip, struct ppc_inst op0, struct ppc_inst op1) static int __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { - struct ppc_inst op[2]; - struct ppc_inst instr; + ppc_inst_t op[2]; + ppc_inst_t instr; void *ip = (void *)rec->ip; unsigned long entry, ptr, tramp; struct module *mod = rec->arch.mod; @@ -588,8 +559,10 @@ static int __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { int err; - struct ppc_inst op; + ppc_inst_t op; u32 *ip = (u32 *)rec->ip; + struct module *mod = rec->arch.mod; + unsigned long tramp; /* read where this goes */ if (copy_inst_from_kernel_nofault(&op, ip)) @@ -602,13 +575,23 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) } /* If we never set up a trampoline to ftrace_caller, then bail */ - if (!rec->arch.mod->arch.tramp) { +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS + if (!mod->arch.tramp || !mod->arch.tramp_regs) { +#else + if (!mod->arch.tramp) { +#endif pr_err("No ftrace trampoline\n"); return -EINVAL; } +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS + if (rec->flags & FTRACE_FL_REGS) + tramp = mod->arch.tramp_regs; + else +#endif + tramp = mod->arch.tramp; /* create the branch to the trampoline */ - err = create_branch(&op, ip, rec->arch.mod->arch.tramp, BRANCH_SET_LINK); + err = create_branch(&op, ip, tramp, BRANCH_SET_LINK); if (err) { pr_err("REL24 out of range!\n"); return -EINVAL; @@ -626,7 +609,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr) { - struct ppc_inst op; + ppc_inst_t op; void *ip = (void *)rec->ip; unsigned long tramp, entry, ptr; @@ -674,7 +657,7 @@ static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr) int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { unsigned long ip = rec->ip; - struct ppc_inst old, new; + ppc_inst_t old, new; /* * If the calling address is more that 24 bits away, @@ -713,7 +696,7 @@ static int __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { - struct ppc_inst op; + ppc_inst_t op; unsigned long ip = rec->ip; unsigned long entry, ptr, tramp; struct module *mod = rec->arch.mod; @@ -807,7 +790,7 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { unsigned long ip = rec->ip; - struct ppc_inst old, new; + ppc_inst_t old, new; /* * If the calling address is more that 24 bits away, @@ -847,7 +830,7 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, int ftrace_update_ftrace_func(ftrace_func_t func) { unsigned long ip = (unsigned long)(&ftrace_call); - struct ppc_inst old, new; + ppc_inst_t old, new; int ret; old = ppc_inst_read((u32 *)&ftrace_call); @@ -932,7 +915,7 @@ int ftrace_enable_ftrace_graph_caller(void) unsigned long ip = (unsigned long)(&ftrace_graph_call); unsigned long addr = (unsigned long)(&ftrace_graph_caller); unsigned long stub = (unsigned long)(&ftrace_graph_stub); - struct ppc_inst old, new; + ppc_inst_t old, new; old = ftrace_call_replace(ip, stub, 0); new = ftrace_call_replace(ip, addr, 0); @@ -945,7 +928,7 @@ int ftrace_disable_ftrace_graph_caller(void) unsigned long ip = (unsigned long)(&ftrace_graph_call); unsigned long addr = (unsigned long)(&ftrace_graph_caller); unsigned long stub = (unsigned long)(&ftrace_graph_stub); - struct ppc_inst old, new; + ppc_inst_t old, new; old = ftrace_call_replace(ip, addr, 0); new = ftrace_call_replace(ip, stub, 0); diff --git a/arch/powerpc/kernel/trace/ftrace_32.S b/arch/powerpc/kernel/trace/ftrace_32.S index e023ae59c429..0a02c0cb12d9 100644 --- a/arch/powerpc/kernel/trace/ftrace_32.S +++ b/arch/powerpc/kernel/trace/ftrace_32.S @@ -9,55 +9,135 @@ #include <asm/asm-offsets.h> #include <asm/ftrace.h> #include <asm/export.h> +#include <asm/ptrace.h> _GLOBAL(mcount) _GLOBAL(_mcount) /* * It is required that _mcount on PPC32 must preserve the - * link register. But we have r0 to play with. We use r0 + * link register. But we have r12 to play with. We use r12 * to push the return address back to the caller of mcount * into the ctr register, restore the link register and * then jump back using the ctr register. */ - mflr r0 - mtctr r0 - lwz r0, 4(r1) + mflr r12 + mtctr r12 mtlr r0 bctr +EXPORT_SYMBOL(_mcount) _GLOBAL(ftrace_caller) MCOUNT_SAVE_FRAME /* r3 ends up with link register */ subi r3, r3, MCOUNT_INSN_SIZE + lis r5,function_trace_op@ha + lwz r5,function_trace_op@l(r5) + li r6, 0 .globl ftrace_call ftrace_call: bl ftrace_stub nop + MCOUNT_RESTORE_FRAME +ftrace_caller_common: #ifdef CONFIG_FUNCTION_GRAPH_TRACER .globl ftrace_graph_call ftrace_graph_call: b ftrace_graph_stub _GLOBAL(ftrace_graph_stub) #endif - MCOUNT_RESTORE_FRAME /* old link register ends up in ctr reg */ bctr -EXPORT_SYMBOL(_mcount) _GLOBAL(ftrace_stub) blr +_GLOBAL(ftrace_regs_caller) + /* Save the original return address in A's stack frame */ + stw r0,LRSAVE(r1) + + /* Create our stack frame + pt_regs */ + stwu r1,-INT_FRAME_SIZE(r1) + + /* Save all gprs to pt_regs */ + stw r0, GPR0(r1) + stmw r2, GPR2(r1) + + /* Save previous stack pointer (r1) */ + addi r8, r1, INT_FRAME_SIZE + stw r8, GPR1(r1) + + /* Load special regs for save below */ + mfmsr r8 + mfctr r9 + mfxer r10 + mfcr r11 + + /* Get the _mcount() call site out of LR */ + mflr r7 + /* Save it as pt_regs->nip */ + stw r7, _NIP(r1) + /* Save the read LR in pt_regs->link */ + stw r0, _LINK(r1) + + lis r3,function_trace_op@ha + lwz r5,function_trace_op@l(r3) + + /* Calculate ip from nip-4 into r3 for call below */ + subi r3, r7, MCOUNT_INSN_SIZE + + /* Put the original return address in r4 as parent_ip */ + mr r4, r0 + + /* Save special regs */ + stw r8, _MSR(r1) + stw r9, _CTR(r1) + stw r10, _XER(r1) + stw r11, _CCR(r1) + + /* Load &pt_regs in r6 for call below */ + addi r6, r1, STACK_FRAME_OVERHEAD + + /* ftrace_call(r3, r4, r5, r6) */ +.globl ftrace_regs_call +ftrace_regs_call: + bl ftrace_stub + nop + + /* Load ctr with the possibly modified NIP */ + lwz r3, _NIP(r1) + mtctr r3 + + /* Restore gprs */ + lmw r2, GPR2(r1) + + /* Restore possibly modified LR */ + lwz r0, _LINK(r1) + mtlr r0 + + /* Pop our stack frame */ + addi r1, r1, INT_FRAME_SIZE + + b ftrace_caller_common + #ifdef CONFIG_FUNCTION_GRAPH_TRACER _GLOBAL(ftrace_graph_caller) + stwu r1,-48(r1) + stw r3, 12(r1) + stw r4, 16(r1) + stw r5, 20(r1) + stw r6, 24(r1) + stw r7, 28(r1) + stw r8, 32(r1) + stw r9, 36(r1) + stw r10,40(r1) + addi r5, r1, 48 - /* load r4 with local address */ - lwz r4, 44(r1) + mfctr r4 /* ftrace_caller has moved local addr here */ + stw r4, 44(r1) + mflr r3 /* ftrace_caller has restored LR from stack */ subi r4, r4, MCOUNT_INSN_SIZE - /* Grab the LR out of the caller stack frame */ - lwz r3,52(r1) - bl prepare_ftrace_return nop @@ -66,9 +146,21 @@ _GLOBAL(ftrace_graph_caller) * Change the LR in the callers stack frame to this. */ stw r3,52(r1) + mtlr r3 + lwz r0,44(r1) + mtctr r0 + + lwz r3, 12(r1) + lwz r4, 16(r1) + lwz r5, 20(r1) + lwz r6, 24(r1) + lwz r7, 28(r1) + lwz r8, 32(r1) + lwz r9, 36(r1) + lwz r10,40(r1) + + addi r1, r1, 48 - MCOUNT_RESTORE_FRAME - /* old link register ends up in ctr reg */ bctr _GLOBAL(return_to_handler) diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S index f9fd5f743eba..d636fc755f60 100644 --- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S +++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S @@ -41,15 +41,14 @@ _GLOBAL(ftrace_regs_caller) /* Save all gprs to pt_regs */ SAVE_GPR(0, r1) - SAVE_10GPRS(2, r1) + SAVE_GPRS(2, 11, r1) /* Ok to continue? */ lbz r3, PACA_FTRACE_ENABLED(r13) cmpdi r3, 0 beq ftrace_no_trace - SAVE_10GPRS(12, r1) - SAVE_10GPRS(22, r1) + SAVE_GPRS(12, 31, r1) /* Save previous stack pointer (r1) */ addi r8, r1, SWITCH_FRAME_SIZE @@ -108,10 +107,8 @@ ftrace_regs_call: #endif /* Restore gprs */ - REST_GPR(0,r1) - REST_10GPRS(2,r1) - REST_10GPRS(12,r1) - REST_10GPRS(22,r1) + REST_GPR(0, r1) + REST_GPRS(2, 31, r1) /* Restore possibly modified LR */ ld r0, _LINK(r1) @@ -157,7 +154,7 @@ _GLOBAL(ftrace_caller) stdu r1, -SWITCH_FRAME_SIZE(r1) /* Save all gprs to pt_regs */ - SAVE_8GPRS(3, r1) + SAVE_GPRS(3, 10, r1) lbz r3, PACA_FTRACE_ENABLED(r13) cmpdi r3, 0 @@ -194,7 +191,7 @@ ftrace_call: mtctr r3 /* Restore gprs */ - REST_8GPRS(3,r1) + REST_GPRS(3, 10, r1) /* Restore callee's TOC */ ld r2, 24(r1) diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c index 8513aa49614e..d3942de254c6 100644 --- a/arch/powerpc/kernel/udbg_16550.c +++ b/arch/powerpc/kernel/udbg_16550.c @@ -84,7 +84,7 @@ static int udbg_uart_getc(void) return udbg_uart_in(UART_RBR); } -static void udbg_use_uart(void) +static void __init udbg_use_uart(void) { udbg_putc = udbg_uart_putc; udbg_flush = udbg_uart_flush; @@ -92,7 +92,7 @@ static void udbg_use_uart(void) udbg_getc_poll = udbg_uart_getc_poll; } -void udbg_uart_setup(unsigned int speed, unsigned int clock) +void __init udbg_uart_setup(unsigned int speed, unsigned int clock) { unsigned int dll, base_bauds; @@ -121,7 +121,7 @@ void udbg_uart_setup(unsigned int speed, unsigned int clock) udbg_uart_out(UART_FCR, 0x7); } -unsigned int udbg_probe_uart_speed(unsigned int clock) +unsigned int __init udbg_probe_uart_speed(unsigned int clock) { unsigned int dll, dlm, divisor, prescaler, speed; u8 old_lcr; @@ -172,7 +172,7 @@ static void udbg_uart_out_pio(unsigned int reg, u8 data) outb(data, udbg_uart.pio_base + (reg * udbg_uart_stride)); } -void udbg_uart_init_pio(unsigned long port, unsigned int stride) +void __init udbg_uart_init_pio(unsigned long port, unsigned int stride) { if (!port) return; @@ -194,7 +194,7 @@ static void udbg_uart_out_mmio(unsigned int reg, u8 data) } -void udbg_uart_init_mmio(void __iomem *addr, unsigned int stride) +void __init udbg_uart_init_mmio(void __iomem *addr, unsigned int stride) { if (!addr) return; diff --git a/arch/powerpc/kernel/vecemu.c b/arch/powerpc/kernel/vecemu.c index ae632569446f..fd9432875ebc 100644 --- a/arch/powerpc/kernel/vecemu.c +++ b/arch/powerpc/kernel/vecemu.c @@ -261,7 +261,7 @@ static unsigned int rfin(unsigned int x) int emulate_altivec(struct pt_regs *regs) { - struct ppc_inst instr; + ppc_inst_t instr; unsigned int i, word; unsigned int va, vb, vc, vd; vector128 *vrs; diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index ba03eedfdcd8..5cc24d8cce94 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -47,6 +47,10 @@ EXPORT_SYMBOL(store_vr_state) */ _GLOBAL(load_up_altivec) mfmsr r5 /* grab the current MSR */ +#ifdef CONFIG_PPC_BOOK3S_64 + /* interrupt doesn't set MSR[RI] and HPT can fault on current access */ + ori r5,r5,MSR_RI +#endif oris r5,r5,MSR_VEC@h MTMSRD(r5) /* enable use of AltiVec now */ isync @@ -126,6 +130,12 @@ _GLOBAL(load_up_vsx) andis. r5,r12,MSR_VEC@h beql+ load_up_altivec /* skip if already loaded */ +#ifdef CONFIG_PPC_BOOK3S_64 + /* interrupt doesn't set MSR[RI] and HPT can fault on current access */ + li r5,MSR_RI + mtmsrd r5,1 +#endif + ld r4,PACACURRENT(r13) addi r4,r4,THREAD /* Get THREAD */ li r6,1 diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 18e42c74abdd..2bcca818136a 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -322,10 +322,6 @@ SECTIONS #ifdef CONFIG_PPC32 .data : AT(ADDR(.data) - LOAD_OFFSET) { DATA_DATA -#ifdef CONFIG_UBSAN - *(.data..Lubsan_data*) - *(.data..Lubsan_type*) -#endif *(.data.rel*) *(SDATA_MAIN) *(.sdata2) @@ -336,24 +332,18 @@ SECTIONS #else .data : AT(ADDR(.data) - LOAD_OFFSET) { DATA_DATA -#ifdef CONFIG_UBSAN - *(.data..Lubsan_data*) - *(.data..Lubsan_type*) -#endif *(.data.rel*) *(.toc1) *(.branch_lt) } - . = ALIGN(256); - .got : AT(ADDR(.got) - LOAD_OFFSET) { - __toc_start = .; + .got : AT(ADDR(.got) - LOAD_OFFSET) ALIGN(256) { + *(.got) #ifndef CONFIG_RELOCATABLE __prom_init_toc_start = .; - arch/powerpc/kernel/prom_init.o*(.toc .got) + arch/powerpc/kernel/prom_init.o*(.toc) __prom_init_toc_end = .; #endif - *(.got) *(.toc) } #endif diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index 3fa6d240bade..bfc27496fe7e 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -85,10 +85,37 @@ static DEFINE_PER_CPU(u64, wd_timer_tb); /* SMP checker bits */ static unsigned long __wd_smp_lock; +static unsigned long __wd_reporting; +static unsigned long __wd_nmi_output; static cpumask_t wd_smp_cpus_pending; static cpumask_t wd_smp_cpus_stuck; static u64 wd_smp_last_reset_tb; +/* + * Try to take the exclusive watchdog action / NMI IPI / printing lock. + * wd_smp_lock must be held. If this fails, we should return and wait + * for the watchdog to kick in again (or another CPU to trigger it). + * + * Importantly, if hardlockup_panic is set, wd_try_report failure should + * not delay the panic, because whichever other CPU is reporting will + * call panic. + */ +static bool wd_try_report(void) +{ + if (__wd_reporting) + return false; + __wd_reporting = 1; + return true; +} + +/* End printing after successful wd_try_report. wd_smp_lock not required. */ +static void wd_end_reporting(void) +{ + smp_mb(); /* End printing "critical section" */ + WARN_ON_ONCE(__wd_reporting == 0); + WRITE_ONCE(__wd_reporting, 0); +} + static inline void wd_smp_lock(unsigned long *flags) { /* @@ -128,109 +155,182 @@ static void wd_lockup_ipi(struct pt_regs *regs) else dump_stack(); + /* + * __wd_nmi_output must be set after we printk from NMI context. + * + * printk from NMI context defers printing to the console to irq_work. + * If that NMI was taken in some code that is hard-locked, then irqs + * are disabled so irq_work will never fire. That can result in the + * hard lockup messages being delayed (indefinitely, until something + * else kicks the console drivers). + * + * Setting __wd_nmi_output will cause another CPU to notice and kick + * the console drivers for us. + * + * xchg is not needed here (it could be a smp_mb and store), but xchg + * gives the memory ordering and atomicity required. + */ + xchg(&__wd_nmi_output, 1); + /* Do not panic from here because that can recurse into NMI IPI layer */ } -static void set_cpumask_stuck(const struct cpumask *cpumask, u64 tb) +static bool set_cpu_stuck(int cpu) { - cpumask_or(&wd_smp_cpus_stuck, &wd_smp_cpus_stuck, cpumask); - cpumask_andnot(&wd_smp_cpus_pending, &wd_smp_cpus_pending, cpumask); + cpumask_set_cpu(cpu, &wd_smp_cpus_stuck); + cpumask_clear_cpu(cpu, &wd_smp_cpus_pending); + /* + * See wd_smp_clear_cpu_pending() + */ + smp_mb(); if (cpumask_empty(&wd_smp_cpus_pending)) { - wd_smp_last_reset_tb = tb; + wd_smp_last_reset_tb = get_tb(); cpumask_andnot(&wd_smp_cpus_pending, &wd_cpus_enabled, &wd_smp_cpus_stuck); + return true; } -} -static void set_cpu_stuck(int cpu, u64 tb) -{ - set_cpumask_stuck(cpumask_of(cpu), tb); + return false; } -static void watchdog_smp_panic(int cpu, u64 tb) +static void watchdog_smp_panic(int cpu) { + static cpumask_t wd_smp_cpus_ipi; // protected by reporting unsigned long flags; + u64 tb, last_reset; int c; wd_smp_lock(&flags); /* Double check some things under lock */ - if ((s64)(tb - wd_smp_last_reset_tb) < (s64)wd_smp_panic_timeout_tb) + tb = get_tb(); + last_reset = wd_smp_last_reset_tb; + if ((s64)(tb - last_reset) < (s64)wd_smp_panic_timeout_tb) goto out; if (cpumask_test_cpu(cpu, &wd_smp_cpus_pending)) goto out; - if (cpumask_weight(&wd_smp_cpus_pending) == 0) + if (!wd_try_report()) + goto out; + for_each_online_cpu(c) { + if (!cpumask_test_cpu(c, &wd_smp_cpus_pending)) + continue; + if (c == cpu) + continue; // should not happen + + __cpumask_set_cpu(c, &wd_smp_cpus_ipi); + if (set_cpu_stuck(c)) + break; + } + if (cpumask_empty(&wd_smp_cpus_ipi)) { + wd_end_reporting(); goto out; + } + wd_smp_unlock(&flags); pr_emerg("CPU %d detected hard LOCKUP on other CPUs %*pbl\n", - cpu, cpumask_pr_args(&wd_smp_cpus_pending)); + cpu, cpumask_pr_args(&wd_smp_cpus_ipi)); pr_emerg("CPU %d TB:%lld, last SMP heartbeat TB:%lld (%lldms ago)\n", - cpu, tb, wd_smp_last_reset_tb, - tb_to_ns(tb - wd_smp_last_reset_tb) / 1000000); + cpu, tb, last_reset, tb_to_ns(tb - last_reset) / 1000000); if (!sysctl_hardlockup_all_cpu_backtrace) { /* * Try to trigger the stuck CPUs, unless we are going to * get a backtrace on all of them anyway. */ - for_each_cpu(c, &wd_smp_cpus_pending) { - if (c == cpu) - continue; + for_each_cpu(c, &wd_smp_cpus_ipi) { smp_send_nmi_ipi(c, wd_lockup_ipi, 1000000); + __cpumask_clear_cpu(c, &wd_smp_cpus_ipi); } - } - - /* Take the stuck CPUs out of the watch group */ - set_cpumask_stuck(&wd_smp_cpus_pending, tb); - - wd_smp_unlock(&flags); - - if (sysctl_hardlockup_all_cpu_backtrace) + } else { trigger_allbutself_cpu_backtrace(); - - /* - * Force flush any remote buffers that might be stuck in IRQ context - * and therefore could not run their irq_work. - */ - printk_trigger_flush(); + cpumask_clear(&wd_smp_cpus_ipi); + } if (hardlockup_panic) nmi_panic(NULL, "Hard LOCKUP"); + wd_end_reporting(); + return; out: wd_smp_unlock(&flags); } -static void wd_smp_clear_cpu_pending(int cpu, u64 tb) +static void wd_smp_clear_cpu_pending(int cpu) { if (!cpumask_test_cpu(cpu, &wd_smp_cpus_pending)) { if (unlikely(cpumask_test_cpu(cpu, &wd_smp_cpus_stuck))) { struct pt_regs *regs = get_irq_regs(); unsigned long flags; - wd_smp_lock(&flags); - pr_emerg("CPU %d became unstuck TB:%lld\n", - cpu, tb); + cpu, get_tb()); print_irqtrace_events(current); if (regs) show_regs(regs); else dump_stack(); + wd_smp_lock(&flags); cpumask_clear_cpu(cpu, &wd_smp_cpus_stuck); wd_smp_unlock(&flags); + } else { + /* + * The last CPU to clear pending should have reset the + * watchdog so we generally should not find it empty + * here if our CPU was clear. However it could happen + * due to a rare race with another CPU taking the + * last CPU out of the mask concurrently. + * + * We can't add a warning for it. But just in case + * there is a problem with the watchdog that is causing + * the mask to not be reset, try to kick it along here. + */ + if (unlikely(cpumask_empty(&wd_smp_cpus_pending))) + goto none_pending; } return; } + + /* + * All other updates to wd_smp_cpus_pending are performed under + * wd_smp_lock. All of them are atomic except the case where the + * mask becomes empty and is reset. This will not happen here because + * cpu was tested to be in the bitmap (above), and a CPU only clears + * its own bit. _Except_ in the case where another CPU has detected a + * hard lockup on our CPU and takes us out of the pending mask. So in + * normal operation there will be no race here, no problem. + * + * In the lockup case, this atomic clear-bit vs a store that refills + * other bits in the accessed word wll not be a problem. The bit clear + * is atomic so it will not cause the store to get lost, and the store + * will never set this bit so it will not overwrite the bit clear. The + * only way for a stuck CPU to return to the pending bitmap is to + * become unstuck itself. + */ cpumask_clear_cpu(cpu, &wd_smp_cpus_pending); + + /* + * Order the store to clear pending with the load(s) to check all + * words in the pending mask to check they are all empty. This orders + * with the same barrier on another CPU. This prevents two CPUs + * clearing the last 2 pending bits, but neither seeing the other's + * store when checking if the mask is empty, and missing an empty + * mask, which ends with a false positive. + */ + smp_mb(); if (cpumask_empty(&wd_smp_cpus_pending)) { unsigned long flags; +none_pending: + /* + * Double check under lock because more than one CPU could see + * a clear mask with the lockless check after clearing their + * pending bits. + */ wd_smp_lock(&flags); if (cpumask_empty(&wd_smp_cpus_pending)) { - wd_smp_last_reset_tb = tb; + wd_smp_last_reset_tb = get_tb(); cpumask_andnot(&wd_smp_cpus_pending, &wd_cpus_enabled, &wd_smp_cpus_stuck); @@ -245,10 +345,21 @@ static void watchdog_timer_interrupt(int cpu) per_cpu(wd_timer_tb, cpu) = tb; - wd_smp_clear_cpu_pending(cpu, tb); + wd_smp_clear_cpu_pending(cpu); if ((s64)(tb - wd_smp_last_reset_tb) >= (s64)wd_smp_panic_timeout_tb) - watchdog_smp_panic(cpu, tb); + watchdog_smp_panic(cpu); + + if (__wd_nmi_output && xchg(&__wd_nmi_output, 0)) { + /* + * Something has called printk from NMI context. It might be + * stuck, so this this triggers a flush that will get that + * printk output to the console. + * + * See wd_lockup_ipi. + */ + printk_trigger_flush(); + } } DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt) @@ -267,12 +378,27 @@ DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt) tb = get_tb(); if (tb - per_cpu(wd_timer_tb, cpu) >= wd_panic_timeout_tb) { + /* + * Taking wd_smp_lock here means it is a soft-NMI lock, which + * means we can't take any regular or irqsafe spin locks while + * holding this lock. This is why timers can't printk while + * holding the lock. + */ wd_smp_lock(&flags); if (cpumask_test_cpu(cpu, &wd_smp_cpus_stuck)) { wd_smp_unlock(&flags); return 0; } - set_cpu_stuck(cpu, tb); + if (!wd_try_report()) { + wd_smp_unlock(&flags); + /* Couldn't report, try again in 100ms */ + mtspr(SPRN_DEC, 100 * tb_ticks_per_usec * 1000); + return 0; + } + + set_cpu_stuck(cpu); + + wd_smp_unlock(&flags); pr_emerg("CPU %d self-detected hard LOCKUP @ %pS\n", cpu, (void *)regs->nip); @@ -283,14 +409,21 @@ DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt) print_irqtrace_events(current); show_regs(regs); - wd_smp_unlock(&flags); + xchg(&__wd_nmi_output, 1); // see wd_lockup_ipi if (sysctl_hardlockup_all_cpu_backtrace) trigger_allbutself_cpu_backtrace(); if (hardlockup_panic) nmi_panic(regs, "Hard LOCKUP"); + + wd_end_reporting(); } + /* + * We are okay to change DEC in soft_nmi_interrupt because the masked + * handler has marked a DEC as pending, so the timer interrupt will be + * replayed as soon as local irqs are enabled again. + */ if (wd_panic_timeout_tb < 0x7fffffff) mtspr(SPRN_DEC, wd_panic_timeout_tb); @@ -318,11 +451,15 @@ void arch_touch_nmi_watchdog(void) { unsigned long ticks = tb_ticks_per_usec * wd_timer_period_ms * 1000; int cpu = smp_processor_id(); - u64 tb = get_tb(); + u64 tb; + if (!cpumask_test_cpu(cpu, &watchdog_cpumask)) + return; + + tb = get_tb(); if (tb - per_cpu(wd_timer_tb, cpu) >= ticks) { per_cpu(wd_timer_tb, cpu) = tb; - wd_smp_clear_cpu_pending(cpu, tb); + wd_smp_clear_cpu_pending(cpu); } } EXPORT_SYMBOL(arch_touch_nmi_watchdog); @@ -380,7 +517,7 @@ static void stop_watchdog(void *arg) cpumask_clear_cpu(cpu, &wd_cpus_enabled); wd_smp_unlock(&flags); - wd_smp_clear_cpu_pending(cpu, get_tb()); + wd_smp_clear_cpu_pending(cpu); } static int stop_watchdog_on_cpu(unsigned int cpu) diff --git a/arch/powerpc/kexec/core.c b/arch/powerpc/kexec/core.c index a2242017e55f..8b68d9f91a03 100644 --- a/arch/powerpc/kexec/core.c +++ b/arch/powerpc/kexec/core.c @@ -185,7 +185,7 @@ void __init reserve_crashkernel(void) } } -int overlaps_crashkernel(unsigned long start, unsigned long size) +int __init overlaps_crashkernel(unsigned long start, unsigned long size) { return (start + size) > crashk_res.start && start <= crashk_res.end; } diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c index 66678518b938..635b5fc30b53 100644 --- a/arch/powerpc/kexec/core_64.c +++ b/arch/powerpc/kexec/core_64.c @@ -378,7 +378,7 @@ void default_machine_kexec(struct kimage *image) /* NOTREACHED */ } -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU /* Values we need to export to the second kernel via the device tree. */ static unsigned long htab_base; static unsigned long htab_size; @@ -420,4 +420,4 @@ static int __init export_htab_values(void) return 0; } late_initcall(export_htab_values); -#endif /* CONFIG_PPC_BOOK3S_64 */ +#endif /* CONFIG_PPC_64S_HASH_MMU */ diff --git a/arch/powerpc/kexec/ranges.c b/arch/powerpc/kexec/ranges.c index 6b81c852feab..563e9989a5bf 100644 --- a/arch/powerpc/kexec/ranges.c +++ b/arch/powerpc/kexec/ranges.c @@ -296,7 +296,7 @@ int add_initrd_mem_range(struct crash_mem **mem_ranges) return ret; } -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU /** * add_htab_mem_range - Adds htab range to the given memory ranges list, * if it exists diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index ff581d70f20c..f947b77386a9 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -69,6 +69,7 @@ config KVM_BOOK3S_64 select KVM_BOOK3S_64_HANDLER select KVM select KVM_BOOK3S_PR_POSSIBLE if !KVM_BOOK3S_HV_POSSIBLE + select PPC_64S_HASH_MMU select SPAPR_TCE_IOMMU if IOMMU_SUPPORT && (PPC_PSERIES || PPC_POWERNV) help Support running unmodified book3s_64 and book3s_32 guest kernels @@ -130,6 +131,21 @@ config KVM_BOOK3S_HV_EXIT_TIMING If unsure, say N. +config KVM_BOOK3S_HV_NESTED_PMU_WORKAROUND + bool "Nested L0 host workaround for L1 KVM host PMU handling bug" if EXPERT + depends on KVM_BOOK3S_HV_POSSIBLE + default !EXPERT + help + Old nested HV capable Linux guests have a bug where they don't + reflect the PMU in-use status of their L2 guest to the L0 host + while the L2 PMU registers are live. This can result in loss + of L2 PMU register state, causing perf to not work correctly in + L2 guests. + + Selecting this option for the L0 host implements a workaround for + those buggy L1s which saves the L2 state, at the cost of performance + in all nested-capable guest entry/exit. + config KVM_BOOKE_HV bool diff --git a/arch/powerpc/kvm/book3s_64_entry.S b/arch/powerpc/kvm/book3s_64_entry.S index 983b8c18bc31..05e003eb5d90 100644 --- a/arch/powerpc/kvm/book3s_64_entry.S +++ b/arch/powerpc/kvm/book3s_64_entry.S @@ -374,11 +374,16 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) BEGIN_FTR_SECTION mtspr SPRN_DAWRX1,r10 END_FTR_SECTION_IFSET(CPU_FTR_DAWR1) - mtspr SPRN_PID,r10 /* - * Switch to host MMU mode + * Switch to host MMU mode (don't have the real host PID but we aren't + * going back to userspace). */ + hwsync + isync + + mtspr SPRN_PID,r10 + ld r10, HSTATE_KVM_VCPU(r13) ld r10, VCPU_KVM(r10) lwz r10, KVM_HOST_LPID(r10) @@ -389,6 +394,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_DAWR1) ld r10, KVM_HOST_LPCR(r10) mtspr SPRN_LPCR,r10 + isync + /* * Set GUEST_MODE_NONE so the handler won't branch to KVM, and clear * MSR_RI in r12 ([H]SRR1) so the handler won't try to return. diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 16359525a40f..8cebe5542256 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -57,6 +57,8 @@ unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid, preempt_disable(); + asm volatile("hwsync" ::: "memory"); + isync(); /* switch the lpid first to avoid running host with unallocated pid */ old_lpid = mfspr(SPRN_LPID); if (old_lpid != lpid) @@ -75,6 +77,8 @@ unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid, ret = __copy_to_user_inatomic((void __user *)to, from, n); pagefault_enable(); + asm volatile("hwsync" ::: "memory"); + isync(); /* switch the pid first to avoid running host with unallocated pid */ if (quadrant == 1 && pid != old_pid) mtspr(SPRN_PID, old_pid); diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 7b74fc0a986b..f64e45d6c0f4 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -80,6 +80,7 @@ #include <asm/plpar_wrappers.h> #include "book3s.h" +#include "book3s_hv.h" #define CREATE_TRACE_POINTS #include "trace_hv.h" @@ -127,11 +128,6 @@ static bool nested = true; module_param(nested, bool, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(nested, "Enable nested virtualization (only on POWER9)"); -static inline bool nesting_enabled(struct kvm *kvm) -{ - return kvm->arch.nested_enable && kvm_is_radix(kvm); -} - static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); /* @@ -276,22 +272,26 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu) * they should never fail.) */ -static void kvmppc_core_start_stolen(struct kvmppc_vcore *vc) +static void kvmppc_core_start_stolen(struct kvmppc_vcore *vc, u64 tb) { unsigned long flags; + WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300)); + spin_lock_irqsave(&vc->stoltb_lock, flags); - vc->preempt_tb = mftb(); + vc->preempt_tb = tb; spin_unlock_irqrestore(&vc->stoltb_lock, flags); } -static void kvmppc_core_end_stolen(struct kvmppc_vcore *vc) +static void kvmppc_core_end_stolen(struct kvmppc_vcore *vc, u64 tb) { unsigned long flags; + WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300)); + spin_lock_irqsave(&vc->stoltb_lock, flags); if (vc->preempt_tb != TB_NIL) { - vc->stolen_tb += mftb() - vc->preempt_tb; + vc->stolen_tb += tb - vc->preempt_tb; vc->preempt_tb = TB_NIL; } spin_unlock_irqrestore(&vc->stoltb_lock, flags); @@ -301,6 +301,12 @@ static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu) { struct kvmppc_vcore *vc = vcpu->arch.vcore; unsigned long flags; + u64 now; + + if (cpu_has_feature(CPU_FTR_ARCH_300)) + return; + + now = mftb(); /* * We can test vc->runner without taking the vcore lock, @@ -309,12 +315,12 @@ static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu) * ever sets it to NULL. */ if (vc->runner == vcpu && vc->vcore_state >= VCORE_SLEEPING) - kvmppc_core_end_stolen(vc); + kvmppc_core_end_stolen(vc, now); spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags); if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST && vcpu->arch.busy_preempt != TB_NIL) { - vcpu->arch.busy_stolen += mftb() - vcpu->arch.busy_preempt; + vcpu->arch.busy_stolen += now - vcpu->arch.busy_preempt; vcpu->arch.busy_preempt = TB_NIL; } spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags); @@ -324,13 +330,19 @@ static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu) { struct kvmppc_vcore *vc = vcpu->arch.vcore; unsigned long flags; + u64 now; + + if (cpu_has_feature(CPU_FTR_ARCH_300)) + return; + + now = mftb(); if (vc->runner == vcpu && vc->vcore_state >= VCORE_SLEEPING) - kvmppc_core_start_stolen(vc); + kvmppc_core_start_stolen(vc, now); spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags); if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST) - vcpu->arch.busy_preempt = mftb(); + vcpu->arch.busy_preempt = now; spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags); } @@ -675,6 +687,8 @@ static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 now) u64 p; unsigned long flags; + WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300)); + spin_lock_irqsave(&vc->stoltb_lock, flags); p = vc->stolen_tb; if (vc->vcore_state != VCORE_INACTIVE && @@ -684,35 +698,30 @@ static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 now) return p; } -static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu, - struct kvmppc_vcore *vc) +static void __kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu, + unsigned int pcpu, u64 now, + unsigned long stolen) { struct dtl_entry *dt; struct lppaca *vpa; - unsigned long stolen; - unsigned long core_stolen; - u64 now; - unsigned long flags; dt = vcpu->arch.dtl_ptr; vpa = vcpu->arch.vpa.pinned_addr; - now = mftb(); - core_stolen = vcore_stolen_time(vc, now); - stolen = core_stolen - vcpu->arch.stolen_logged; - vcpu->arch.stolen_logged = core_stolen; - spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags); - stolen += vcpu->arch.busy_stolen; - vcpu->arch.busy_stolen = 0; - spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags); + if (!dt || !vpa) return; - memset(dt, 0, sizeof(struct dtl_entry)); + dt->dispatch_reason = 7; - dt->processor_id = cpu_to_be16(vc->pcpu + vcpu->arch.ptid); - dt->timebase = cpu_to_be64(now + vc->tb_offset); + dt->preempt_reason = 0; + dt->processor_id = cpu_to_be16(pcpu + vcpu->arch.ptid); dt->enqueue_to_dispatch_time = cpu_to_be32(stolen); + dt->ready_to_enqueue_time = 0; + dt->waiting_to_ready_time = 0; + dt->timebase = cpu_to_be64(now); + dt->fault_addr = 0; dt->srr0 = cpu_to_be64(kvmppc_get_pc(vcpu)); dt->srr1 = cpu_to_be64(vcpu->arch.shregs.msr); + ++dt; if (dt == vcpu->arch.dtl.pinned_end) dt = vcpu->arch.dtl.pinned_addr; @@ -723,6 +732,27 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu, vcpu->arch.dtl.dirty = true; } +static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu, + struct kvmppc_vcore *vc) +{ + unsigned long stolen; + unsigned long core_stolen; + u64 now; + unsigned long flags; + + now = mftb(); + + core_stolen = vcore_stolen_time(vc, now); + stolen = core_stolen - vcpu->arch.stolen_logged; + vcpu->arch.stolen_logged = core_stolen; + spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags); + stolen += vcpu->arch.busy_stolen; + vcpu->arch.busy_stolen = 0; + spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags); + + __kvmppc_create_dtl_entry(vcpu, vc->pcpu, now + vc->tb_offset, stolen); +} + /* See if there is a doorbell interrupt pending for a vcpu */ static bool kvmppc_doorbell_pending(struct kvm_vcpu *vcpu) { @@ -731,6 +761,8 @@ static bool kvmppc_doorbell_pending(struct kvm_vcpu *vcpu) if (vcpu->arch.doorbell_request) return true; + if (cpu_has_feature(CPU_FTR_ARCH_300)) + return false; /* * Ensure that the read of vcore->dpdes comes after the read * of vcpu->doorbell_request. This barrier matches the @@ -900,13 +932,14 @@ static int kvm_arch_vcpu_yield_to(struct kvm_vcpu *target) * mode handler is not called but no other threads are in the * source vcore. */ - - spin_lock(&vcore->lock); - if (target->arch.state == KVMPPC_VCPU_RUNNABLE && - vcore->vcore_state != VCORE_INACTIVE && - vcore->runner) - target = vcore->runner; - spin_unlock(&vcore->lock); + if (!cpu_has_feature(CPU_FTR_ARCH_300)) { + spin_lock(&vcore->lock); + if (target->arch.state == KVMPPC_VCPU_RUNNABLE && + vcore->vcore_state != VCORE_INACTIVE && + vcore->runner) + target = vcore->runner; + spin_unlock(&vcore->lock); + } return kvm_vcpu_yield_to(target); } @@ -1421,6 +1454,43 @@ static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu *vcpu) return RESUME_GUEST; } +/* + * If the lppaca had pmcregs_in_use clear when we exited the guest, then + * HFSCR_PM is cleared for next entry. If the guest then tries to access + * the PMU SPRs, we get this facility unavailable interrupt. Putting HFSCR_PM + * back in the guest HFSCR will cause the next entry to load the PMU SPRs and + * allow the guest access to continue. + */ +static int kvmppc_pmu_unavailable(struct kvm_vcpu *vcpu) +{ + if (!(vcpu->arch.hfscr_permitted & HFSCR_PM)) + return EMULATE_FAIL; + + vcpu->arch.hfscr |= HFSCR_PM; + + return RESUME_GUEST; +} + +static int kvmppc_ebb_unavailable(struct kvm_vcpu *vcpu) +{ + if (!(vcpu->arch.hfscr_permitted & HFSCR_EBB)) + return EMULATE_FAIL; + + vcpu->arch.hfscr |= HFSCR_EBB; + + return RESUME_GUEST; +} + +static int kvmppc_tm_unavailable(struct kvm_vcpu *vcpu) +{ + if (!(vcpu->arch.hfscr_permitted & HFSCR_TM)) + return EMULATE_FAIL; + + vcpu->arch.hfscr |= HFSCR_TM; + + return RESUME_GUEST; +} + static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu, struct task_struct *tsk) { @@ -1451,6 +1521,10 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu, run->ready_for_interrupt_injection = 1; switch (vcpu->arch.trap) { /* We're good on these - the host merely wanted to get our attention */ + case BOOK3S_INTERRUPT_NESTED_HV_DECREMENTER: + WARN_ON_ONCE(1); /* Should never happen */ + vcpu->arch.trap = BOOK3S_INTERRUPT_HV_DECREMENTER; + fallthrough; case BOOK3S_INTERRUPT_HV_DECREMENTER: vcpu->stat.dec_exits++; r = RESUME_GUEST; @@ -1575,7 +1649,8 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu, unsigned long vsid; long err; - if (vcpu->arch.fault_dsisr == HDSISR_CANARY) { + if (cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG) && + unlikely(vcpu->arch.fault_dsisr == HDSISR_CANARY)) { r = RESUME_GUEST; /* Just retry if it's the canary */ break; } @@ -1702,16 +1777,26 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu, * to emulate. * Otherwise, we just generate a program interrupt to the guest. */ - case BOOK3S_INTERRUPT_H_FAC_UNAVAIL: + case BOOK3S_INTERRUPT_H_FAC_UNAVAIL: { + u64 cause = vcpu->arch.hfscr >> 56; + r = EMULATE_FAIL; - if (((vcpu->arch.hfscr >> 56) == FSCR_MSGP_LG) && - cpu_has_feature(CPU_FTR_ARCH_300)) - r = kvmppc_emulate_doorbell_instr(vcpu); + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + if (cause == FSCR_MSGP_LG) + r = kvmppc_emulate_doorbell_instr(vcpu); + if (cause == FSCR_PM_LG) + r = kvmppc_pmu_unavailable(vcpu); + if (cause == FSCR_EBB_LG) + r = kvmppc_ebb_unavailable(vcpu); + if (cause == FSCR_TM_LG) + r = kvmppc_tm_unavailable(vcpu); + } if (r == EMULATE_FAIL) { kvmppc_core_queue_program(vcpu, SRR1_PROGILL); r = RESUME_GUEST; } break; + } case BOOK3S_INTERRUPT_HV_RM_HARD: r = RESUME_PASSTHROUGH; @@ -1768,6 +1853,12 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu) vcpu->stat.ext_intr_exits++; r = RESUME_GUEST; break; + /* These need to go to the nested HV */ + case BOOK3S_INTERRUPT_NESTED_HV_DECREMENTER: + vcpu->arch.trap = BOOK3S_INTERRUPT_HV_DECREMENTER; + vcpu->stat.dec_exits++; + r = RESUME_HOST; + break; /* SR/HMI/PMI are HV interrupts that host has handled. Resume guest.*/ case BOOK3S_INTERRUPT_HMI: case BOOK3S_INTERRUPT_PERFMON: @@ -2096,8 +2187,10 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, * either vcore->dpdes or doorbell_request. * On POWER8, doorbell_request is 0. */ - *val = get_reg_val(id, vcpu->arch.vcore->dpdes | - vcpu->arch.doorbell_request); + if (cpu_has_feature(CPU_FTR_ARCH_300)) + *val = get_reg_val(id, vcpu->arch.doorbell_request); + else + *val = get_reg_val(id, vcpu->arch.vcore->dpdes); break; case KVM_REG_PPC_VTB: *val = get_reg_val(id, vcpu->arch.vcore->vtb); @@ -2238,8 +2331,7 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, *val = get_reg_val(id, vcpu->arch.vcore->arch_compat); break; case KVM_REG_PPC_DEC_EXPIRY: - *val = get_reg_val(id, vcpu->arch.dec_expires + - vcpu->arch.vcore->tb_offset); + *val = get_reg_val(id, vcpu->arch.dec_expires); break; case KVM_REG_PPC_ONLINE: *val = get_reg_val(id, vcpu->arch.online); @@ -2335,7 +2427,10 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, vcpu->arch.pspb = set_reg_val(id, *val); break; case KVM_REG_PPC_DPDES: - vcpu->arch.vcore->dpdes = set_reg_val(id, *val); + if (cpu_has_feature(CPU_FTR_ARCH_300)) + vcpu->arch.doorbell_request = set_reg_val(id, *val) & 1; + else + vcpu->arch.vcore->dpdes = set_reg_val(id, *val); break; case KVM_REG_PPC_VTB: vcpu->arch.vcore->vtb = set_reg_val(id, *val); @@ -2491,8 +2586,7 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, r = kvmppc_set_arch_compat(vcpu, set_reg_val(id, *val)); break; case KVM_REG_PPC_DEC_EXPIRY: - vcpu->arch.dec_expires = set_reg_val(id, *val) - - vcpu->arch.vcore->tb_offset; + vcpu->arch.dec_expires = set_reg_val(id, *val); break; case KVM_REG_PPC_ONLINE: i = set_reg_val(id, *val); @@ -2715,6 +2809,11 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu) #endif #endif vcpu->arch.mmcr[0] = MMCR0_FC; + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + vcpu->arch.mmcr[0] |= MMCR0_PMCCEXT; + vcpu->arch.mmcra = MMCRA_BHRB_DISABLE; + } + vcpu->arch.ctrl = CTRL_RUNLATCH; /* default to host PVR, since we can't spoof it */ kvmppc_set_pvr_hv(vcpu, mfspr(SPRN_PVR)); @@ -2745,6 +2844,11 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu) vcpu->arch.hfscr_permitted = vcpu->arch.hfscr; + /* + * PM, EBB, TM are demand-faulted so start with it clear. + */ + vcpu->arch.hfscr &= ~(HFSCR_PM | HFSCR_EBB | HFSCR_TM); + kvmppc_mmu_book3s_hv_init(vcpu); vcpu->arch.state = KVMPPC_VCPU_NOTREADY; @@ -2869,13 +2973,13 @@ static void kvmppc_set_timer(struct kvm_vcpu *vcpu) unsigned long dec_nsec, now; now = get_tb(); - if (now > vcpu->arch.dec_expires) { + if (now > kvmppc_dec_expires_host_tb(vcpu)) { /* decrementer has already gone negative */ kvmppc_core_queue_dec(vcpu); kvmppc_core_prepare_to_enter(vcpu); return; } - dec_nsec = tb_to_ns(vcpu->arch.dec_expires - now); + dec_nsec = tb_to_ns(kvmppc_dec_expires_host_tb(vcpu) - now); hrtimer_start(&vcpu->arch.dec_timer, dec_nsec, HRTIMER_MODE_REL); vcpu->arch.timer_running = 1; } @@ -2883,14 +2987,14 @@ static void kvmppc_set_timer(struct kvm_vcpu *vcpu) extern int __kvmppc_vcore_entry(void); static void kvmppc_remove_runnable(struct kvmppc_vcore *vc, - struct kvm_vcpu *vcpu) + struct kvm_vcpu *vcpu, u64 tb) { u64 now; if (vcpu->arch.state != KVMPPC_VCPU_RUNNABLE) return; spin_lock_irq(&vcpu->arch.tbacct_lock); - now = mftb(); + now = tb; vcpu->arch.busy_stolen += vcore_stolen_time(vc, now) - vcpu->arch.stolen_logged; vcpu->arch.busy_preempt = now; @@ -2945,30 +3049,59 @@ static void kvmppc_release_hwthread(int cpu) tpaca->kvm_hstate.kvm_split_mode = NULL; } +static DEFINE_PER_CPU(struct kvm *, cpu_in_guest); + static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu) { struct kvm_nested_guest *nested = vcpu->arch.nested; - cpumask_t *cpu_in_guest; + cpumask_t *need_tlb_flush; int i; + if (nested) + need_tlb_flush = &nested->need_tlb_flush; + else + need_tlb_flush = &kvm->arch.need_tlb_flush; + cpu = cpu_first_tlb_thread_sibling(cpu); - if (nested) { - cpumask_set_cpu(cpu, &nested->need_tlb_flush); - cpu_in_guest = &nested->cpu_in_guest; - } else { - cpumask_set_cpu(cpu, &kvm->arch.need_tlb_flush); - cpu_in_guest = &kvm->arch.cpu_in_guest; - } + for (i = cpu; i <= cpu_last_tlb_thread_sibling(cpu); + i += cpu_tlb_thread_sibling_step()) + cpumask_set_cpu(i, need_tlb_flush); + /* - * Make sure setting of bit in need_tlb_flush precedes - * testing of cpu_in_guest bits. The matching barrier on - * the other side is the first smp_mb() in kvmppc_run_core(). + * Make sure setting of bit in need_tlb_flush precedes testing of + * cpu_in_guest. The matching barrier on the other side is hwsync + * when switching to guest MMU mode, which happens between + * cpu_in_guest being set to the guest kvm, and need_tlb_flush bit + * being tested. */ smp_mb(); + for (i = cpu; i <= cpu_last_tlb_thread_sibling(cpu); - i += cpu_tlb_thread_sibling_step()) - if (cpumask_test_cpu(i, cpu_in_guest)) + i += cpu_tlb_thread_sibling_step()) { + struct kvm *running = *per_cpu_ptr(&cpu_in_guest, i); + + if (running == kvm) smp_call_function_single(i, do_nothing, NULL, 1); + } +} + +static void do_migrate_away_vcpu(void *arg) +{ + struct kvm_vcpu *vcpu = arg; + struct kvm *kvm = vcpu->kvm; + + /* + * If the guest has GTSE, it may execute tlbie, so do a eieio; tlbsync; + * ptesync sequence on the old CPU before migrating to a new one, in + * case we interrupted the guest between a tlbie ; eieio ; + * tlbsync; ptesync sequence. + * + * Otherwise, ptesync is sufficient for ordering tlbiel sequences. + */ + if (kvm->arch.lpcr & LPCR_GTSE) + asm volatile("eieio; tlbsync; ptesync"); + else + asm volatile("ptesync"); } static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu) @@ -2994,14 +3127,17 @@ static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu) * can move around between pcpus. To cope with this, when * a vcpu moves from one pcpu to another, we need to tell * any vcpus running on the same core as this vcpu previously - * ran to flush the TLB. The TLB is shared between threads, - * so we use a single bit in .need_tlb_flush for all 4 threads. + * ran to flush the TLB. */ if (prev_cpu != pcpu) { - if (prev_cpu >= 0 && - cpu_first_tlb_thread_sibling(prev_cpu) != - cpu_first_tlb_thread_sibling(pcpu)) - radix_flush_cpu(kvm, prev_cpu, vcpu); + if (prev_cpu >= 0) { + if (cpu_first_tlb_thread_sibling(prev_cpu) != + cpu_first_tlb_thread_sibling(pcpu)) + radix_flush_cpu(kvm, prev_cpu, vcpu); + + smp_call_function_single(prev_cpu, + do_migrate_away_vcpu, vcpu, 1); + } if (nested) nested->prev_cpu[vcpu->arch.nested_vcpu_id] = pcpu; else @@ -3013,7 +3149,6 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc) { int cpu; struct paca_struct *tpaca; - struct kvm *kvm = vc->kvm; cpu = vc->pcpu; if (vcpu) { @@ -3024,7 +3159,6 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc) cpu += vcpu->arch.ptid; vcpu->cpu = vc->pcpu; vcpu->arch.thread_cpu = cpu; - cpumask_set_cpu(cpu, &kvm->arch.cpu_in_guest); } tpaca = paca_ptrs[cpu]; tpaca->kvm_hstate.kvm_vcpu = vcpu; @@ -3125,6 +3259,8 @@ static void kvmppc_vcore_preempt(struct kvmppc_vcore *vc) { struct preempted_vcore_list *lp = this_cpu_ptr(&preempted_vcores); + WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300)); + vc->vcore_state = VCORE_PREEMPT; vc->pcpu = smp_processor_id(); if (vc->num_threads < threads_per_vcore(vc->kvm)) { @@ -3134,14 +3270,16 @@ static void kvmppc_vcore_preempt(struct kvmppc_vcore *vc) } /* Start accumulating stolen time */ - kvmppc_core_start_stolen(vc); + kvmppc_core_start_stolen(vc, mftb()); } static void kvmppc_vcore_end_preempt(struct kvmppc_vcore *vc) { struct preempted_vcore_list *lp; - kvmppc_core_end_stolen(vc); + WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300)); + + kvmppc_core_end_stolen(vc, mftb()); if (!list_empty(&vc->preempt_list)) { lp = &per_cpu(preempted_vcores, vc->pcpu); spin_lock(&lp->lock); @@ -3268,7 +3406,7 @@ static void prepare_threads(struct kvmppc_vcore *vc) vcpu->arch.ret = RESUME_GUEST; else continue; - kvmppc_remove_runnable(vc, vcpu); + kvmppc_remove_runnable(vc, vcpu, mftb()); wake_up(&vcpu->arch.cpu_run); } } @@ -3287,7 +3425,7 @@ static void collect_piggybacks(struct core_info *cip, int target_threads) list_del_init(&pvc->preempt_list); if (pvc->runner == NULL) { pvc->vcore_state = VCORE_INACTIVE; - kvmppc_core_end_stolen(pvc); + kvmppc_core_end_stolen(pvc, mftb()); } spin_unlock(&pvc->lock); continue; @@ -3296,7 +3434,7 @@ static void collect_piggybacks(struct core_info *cip, int target_threads) spin_unlock(&pvc->lock); continue; } - kvmppc_core_end_stolen(pvc); + kvmppc_core_end_stolen(pvc, mftb()); pvc->vcore_state = VCORE_PIGGYBACK; if (cip->total_threads >= target_threads) break; @@ -3340,7 +3478,7 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master) */ spin_unlock(&vc->lock); /* cancel pending dec exception if dec is positive */ - if (now < vcpu->arch.dec_expires && + if (now < kvmppc_dec_expires_host_tb(vcpu) && kvmppc_core_pending_dec(vcpu)) kvmppc_core_dequeue_dec(vcpu); @@ -3363,7 +3501,7 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master) else ++still_running; } else { - kvmppc_remove_runnable(vc, vcpu); + kvmppc_remove_runnable(vc, vcpu, mftb()); wake_up(&vcpu->arch.cpu_run); } } @@ -3372,7 +3510,7 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master) kvmppc_vcore_preempt(vc); } else if (vc->runner) { vc->vcore_state = VCORE_PREEMPT; - kvmppc_core_start_stolen(vc); + kvmppc_core_start_stolen(vc, mftb()); } else { vc->vcore_state = VCORE_INACTIVE; } @@ -3503,7 +3641,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) { for_each_runnable_thread(i, vcpu, vc) { vcpu->arch.ret = -EBUSY; - kvmppc_remove_runnable(vc, vcpu); + kvmppc_remove_runnable(vc, vcpu, mftb()); wake_up(&vcpu->arch.cpu_run); } goto out; @@ -3748,7 +3886,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) kvmppc_release_hwthread(pcpu + i); if (sip && sip->napped[i]) kvmppc_ipi_thread(pcpu + i); - cpumask_clear_cpu(pcpu + i, &vc->kvm->arch.cpu_in_guest); } spin_unlock(&vc->lock); @@ -3770,211 +3907,137 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) trace_kvmppc_run_core(vc, 1); } -static void load_spr_state(struct kvm_vcpu *vcpu) -{ - mtspr(SPRN_DSCR, vcpu->arch.dscr); - mtspr(SPRN_IAMR, vcpu->arch.iamr); - mtspr(SPRN_PSPB, vcpu->arch.pspb); - mtspr(SPRN_FSCR, vcpu->arch.fscr); - mtspr(SPRN_TAR, vcpu->arch.tar); - mtspr(SPRN_EBBHR, vcpu->arch.ebbhr); - mtspr(SPRN_EBBRR, vcpu->arch.ebbrr); - mtspr(SPRN_BESCR, vcpu->arch.bescr); - mtspr(SPRN_TIDR, vcpu->arch.tid); - mtspr(SPRN_AMR, vcpu->arch.amr); - mtspr(SPRN_UAMOR, vcpu->arch.uamor); - - /* - * DAR, DSISR, and for nested HV, SPRGs must be set with MSR[RI] - * clear (or hstate set appropriately to catch those registers - * being clobbered if we take a MCE or SRESET), so those are done - * later. - */ - - if (!(vcpu->arch.ctrl & 1)) - mtspr(SPRN_CTRLT, mfspr(SPRN_CTRLF) & ~1); -} - -static void store_spr_state(struct kvm_vcpu *vcpu) -{ - vcpu->arch.ctrl = mfspr(SPRN_CTRLF); - - vcpu->arch.iamr = mfspr(SPRN_IAMR); - vcpu->arch.pspb = mfspr(SPRN_PSPB); - vcpu->arch.fscr = mfspr(SPRN_FSCR); - vcpu->arch.tar = mfspr(SPRN_TAR); - vcpu->arch.ebbhr = mfspr(SPRN_EBBHR); - vcpu->arch.ebbrr = mfspr(SPRN_EBBRR); - vcpu->arch.bescr = mfspr(SPRN_BESCR); - vcpu->arch.tid = mfspr(SPRN_TIDR); - vcpu->arch.amr = mfspr(SPRN_AMR); - vcpu->arch.uamor = mfspr(SPRN_UAMOR); - vcpu->arch.dscr = mfspr(SPRN_DSCR); -} - -/* - * Privileged (non-hypervisor) host registers to save. - */ -struct p9_host_os_sprs { - unsigned long dscr; - unsigned long tidr; - unsigned long iamr; - unsigned long amr; - unsigned long fscr; -}; - -static void save_p9_host_os_sprs(struct p9_host_os_sprs *host_os_sprs) -{ - host_os_sprs->dscr = mfspr(SPRN_DSCR); - host_os_sprs->tidr = mfspr(SPRN_TIDR); - host_os_sprs->iamr = mfspr(SPRN_IAMR); - host_os_sprs->amr = mfspr(SPRN_AMR); - host_os_sprs->fscr = mfspr(SPRN_FSCR); -} - -/* vcpu guest regs must already be saved */ -static void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu, - struct p9_host_os_sprs *host_os_sprs) -{ - mtspr(SPRN_PSPB, 0); - mtspr(SPRN_UAMOR, 0); - - mtspr(SPRN_DSCR, host_os_sprs->dscr); - mtspr(SPRN_TIDR, host_os_sprs->tidr); - mtspr(SPRN_IAMR, host_os_sprs->iamr); - - if (host_os_sprs->amr != vcpu->arch.amr) - mtspr(SPRN_AMR, host_os_sprs->amr); - - if (host_os_sprs->fscr != vcpu->arch.fscr) - mtspr(SPRN_FSCR, host_os_sprs->fscr); - - /* Save guest CTRL register, set runlatch to 1 */ - if (!(vcpu->arch.ctrl & 1)) - mtspr(SPRN_CTRLT, 1); -} - static inline bool hcall_is_xics(unsigned long req) { return req == H_EOI || req == H_CPPR || req == H_IPI || req == H_IPOLL || req == H_XIRR || req == H_XIRR_X; } -/* - * Guest entry for POWER9 and later CPUs. - */ -static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, - unsigned long lpcr) +static void vcpu_vpa_increment_dispatch(struct kvm_vcpu *vcpu) +{ + struct lppaca *lp = vcpu->arch.vpa.pinned_addr; + if (lp) { + u32 yield_count = be32_to_cpu(lp->yield_count) + 1; + lp->yield_count = cpu_to_be32(yield_count); + vcpu->arch.vpa.dirty = 1; + } +} + +/* call our hypervisor to load up HV regs and go */ +static int kvmhv_vcpu_entry_p9_nested(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr, u64 *tb) { struct kvmppc_vcore *vc = vcpu->arch.vcore; + unsigned long host_psscr; + unsigned long msr; + struct hv_guest_state hvregs; struct p9_host_os_sprs host_os_sprs; s64 dec; - u64 tb; - int trap, save_pmu; - - WARN_ON_ONCE(vcpu->arch.ceded); + int trap; - dec = mfspr(SPRN_DEC); - tb = mftb(); - if (dec < 0) - return BOOK3S_INTERRUPT_HV_DECREMENTER; - local_paca->kvm_hstate.dec_expires = dec + tb; - if (local_paca->kvm_hstate.dec_expires < time_limit) - time_limit = local_paca->kvm_hstate.dec_expires; + msr = mfmsr(); save_p9_host_os_sprs(&host_os_sprs); - kvmhv_save_host_pmu(); /* saves it to PACA kvm_hstate */ + /* + * We need to save and restore the guest visible part of the + * psscr (i.e. using SPRN_PSSCR_PR) since the hypervisor + * doesn't do this for us. Note only required if pseries since + * this is done in kvmhv_vcpu_entry_p9() below otherwise. + */ + host_psscr = mfspr(SPRN_PSSCR_PR); - kvmppc_subcore_enter_guest(); + kvmppc_msr_hard_disable_set_facilities(vcpu, msr); + if (lazy_irq_pending()) + return 0; - vc->entry_exit_map = 1; - vc->in_guest = 1; + if (unlikely(load_vcpu_state(vcpu, &host_os_sprs))) + msr = mfmsr(); /* TM restore can update msr */ - if (vcpu->arch.vpa.pinned_addr) { - struct lppaca *lp = vcpu->arch.vpa.pinned_addr; - u32 yield_count = be32_to_cpu(lp->yield_count) + 1; - lp->yield_count = cpu_to_be32(yield_count); - vcpu->arch.vpa.dirty = 1; - } - - if (cpu_has_feature(CPU_FTR_TM) || - cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) - kvmppc_restore_tm_hv(vcpu, vcpu->arch.shregs.msr, true); + if (vcpu->arch.psscr != host_psscr) + mtspr(SPRN_PSSCR_PR, vcpu->arch.psscr); -#ifdef CONFIG_PPC_PSERIES - if (kvmhv_on_pseries()) { - barrier(); - if (vcpu->arch.vpa.pinned_addr) { - struct lppaca *lp = vcpu->arch.vpa.pinned_addr; - get_lppaca()->pmcregs_in_use = lp->pmcregs_in_use; - } else { - get_lppaca()->pmcregs_in_use = 1; - } - barrier(); + kvmhv_save_hv_regs(vcpu, &hvregs); + hvregs.lpcr = lpcr; + vcpu->arch.regs.msr = vcpu->arch.shregs.msr; + hvregs.version = HV_GUEST_STATE_VERSION; + if (vcpu->arch.nested) { + hvregs.lpid = vcpu->arch.nested->shadow_lpid; + hvregs.vcpu_token = vcpu->arch.nested_vcpu_id; + } else { + hvregs.lpid = vcpu->kvm->arch.lpid; + hvregs.vcpu_token = vcpu->vcpu_id; } -#endif - kvmhv_load_guest_pmu(vcpu); - - msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX); - load_fp_state(&vcpu->arch.fp); -#ifdef CONFIG_ALTIVEC - load_vr_state(&vcpu->arch.vr); -#endif - mtspr(SPRN_VRSAVE, vcpu->arch.vrsave); - - load_spr_state(vcpu); + hvregs.hdec_expiry = time_limit; /* - * When setting DEC, we must always deal with irq_work_raise via NMI vs - * setting DEC. The problem occurs right as we switch into guest mode - * if a NMI hits and sets pending work and sets DEC, then that will - * apply to the guest and not bring us back to the host. + * When setting DEC, we must always deal with irq_work_raise + * via NMI vs setting DEC. The problem occurs right as we + * switch into guest mode if a NMI hits and sets pending work + * and sets DEC, then that will apply to the guest and not + * bring us back to the host. * - * irq_work_raise could check a flag (or possibly LPCR[HDICE] for - * example) and set HDEC to 1? That wouldn't solve the nested hv - * case which needs to abort the hcall or zero the time limit. + * irq_work_raise could check a flag (or possibly LPCR[HDICE] + * for example) and set HDEC to 1? That wouldn't solve the + * nested hv case which needs to abort the hcall or zero the + * time limit. * * XXX: Another day's problem. */ - mtspr(SPRN_DEC, vcpu->arch.dec_expires - mftb()); + mtspr(SPRN_DEC, kvmppc_dec_expires_host_tb(vcpu) - *tb); + + mtspr(SPRN_DAR, vcpu->arch.shregs.dar); + mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr); + switch_pmu_to_guest(vcpu, &host_os_sprs); + trap = plpar_hcall_norets(H_ENTER_NESTED, __pa(&hvregs), + __pa(&vcpu->arch.regs)); + kvmhv_restore_hv_return_state(vcpu, &hvregs); + switch_pmu_to_host(vcpu, &host_os_sprs); + vcpu->arch.shregs.msr = vcpu->arch.regs.msr; + vcpu->arch.shregs.dar = mfspr(SPRN_DAR); + vcpu->arch.shregs.dsisr = mfspr(SPRN_DSISR); + vcpu->arch.psscr = mfspr(SPRN_PSSCR_PR); + + store_vcpu_state(vcpu); - if (kvmhv_on_pseries()) { - /* - * We need to save and restore the guest visible part of the - * psscr (i.e. using SPRN_PSSCR_PR) since the hypervisor - * doesn't do this for us. Note only required if pseries since - * this is done in kvmhv_vcpu_entry_p9() below otherwise. - */ - unsigned long host_psscr; - /* call our hypervisor to load up HV regs and go */ - struct hv_guest_state hvregs; + dec = mfspr(SPRN_DEC); + if (!(lpcr & LPCR_LD)) /* Sign extend if not using large decrementer */ + dec = (s32) dec; + *tb = mftb(); + vcpu->arch.dec_expires = dec + (*tb + vc->tb_offset); - host_psscr = mfspr(SPRN_PSSCR_PR); - mtspr(SPRN_PSSCR_PR, vcpu->arch.psscr); - kvmhv_save_hv_regs(vcpu, &hvregs); - hvregs.lpcr = lpcr; - vcpu->arch.regs.msr = vcpu->arch.shregs.msr; - hvregs.version = HV_GUEST_STATE_VERSION; - if (vcpu->arch.nested) { - hvregs.lpid = vcpu->arch.nested->shadow_lpid; - hvregs.vcpu_token = vcpu->arch.nested_vcpu_id; - } else { - hvregs.lpid = vcpu->kvm->arch.lpid; - hvregs.vcpu_token = vcpu->vcpu_id; - } - hvregs.hdec_expiry = time_limit; - mtspr(SPRN_DAR, vcpu->arch.shregs.dar); - mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr); - trap = plpar_hcall_norets(H_ENTER_NESTED, __pa(&hvregs), - __pa(&vcpu->arch.regs)); - kvmhv_restore_hv_return_state(vcpu, &hvregs); - vcpu->arch.shregs.msr = vcpu->arch.regs.msr; - vcpu->arch.shregs.dar = mfspr(SPRN_DAR); - vcpu->arch.shregs.dsisr = mfspr(SPRN_DSISR); - vcpu->arch.psscr = mfspr(SPRN_PSSCR_PR); + timer_rearm_host_dec(*tb); + + restore_p9_host_os_sprs(vcpu, &host_os_sprs); + if (vcpu->arch.psscr != host_psscr) mtspr(SPRN_PSSCR_PR, host_psscr); + return trap; +} + +/* + * Guest entry for POWER9 and later CPUs. + */ +static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, + unsigned long lpcr, u64 *tb) +{ + u64 next_timer; + int trap; + + next_timer = timer_get_next_tb(); + if (*tb >= next_timer) + return BOOK3S_INTERRUPT_HV_DECREMENTER; + if (next_timer < time_limit) + time_limit = next_timer; + else if (*tb >= time_limit) /* nested time limit */ + return BOOK3S_INTERRUPT_NESTED_HV_DECREMENTER; + + vcpu->arch.ceded = 0; + + vcpu_vpa_increment_dispatch(vcpu); + + if (kvmhv_on_pseries()) { + trap = kvmhv_vcpu_entry_p9_nested(vcpu, time_limit, lpcr, tb); + /* H_CEDE has to be handled now, not later */ if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested && kvmppc_get_gpr(vcpu, 3) == H_CEDE) { @@ -3982,9 +4045,16 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, kvmppc_set_gpr(vcpu, 3, 0); trap = 0; } + } else { + struct kvm *kvm = vcpu->kvm; + kvmppc_xive_push_vcpu(vcpu); - trap = kvmhv_vcpu_entry_p9(vcpu, time_limit, lpcr); + + __this_cpu_write(cpu_in_guest, kvm); + trap = kvmhv_vcpu_entry_p9(vcpu, time_limit, lpcr, tb); + __this_cpu_write(cpu_in_guest, NULL); + if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested && !(vcpu->arch.shregs.msr & MSR_PR)) { unsigned long req = kvmppc_get_gpr(vcpu, 3); @@ -4009,65 +4079,11 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, } kvmppc_xive_pull_vcpu(vcpu); - if (kvm_is_radix(vcpu->kvm)) + if (kvm_is_radix(kvm)) vcpu->arch.slb_max = 0; } - dec = mfspr(SPRN_DEC); - if (!(lpcr & LPCR_LD)) /* Sign extend if not using large decrementer */ - dec = (s32) dec; - tb = mftb(); - vcpu->arch.dec_expires = dec + tb; - vcpu->cpu = -1; - vcpu->arch.thread_cpu = -1; - - store_spr_state(vcpu); - - restore_p9_host_os_sprs(vcpu, &host_os_sprs); - - msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX); - store_fp_state(&vcpu->arch.fp); -#ifdef CONFIG_ALTIVEC - store_vr_state(&vcpu->arch.vr); -#endif - vcpu->arch.vrsave = mfspr(SPRN_VRSAVE); - - if (cpu_has_feature(CPU_FTR_TM) || - cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) - kvmppc_save_tm_hv(vcpu, vcpu->arch.shregs.msr, true); - - save_pmu = 1; - if (vcpu->arch.vpa.pinned_addr) { - struct lppaca *lp = vcpu->arch.vpa.pinned_addr; - u32 yield_count = be32_to_cpu(lp->yield_count) + 1; - lp->yield_count = cpu_to_be32(yield_count); - vcpu->arch.vpa.dirty = 1; - save_pmu = lp->pmcregs_in_use; - } - /* Must save pmu if this guest is capable of running nested guests */ - save_pmu |= nesting_enabled(vcpu->kvm); - - kvmhv_save_guest_pmu(vcpu, save_pmu); -#ifdef CONFIG_PPC_PSERIES - if (kvmhv_on_pseries()) { - barrier(); - get_lppaca()->pmcregs_in_use = ppc_get_pmu_inuse(); - barrier(); - } -#endif - - vc->entry_exit_map = 0x101; - vc->in_guest = 0; - - mtspr(SPRN_DEC, local_paca->kvm_hstate.dec_expires - mftb()); - /* We may have raced with new irq work */ - if (test_irq_work_pending()) - set_dec(1); - mtspr(SPRN_SPRG_VDSO_WRITE, local_paca->sprg_vdso); - - kvmhv_load_host_pmu(); - - kvmppc_subcore_exit_guest(); + vcpu_vpa_increment_dispatch(vcpu); return trap; } @@ -4132,6 +4148,13 @@ static bool kvmppc_vcpu_woken(struct kvm_vcpu *vcpu) return false; } +static bool kvmppc_vcpu_check_block(struct kvm_vcpu *vcpu) +{ + if (!vcpu->arch.ceded || kvmppc_vcpu_woken(vcpu)) + return true; + return false; +} + /* * Check to see if any of the runnable vcpus on the vcore have pending * exceptions or are no longer ceded @@ -4142,7 +4165,7 @@ static int kvmppc_vcore_check_block(struct kvmppc_vcore *vc) int i; for_each_runnable_thread(i, vcpu, vc) { - if (!vcpu->arch.ceded || kvmppc_vcpu_woken(vcpu)) + if (kvmppc_vcpu_check_block(vcpu)) return 1; } @@ -4159,6 +4182,8 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) int do_sleep = 1; u64 block_ns; + WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300)); + /* Poll for pending exceptions and ceded state */ cur = start_poll = ktime_get(); if (vc->halt_poll_ns) { @@ -4355,7 +4380,7 @@ static int kvmppc_run_vcpu(struct kvm_vcpu *vcpu) for_each_runnable_thread(i, v, vc) { kvmppc_core_prepare_to_enter(v); if (signal_pending(v->arch.run_task)) { - kvmppc_remove_runnable(vc, v); + kvmppc_remove_runnable(vc, v, mftb()); v->stat.signal_exits++; v->run->exit_reason = KVM_EXIT_INTR; v->arch.ret = -EINTR; @@ -4396,7 +4421,7 @@ static int kvmppc_run_vcpu(struct kvm_vcpu *vcpu) kvmppc_vcore_end_preempt(vc); if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) { - kvmppc_remove_runnable(vc, vcpu); + kvmppc_remove_runnable(vc, vcpu, mftb()); vcpu->stat.signal_exits++; run->exit_reason = KVM_EXIT_INTR; vcpu->arch.ret = -EINTR; @@ -4417,12 +4442,15 @@ static int kvmppc_run_vcpu(struct kvm_vcpu *vcpu) int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr) { + struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu); struct kvm_run *run = vcpu->run; int trap, r, pcpu; int srcu_idx; struct kvmppc_vcore *vc; struct kvm *kvm = vcpu->kvm; struct kvm_nested_guest *nested = vcpu->arch.nested; + unsigned long flags; + u64 tb; trace_kvmppc_run_vcpu_enter(vcpu); @@ -4433,16 +4461,11 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, vc = vcpu->arch.vcore; vcpu->arch.ceded = 0; vcpu->arch.run_task = current; - vcpu->arch.stolen_logged = vcore_stolen_time(vc, mftb()); vcpu->arch.state = KVMPPC_VCPU_RUNNABLE; - vcpu->arch.busy_preempt = TB_NIL; vcpu->arch.last_inst = KVM_INST_FETCH_FAILED; - vc->runnable_threads[0] = vcpu; - vc->n_runnable = 1; - vc->runner = vcpu; /* See if the MMU is ready to go */ - if (!kvm->arch.mmu_ready) { + if (unlikely(!kvm->arch.mmu_ready)) { r = kvmhv_setup_mmu(vcpu); if (r) { run->exit_reason = KVM_EXIT_FAIL_ENTRY; @@ -4457,29 +4480,21 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, kvmppc_update_vpas(vcpu); - init_vcore_to_run(vc); - vc->preempt_tb = TB_NIL; - preempt_disable(); pcpu = smp_processor_id(); - vc->pcpu = pcpu; if (kvm_is_radix(kvm)) kvmppc_prepare_radix_vcpu(vcpu, pcpu); - local_irq_disable(); - hard_irq_disable(); + /* flags save not required, but irq_pmu has no disable/enable API */ + powerpc_local_irq_pmu_save(flags); + if (signal_pending(current)) goto sigpend; - if (lazy_irq_pending() || need_resched() || !kvm->arch.mmu_ready) + if (need_resched() || !kvm->arch.mmu_ready) goto out; if (!nested) { kvmppc_core_prepare_to_enter(vcpu); - if (vcpu->arch.doorbell_request) { - vc->dpdes = 1; - smp_wmb(); - vcpu->arch.doorbell_request = 0; - } if (test_bit(BOOK3S_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions)) lpcr |= LPCR_MER; @@ -4490,16 +4505,23 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, goto out; } - kvmppc_clear_host_core(pcpu); + if (vcpu->arch.timer_running) { + hrtimer_try_to_cancel(&vcpu->arch.dec_timer); + vcpu->arch.timer_running = 0; + } - local_paca->kvm_hstate.napping = 0; - local_paca->kvm_hstate.kvm_split_mode = NULL; - kvmppc_start_thread(vcpu, vc); - kvmppc_create_dtl_entry(vcpu, vc); - trace_kvm_guest_enter(vcpu); + tb = mftb(); - vc->vcore_state = VCORE_RUNNING; - trace_kvmppc_run_core(vc, 0); + vcpu->cpu = pcpu; + vcpu->arch.thread_cpu = pcpu; + vc->pcpu = pcpu; + local_paca->kvm_hstate.kvm_vcpu = vcpu; + local_paca->kvm_hstate.ptid = 0; + local_paca->kvm_hstate.fake_suspend = 0; + + __kvmppc_create_dtl_entry(vcpu, pcpu, tb + vc->tb_offset, 0); + + trace_kvm_guest_enter(vcpu); guest_enter_irqoff(); @@ -4510,7 +4532,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, /* Tell lockdep that we're about to enable interrupts */ trace_hardirqs_on(); - trap = kvmhv_p9_guest_entry(vcpu, time_limit, lpcr); + trap = kvmhv_p9_guest_entry(vcpu, time_limit, lpcr, &tb); vcpu->arch.trap = trap; trace_hardirqs_off(); @@ -4521,8 +4543,6 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, set_irq_happened(trap); - kvmppc_set_host_core(pcpu); - context_tracking_guest_exit(); if (!vtime_accounting_enabled_this_cpu()) { local_irq_enable(); @@ -4538,9 +4558,10 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, } vtime_account_guest_exit(); - local_irq_enable(); + vcpu->cpu = -1; + vcpu->arch.thread_cpu = -1; - cpumask_clear_cpu(pcpu, &kvm->arch.cpu_in_guest); + powerpc_local_irq_pmu_restore(flags); preempt_enable(); @@ -4550,7 +4571,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, * by L2 and the L1 decrementer is provided in hdec_expires */ if (kvmppc_core_pending_dec(vcpu) && - ((get_tb() < vcpu->arch.dec_expires) || + ((tb < kvmppc_dec_expires_host_tb(vcpu)) || (trap == BOOK3S_INTERRUPT_SYSCALL && kvmppc_get_gpr(vcpu, 3) == H_ENTER_NESTED))) kvmppc_core_dequeue_dec(vcpu); @@ -4565,28 +4586,31 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, } vcpu->arch.ret = r; - if (is_kvmppc_resume_guest(r) && vcpu->arch.ceded && - !kvmppc_vcpu_woken(vcpu)) { + if (is_kvmppc_resume_guest(r) && !kvmppc_vcpu_check_block(vcpu)) { kvmppc_set_timer(vcpu); - while (vcpu->arch.ceded && !kvmppc_vcpu_woken(vcpu)) { + + prepare_to_rcuwait(wait); + for (;;) { + set_current_state(TASK_INTERRUPTIBLE); if (signal_pending(current)) { vcpu->stat.signal_exits++; run->exit_reason = KVM_EXIT_INTR; vcpu->arch.ret = -EINTR; break; } - spin_lock(&vc->lock); - kvmppc_vcore_blocked(vc); - spin_unlock(&vc->lock); + + if (kvmppc_vcpu_check_block(vcpu)) + break; + + trace_kvmppc_vcore_blocked(vc, 0); + schedule(); + trace_kvmppc_vcore_blocked(vc, 1); } + finish_rcuwait(wait); } vcpu->arch.ceded = 0; - vc->vcore_state = VCORE_INACTIVE; - trace_kvmppc_run_core(vc, 1); - done: - kvmppc_remove_runnable(vc, vcpu); trace_kvmppc_run_vcpu_exit(vcpu); return vcpu->arch.ret; @@ -4596,7 +4620,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, run->exit_reason = KVM_EXIT_INTR; vcpu->arch.ret = -EINTR; out: - local_irq_enable(); + powerpc_local_irq_pmu_restore(flags); preempt_enable(); goto done; } @@ -4606,23 +4630,25 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu) struct kvm_run *run = vcpu->run; int r; int srcu_idx; - unsigned long ebb_regs[3] = {}; /* shut up GCC */ - unsigned long user_tar = 0; - unsigned int user_vrsave; struct kvm *kvm; + unsigned long msr; if (!vcpu->arch.sane) { run->exit_reason = KVM_EXIT_INTERNAL_ERROR; return -EINVAL; } + /* No need to go into the guest when all we'll do is come back out */ + if (signal_pending(current)) { + run->exit_reason = KVM_EXIT_INTR; + return -EINTR; + } + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM /* * Don't allow entry with a suspended transaction, because * the guest entry/exit code will lose it. - * If the guest has TM enabled, save away their TM-related SPRs - * (they will get restored by the TM unavailable interrupt). */ -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (cpu_has_feature(CPU_FTR_TM) && current->thread.regs && (current->thread.regs->msr & MSR_TM)) { if (MSR_TM_ACTIVE(current->thread.regs->msr)) { @@ -4630,12 +4656,6 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu) run->fail_entry.hardware_entry_failure_reason = 0; return -EINVAL; } - /* Enable TM so we can read the TM SPRs */ - mtmsr(mfmsr() | MSR_TM); - current->thread.tm_tfhar = mfspr(SPRN_TFHAR); - current->thread.tm_tfiar = mfspr(SPRN_TFIAR); - current->thread.tm_texasr = mfspr(SPRN_TEXASR); - current->thread.regs->msr &= ~MSR_TM; } #endif @@ -4650,29 +4670,30 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu) kvmppc_core_prepare_to_enter(vcpu); - /* No need to go into the guest when all we'll do is come back out */ - if (signal_pending(current)) { - run->exit_reason = KVM_EXIT_INTR; - return -EINTR; - } - kvm = vcpu->kvm; atomic_inc(&kvm->arch.vcpus_running); /* Order vcpus_running vs. mmu_ready, see kvmppc_alloc_reset_hpt */ smp_mb(); - flush_all_to_thread(current); + msr = 0; + if (IS_ENABLED(CONFIG_PPC_FPU)) + msr |= MSR_FP; + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + msr |= MSR_VEC; + if (cpu_has_feature(CPU_FTR_VSX)) + msr |= MSR_VSX; + if ((cpu_has_feature(CPU_FTR_TM) || + cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) && + (vcpu->arch.hfscr & HFSCR_TM)) + msr |= MSR_TM; + msr = msr_check_and_set(msr); - /* Save userspace EBB and other register values */ - if (cpu_has_feature(CPU_FTR_ARCH_207S)) { - ebb_regs[0] = mfspr(SPRN_EBBHR); - ebb_regs[1] = mfspr(SPRN_EBBRR); - ebb_regs[2] = mfspr(SPRN_BESCR); - user_tar = mfspr(SPRN_TAR); - } - user_vrsave = mfspr(SPRN_VRSAVE); + kvmppc_save_user_regs(); - vcpu->arch.waitp = &vcpu->arch.vcore->wait; + kvmppc_save_current_sprs(); + + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + vcpu->arch.waitp = &vcpu->arch.vcore->wait; vcpu->arch.pgdir = kvm->mm->pgd; vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; @@ -4711,15 +4732,6 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu) } } while (is_kvmppc_resume_guest(r)); - /* Restore userspace EBB and other register values */ - if (cpu_has_feature(CPU_FTR_ARCH_207S)) { - mtspr(SPRN_EBBHR, ebb_regs[0]); - mtspr(SPRN_EBBRR, ebb_regs[1]); - mtspr(SPRN_BESCR, ebb_regs[2]); - mtspr(SPRN_TAR, user_tar); - } - mtspr(SPRN_VRSAVE, user_vrsave); - vcpu->arch.state = KVMPPC_VCPU_NOTREADY; atomic_dec(&kvm->arch.vcpus_running); @@ -4861,8 +4873,12 @@ static int kvmppc_core_prepare_memory_region_hv(struct kvm *kvm, unsigned long npages = mem->memory_size >> PAGE_SHIFT; if (change == KVM_MR_CREATE) { - slot->arch.rmap = vzalloc(array_size(npages, - sizeof(*slot->arch.rmap))); + unsigned long size = array_size(npages, sizeof(*slot->arch.rmap)); + + if ((size >> PAGE_SHIFT) > totalram_pages()) + return -ENOMEM; + + slot->arch.rmap = vzalloc(size); if (!slot->arch.rmap) return -ENOMEM; } @@ -5072,6 +5088,8 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) */ int kvmppc_switch_mmu_to_hpt(struct kvm *kvm) { + unsigned long lpcr, lpcr_mask; + if (nesting_enabled(kvm)) kvmhv_release_all_nested(kvm); kvmppc_rmap_reset(kvm); @@ -5081,8 +5099,13 @@ int kvmppc_switch_mmu_to_hpt(struct kvm *kvm) kvm->arch.radix = 0; spin_unlock(&kvm->mmu_lock); kvmppc_free_radix(kvm); - kvmppc_update_lpcr(kvm, LPCR_VPM1, - LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR); + + lpcr = LPCR_VPM1; + lpcr_mask = LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR; + if (cpu_has_feature(CPU_FTR_ARCH_31)) + lpcr_mask |= LPCR_HAIL; + kvmppc_update_lpcr(kvm, lpcr, lpcr_mask); + return 0; } @@ -5092,6 +5115,7 @@ int kvmppc_switch_mmu_to_hpt(struct kvm *kvm) */ int kvmppc_switch_mmu_to_radix(struct kvm *kvm) { + unsigned long lpcr, lpcr_mask; int err; err = kvmppc_init_vm_radix(kvm); @@ -5103,8 +5127,17 @@ int kvmppc_switch_mmu_to_radix(struct kvm *kvm) kvm->arch.radix = 1; spin_unlock(&kvm->mmu_lock); kvmppc_free_hpt(&kvm->arch.hpt); - kvmppc_update_lpcr(kvm, LPCR_UPRT | LPCR_GTSE | LPCR_HR, - LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR); + + lpcr = LPCR_UPRT | LPCR_GTSE | LPCR_HR; + lpcr_mask = LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR; + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + lpcr_mask |= LPCR_HAIL; + if (cpu_has_feature(CPU_FTR_HVMODE) && + (kvm->arch.host_lpcr & LPCR_HAIL)) + lpcr |= LPCR_HAIL; + } + kvmppc_update_lpcr(kvm, lpcr, lpcr_mask); + return 0; } @@ -5126,6 +5159,9 @@ void kvmppc_alloc_host_rm_ops(void) int cpu, core; int size; + if (cpu_has_feature(CPU_FTR_ARCH_300)) + return; + /* Not the first time here ? */ if (kvmppc_host_rm_ops_hv != NULL) return; @@ -5268,6 +5304,10 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) kvm->arch.mmu_ready = 1; lpcr &= ~LPCR_VPM1; lpcr |= LPCR_UPRT | LPCR_GTSE | LPCR_HR; + if (cpu_has_feature(CPU_FTR_HVMODE) && + cpu_has_feature(CPU_FTR_ARCH_31) && + (kvm->arch.host_lpcr & LPCR_HAIL)) + lpcr |= LPCR_HAIL; ret = kvmppc_init_vm_radix(kvm); if (ret) { kvmppc_free_lpid(kvm->arch.lpid); @@ -6063,9 +6103,11 @@ static int kvmppc_book3s_init_hv(void) if (r) return r; - r = kvm_init_subcore_bitmap(); - if (r) - return r; + if (!cpu_has_feature(CPU_FTR_ARCH_300)) { + r = kvm_init_subcore_bitmap(); + if (r) + return r; + } /* * We need a way of accessing the XICS interrupt controller, diff --git a/arch/powerpc/kvm/book3s_hv.h b/arch/powerpc/kvm/book3s_hv.h new file mode 100644 index 000000000000..6b7f07d9026b --- /dev/null +++ b/arch/powerpc/kvm/book3s_hv.h @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* + * Privileged (non-hypervisor) host registers to save. + */ +struct p9_host_os_sprs { + unsigned long iamr; + unsigned long amr; + + unsigned int pmc1; + unsigned int pmc2; + unsigned int pmc3; + unsigned int pmc4; + unsigned int pmc5; + unsigned int pmc6; + unsigned long mmcr0; + unsigned long mmcr1; + unsigned long mmcr2; + unsigned long mmcr3; + unsigned long mmcra; + unsigned long siar; + unsigned long sier1; + unsigned long sier2; + unsigned long sier3; + unsigned long sdar; +}; + +static inline bool nesting_enabled(struct kvm *kvm) +{ + return kvm->arch.nested_enable && kvm_is_radix(kvm); +} + +bool load_vcpu_state(struct kvm_vcpu *vcpu, + struct p9_host_os_sprs *host_os_sprs); +void store_vcpu_state(struct kvm_vcpu *vcpu); +void save_p9_host_os_sprs(struct p9_host_os_sprs *host_os_sprs); +void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu, + struct p9_host_os_sprs *host_os_sprs); +void switch_pmu_to_guest(struct kvm_vcpu *vcpu, + struct p9_host_os_sprs *host_os_sprs); +void switch_pmu_to_host(struct kvm_vcpu *vcpu, + struct p9_host_os_sprs *host_os_sprs); diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 70b7a8f97153..7d6d91338c3f 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -649,6 +649,8 @@ void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu) int ext; unsigned long lpcr; + WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300)); + /* Insert EXTERNAL bit into LPCR at the MER bit position */ ext = (vcpu->arch.pending_exceptions >> BOOK3S_IRQPRIO_EXTERNAL) & 1; lpcr = mfspr(SPRN_LPCR); @@ -682,60 +684,23 @@ static void flush_guest_tlb(struct kvm *kvm) unsigned long rb, set; rb = PPC_BIT(52); /* IS = 2 */ - if (kvm_is_radix(kvm)) { - /* R=1 PRS=1 RIC=2 */ + for (set = 0; set < kvm->arch.tlb_sets; ++set) { + /* R=0 PRS=0 RIC=0 */ asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) - : : "r" (rb), "i" (1), "i" (1), "i" (2), + : : "r" (rb), "i" (0), "i" (0), "i" (0), "r" (0) : "memory"); - for (set = 1; set < kvm->arch.tlb_sets; ++set) { - rb += PPC_BIT(51); /* increment set number */ - /* R=1 PRS=1 RIC=0 */ - asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) - : : "r" (rb), "i" (1), "i" (1), "i" (0), - "r" (0) : "memory"); - } - asm volatile("ptesync": : :"memory"); - // POWER9 congruence-class TLBIEL leaves ERAT. Flush it now. - asm volatile(PPC_RADIX_INVALIDATE_ERAT_GUEST : : :"memory"); - } else { - for (set = 0; set < kvm->arch.tlb_sets; ++set) { - /* R=0 PRS=0 RIC=0 */ - asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) - : : "r" (rb), "i" (0), "i" (0), "i" (0), - "r" (0) : "memory"); - rb += PPC_BIT(51); /* increment set number */ - } - asm volatile("ptesync": : :"memory"); - // POWER9 congruence-class TLBIEL leaves ERAT. Flush it now. - if (cpu_has_feature(CPU_FTR_ARCH_300)) - asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT : : :"memory"); + rb += PPC_BIT(51); /* increment set number */ } + asm volatile("ptesync": : :"memory"); } -void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu, - struct kvm_nested_guest *nested) +void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu) { - cpumask_t *need_tlb_flush; - - /* - * On POWER9, individual threads can come in here, but the - * TLB is shared between the 4 threads in a core, hence - * invalidating on one thread invalidates for all. - * Thus we make all 4 threads use the same bit. - */ - if (cpu_has_feature(CPU_FTR_ARCH_300)) - pcpu = cpu_first_tlb_thread_sibling(pcpu); - - if (nested) - need_tlb_flush = &nested->need_tlb_flush; - else - need_tlb_flush = &kvm->arch.need_tlb_flush; - - if (cpumask_test_cpu(pcpu, need_tlb_flush)) { + if (cpumask_test_cpu(pcpu, &kvm->arch.need_tlb_flush)) { flush_guest_tlb(kvm); /* Clear the bit after the TLB flush */ - cpumask_clear_cpu(pcpu, need_tlb_flush); + cpumask_clear_cpu(pcpu, &kvm->arch.need_tlb_flush); } } EXPORT_SYMBOL_GPL(kvmppc_check_need_tlb_flush); diff --git a/arch/powerpc/kvm/book3s_hv_hmi.c b/arch/powerpc/kvm/book3s_hv_hmi.c index 9af660476314..1ec50c69678b 100644 --- a/arch/powerpc/kvm/book3s_hv_hmi.c +++ b/arch/powerpc/kvm/book3s_hv_hmi.c @@ -20,10 +20,15 @@ void wait_for_subcore_guest_exit(void) /* * NULL bitmap pointer indicates that KVM module hasn't - * been loaded yet and hence no guests are running. + * been loaded yet and hence no guests are running, or running + * on POWER9 or newer CPU. + * * If no KVM is in use, no need to co-ordinate among threads * as all of them will always be in host and no one is going * to modify TB other than the opal hmi handler. + * + * POWER9 and newer don't need this synchronisation. + * * Hence, just return from here. */ if (!local_paca->sibling_subcore_state) diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S index 4444f83cb133..59d89e4b154a 100644 --- a/arch/powerpc/kvm/book3s_hv_interrupts.S +++ b/arch/powerpc/kvm/book3s_hv_interrupts.S @@ -104,7 +104,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) mtlr r0 blr -_GLOBAL(kvmhv_save_host_pmu) +/* + * void kvmhv_save_host_pmu(void) + */ +kvmhv_save_host_pmu: BEGIN_FTR_SECTION /* Work around P8 PMAE bug */ li r3, -1 @@ -138,14 +141,6 @@ BEGIN_FTR_SECTION std r8, HSTATE_MMCR2(r13) std r9, HSTATE_SIER(r13) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) -BEGIN_FTR_SECTION - mfspr r5, SPRN_MMCR3 - mfspr r6, SPRN_SIER2 - mfspr r7, SPRN_SIER3 - std r5, HSTATE_MMCR3(r13) - std r6, HSTATE_SIER2(r13) - std r7, HSTATE_SIER3(r13) -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31) mfspr r3, SPRN_PMC1 mfspr r5, SPRN_PMC2 mfspr r6, SPRN_PMC3 diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index ed8a2c9f5629..a2e34efb8d31 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -358,6 +358,7 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) /* convert TB values/offsets to host (L0) values */ hdec_exp = l2_hv.hdec_expiry - vc->tb_offset; vc->tb_offset += l2_hv.tb_offset; + vcpu->arch.dec_expires += l2_hv.tb_offset; /* set L1 state to L2 state */ vcpu->arch.nested = l2; @@ -374,11 +375,6 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) vcpu->arch.ret = RESUME_GUEST; vcpu->arch.trap = 0; do { - if (mftb() >= hdec_exp) { - vcpu->arch.trap = BOOK3S_INTERRUPT_HV_DECREMENTER; - r = RESUME_HOST; - break; - } r = kvmhv_run_single_vcpu(vcpu, hdec_exp, lpcr); } while (is_kvmppc_resume_guest(r)); @@ -399,6 +395,8 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) if (l2_regs.msr & MSR_TS_MASK) vcpu->arch.shregs.msr |= MSR_TS_S; vc->tb_offset = saved_l1_hv.tb_offset; + /* XXX: is this always the same delta as saved_l1_hv.tb_offset? */ + vcpu->arch.dec_expires -= l2_hv.tb_offset; restore_hv_regs(vcpu, &saved_l1_hv); vcpu->arch.purr += delta_purr; vcpu->arch.spurr += delta_spurr; @@ -582,7 +580,7 @@ long kvmhv_copy_tofrom_guest_nested(struct kvm_vcpu *vcpu) if (eaddr & (0xFFFUL << 52)) return H_PARAMETER; - buf = kzalloc(n, GFP_KERNEL); + buf = kzalloc(n, GFP_KERNEL | __GFP_NOWARN); if (!buf) return H_NO_MEM; diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c b/arch/powerpc/kvm/book3s_hv_p9_entry.c index 961b3d70483c..a28e5b3daabd 100644 --- a/arch/powerpc/kvm/book3s_hv_p9_entry.c +++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c @@ -4,8 +4,439 @@ #include <asm/asm-prototypes.h> #include <asm/dbell.h> #include <asm/kvm_ppc.h> +#include <asm/pmc.h> #include <asm/ppc-opcode.h> +#include "book3s_hv.h" + +static void freeze_pmu(unsigned long mmcr0, unsigned long mmcra) +{ + if (!(mmcr0 & MMCR0_FC)) + goto do_freeze; + if (mmcra & MMCRA_SAMPLE_ENABLE) + goto do_freeze; + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + if (!(mmcr0 & MMCR0_PMCCEXT)) + goto do_freeze; + if (!(mmcra & MMCRA_BHRB_DISABLE)) + goto do_freeze; + } + return; + +do_freeze: + mmcr0 = MMCR0_FC; + mmcra = 0; + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + mmcr0 |= MMCR0_PMCCEXT; + mmcra = MMCRA_BHRB_DISABLE; + } + + mtspr(SPRN_MMCR0, mmcr0); + mtspr(SPRN_MMCRA, mmcra); + isync(); +} + +void switch_pmu_to_guest(struct kvm_vcpu *vcpu, + struct p9_host_os_sprs *host_os_sprs) +{ + struct lppaca *lp; + int load_pmu = 1; + + lp = vcpu->arch.vpa.pinned_addr; + if (lp) + load_pmu = lp->pmcregs_in_use; + + /* Save host */ + if (ppc_get_pmu_inuse()) { + /* + * It might be better to put PMU handling (at least for the + * host) in the perf subsystem because it knows more about what + * is being used. + */ + + /* POWER9, POWER10 do not implement HPMC or SPMC */ + + host_os_sprs->mmcr0 = mfspr(SPRN_MMCR0); + host_os_sprs->mmcra = mfspr(SPRN_MMCRA); + + freeze_pmu(host_os_sprs->mmcr0, host_os_sprs->mmcra); + + host_os_sprs->pmc1 = mfspr(SPRN_PMC1); + host_os_sprs->pmc2 = mfspr(SPRN_PMC2); + host_os_sprs->pmc3 = mfspr(SPRN_PMC3); + host_os_sprs->pmc4 = mfspr(SPRN_PMC4); + host_os_sprs->pmc5 = mfspr(SPRN_PMC5); + host_os_sprs->pmc6 = mfspr(SPRN_PMC6); + host_os_sprs->mmcr1 = mfspr(SPRN_MMCR1); + host_os_sprs->mmcr2 = mfspr(SPRN_MMCR2); + host_os_sprs->sdar = mfspr(SPRN_SDAR); + host_os_sprs->siar = mfspr(SPRN_SIAR); + host_os_sprs->sier1 = mfspr(SPRN_SIER); + + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + host_os_sprs->mmcr3 = mfspr(SPRN_MMCR3); + host_os_sprs->sier2 = mfspr(SPRN_SIER2); + host_os_sprs->sier3 = mfspr(SPRN_SIER3); + } + } + +#ifdef CONFIG_PPC_PSERIES + /* After saving PMU, before loading guest PMU, flip pmcregs_in_use */ + if (kvmhv_on_pseries()) { + barrier(); + get_lppaca()->pmcregs_in_use = load_pmu; + barrier(); + } +#endif + + /* + * Load guest. If the VPA said the PMCs are not in use but the guest + * tried to access them anyway, HFSCR[PM] will be set by the HFAC + * fault so we can make forward progress. + */ + if (load_pmu || (vcpu->arch.hfscr & HFSCR_PM)) { + mtspr(SPRN_PMC1, vcpu->arch.pmc[0]); + mtspr(SPRN_PMC2, vcpu->arch.pmc[1]); + mtspr(SPRN_PMC3, vcpu->arch.pmc[2]); + mtspr(SPRN_PMC4, vcpu->arch.pmc[3]); + mtspr(SPRN_PMC5, vcpu->arch.pmc[4]); + mtspr(SPRN_PMC6, vcpu->arch.pmc[5]); + mtspr(SPRN_MMCR1, vcpu->arch.mmcr[1]); + mtspr(SPRN_MMCR2, vcpu->arch.mmcr[2]); + mtspr(SPRN_SDAR, vcpu->arch.sdar); + mtspr(SPRN_SIAR, vcpu->arch.siar); + mtspr(SPRN_SIER, vcpu->arch.sier[0]); + + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + mtspr(SPRN_MMCR3, vcpu->arch.mmcr[3]); + mtspr(SPRN_SIER2, vcpu->arch.sier[1]); + mtspr(SPRN_SIER3, vcpu->arch.sier[2]); + } + + /* Set MMCRA then MMCR0 last */ + mtspr(SPRN_MMCRA, vcpu->arch.mmcra); + mtspr(SPRN_MMCR0, vcpu->arch.mmcr[0]); + /* No isync necessary because we're starting counters */ + + if (!vcpu->arch.nested && + (vcpu->arch.hfscr_permitted & HFSCR_PM)) + vcpu->arch.hfscr |= HFSCR_PM; + } +} +EXPORT_SYMBOL_GPL(switch_pmu_to_guest); + +void switch_pmu_to_host(struct kvm_vcpu *vcpu, + struct p9_host_os_sprs *host_os_sprs) +{ + struct lppaca *lp; + int save_pmu = 1; + + lp = vcpu->arch.vpa.pinned_addr; + if (lp) + save_pmu = lp->pmcregs_in_use; + if (IS_ENABLED(CONFIG_KVM_BOOK3S_HV_NESTED_PMU_WORKAROUND)) { + /* + * Save pmu if this guest is capable of running nested guests. + * This is option is for old L1s that do not set their + * lppaca->pmcregs_in_use properly when entering their L2. + */ + save_pmu |= nesting_enabled(vcpu->kvm); + } + + if (save_pmu) { + vcpu->arch.mmcr[0] = mfspr(SPRN_MMCR0); + vcpu->arch.mmcra = mfspr(SPRN_MMCRA); + + freeze_pmu(vcpu->arch.mmcr[0], vcpu->arch.mmcra); + + vcpu->arch.pmc[0] = mfspr(SPRN_PMC1); + vcpu->arch.pmc[1] = mfspr(SPRN_PMC2); + vcpu->arch.pmc[2] = mfspr(SPRN_PMC3); + vcpu->arch.pmc[3] = mfspr(SPRN_PMC4); + vcpu->arch.pmc[4] = mfspr(SPRN_PMC5); + vcpu->arch.pmc[5] = mfspr(SPRN_PMC6); + vcpu->arch.mmcr[1] = mfspr(SPRN_MMCR1); + vcpu->arch.mmcr[2] = mfspr(SPRN_MMCR2); + vcpu->arch.sdar = mfspr(SPRN_SDAR); + vcpu->arch.siar = mfspr(SPRN_SIAR); + vcpu->arch.sier[0] = mfspr(SPRN_SIER); + + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + vcpu->arch.mmcr[3] = mfspr(SPRN_MMCR3); + vcpu->arch.sier[1] = mfspr(SPRN_SIER2); + vcpu->arch.sier[2] = mfspr(SPRN_SIER3); + } + + } else if (vcpu->arch.hfscr & HFSCR_PM) { + /* + * The guest accessed PMC SPRs without specifying they should + * be preserved, or it cleared pmcregs_in_use after the last + * access. Just ensure they are frozen. + */ + freeze_pmu(mfspr(SPRN_MMCR0), mfspr(SPRN_MMCRA)); + + /* + * Demand-fault PMU register access in the guest. + * + * This is used to grab the guest's VPA pmcregs_in_use value + * and reflect it into the host's VPA in the case of a nested + * hypervisor. + * + * It also avoids having to zero-out SPRs after each guest + * exit to avoid side-channels when. + * + * This is cleared here when we exit the guest, so later HFSCR + * interrupt handling can add it back to run the guest with + * PM enabled next time. + */ + if (!vcpu->arch.nested) + vcpu->arch.hfscr &= ~HFSCR_PM; + } /* otherwise the PMU should still be frozen */ + +#ifdef CONFIG_PPC_PSERIES + if (kvmhv_on_pseries()) { + barrier(); + get_lppaca()->pmcregs_in_use = ppc_get_pmu_inuse(); + barrier(); + } +#endif + + if (ppc_get_pmu_inuse()) { + mtspr(SPRN_PMC1, host_os_sprs->pmc1); + mtspr(SPRN_PMC2, host_os_sprs->pmc2); + mtspr(SPRN_PMC3, host_os_sprs->pmc3); + mtspr(SPRN_PMC4, host_os_sprs->pmc4); + mtspr(SPRN_PMC5, host_os_sprs->pmc5); + mtspr(SPRN_PMC6, host_os_sprs->pmc6); + mtspr(SPRN_MMCR1, host_os_sprs->mmcr1); + mtspr(SPRN_MMCR2, host_os_sprs->mmcr2); + mtspr(SPRN_SDAR, host_os_sprs->sdar); + mtspr(SPRN_SIAR, host_os_sprs->siar); + mtspr(SPRN_SIER, host_os_sprs->sier1); + + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + mtspr(SPRN_MMCR3, host_os_sprs->mmcr3); + mtspr(SPRN_SIER2, host_os_sprs->sier2); + mtspr(SPRN_SIER3, host_os_sprs->sier3); + } + + /* Set MMCRA then MMCR0 last */ + mtspr(SPRN_MMCRA, host_os_sprs->mmcra); + mtspr(SPRN_MMCR0, host_os_sprs->mmcr0); + isync(); + } +} +EXPORT_SYMBOL_GPL(switch_pmu_to_host); + +static void load_spr_state(struct kvm_vcpu *vcpu, + struct p9_host_os_sprs *host_os_sprs) +{ + /* TAR is very fast */ + mtspr(SPRN_TAR, vcpu->arch.tar); + +#ifdef CONFIG_ALTIVEC + if (cpu_has_feature(CPU_FTR_ALTIVEC) && + current->thread.vrsave != vcpu->arch.vrsave) + mtspr(SPRN_VRSAVE, vcpu->arch.vrsave); +#endif + + if (vcpu->arch.hfscr & HFSCR_EBB) { + if (current->thread.ebbhr != vcpu->arch.ebbhr) + mtspr(SPRN_EBBHR, vcpu->arch.ebbhr); + if (current->thread.ebbrr != vcpu->arch.ebbrr) + mtspr(SPRN_EBBRR, vcpu->arch.ebbrr); + if (current->thread.bescr != vcpu->arch.bescr) + mtspr(SPRN_BESCR, vcpu->arch.bescr); + } + + if (cpu_has_feature(CPU_FTR_P9_TIDR) && + current->thread.tidr != vcpu->arch.tid) + mtspr(SPRN_TIDR, vcpu->arch.tid); + if (host_os_sprs->iamr != vcpu->arch.iamr) + mtspr(SPRN_IAMR, vcpu->arch.iamr); + if (host_os_sprs->amr != vcpu->arch.amr) + mtspr(SPRN_AMR, vcpu->arch.amr); + if (vcpu->arch.uamor != 0) + mtspr(SPRN_UAMOR, vcpu->arch.uamor); + if (current->thread.fscr != vcpu->arch.fscr) + mtspr(SPRN_FSCR, vcpu->arch.fscr); + if (current->thread.dscr != vcpu->arch.dscr) + mtspr(SPRN_DSCR, vcpu->arch.dscr); + if (vcpu->arch.pspb != 0) + mtspr(SPRN_PSPB, vcpu->arch.pspb); + + /* + * DAR, DSISR, and for nested HV, SPRGs must be set with MSR[RI] + * clear (or hstate set appropriately to catch those registers + * being clobbered if we take a MCE or SRESET), so those are done + * later. + */ + + if (!(vcpu->arch.ctrl & 1)) + mtspr(SPRN_CTRLT, 0); +} + +static void store_spr_state(struct kvm_vcpu *vcpu) +{ + vcpu->arch.tar = mfspr(SPRN_TAR); + +#ifdef CONFIG_ALTIVEC + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + vcpu->arch.vrsave = mfspr(SPRN_VRSAVE); +#endif + + if (vcpu->arch.hfscr & HFSCR_EBB) { + vcpu->arch.ebbhr = mfspr(SPRN_EBBHR); + vcpu->arch.ebbrr = mfspr(SPRN_EBBRR); + vcpu->arch.bescr = mfspr(SPRN_BESCR); + } + + if (cpu_has_feature(CPU_FTR_P9_TIDR)) + vcpu->arch.tid = mfspr(SPRN_TIDR); + vcpu->arch.iamr = mfspr(SPRN_IAMR); + vcpu->arch.amr = mfspr(SPRN_AMR); + vcpu->arch.uamor = mfspr(SPRN_UAMOR); + vcpu->arch.fscr = mfspr(SPRN_FSCR); + vcpu->arch.dscr = mfspr(SPRN_DSCR); + vcpu->arch.pspb = mfspr(SPRN_PSPB); + + vcpu->arch.ctrl = mfspr(SPRN_CTRLF); +} + +/* Returns true if current MSR and/or guest MSR may have changed */ +bool load_vcpu_state(struct kvm_vcpu *vcpu, + struct p9_host_os_sprs *host_os_sprs) +{ + bool ret = false; + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (cpu_has_feature(CPU_FTR_TM) || + cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) { + unsigned long guest_msr = vcpu->arch.shregs.msr; + if (MSR_TM_ACTIVE(guest_msr)) { + kvmppc_restore_tm_hv(vcpu, guest_msr, true); + ret = true; + } else if (vcpu->arch.hfscr & HFSCR_TM) { + mtspr(SPRN_TEXASR, vcpu->arch.texasr); + mtspr(SPRN_TFHAR, vcpu->arch.tfhar); + mtspr(SPRN_TFIAR, vcpu->arch.tfiar); + } + } +#endif + + load_spr_state(vcpu, host_os_sprs); + + load_fp_state(&vcpu->arch.fp); +#ifdef CONFIG_ALTIVEC + load_vr_state(&vcpu->arch.vr); +#endif + + return ret; +} +EXPORT_SYMBOL_GPL(load_vcpu_state); + +void store_vcpu_state(struct kvm_vcpu *vcpu) +{ + store_spr_state(vcpu); + + store_fp_state(&vcpu->arch.fp); +#ifdef CONFIG_ALTIVEC + store_vr_state(&vcpu->arch.vr); +#endif + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (cpu_has_feature(CPU_FTR_TM) || + cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) { + unsigned long guest_msr = vcpu->arch.shregs.msr; + if (MSR_TM_ACTIVE(guest_msr)) { + kvmppc_save_tm_hv(vcpu, guest_msr, true); + } else if (vcpu->arch.hfscr & HFSCR_TM) { + vcpu->arch.texasr = mfspr(SPRN_TEXASR); + vcpu->arch.tfhar = mfspr(SPRN_TFHAR); + vcpu->arch.tfiar = mfspr(SPRN_TFIAR); + + if (!vcpu->arch.nested) { + vcpu->arch.load_tm++; /* see load_ebb comment */ + if (!vcpu->arch.load_tm) + vcpu->arch.hfscr &= ~HFSCR_TM; + } + } + } +#endif +} +EXPORT_SYMBOL_GPL(store_vcpu_state); + +void save_p9_host_os_sprs(struct p9_host_os_sprs *host_os_sprs) +{ + host_os_sprs->iamr = mfspr(SPRN_IAMR); + host_os_sprs->amr = mfspr(SPRN_AMR); +} +EXPORT_SYMBOL_GPL(save_p9_host_os_sprs); + +/* vcpu guest regs must already be saved */ +void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu, + struct p9_host_os_sprs *host_os_sprs) +{ + /* + * current->thread.xxx registers must all be restored to host + * values before a potential context switch, othrewise the context + * switch itself will overwrite current->thread.xxx with the values + * from the guest SPRs. + */ + + mtspr(SPRN_SPRG_VDSO_WRITE, local_paca->sprg_vdso); + + if (cpu_has_feature(CPU_FTR_P9_TIDR) && + current->thread.tidr != vcpu->arch.tid) + mtspr(SPRN_TIDR, current->thread.tidr); + if (host_os_sprs->iamr != vcpu->arch.iamr) + mtspr(SPRN_IAMR, host_os_sprs->iamr); + if (vcpu->arch.uamor != 0) + mtspr(SPRN_UAMOR, 0); + if (host_os_sprs->amr != vcpu->arch.amr) + mtspr(SPRN_AMR, host_os_sprs->amr); + if (current->thread.fscr != vcpu->arch.fscr) + mtspr(SPRN_FSCR, current->thread.fscr); + if (current->thread.dscr != vcpu->arch.dscr) + mtspr(SPRN_DSCR, current->thread.dscr); + if (vcpu->arch.pspb != 0) + mtspr(SPRN_PSPB, 0); + + /* Save guest CTRL register, set runlatch to 1 */ + if (!(vcpu->arch.ctrl & 1)) + mtspr(SPRN_CTRLT, 1); + +#ifdef CONFIG_ALTIVEC + if (cpu_has_feature(CPU_FTR_ALTIVEC) && + vcpu->arch.vrsave != current->thread.vrsave) + mtspr(SPRN_VRSAVE, current->thread.vrsave); +#endif + if (vcpu->arch.hfscr & HFSCR_EBB) { + if (vcpu->arch.bescr != current->thread.bescr) + mtspr(SPRN_BESCR, current->thread.bescr); + if (vcpu->arch.ebbhr != current->thread.ebbhr) + mtspr(SPRN_EBBHR, current->thread.ebbhr); + if (vcpu->arch.ebbrr != current->thread.ebbrr) + mtspr(SPRN_EBBRR, current->thread.ebbrr); + + if (!vcpu->arch.nested) { + /* + * This is like load_fp in context switching, turn off + * the facility after it wraps the u8 to try avoiding + * saving and restoring the registers each partition + * switch. + */ + vcpu->arch.load_ebb++; + if (!vcpu->arch.load_ebb) + vcpu->arch.hfscr &= ~HFSCR_EBB; + } + } + + if (vcpu->arch.tar != current->thread.tar) + mtspr(SPRN_TAR, current->thread.tar); +} +EXPORT_SYMBOL_GPL(restore_p9_host_os_sprs); + #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING static void __start_timing(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator *next) { @@ -56,10 +487,22 @@ static void __accumulate_time(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator #define accumulate_time(vcpu, next) do {} while (0) #endif -static inline void mfslb(unsigned int idx, u64 *slbee, u64 *slbev) +static inline u64 mfslbv(unsigned int idx) { - asm volatile("slbmfev %0,%1" : "=r" (*slbev) : "r" (idx)); - asm volatile("slbmfee %0,%1" : "=r" (*slbee) : "r" (idx)); + u64 slbev; + + asm volatile("slbmfev %0,%1" : "=r" (slbev) : "r" (idx)); + + return slbev; +} + +static inline u64 mfslbe(unsigned int idx) +{ + u64 slbee; + + asm volatile("slbmfee %0,%1" : "=r" (slbee) : "r" (idx)); + + return slbee; } static inline void mtslb(u64 slbee, u64 slbev) @@ -100,17 +543,19 @@ static void switch_mmu_to_guest_radix(struct kvm *kvm, struct kvm_vcpu *vcpu, u6 lpid = nested ? nested->shadow_lpid : kvm->arch.lpid; /* - * All the isync()s are overkill but trivially follow the ISA - * requirements. Some can likely be replaced with justification - * comment for why they are not needed. + * Prior memory accesses to host PID Q3 must be completed before we + * start switching, and stores must be drained to avoid not-my-LPAR + * logic (see switch_mmu_to_host). */ + asm volatile("hwsync" ::: "memory"); isync(); mtspr(SPRN_LPID, lpid); - isync(); mtspr(SPRN_LPCR, lpcr); - isync(); mtspr(SPRN_PID, vcpu->arch.pid); - isync(); + /* + * isync not required here because we are HRFID'ing to guest before + * any guest context access, which is context synchronising. + */ } static void switch_mmu_to_guest_hpt(struct kvm *kvm, struct kvm_vcpu *vcpu, u64 lpcr) @@ -120,25 +565,41 @@ static void switch_mmu_to_guest_hpt(struct kvm *kvm, struct kvm_vcpu *vcpu, u64 lpid = kvm->arch.lpid; + /* + * See switch_mmu_to_guest_radix. ptesync should not be required here + * even if the host is in HPT mode because speculative accesses would + * not cause RC updates (we are in real mode). + */ + asm volatile("hwsync" ::: "memory"); + isync(); mtspr(SPRN_LPID, lpid); mtspr(SPRN_LPCR, lpcr); mtspr(SPRN_PID, vcpu->arch.pid); for (i = 0; i < vcpu->arch.slb_max; i++) mtslb(vcpu->arch.slb[i].orige, vcpu->arch.slb[i].origv); - - isync(); + /* + * isync not required here, see switch_mmu_to_guest_radix. + */ } static void switch_mmu_to_host(struct kvm *kvm, u32 pid) { + /* + * The guest has exited, so guest MMU context is no longer being + * non-speculatively accessed, but a hwsync is needed before the + * mtLPIDR / mtPIDR switch, in order to ensure all stores are drained, + * so the not-my-LPAR tlbie logic does not overlook them. + */ + asm volatile("hwsync" ::: "memory"); isync(); mtspr(SPRN_PID, pid); - isync(); mtspr(SPRN_LPID, kvm->arch.host_lpid); - isync(); mtspr(SPRN_LPCR, kvm->arch.host_lpcr); - isync(); + /* + * isync is not required after the switch, because mtmsrd with L=0 + * is performed after this switch, which is context synchronising. + */ if (!radix_enabled()) slb_restore_bolted_realmode(); @@ -171,8 +632,10 @@ static void save_clear_guest_mmu(struct kvm *kvm, struct kvm_vcpu *vcpu) */ for (i = 0; i < vcpu->arch.slb_nr; i++) { u64 slbee, slbev; - mfslb(i, &slbee, &slbev); + + slbee = mfslbe(i); if (slbee & SLB_ESID_V) { + slbev = mfslbv(i); vcpu->arch.slb[nr].orige = slbee | i; vcpu->arch.slb[nr].origv = slbev; nr++; @@ -183,15 +646,128 @@ static void save_clear_guest_mmu(struct kvm *kvm, struct kvm_vcpu *vcpu) } } -int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr) +static void flush_guest_tlb(struct kvm *kvm) { + unsigned long rb, set; + + rb = PPC_BIT(52); /* IS = 2 */ + if (kvm_is_radix(kvm)) { + /* R=1 PRS=1 RIC=2 */ + asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) + : : "r" (rb), "i" (1), "i" (1), "i" (2), + "r" (0) : "memory"); + for (set = 1; set < kvm->arch.tlb_sets; ++set) { + rb += PPC_BIT(51); /* increment set number */ + /* R=1 PRS=1 RIC=0 */ + asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) + : : "r" (rb), "i" (1), "i" (1), "i" (0), + "r" (0) : "memory"); + } + asm volatile("ptesync": : :"memory"); + // POWER9 congruence-class TLBIEL leaves ERAT. Flush it now. + asm volatile(PPC_RADIX_INVALIDATE_ERAT_GUEST : : :"memory"); + } else { + for (set = 0; set < kvm->arch.tlb_sets; ++set) { + /* R=0 PRS=0 RIC=0 */ + asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) + : : "r" (rb), "i" (0), "i" (0), "i" (0), + "r" (0) : "memory"); + rb += PPC_BIT(51); /* increment set number */ + } + asm volatile("ptesync": : :"memory"); + // POWER9 congruence-class TLBIEL leaves ERAT. Flush it now. + asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT : : :"memory"); + } +} + +static void check_need_tlb_flush(struct kvm *kvm, int pcpu, + struct kvm_nested_guest *nested) +{ + cpumask_t *need_tlb_flush; + bool all_set = true; + int i; + + if (nested) + need_tlb_flush = &nested->need_tlb_flush; + else + need_tlb_flush = &kvm->arch.need_tlb_flush; + + if (likely(!cpumask_test_cpu(pcpu, need_tlb_flush))) + return; + + /* + * Individual threads can come in here, but the TLB is shared between + * the 4 threads in a core, hence invalidating on one thread + * invalidates for all, so only invalidate the first time (if all bits + * were set. The others must still execute a ptesync. + * + * If a race occurs and two threads do the TLB flush, that is not a + * problem, just sub-optimal. + */ + for (i = cpu_first_tlb_thread_sibling(pcpu); + i <= cpu_last_tlb_thread_sibling(pcpu); + i += cpu_tlb_thread_sibling_step()) { + if (!cpumask_test_cpu(i, need_tlb_flush)) { + all_set = false; + break; + } + } + if (all_set) + flush_guest_tlb(kvm); + else + asm volatile("ptesync" ::: "memory"); + + /* Clear the bit after the TLB flush */ + cpumask_clear_cpu(pcpu, need_tlb_flush); +} + +unsigned long kvmppc_msr_hard_disable_set_facilities(struct kvm_vcpu *vcpu, unsigned long msr) +{ + unsigned long msr_needed = 0; + + msr &= ~MSR_EE; + + /* MSR bits may have been cleared by context switch so must recheck */ + if (IS_ENABLED(CONFIG_PPC_FPU)) + msr_needed |= MSR_FP; + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + msr_needed |= MSR_VEC; + if (cpu_has_feature(CPU_FTR_VSX)) + msr_needed |= MSR_VSX; + if ((cpu_has_feature(CPU_FTR_TM) || + cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) && + (vcpu->arch.hfscr & HFSCR_TM)) + msr_needed |= MSR_TM; + + /* + * This could be combined with MSR[RI] clearing, but that expands + * the unrecoverable window. It would be better to cover unrecoverable + * with KVM bad interrupt handling rather than use MSR[RI] at all. + * + * Much more difficult and less worthwhile to combine with IR/DR + * disable. + */ + if ((msr & msr_needed) != msr_needed) { + msr |= msr_needed; + __mtmsrd(msr, 0); + } else { + __hard_irq_disable(); + } + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + + return msr; +} +EXPORT_SYMBOL_GPL(kvmppc_msr_hard_disable_set_facilities); + +int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr, u64 *tb) +{ + struct p9_host_os_sprs host_os_sprs; struct kvm *kvm = vcpu->kvm; struct kvm_nested_guest *nested = vcpu->arch.nested; struct kvmppc_vcore *vc = vcpu->arch.vcore; - s64 hdec; - u64 tb, purr, spurr; + s64 hdec, dec; + u64 purr, spurr; u64 *exsave; - bool ri_set; int trap; unsigned long msr; unsigned long host_hfscr; @@ -199,11 +775,13 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc unsigned long host_dawr0; unsigned long host_dawrx0; unsigned long host_psscr; + unsigned long host_hpsscr; unsigned long host_pidr; unsigned long host_dawr1; unsigned long host_dawrx1; + unsigned long dpdes; - hdec = time_limit - mftb(); + hdec = time_limit - *tb; if (hdec < 0) return BOOK3S_INTERRUPT_HV_DECREMENTER; @@ -214,51 +792,84 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc vcpu->arch.ceded = 0; - if (vc->tb_offset) { - u64 new_tb = mftb() + vc->tb_offset; - mtspr(SPRN_TBU40, new_tb); - tb = mftb(); - if ((tb & 0xffffff) < (new_tb & 0xffffff)) - mtspr(SPRN_TBU40, new_tb + 0x1000000); - vc->tb_offset_applied = vc->tb_offset; - } - - msr = mfmsr(); + /* Save MSR for restore, with EE clear. */ + msr = mfmsr() & ~MSR_EE; host_hfscr = mfspr(SPRN_HFSCR); host_ciabr = mfspr(SPRN_CIABR); - host_dawr0 = mfspr(SPRN_DAWR0); - host_dawrx0 = mfspr(SPRN_DAWRX0); - host_psscr = mfspr(SPRN_PSSCR); + host_psscr = mfspr(SPRN_PSSCR_PR); + if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) + host_hpsscr = mfspr(SPRN_PSSCR); host_pidr = mfspr(SPRN_PID); - if (cpu_has_feature(CPU_FTR_DAWR1)) { - host_dawr1 = mfspr(SPRN_DAWR1); - host_dawrx1 = mfspr(SPRN_DAWRX1); - } - if (vc->pcr) - mtspr(SPRN_PCR, vc->pcr | PCR_MASK); - mtspr(SPRN_DPDES, vc->dpdes); - mtspr(SPRN_VTB, vc->vtb); + if (dawr_enabled()) { + host_dawr0 = mfspr(SPRN_DAWR0); + host_dawrx0 = mfspr(SPRN_DAWRX0); + if (cpu_has_feature(CPU_FTR_DAWR1)) { + host_dawr1 = mfspr(SPRN_DAWR1); + host_dawrx1 = mfspr(SPRN_DAWRX1); + } + } local_paca->kvm_hstate.host_purr = mfspr(SPRN_PURR); local_paca->kvm_hstate.host_spurr = mfspr(SPRN_SPURR); + + save_p9_host_os_sprs(&host_os_sprs); + + msr = kvmppc_msr_hard_disable_set_facilities(vcpu, msr); + if (lazy_irq_pending()) { + trap = 0; + goto out; + } + + if (unlikely(load_vcpu_state(vcpu, &host_os_sprs))) + msr = mfmsr(); /* MSR may have been updated */ + + if (vc->tb_offset) { + u64 new_tb = *tb + vc->tb_offset; + mtspr(SPRN_TBU40, new_tb); + if ((mftb() & 0xffffff) < (new_tb & 0xffffff)) { + new_tb += 0x1000000; + mtspr(SPRN_TBU40, new_tb); + } + *tb = new_tb; + vc->tb_offset_applied = vc->tb_offset; + } + + mtspr(SPRN_VTB, vc->vtb); mtspr(SPRN_PURR, vcpu->arch.purr); mtspr(SPRN_SPURR, vcpu->arch.spurr); + if (vc->pcr) + mtspr(SPRN_PCR, vc->pcr | PCR_MASK); + if (vcpu->arch.doorbell_request) { + vcpu->arch.doorbell_request = 0; + mtspr(SPRN_DPDES, 1); + } + if (dawr_enabled()) { - mtspr(SPRN_DAWR0, vcpu->arch.dawr0); - mtspr(SPRN_DAWRX0, vcpu->arch.dawrx0); + if (vcpu->arch.dawr0 != host_dawr0) + mtspr(SPRN_DAWR0, vcpu->arch.dawr0); + if (vcpu->arch.dawrx0 != host_dawrx0) + mtspr(SPRN_DAWRX0, vcpu->arch.dawrx0); if (cpu_has_feature(CPU_FTR_DAWR1)) { - mtspr(SPRN_DAWR1, vcpu->arch.dawr1); - mtspr(SPRN_DAWRX1, vcpu->arch.dawrx1); + if (vcpu->arch.dawr1 != host_dawr1) + mtspr(SPRN_DAWR1, vcpu->arch.dawr1); + if (vcpu->arch.dawrx1 != host_dawrx1) + mtspr(SPRN_DAWRX1, vcpu->arch.dawrx1); } } - mtspr(SPRN_CIABR, vcpu->arch.ciabr); - mtspr(SPRN_IC, vcpu->arch.ic); + if (vcpu->arch.ciabr != host_ciabr) + mtspr(SPRN_CIABR, vcpu->arch.ciabr); - mtspr(SPRN_PSSCR, vcpu->arch.psscr | PSSCR_EC | - (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG)); + + if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) { + mtspr(SPRN_PSSCR, vcpu->arch.psscr | PSSCR_EC | + (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG)); + } else { + if (vcpu->arch.psscr != host_psscr) + mtspr(SPRN_PSSCR_PR, vcpu->arch.psscr); + } mtspr(SPRN_HFSCR, vcpu->arch.hfscr); @@ -276,18 +887,34 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc * HDSI which should correctly update the HDSISR the second time HDSI * entry. * - * Just do this on all p9 processors for now. + * The "radix prefetch bug" test can be used to test for this bug, as + * it also exists fo DD2.1 and below. */ - mtspr(SPRN_HDSISR, HDSISR_CANARY); + if (cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) + mtspr(SPRN_HDSISR, HDSISR_CANARY); mtspr(SPRN_SPRG0, vcpu->arch.shregs.sprg0); mtspr(SPRN_SPRG1, vcpu->arch.shregs.sprg1); mtspr(SPRN_SPRG2, vcpu->arch.shregs.sprg2); mtspr(SPRN_SPRG3, vcpu->arch.shregs.sprg3); - mtspr(SPRN_AMOR, ~0UL); + /* + * It might be preferable to load_vcpu_state here, in order to get the + * GPR/FP register loads executing in parallel with the previous mtSPR + * instructions, but for now that can't be done because the TM handling + * in load_vcpu_state can change some SPRs and vcpu state (nip, msr). + * But TM could be split out if this would be a significant benefit. + */ - local_paca->kvm_hstate.in_guest = KVM_GUEST_MODE_HV_P9; + /* + * MSR[RI] does not need to be cleared (and is not, for radix guests + * with no prefetch bug), because in_guest is set. If we take a SRESET + * or MCE with in_guest set but still in HV mode, then + * kvmppc_p9_bad_interrupt handles the interrupt, which effectively + * clears MSR[RI] and doesn't return. + */ + WRITE_ONCE(local_paca->kvm_hstate.in_guest, KVM_GUEST_MODE_HV_P9); + barrier(); /* Open in_guest critical section */ /* * Hash host, hash guest, or radix guest with prefetch bug, all have @@ -299,17 +926,13 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc save_clear_host_mmu(kvm); - if (kvm_is_radix(kvm)) { + if (kvm_is_radix(kvm)) switch_mmu_to_guest_radix(kvm, vcpu, lpcr); - if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) - __mtmsrd(0, 1); /* clear RI */ - - } else { + else switch_mmu_to_guest_hpt(kvm, vcpu, lpcr); - } /* TLBIEL uses LPID=LPIDR, so run this after setting guest LPID */ - kvmppc_check_need_tlb_flush(kvm, vc->pcpu, nested); + check_need_tlb_flush(kvm, vc->pcpu, nested); /* * P9 suppresses the HDEC exception when LPCR[HDICE] = 0, @@ -317,6 +940,8 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc */ mtspr(SPRN_HDEC, hdec); + mtspr(SPRN_DEC, vcpu->arch.dec_expires - *tb); + #ifdef CONFIG_PPC_TRANSACTIONAL_MEM tm_return_to_guest: #endif @@ -327,7 +952,9 @@ tm_return_to_guest: accumulate_time(vcpu, &vcpu->arch.guest_time); + switch_pmu_to_guest(vcpu, &host_os_sprs); kvmppc_p9_enter_guest(vcpu); + switch_pmu_to_host(vcpu, &host_os_sprs); accumulate_time(vcpu, &vcpu->arch.rm_intr); @@ -340,36 +967,27 @@ tm_return_to_guest: /* 0x2 bit for HSRR is only used by PR and P7/8 HV paths, clear it */ trap = local_paca->kvm_hstate.scratch0 & ~0x2; - /* HSRR interrupts leave MSR[RI] unchanged, SRR interrupts clear it. */ - ri_set = false; - if (likely(trap > BOOK3S_INTERRUPT_MACHINE_CHECK)) { - if (trap != BOOK3S_INTERRUPT_SYSCALL && - (vcpu->arch.shregs.msr & MSR_RI)) - ri_set = true; + if (likely(trap > BOOK3S_INTERRUPT_MACHINE_CHECK)) exsave = local_paca->exgen; - } else if (trap == BOOK3S_INTERRUPT_SYSTEM_RESET) { + else if (trap == BOOK3S_INTERRUPT_SYSTEM_RESET) exsave = local_paca->exnmi; - } else { /* trap == 0x200 */ + else /* trap == 0x200 */ exsave = local_paca->exmc; - } vcpu->arch.regs.gpr[1] = local_paca->kvm_hstate.scratch1; vcpu->arch.regs.gpr[3] = local_paca->kvm_hstate.scratch2; /* - * Only set RI after reading machine check regs (DAR, DSISR, SRR0/1) - * and hstate scratch (which we need to move into exsave to make - * re-entrant vs SRESET/MCE) + * After reading machine check regs (DAR, DSISR, SRR0/1) and hstate + * scratch (which we need to move into exsave to make re-entrant vs + * SRESET/MCE), register state is protected from reentrancy. However + * timebase, MMU, among other state is still set to guest, so don't + * enable MSR[RI] here. It gets enabled at the end, after in_guest + * is cleared. + * + * It is possible an NMI could come in here, which is why it is + * important to save the above state early so it can be debugged. */ - if (ri_set) { - if (unlikely(!(mfmsr() & MSR_RI))) { - __mtmsrd(MSR_RI, 1); - WARN_ON_ONCE(1); - } - } else { - WARN_ON_ONCE(mfmsr() & MSR_RI); - __mtmsrd(MSR_RI, 1); - } vcpu->arch.regs.gpr[9] = exsave[EX_R9/sizeof(u64)]; vcpu->arch.regs.gpr[10] = exsave[EX_R10/sizeof(u64)]; @@ -388,7 +1006,7 @@ tm_return_to_guest: kvmppc_realmode_machine_check(vcpu); } else if (unlikely(trap == BOOK3S_INTERRUPT_HMI)) { - kvmppc_realmode_hmi_handler(); + kvmppc_p9_realmode_hmi_handler(vcpu); } else if (trap == BOOK3S_INTERRUPT_H_EMUL_ASSIST) { vcpu->arch.emul_inst = mfspr(SPRN_HEIR); @@ -427,13 +1045,6 @@ tm_return_to_guest: */ mtspr(SPRN_HSRR0, vcpu->arch.regs.nip); mtspr(SPRN_HSRR1, vcpu->arch.shregs.msr); - - /* - * tm_return_to_guest re-loads SRR0/1, DAR, - * DSISR after RI is cleared, in case they had - * been clobbered by a MCE. - */ - __mtmsrd(0, 1); /* clear RI */ goto tm_return_to_guest; } } @@ -445,81 +1056,109 @@ tm_return_to_guest: /* Advance host PURR/SPURR by the amount used by guest */ purr = mfspr(SPRN_PURR); spurr = mfspr(SPRN_SPURR); - mtspr(SPRN_PURR, local_paca->kvm_hstate.host_purr + - purr - vcpu->arch.purr); - mtspr(SPRN_SPURR, local_paca->kvm_hstate.host_spurr + - spurr - vcpu->arch.spurr); + local_paca->kvm_hstate.host_purr += purr - vcpu->arch.purr; + local_paca->kvm_hstate.host_spurr += spurr - vcpu->arch.spurr; vcpu->arch.purr = purr; vcpu->arch.spurr = spurr; vcpu->arch.ic = mfspr(SPRN_IC); vcpu->arch.pid = mfspr(SPRN_PID); - vcpu->arch.psscr = mfspr(SPRN_PSSCR) & PSSCR_GUEST_VIS; + vcpu->arch.psscr = mfspr(SPRN_PSSCR_PR); vcpu->arch.shregs.sprg0 = mfspr(SPRN_SPRG0); vcpu->arch.shregs.sprg1 = mfspr(SPRN_SPRG1); vcpu->arch.shregs.sprg2 = mfspr(SPRN_SPRG2); vcpu->arch.shregs.sprg3 = mfspr(SPRN_SPRG3); - /* Preserve PSSCR[FAKE_SUSPEND] until we've called kvmppc_save_tm_hv */ - mtspr(SPRN_PSSCR, host_psscr | - (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG)); - mtspr(SPRN_HFSCR, host_hfscr); - mtspr(SPRN_CIABR, host_ciabr); - mtspr(SPRN_DAWR0, host_dawr0); - mtspr(SPRN_DAWRX0, host_dawrx0); - if (cpu_has_feature(CPU_FTR_DAWR1)) { - mtspr(SPRN_DAWR1, host_dawr1); - mtspr(SPRN_DAWRX1, host_dawrx1); - } - - if (kvm_is_radix(kvm)) { - /* - * Since this is radix, do a eieio; tlbsync; ptesync sequence - * in case we interrupted the guest between a tlbie and a - * ptesync. - */ - asm volatile("eieio; tlbsync; ptesync"); - } + dpdes = mfspr(SPRN_DPDES); + if (dpdes) + vcpu->arch.doorbell_request = 1; - /* - * cp_abort is required if the processor supports local copy-paste - * to clear the copy buffer that was under control of the guest. - */ - if (cpu_has_feature(CPU_FTR_ARCH_31)) - asm volatile(PPC_CP_ABORT); - - vc->dpdes = mfspr(SPRN_DPDES); vc->vtb = mfspr(SPRN_VTB); - mtspr(SPRN_DPDES, 0); - if (vc->pcr) - mtspr(SPRN_PCR, PCR_MASK); + + dec = mfspr(SPRN_DEC); + if (!(lpcr & LPCR_LD)) /* Sign extend if not using large decrementer */ + dec = (s32) dec; + *tb = mftb(); + vcpu->arch.dec_expires = dec + *tb; if (vc->tb_offset_applied) { - u64 new_tb = mftb() - vc->tb_offset_applied; + u64 new_tb = *tb - vc->tb_offset_applied; mtspr(SPRN_TBU40, new_tb); - tb = mftb(); - if ((tb & 0xffffff) < (new_tb & 0xffffff)) - mtspr(SPRN_TBU40, new_tb + 0x1000000); + if ((mftb() & 0xffffff) < (new_tb & 0xffffff)) { + new_tb += 0x1000000; + mtspr(SPRN_TBU40, new_tb); + } + *tb = new_tb; vc->tb_offset_applied = 0; } - mtspr(SPRN_HDEC, 0x7fffffff); - save_clear_guest_mmu(kvm, vcpu); switch_mmu_to_host(kvm, host_pidr); - local_paca->kvm_hstate.in_guest = KVM_GUEST_MODE_NONE; /* - * If we are in real mode, only switch MMU on after the MMU is - * switched to host, to avoid the P9_RADIX_PREFETCH_BUG. + * Enable MSR here in order to have facilities enabled to save + * guest registers. This enables MMU (if we were in realmode), so + * only switch MMU on after the MMU is switched to host, to avoid + * the P9_RADIX_PREFETCH_BUG or hash guest context. */ if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && - vcpu->arch.shregs.msr & MSR_TS_MASK) + vcpu->arch.shregs.msr & MSR_TS_MASK) msr |= MSR_TS_S; - __mtmsrd(msr, 0); + store_vcpu_state(vcpu); + + mtspr(SPRN_PURR, local_paca->kvm_hstate.host_purr); + mtspr(SPRN_SPURR, local_paca->kvm_hstate.host_spurr); + + if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) { + /* Preserve PSSCR[FAKE_SUSPEND] until we've called kvmppc_save_tm_hv */ + mtspr(SPRN_PSSCR, host_hpsscr | + (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG)); + } + + mtspr(SPRN_HFSCR, host_hfscr); + if (vcpu->arch.ciabr != host_ciabr) + mtspr(SPRN_CIABR, host_ciabr); + + if (dawr_enabled()) { + if (vcpu->arch.dawr0 != host_dawr0) + mtspr(SPRN_DAWR0, host_dawr0); + if (vcpu->arch.dawrx0 != host_dawrx0) + mtspr(SPRN_DAWRX0, host_dawrx0); + if (cpu_has_feature(CPU_FTR_DAWR1)) { + if (vcpu->arch.dawr1 != host_dawr1) + mtspr(SPRN_DAWR1, host_dawr1); + if (vcpu->arch.dawrx1 != host_dawrx1) + mtspr(SPRN_DAWRX1, host_dawrx1); + } + } + + if (dpdes) + mtspr(SPRN_DPDES, 0); + if (vc->pcr) + mtspr(SPRN_PCR, PCR_MASK); + + /* HDEC must be at least as large as DEC, so decrementer_max fits */ + mtspr(SPRN_HDEC, decrementer_max); + + timer_rearm_host_dec(*tb); + + restore_p9_host_os_sprs(vcpu, &host_os_sprs); + + barrier(); /* Close in_guest critical section */ + WRITE_ONCE(local_paca->kvm_hstate.in_guest, KVM_GUEST_MODE_NONE); + /* Interrupts are recoverable at this point */ + + /* + * cp_abort is required if the processor supports local copy-paste + * to clear the copy buffer that was under control of the guest. + */ + if (cpu_has_feature(CPU_FTR_ARCH_31)) + asm volatile(PPC_CP_ABORT); + +out: end_timing(vcpu); return trap; diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c index d4bca93b79f6..ccfd96965630 100644 --- a/arch/powerpc/kvm/book3s_hv_ras.c +++ b/arch/powerpc/kvm/book3s_hv_ras.c @@ -136,6 +136,60 @@ void kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu) vcpu->arch.mce_evt = mce_evt; } + +long kvmppc_p9_realmode_hmi_handler(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcore *vc = vcpu->arch.vcore; + long ret = 0; + + /* + * Unapply and clear the offset first. That way, if the TB was not + * resynced then it will remain in host-offset, and if it was resynced + * then it is brought into host-offset. Then the tb offset is + * re-applied before continuing with the KVM exit. + * + * This way, we don't need to actually know whether not OPAL resynced + * the timebase or do any of the complicated dance that the P7/8 + * path requires. + */ + if (vc->tb_offset_applied) { + u64 new_tb = mftb() - vc->tb_offset_applied; + mtspr(SPRN_TBU40, new_tb); + if ((mftb() & 0xffffff) < (new_tb & 0xffffff)) { + new_tb += 0x1000000; + mtspr(SPRN_TBU40, new_tb); + } + vc->tb_offset_applied = 0; + } + + local_paca->hmi_irqs++; + + if (hmi_handle_debugtrig(NULL) >= 0) { + ret = 1; + goto out; + } + + if (ppc_md.hmi_exception_early) + ppc_md.hmi_exception_early(NULL); + +out: + if (vc->tb_offset) { + u64 new_tb = mftb() + vc->tb_offset; + mtspr(SPRN_TBU40, new_tb); + if ((mftb() & 0xffffff) < (new_tb & 0xffffff)) { + new_tb += 0x1000000; + mtspr(SPRN_TBU40, new_tb); + } + vc->tb_offset_applied = vc->tb_offset; + } + + return ret; +} + +/* + * The following subcore HMI handling is all only for pre-POWER9 CPUs. + */ + /* Check if dynamic split is in force and return subcore size accordingly. */ static inline int kvmppc_cur_subcore_size(void) { diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 2c1f3c6e72d1..2257fb18cb72 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -55,12 +55,6 @@ static int global_invalidates(struct kvm *kvm) smp_wmb(); cpumask_setall(&kvm->arch.need_tlb_flush); cpu = local_paca->kvm_hstate.kvm_vcore->pcpu; - /* - * On POWER9, threads are independent but the TLB is shared, - * so use the bit for the first thread to represent the core. - */ - if (cpu_has_feature(CPU_FTR_ARCH_300)) - cpu = cpu_first_tlb_thread_sibling(cpu); cpumask_clear_cpu(cpu, &kvm->arch.need_tlb_flush); } diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 32a4b4d412b9..d185dee26026 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -778,17 +778,14 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) /* Restore AMR and UAMOR, set AMOR to all 1s */ ld r5,VCPU_AMR(r4) ld r6,VCPU_UAMOR(r4) - li r7,-1 mtspr SPRN_AMR,r5 mtspr SPRN_UAMOR,r6 - mtspr SPRN_AMOR,r7 - /* Restore state of CTRL run bit; assume 1 on entry */ + /* Restore state of CTRL run bit; the host currently has it set to 1 */ lwz r5,VCPU_CTRL(r4) andi. r5,r5,1 bne 4f - mfspr r6,SPRN_CTRLF - clrrdi r6,r6,1 + li r6,0 mtspr SPRN_CTRLT,r6 4: /* Secondary threads wait for primary to have done partition switch */ @@ -817,10 +814,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) * Set the decrementer to the guest decrementer. */ ld r8,VCPU_DEC_EXPIRES(r4) - /* r8 is a host timebase value here, convert to guest TB */ - ld r5,HSTATE_KVM_VCORE(r13) - ld r6,VCORE_TB_OFFSET_APPL(r5) - add r8,r8,r6 mftb r7 subf r3,r7,r8 mtspr SPRN_DEC,r3 @@ -1195,9 +1188,6 @@ guest_bypass: mftb r6 extsw r5,r5 16: add r5,r5,r6 - /* r5 is a guest timebase value here, convert to host TB */ - ld r4,VCORE_TB_OFFSET_APPL(r3) - subf r5,r4,r5 std r5,VCPU_DEC_EXPIRES(r9) /* Increment exit count, poke other threads to exit */ @@ -1211,12 +1201,12 @@ guest_bypass: stw r0, VCPU_CPU(r9) stw r0, VCPU_THREAD_CPU(r9) - /* Save guest CTRL register, set runlatch to 1 */ + /* Save guest CTRL register, set runlatch to 1 if it was clear */ mfspr r6,SPRN_CTRLF stw r6,VCPU_CTRL(r9) andi. r0,r6,1 bne 4f - ori r6,r6,1 + li r6,1 mtspr SPRN_CTRLT,r6 4: /* @@ -2163,9 +2153,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_TM) /* save expiry time of guest decrementer */ add r3, r3, r5 ld r4, HSTATE_KVM_VCPU(r13) - ld r5, HSTATE_KVM_VCORE(r13) - ld r6, VCORE_TB_OFFSET_APPL(r5) - subf r3, r6, r3 /* convert to host TB value */ std r3, VCPU_DEC_EXPIRES(r4) #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING @@ -2186,8 +2173,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_TM) * Also clear the runlatch bit before napping. */ kvm_do_nap: - mfspr r0, SPRN_CTRLF - clrrdi r0, r0, 1 + li r0,0 mtspr SPRN_CTRLT, r0 li r0,1 @@ -2206,8 +2192,7 @@ kvm_nap_sequence: /* desired LPCR value in r5 */ bl isa206_idle_insn_mayloss - mfspr r0, SPRN_CTRLF - ori r0, r0, 1 + li r0,1 mtspr SPRN_CTRLT, r0 mtspr SPRN_SRR1, r3 @@ -2264,9 +2249,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_TM) /* Restore guest decrementer */ ld r3, VCPU_DEC_EXPIRES(r4) - ld r5, HSTATE_KVM_VCORE(r13) - ld r6, VCORE_TB_OFFSET_APPL(r5) - add r3, r3, r6 /* convert host TB to guest TB value */ mftb r7 subf r3, r7, r3 mtspr SPRN_DEC, r3 @@ -2711,8 +2693,7 @@ kvmppc_bad_host_intr: std r0, GPR0(r1) std r9, GPR1(r1) std r2, GPR2(r1) - SAVE_4GPRS(3, r1) - SAVE_2GPRS(7, r1) + SAVE_GPRS(3, 8, r1) srdi r0, r12, 32 clrldi r12, r12, 32 std r0, _CCR(r1) @@ -2735,7 +2716,7 @@ kvmppc_bad_host_intr: ld r9, HSTATE_SCRATCH2(r13) ld r12, HSTATE_SCRATCH0(r13) GET_SCRATCH0(r0) - SAVE_4GPRS(9, r1) + SAVE_GPRS(9, 12, r1) std r0, GPR13(r1) SAVE_NVGPRS(r1) ld r5, HSTATE_CFAR(r13) @@ -2778,10 +2759,11 @@ kvmppc_msr_interrupt: blr /* + * void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu) + * * Load up guest PMU state. R3 points to the vcpu struct. */ -_GLOBAL(kvmhv_load_guest_pmu) -EXPORT_SYMBOL_GPL(kvmhv_load_guest_pmu) +kvmhv_load_guest_pmu: mr r4, r3 mflr r0 li r3, 1 @@ -2816,26 +2798,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG) mtspr SPRN_SIAR, r7 mtspr SPRN_SDAR, r8 BEGIN_FTR_SECTION - ld r5, VCPU_MMCR + 24(r4) - ld r6, VCPU_SIER + 8(r4) - ld r7, VCPU_SIER + 16(r4) - mtspr SPRN_MMCR3, r5 - mtspr SPRN_SIER2, r6 - mtspr SPRN_SIER3, r7 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31) -BEGIN_FTR_SECTION ld r5, VCPU_MMCR + 16(r4) ld r6, VCPU_SIER(r4) mtspr SPRN_MMCR2, r5 mtspr SPRN_SIER, r6 -BEGIN_FTR_SECTION_NESTED(96) lwz r7, VCPU_PMC + 24(r4) lwz r8, VCPU_PMC + 28(r4) ld r9, VCPU_MMCRS(r4) mtspr SPRN_SPMC1, r7 mtspr SPRN_SPMC2, r8 mtspr SPRN_MMCRS, r9 -END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) mtspr SPRN_MMCR0, r3 isync @@ -2843,10 +2815,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) blr /* + * void kvmhv_load_host_pmu(void) + * * Reload host PMU state saved in the PACA by kvmhv_save_host_pmu. */ -_GLOBAL(kvmhv_load_host_pmu) -EXPORT_SYMBOL_GPL(kvmhv_load_host_pmu) +kvmhv_load_host_pmu: mflr r0 lbz r4, PACA_PMCINUSE(r13) /* is the host using the PMU? */ cmpwi r4, 0 @@ -2884,25 +2857,18 @@ BEGIN_FTR_SECTION mtspr SPRN_MMCR2, r8 mtspr SPRN_SIER, r9 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) -BEGIN_FTR_SECTION - ld r5, HSTATE_MMCR3(r13) - ld r6, HSTATE_SIER2(r13) - ld r7, HSTATE_SIER3(r13) - mtspr SPRN_MMCR3, r5 - mtspr SPRN_SIER2, r6 - mtspr SPRN_SIER3, r7 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31) mtspr SPRN_MMCR0, r3 isync mtlr r0 23: blr /* + * void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use) + * * Save guest PMU state into the vcpu struct. * r3 = vcpu, r4 = full save flag (PMU in use flag set in VPA) */ -_GLOBAL(kvmhv_save_guest_pmu) -EXPORT_SYMBOL_GPL(kvmhv_save_guest_pmu) +kvmhv_save_guest_pmu: mr r9, r3 mr r8, r4 BEGIN_FTR_SECTION @@ -2951,14 +2917,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) BEGIN_FTR_SECTION std r10, VCPU_MMCR + 16(r9) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) -BEGIN_FTR_SECTION - mfspr r5, SPRN_MMCR3 - mfspr r6, SPRN_SIER2 - mfspr r7, SPRN_SIER3 - std r5, VCPU_MMCR + 24(r9) - std r6, VCPU_SIER + 8(r9) - std r7, VCPU_SIER + 16(r9) -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31) std r7, VCPU_SIAR(r9) std r8, VCPU_SDAR(r9) mfspr r3, SPRN_PMC1 @@ -2976,7 +2934,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31) BEGIN_FTR_SECTION mfspr r5, SPRN_SIER std r5, VCPU_SIER(r9) -BEGIN_FTR_SECTION_NESTED(96) mfspr r6, SPRN_SPMC1 mfspr r7, SPRN_SPMC2 mfspr r8, SPRN_MMCRS @@ -2985,7 +2942,6 @@ BEGIN_FTR_SECTION_NESTED(96) std r8, VCPU_MMCRS(r9) lis r4, 0x8000 mtspr SPRN_MMCRS, r4 -END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 22: blr diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 9e5d0f413b71..5d1881d2e39a 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -19,7 +19,12 @@ CFLAGS_code-patching.o += -DDISABLE_BRANCH_PROFILING CFLAGS_feature-fixups.o += -DDISABLE_BRANCH_PROFILING endif -obj-y += alloc.o code-patching.o feature-fixups.o pmem.o test_code-patching.o +CFLAGS_code-patching.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) +CFLAGS_feature-fixups.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) + +obj-y += alloc.o code-patching.o feature-fixups.o pmem.o + +obj-$(CONFIG_CODE_PATCHING_SELFTEST) += test-code-patching.o ifndef CONFIG_KASAN obj-y += string.o memcmp_$(BITS).o diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index c5ed98823835..906d43463366 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -3,22 +3,18 @@ * Copyright 2008 Michael Ellerman, IBM Corporation. */ -#include <linux/kernel.h> #include <linux/kprobes.h> #include <linux/vmalloc.h> #include <linux/init.h> -#include <linux/mm.h> #include <linux/cpuhotplug.h> -#include <linux/slab.h> #include <linux/uaccess.h> #include <asm/tlbflush.h> #include <asm/page.h> #include <asm/code-patching.h> -#include <asm/setup.h> #include <asm/inst.h> -static int __patch_instruction(u32 *exec_addr, struct ppc_inst instr, u32 *patch_addr) +static int __patch_instruction(u32 *exec_addr, ppc_inst_t instr, u32 *patch_addr) { if (!ppc_inst_prefixed(instr)) { u32 val = ppc_inst_val(instr); @@ -39,7 +35,7 @@ failed: return -EFAULT; } -int raw_patch_instruction(u32 *addr, struct ppc_inst instr) +int raw_patch_instruction(u32 *addr, ppc_inst_t instr) { return __patch_instruction(addr, instr, addr); } @@ -86,23 +82,16 @@ void __init poking_init(void) static int map_patch_area(void *addr, unsigned long text_poke_addr) { unsigned long pfn; - int err; if (is_vmalloc_or_module_addr(addr)) pfn = vmalloc_to_pfn(addr); else pfn = __pa_symbol(addr) >> PAGE_SHIFT; - err = map_kernel_page(text_poke_addr, (pfn << PAGE_SHIFT), PAGE_KERNEL); - - pr_devel("Mapped addr %lx with pfn %lx:%d\n", text_poke_addr, pfn, err); - if (err) - return -1; - - return 0; + return map_kernel_page(text_poke_addr, (pfn << PAGE_SHIFT), PAGE_KERNEL); } -static inline int unmap_patch_area(unsigned long addr) +static void unmap_patch_area(unsigned long addr) { pte_t *ptep; pmd_t *pmdp; @@ -111,43 +100,56 @@ static inline int unmap_patch_area(unsigned long addr) pgd_t *pgdp; pgdp = pgd_offset_k(addr); - if (unlikely(!pgdp)) - return -EINVAL; + if (WARN_ON(pgd_none(*pgdp))) + return; p4dp = p4d_offset(pgdp, addr); - if (unlikely(!p4dp)) - return -EINVAL; + if (WARN_ON(p4d_none(*p4dp))) + return; pudp = pud_offset(p4dp, addr); - if (unlikely(!pudp)) - return -EINVAL; + if (WARN_ON(pud_none(*pudp))) + return; pmdp = pmd_offset(pudp, addr); - if (unlikely(!pmdp)) - return -EINVAL; + if (WARN_ON(pmd_none(*pmdp))) + return; ptep = pte_offset_kernel(pmdp, addr); - if (unlikely(!ptep)) - return -EINVAL; - - pr_devel("clearing mm %p, pte %p, addr %lx\n", &init_mm, ptep, addr); + if (WARN_ON(pte_none(*ptep))) + return; /* * In hash, pte_clear flushes the tlb, in radix, we have to */ pte_clear(&init_mm, addr, ptep); flush_tlb_kernel_range(addr, addr + PAGE_SIZE); +} - return 0; +static int __do_patch_instruction(u32 *addr, ppc_inst_t instr) +{ + int err; + u32 *patch_addr; + unsigned long text_poke_addr; + + text_poke_addr = (unsigned long)__this_cpu_read(text_poke_area)->addr; + patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr)); + + err = map_patch_area(addr, text_poke_addr); + if (err) + return err; + + err = __patch_instruction(addr, instr, patch_addr); + + unmap_patch_area(text_poke_addr); + + return err; } -static int do_patch_instruction(u32 *addr, struct ppc_inst instr) +static int do_patch_instruction(u32 *addr, ppc_inst_t instr) { int err; - u32 *patch_addr = NULL; unsigned long flags; - unsigned long text_poke_addr; - unsigned long kaddr = (unsigned long)addr; /* * During early early boot patch_instruction is called @@ -158,51 +160,37 @@ static int do_patch_instruction(u32 *addr, struct ppc_inst instr) return raw_patch_instruction(addr, instr); local_irq_save(flags); - - text_poke_addr = (unsigned long)__this_cpu_read(text_poke_area)->addr; - if (map_patch_area(addr, text_poke_addr)) { - err = -1; - goto out; - } - - patch_addr = (u32 *)(text_poke_addr + (kaddr & ~PAGE_MASK)); - - __patch_instruction(addr, instr, patch_addr); - - err = unmap_patch_area(text_poke_addr); - if (err) - pr_warn("failed to unmap %lx\n", text_poke_addr); - -out: + err = __do_patch_instruction(addr, instr); local_irq_restore(flags); return err; } #else /* !CONFIG_STRICT_KERNEL_RWX */ -static int do_patch_instruction(u32 *addr, struct ppc_inst instr) +static int do_patch_instruction(u32 *addr, ppc_inst_t instr) { return raw_patch_instruction(addr, instr); } #endif /* CONFIG_STRICT_KERNEL_RWX */ -int patch_instruction(u32 *addr, struct ppc_inst instr) +int patch_instruction(u32 *addr, ppc_inst_t instr) { /* Make sure we aren't patching a freed init section */ - if (init_mem_is_free && init_section_contains(addr, 4)) { - pr_debug("Skipping init section patching addr: 0x%px\n", addr); + if (system_state >= SYSTEM_FREEING_INITMEM && init_section_contains(addr, 4)) return 0; - } + return do_patch_instruction(addr, instr); } NOKPROBE_SYMBOL(patch_instruction); int patch_branch(u32 *addr, unsigned long target, int flags) { - struct ppc_inst instr; + ppc_inst_t instr; + + if (create_branch(&instr, addr, target, flags)) + return -ERANGE; - create_branch(&instr, addr, target, flags); return patch_instruction(addr, instr); } @@ -237,7 +225,7 @@ bool is_offset_in_cond_branch_range(long offset) * Helper to check if a given instruction is a conditional branch * Derived from the conditional checks in analyse_instr() */ -bool is_conditional_branch(struct ppc_inst instr) +bool is_conditional_branch(ppc_inst_t instr) { unsigned int opcode = ppc_inst_primary_opcode(instr); @@ -255,7 +243,7 @@ bool is_conditional_branch(struct ppc_inst instr) } NOKPROBE_SYMBOL(is_conditional_branch); -int create_branch(struct ppc_inst *instr, const u32 *addr, +int create_branch(ppc_inst_t *instr, const u32 *addr, unsigned long target, int flags) { long offset; @@ -275,7 +263,7 @@ int create_branch(struct ppc_inst *instr, const u32 *addr, return 0; } -int create_cond_branch(struct ppc_inst *instr, const u32 *addr, +int create_cond_branch(ppc_inst_t *instr, const u32 *addr, unsigned long target, int flags) { long offset; @@ -294,22 +282,7 @@ int create_cond_branch(struct ppc_inst *instr, const u32 *addr, return 0; } -static unsigned int branch_opcode(struct ppc_inst instr) -{ - return ppc_inst_primary_opcode(instr) & 0x3F; -} - -static int instr_is_branch_iform(struct ppc_inst instr) -{ - return branch_opcode(instr) == 18; -} - -static int instr_is_branch_bform(struct ppc_inst instr) -{ - return branch_opcode(instr) == 16; -} - -int instr_is_relative_branch(struct ppc_inst instr) +int instr_is_relative_branch(ppc_inst_t instr) { if (ppc_inst_val(instr) & BRANCH_ABSOLUTE) return 0; @@ -317,7 +290,7 @@ int instr_is_relative_branch(struct ppc_inst instr) return instr_is_branch_iform(instr) || instr_is_branch_bform(instr); } -int instr_is_relative_link_branch(struct ppc_inst instr) +int instr_is_relative_link_branch(ppc_inst_t instr) { return instr_is_relative_branch(instr) && (ppc_inst_val(instr) & BRANCH_SET_LINK); } @@ -364,7 +337,7 @@ unsigned long branch_target(const u32 *instr) return 0; } -int translate_branch(struct ppc_inst *instr, const u32 *dest, const u32 *src) +int translate_branch(ppc_inst_t *instr, const u32 *dest, const u32 *src) { unsigned long target; target = branch_target(src); @@ -378,375 +351,3 @@ int translate_branch(struct ppc_inst *instr, const u32 *dest, const u32 *src) return 1; } - -#ifdef CONFIG_PPC_BOOK3E_64 -void __patch_exception(int exc, unsigned long addr) -{ - extern unsigned int interrupt_base_book3e; - unsigned int *ibase = &interrupt_base_book3e; - - /* Our exceptions vectors start with a NOP and -then- a branch - * to deal with single stepping from userspace which stops on - * the second instruction. Thus we need to patch the second - * instruction of the exception, not the first one - */ - - patch_branch(ibase + (exc / 4) + 1, addr, 0); -} -#endif - -#ifdef CONFIG_CODE_PATCHING_SELFTEST - -static int instr_is_branch_to_addr(const u32 *instr, unsigned long addr) -{ - if (instr_is_branch_iform(ppc_inst_read(instr)) || - instr_is_branch_bform(ppc_inst_read(instr))) - return branch_target(instr) == addr; - - return 0; -} - -static void __init test_trampoline(void) -{ - asm ("nop;\n"); -} - -#define check(x) \ - if (!(x)) printk("code-patching: test failed at line %d\n", __LINE__); - -static void __init test_branch_iform(void) -{ - int err; - struct ppc_inst instr; - u32 tmp[2]; - u32 *iptr = tmp; - unsigned long addr = (unsigned long)tmp; - - /* The simplest case, branch to self, no flags */ - check(instr_is_branch_iform(ppc_inst(0x48000000))); - /* All bits of target set, and flags */ - check(instr_is_branch_iform(ppc_inst(0x4bffffff))); - /* High bit of opcode set, which is wrong */ - check(!instr_is_branch_iform(ppc_inst(0xcbffffff))); - /* Middle bits of opcode set, which is wrong */ - check(!instr_is_branch_iform(ppc_inst(0x7bffffff))); - - /* Simplest case, branch to self with link */ - check(instr_is_branch_iform(ppc_inst(0x48000001))); - /* All bits of targets set */ - check(instr_is_branch_iform(ppc_inst(0x4bfffffd))); - /* Some bits of targets set */ - check(instr_is_branch_iform(ppc_inst(0x4bff00fd))); - /* Must be a valid branch to start with */ - check(!instr_is_branch_iform(ppc_inst(0x7bfffffd))); - - /* Absolute branch to 0x100 */ - patch_instruction(iptr, ppc_inst(0x48000103)); - check(instr_is_branch_to_addr(iptr, 0x100)); - /* Absolute branch to 0x420fc */ - patch_instruction(iptr, ppc_inst(0x480420ff)); - check(instr_is_branch_to_addr(iptr, 0x420fc)); - /* Maximum positive relative branch, + 20MB - 4B */ - patch_instruction(iptr, ppc_inst(0x49fffffc)); - check(instr_is_branch_to_addr(iptr, addr + 0x1FFFFFC)); - /* Smallest negative relative branch, - 4B */ - patch_instruction(iptr, ppc_inst(0x4bfffffc)); - check(instr_is_branch_to_addr(iptr, addr - 4)); - /* Largest negative relative branch, - 32 MB */ - patch_instruction(iptr, ppc_inst(0x4a000000)); - check(instr_is_branch_to_addr(iptr, addr - 0x2000000)); - - /* Branch to self, with link */ - err = create_branch(&instr, iptr, addr, BRANCH_SET_LINK); - patch_instruction(iptr, instr); - check(instr_is_branch_to_addr(iptr, addr)); - - /* Branch to self - 0x100, with link */ - err = create_branch(&instr, iptr, addr - 0x100, BRANCH_SET_LINK); - patch_instruction(iptr, instr); - check(instr_is_branch_to_addr(iptr, addr - 0x100)); - - /* Branch to self + 0x100, no link */ - err = create_branch(&instr, iptr, addr + 0x100, 0); - patch_instruction(iptr, instr); - check(instr_is_branch_to_addr(iptr, addr + 0x100)); - - /* Maximum relative negative offset, - 32 MB */ - err = create_branch(&instr, iptr, addr - 0x2000000, BRANCH_SET_LINK); - patch_instruction(iptr, instr); - check(instr_is_branch_to_addr(iptr, addr - 0x2000000)); - - /* Out of range relative negative offset, - 32 MB + 4*/ - err = create_branch(&instr, iptr, addr - 0x2000004, BRANCH_SET_LINK); - check(err); - - /* Out of range relative positive offset, + 32 MB */ - err = create_branch(&instr, iptr, addr + 0x2000000, BRANCH_SET_LINK); - check(err); - - /* Unaligned target */ - err = create_branch(&instr, iptr, addr + 3, BRANCH_SET_LINK); - check(err); - - /* Check flags are masked correctly */ - err = create_branch(&instr, iptr, addr, 0xFFFFFFFC); - patch_instruction(iptr, instr); - check(instr_is_branch_to_addr(iptr, addr)); - check(ppc_inst_equal(instr, ppc_inst(0x48000000))); -} - -static void __init test_create_function_call(void) -{ - u32 *iptr; - unsigned long dest; - struct ppc_inst instr; - - /* Check we can create a function call */ - iptr = (u32 *)ppc_function_entry(test_trampoline); - dest = ppc_function_entry(test_create_function_call); - create_branch(&instr, iptr, dest, BRANCH_SET_LINK); - patch_instruction(iptr, instr); - check(instr_is_branch_to_addr(iptr, dest)); -} - -static void __init test_branch_bform(void) -{ - int err; - unsigned long addr; - struct ppc_inst instr; - u32 tmp[2]; - u32 *iptr = tmp; - unsigned int flags; - - addr = (unsigned long)iptr; - - /* The simplest case, branch to self, no flags */ - check(instr_is_branch_bform(ppc_inst(0x40000000))); - /* All bits of target set, and flags */ - check(instr_is_branch_bform(ppc_inst(0x43ffffff))); - /* High bit of opcode set, which is wrong */ - check(!instr_is_branch_bform(ppc_inst(0xc3ffffff))); - /* Middle bits of opcode set, which is wrong */ - check(!instr_is_branch_bform(ppc_inst(0x7bffffff))); - - /* Absolute conditional branch to 0x100 */ - patch_instruction(iptr, ppc_inst(0x43ff0103)); - check(instr_is_branch_to_addr(iptr, 0x100)); - /* Absolute conditional branch to 0x20fc */ - patch_instruction(iptr, ppc_inst(0x43ff20ff)); - check(instr_is_branch_to_addr(iptr, 0x20fc)); - /* Maximum positive relative conditional branch, + 32 KB - 4B */ - patch_instruction(iptr, ppc_inst(0x43ff7ffc)); - check(instr_is_branch_to_addr(iptr, addr + 0x7FFC)); - /* Smallest negative relative conditional branch, - 4B */ - patch_instruction(iptr, ppc_inst(0x43fffffc)); - check(instr_is_branch_to_addr(iptr, addr - 4)); - /* Largest negative relative conditional branch, - 32 KB */ - patch_instruction(iptr, ppc_inst(0x43ff8000)); - check(instr_is_branch_to_addr(iptr, addr - 0x8000)); - - /* All condition code bits set & link */ - flags = 0x3ff000 | BRANCH_SET_LINK; - - /* Branch to self */ - err = create_cond_branch(&instr, iptr, addr, flags); - patch_instruction(iptr, instr); - check(instr_is_branch_to_addr(iptr, addr)); - - /* Branch to self - 0x100 */ - err = create_cond_branch(&instr, iptr, addr - 0x100, flags); - patch_instruction(iptr, instr); - check(instr_is_branch_to_addr(iptr, addr - 0x100)); - - /* Branch to self + 0x100 */ - err = create_cond_branch(&instr, iptr, addr + 0x100, flags); - patch_instruction(iptr, instr); - check(instr_is_branch_to_addr(iptr, addr + 0x100)); - - /* Maximum relative negative offset, - 32 KB */ - err = create_cond_branch(&instr, iptr, addr - 0x8000, flags); - patch_instruction(iptr, instr); - check(instr_is_branch_to_addr(iptr, addr - 0x8000)); - - /* Out of range relative negative offset, - 32 KB + 4*/ - err = create_cond_branch(&instr, iptr, addr - 0x8004, flags); - check(err); - - /* Out of range relative positive offset, + 32 KB */ - err = create_cond_branch(&instr, iptr, addr + 0x8000, flags); - check(err); - - /* Unaligned target */ - err = create_cond_branch(&instr, iptr, addr + 3, flags); - check(err); - - /* Check flags are masked correctly */ - err = create_cond_branch(&instr, iptr, addr, 0xFFFFFFFC); - patch_instruction(iptr, instr); - check(instr_is_branch_to_addr(iptr, addr)); - check(ppc_inst_equal(instr, ppc_inst(0x43FF0000))); -} - -static void __init test_translate_branch(void) -{ - unsigned long addr; - void *p, *q; - struct ppc_inst instr; - void *buf; - - buf = vmalloc(PAGE_ALIGN(0x2000000 + 1)); - check(buf); - if (!buf) - return; - - /* Simple case, branch to self moved a little */ - p = buf; - addr = (unsigned long)p; - patch_branch(p, addr, 0); - check(instr_is_branch_to_addr(p, addr)); - q = p + 4; - translate_branch(&instr, q, p); - patch_instruction(q, instr); - check(instr_is_branch_to_addr(q, addr)); - - /* Maximum negative case, move b . to addr + 32 MB */ - p = buf; - addr = (unsigned long)p; - patch_branch(p, addr, 0); - q = buf + 0x2000000; - translate_branch(&instr, q, p); - patch_instruction(q, instr); - check(instr_is_branch_to_addr(p, addr)); - check(instr_is_branch_to_addr(q, addr)); - check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x4a000000))); - - /* Maximum positive case, move x to x - 32 MB + 4 */ - p = buf + 0x2000000; - addr = (unsigned long)p; - patch_branch(p, addr, 0); - q = buf + 4; - translate_branch(&instr, q, p); - patch_instruction(q, instr); - check(instr_is_branch_to_addr(p, addr)); - check(instr_is_branch_to_addr(q, addr)); - check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x49fffffc))); - - /* Jump to x + 16 MB moved to x + 20 MB */ - p = buf; - addr = 0x1000000 + (unsigned long)buf; - patch_branch(p, addr, BRANCH_SET_LINK); - q = buf + 0x1400000; - translate_branch(&instr, q, p); - patch_instruction(q, instr); - check(instr_is_branch_to_addr(p, addr)); - check(instr_is_branch_to_addr(q, addr)); - - /* Jump to x + 16 MB moved to x - 16 MB + 4 */ - p = buf + 0x1000000; - addr = 0x2000000 + (unsigned long)buf; - patch_branch(p, addr, 0); - q = buf + 4; - translate_branch(&instr, q, p); - patch_instruction(q, instr); - check(instr_is_branch_to_addr(p, addr)); - check(instr_is_branch_to_addr(q, addr)); - - - /* Conditional branch tests */ - - /* Simple case, branch to self moved a little */ - p = buf; - addr = (unsigned long)p; - create_cond_branch(&instr, p, addr, 0); - patch_instruction(p, instr); - check(instr_is_branch_to_addr(p, addr)); - q = buf + 4; - translate_branch(&instr, q, p); - patch_instruction(q, instr); - check(instr_is_branch_to_addr(q, addr)); - - /* Maximum negative case, move b . to addr + 32 KB */ - p = buf; - addr = (unsigned long)p; - create_cond_branch(&instr, p, addr, 0xFFFFFFFC); - patch_instruction(p, instr); - q = buf + 0x8000; - translate_branch(&instr, q, p); - patch_instruction(q, instr); - check(instr_is_branch_to_addr(p, addr)); - check(instr_is_branch_to_addr(q, addr)); - check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x43ff8000))); - - /* Maximum positive case, move x to x - 32 KB + 4 */ - p = buf + 0x8000; - addr = (unsigned long)p; - create_cond_branch(&instr, p, addr, 0xFFFFFFFC); - patch_instruction(p, instr); - q = buf + 4; - translate_branch(&instr, q, p); - patch_instruction(q, instr); - check(instr_is_branch_to_addr(p, addr)); - check(instr_is_branch_to_addr(q, addr)); - check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x43ff7ffc))); - - /* Jump to x + 12 KB moved to x + 20 KB */ - p = buf; - addr = 0x3000 + (unsigned long)buf; - create_cond_branch(&instr, p, addr, BRANCH_SET_LINK); - patch_instruction(p, instr); - q = buf + 0x5000; - translate_branch(&instr, q, p); - patch_instruction(q, instr); - check(instr_is_branch_to_addr(p, addr)); - check(instr_is_branch_to_addr(q, addr)); - - /* Jump to x + 8 KB moved to x - 8 KB + 4 */ - p = buf + 0x2000; - addr = 0x4000 + (unsigned long)buf; - create_cond_branch(&instr, p, addr, 0); - patch_instruction(p, instr); - q = buf + 4; - translate_branch(&instr, q, p); - patch_instruction(q, instr); - check(instr_is_branch_to_addr(p, addr)); - check(instr_is_branch_to_addr(q, addr)); - - /* Free the buffer we were using */ - vfree(buf); -} - -#ifdef CONFIG_PPC64 -static void __init test_prefixed_patching(void) -{ - extern unsigned int code_patching_test1[]; - extern unsigned int code_patching_test1_expected[]; - extern unsigned int end_code_patching_test1[]; - - __patch_instruction(code_patching_test1, - ppc_inst_prefix(OP_PREFIX << 26, 0x00000000), - code_patching_test1); - - check(!memcmp(code_patching_test1, - code_patching_test1_expected, - sizeof(unsigned int) * - (end_code_patching_test1 - code_patching_test1))); -} -#else -static inline void test_prefixed_patching(void) {} -#endif - -static int __init test_code_patching(void) -{ - printk(KERN_DEBUG "Running code patching self-tests ...\n"); - - test_branch_iform(); - test_branch_bform(); - test_create_function_call(); - test_translate_branch(); - test_prefixed_patching(); - - return 0; -} -late_initcall(test_code_patching); - -#endif /* CONFIG_CODE_PATCHING_SELFTEST */ diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index c3e06922468b..343a78826035 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -47,7 +47,7 @@ static u32 *calc_addr(struct fixup_entry *fcur, long offset) static int patch_alt_instruction(u32 *src, u32 *dest, u32 *alt_start, u32 *alt_end) { int err; - struct ppc_inst instr; + ppc_inst_t instr; instr = ppc_inst_read(src); @@ -580,7 +580,7 @@ void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_ printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i); } -static void patch_btb_flush_section(long *curr) +static void __init patch_btb_flush_section(long *curr) { unsigned int *start, *end; @@ -592,7 +592,7 @@ static void patch_btb_flush_section(long *curr) } } -void do_btb_flush_fixups(void) +void __init do_btb_flush_fixups(void) { long *start, *end; @@ -621,10 +621,10 @@ void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end) } } -static void do_final_fixups(void) +static void __init do_final_fixups(void) { #if defined(CONFIG_PPC64) && defined(CONFIG_RELOCATABLE) - struct ppc_inst inst; + ppc_inst_t inst; u32 *src, *dest, *end; if (PHYSICAL_START == 0) @@ -715,12 +715,12 @@ late_initcall(check_features); /* This must be after the text it fixes up, vmlinux.lds.S enforces that atm */ static struct fixup_entry fixup; -static long calc_offset(struct fixup_entry *entry, unsigned int *p) +static long __init calc_offset(struct fixup_entry *entry, unsigned int *p) { return (unsigned long)p - (unsigned long)entry; } -static void test_basic_patching(void) +static void __init test_basic_patching(void) { extern unsigned int ftr_fixup_test1[]; extern unsigned int end_ftr_fixup_test1[]; @@ -751,7 +751,7 @@ static void test_basic_patching(void) check(memcmp(ftr_fixup_test1, ftr_fixup_test1_expected, size) == 0); } -static void test_alternative_patching(void) +static void __init test_alternative_patching(void) { extern unsigned int ftr_fixup_test2[]; extern unsigned int end_ftr_fixup_test2[]; @@ -784,7 +784,7 @@ static void test_alternative_patching(void) check(memcmp(ftr_fixup_test2, ftr_fixup_test2_expected, size) == 0); } -static void test_alternative_case_too_big(void) +static void __init test_alternative_case_too_big(void) { extern unsigned int ftr_fixup_test3[]; extern unsigned int end_ftr_fixup_test3[]; @@ -810,7 +810,7 @@ static void test_alternative_case_too_big(void) check(memcmp(ftr_fixup_test3, ftr_fixup_test3_orig, size) == 0); } -static void test_alternative_case_too_small(void) +static void __init test_alternative_case_too_small(void) { extern unsigned int ftr_fixup_test4[]; extern unsigned int end_ftr_fixup_test4[]; @@ -856,7 +856,7 @@ static void test_alternative_case_with_branch(void) check(memcmp(ftr_fixup_test5, ftr_fixup_test5_expected, size) == 0); } -static void test_alternative_case_with_external_branch(void) +static void __init test_alternative_case_with_external_branch(void) { extern unsigned int ftr_fixup_test6[]; extern unsigned int end_ftr_fixup_test6[]; @@ -866,7 +866,7 @@ static void test_alternative_case_with_external_branch(void) check(memcmp(ftr_fixup_test6, ftr_fixup_test6_expected, size) == 0); } -static void test_alternative_case_with_branch_to_end(void) +static void __init test_alternative_case_with_branch_to_end(void) { extern unsigned int ftr_fixup_test7[]; extern unsigned int end_ftr_fixup_test7[]; @@ -876,7 +876,7 @@ static void test_alternative_case_with_branch_to_end(void) check(memcmp(ftr_fixup_test7, ftr_fixup_test7_expected, size) == 0); } -static void test_cpu_macros(void) +static void __init test_cpu_macros(void) { extern u8 ftr_fixup_test_FTR_macros[]; extern u8 ftr_fixup_test_FTR_macros_expected[]; @@ -888,7 +888,7 @@ static void test_cpu_macros(void) ftr_fixup_test_FTR_macros_expected, size) == 0); } -static void test_fw_macros(void) +static void __init test_fw_macros(void) { #ifdef CONFIG_PPC64 extern u8 ftr_fixup_test_FW_FTR_macros[]; @@ -902,7 +902,7 @@ static void test_fw_macros(void) #endif } -static void test_lwsync_macros(void) +static void __init test_lwsync_macros(void) { extern u8 lwsync_fixup_test[]; extern u8 end_lwsync_fixup_test[]; diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 86f49e3e7cf5..a94b0cd0bdc5 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -1354,7 +1354,7 @@ static nokprobe_inline int trap_compare(long v1, long v2) * otherwise. */ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, - struct ppc_inst instr) + ppc_inst_t instr) { #ifdef CONFIG_PPC64 unsigned int suffixopcode, prefixtype, prefix_r; @@ -3578,7 +3578,7 @@ NOKPROBE_SYMBOL(emulate_loadstore); * or -1 if the instruction is one that should not be stepped, * such as an rfid, or a mtmsrd that would clear MSR_RI. */ -int emulate_step(struct pt_regs *regs, struct ppc_inst instr) +int emulate_step(struct pt_regs *regs, ppc_inst_t instr) { struct instruction_op op; int r, err, type; diff --git a/arch/powerpc/lib/test-code-patching.c b/arch/powerpc/lib/test-code-patching.c new file mode 100644 index 000000000000..c44823292f73 --- /dev/null +++ b/arch/powerpc/lib/test-code-patching.c @@ -0,0 +1,362 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2008 Michael Ellerman, IBM Corporation. + */ + +#include <linux/vmalloc.h> +#include <linux/init.h> + +#include <asm/code-patching.h> + +static int __init instr_is_branch_to_addr(const u32 *instr, unsigned long addr) +{ + if (instr_is_branch_iform(ppc_inst_read(instr)) || + instr_is_branch_bform(ppc_inst_read(instr))) + return branch_target(instr) == addr; + + return 0; +} + +static void __init test_trampoline(void) +{ + asm ("nop;nop;\n"); +} + +#define check(x) do { \ + if (!(x)) \ + pr_err("code-patching: test failed at line %d\n", __LINE__); \ +} while (0) + +static void __init test_branch_iform(void) +{ + int err; + ppc_inst_t instr; + u32 tmp[2]; + u32 *iptr = tmp; + unsigned long addr = (unsigned long)tmp; + + /* The simplest case, branch to self, no flags */ + check(instr_is_branch_iform(ppc_inst(0x48000000))); + /* All bits of target set, and flags */ + check(instr_is_branch_iform(ppc_inst(0x4bffffff))); + /* High bit of opcode set, which is wrong */ + check(!instr_is_branch_iform(ppc_inst(0xcbffffff))); + /* Middle bits of opcode set, which is wrong */ + check(!instr_is_branch_iform(ppc_inst(0x7bffffff))); + + /* Simplest case, branch to self with link */ + check(instr_is_branch_iform(ppc_inst(0x48000001))); + /* All bits of targets set */ + check(instr_is_branch_iform(ppc_inst(0x4bfffffd))); + /* Some bits of targets set */ + check(instr_is_branch_iform(ppc_inst(0x4bff00fd))); + /* Must be a valid branch to start with */ + check(!instr_is_branch_iform(ppc_inst(0x7bfffffd))); + + /* Absolute branch to 0x100 */ + ppc_inst_write(iptr, ppc_inst(0x48000103)); + check(instr_is_branch_to_addr(iptr, 0x100)); + /* Absolute branch to 0x420fc */ + ppc_inst_write(iptr, ppc_inst(0x480420ff)); + check(instr_is_branch_to_addr(iptr, 0x420fc)); + /* Maximum positive relative branch, + 20MB - 4B */ + ppc_inst_write(iptr, ppc_inst(0x49fffffc)); + check(instr_is_branch_to_addr(iptr, addr + 0x1FFFFFC)); + /* Smallest negative relative branch, - 4B */ + ppc_inst_write(iptr, ppc_inst(0x4bfffffc)); + check(instr_is_branch_to_addr(iptr, addr - 4)); + /* Largest negative relative branch, - 32 MB */ + ppc_inst_write(iptr, ppc_inst(0x4a000000)); + check(instr_is_branch_to_addr(iptr, addr - 0x2000000)); + + /* Branch to self, with link */ + err = create_branch(&instr, iptr, addr, BRANCH_SET_LINK); + ppc_inst_write(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr)); + + /* Branch to self - 0x100, with link */ + err = create_branch(&instr, iptr, addr - 0x100, BRANCH_SET_LINK); + ppc_inst_write(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr - 0x100)); + + /* Branch to self + 0x100, no link */ + err = create_branch(&instr, iptr, addr + 0x100, 0); + ppc_inst_write(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr + 0x100)); + + /* Maximum relative negative offset, - 32 MB */ + err = create_branch(&instr, iptr, addr - 0x2000000, BRANCH_SET_LINK); + ppc_inst_write(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr - 0x2000000)); + + /* Out of range relative negative offset, - 32 MB + 4*/ + err = create_branch(&instr, iptr, addr - 0x2000004, BRANCH_SET_LINK); + check(err); + + /* Out of range relative positive offset, + 32 MB */ + err = create_branch(&instr, iptr, addr + 0x2000000, BRANCH_SET_LINK); + check(err); + + /* Unaligned target */ + err = create_branch(&instr, iptr, addr + 3, BRANCH_SET_LINK); + check(err); + + /* Check flags are masked correctly */ + err = create_branch(&instr, iptr, addr, 0xFFFFFFFC); + ppc_inst_write(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr)); + check(ppc_inst_equal(instr, ppc_inst(0x48000000))); +} + +static void __init test_create_function_call(void) +{ + u32 *iptr; + unsigned long dest; + ppc_inst_t instr; + + /* Check we can create a function call */ + iptr = (u32 *)ppc_function_entry(test_trampoline); + dest = ppc_function_entry(test_create_function_call); + create_branch(&instr, iptr, dest, BRANCH_SET_LINK); + patch_instruction(iptr, instr); + check(instr_is_branch_to_addr(iptr, dest)); +} + +static void __init test_branch_bform(void) +{ + int err; + unsigned long addr; + ppc_inst_t instr; + u32 tmp[2]; + u32 *iptr = tmp; + unsigned int flags; + + addr = (unsigned long)iptr; + + /* The simplest case, branch to self, no flags */ + check(instr_is_branch_bform(ppc_inst(0x40000000))); + /* All bits of target set, and flags */ + check(instr_is_branch_bform(ppc_inst(0x43ffffff))); + /* High bit of opcode set, which is wrong */ + check(!instr_is_branch_bform(ppc_inst(0xc3ffffff))); + /* Middle bits of opcode set, which is wrong */ + check(!instr_is_branch_bform(ppc_inst(0x7bffffff))); + + /* Absolute conditional branch to 0x100 */ + ppc_inst_write(iptr, ppc_inst(0x43ff0103)); + check(instr_is_branch_to_addr(iptr, 0x100)); + /* Absolute conditional branch to 0x20fc */ + ppc_inst_write(iptr, ppc_inst(0x43ff20ff)); + check(instr_is_branch_to_addr(iptr, 0x20fc)); + /* Maximum positive relative conditional branch, + 32 KB - 4B */ + ppc_inst_write(iptr, ppc_inst(0x43ff7ffc)); + check(instr_is_branch_to_addr(iptr, addr + 0x7FFC)); + /* Smallest negative relative conditional branch, - 4B */ + ppc_inst_write(iptr, ppc_inst(0x43fffffc)); + check(instr_is_branch_to_addr(iptr, addr - 4)); + /* Largest negative relative conditional branch, - 32 KB */ + ppc_inst_write(iptr, ppc_inst(0x43ff8000)); + check(instr_is_branch_to_addr(iptr, addr - 0x8000)); + + /* All condition code bits set & link */ + flags = 0x3ff000 | BRANCH_SET_LINK; + + /* Branch to self */ + err = create_cond_branch(&instr, iptr, addr, flags); + ppc_inst_write(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr)); + + /* Branch to self - 0x100 */ + err = create_cond_branch(&instr, iptr, addr - 0x100, flags); + ppc_inst_write(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr - 0x100)); + + /* Branch to self + 0x100 */ + err = create_cond_branch(&instr, iptr, addr + 0x100, flags); + ppc_inst_write(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr + 0x100)); + + /* Maximum relative negative offset, - 32 KB */ + err = create_cond_branch(&instr, iptr, addr - 0x8000, flags); + ppc_inst_write(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr - 0x8000)); + + /* Out of range relative negative offset, - 32 KB + 4*/ + err = create_cond_branch(&instr, iptr, addr - 0x8004, flags); + check(err); + + /* Out of range relative positive offset, + 32 KB */ + err = create_cond_branch(&instr, iptr, addr + 0x8000, flags); + check(err); + + /* Unaligned target */ + err = create_cond_branch(&instr, iptr, addr + 3, flags); + check(err); + + /* Check flags are masked correctly */ + err = create_cond_branch(&instr, iptr, addr, 0xFFFFFFFC); + ppc_inst_write(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr)); + check(ppc_inst_equal(instr, ppc_inst(0x43FF0000))); +} + +static void __init test_translate_branch(void) +{ + unsigned long addr; + void *p, *q; + ppc_inst_t instr; + void *buf; + + buf = vmalloc(PAGE_ALIGN(0x2000000 + 1)); + check(buf); + if (!buf) + return; + + /* Simple case, branch to self moved a little */ + p = buf; + addr = (unsigned long)p; + create_branch(&instr, p, addr, 0); + ppc_inst_write(p, instr); + check(instr_is_branch_to_addr(p, addr)); + q = p + 4; + translate_branch(&instr, q, p); + ppc_inst_write(q, instr); + check(instr_is_branch_to_addr(q, addr)); + + /* Maximum negative case, move b . to addr + 32 MB */ + p = buf; + addr = (unsigned long)p; + create_branch(&instr, p, addr, 0); + ppc_inst_write(p, instr); + q = buf + 0x2000000; + translate_branch(&instr, q, p); + ppc_inst_write(q, instr); + check(instr_is_branch_to_addr(p, addr)); + check(instr_is_branch_to_addr(q, addr)); + check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x4a000000))); + + /* Maximum positive case, move x to x - 32 MB + 4 */ + p = buf + 0x2000000; + addr = (unsigned long)p; + create_branch(&instr, p, addr, 0); + ppc_inst_write(p, instr); + q = buf + 4; + translate_branch(&instr, q, p); + ppc_inst_write(q, instr); + check(instr_is_branch_to_addr(p, addr)); + check(instr_is_branch_to_addr(q, addr)); + check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x49fffffc))); + + /* Jump to x + 16 MB moved to x + 20 MB */ + p = buf; + addr = 0x1000000 + (unsigned long)buf; + create_branch(&instr, p, addr, BRANCH_SET_LINK); + ppc_inst_write(p, instr); + q = buf + 0x1400000; + translate_branch(&instr, q, p); + ppc_inst_write(q, instr); + check(instr_is_branch_to_addr(p, addr)); + check(instr_is_branch_to_addr(q, addr)); + + /* Jump to x + 16 MB moved to x - 16 MB + 4 */ + p = buf + 0x1000000; + addr = 0x2000000 + (unsigned long)buf; + create_branch(&instr, p, addr, 0); + ppc_inst_write(p, instr); + q = buf + 4; + translate_branch(&instr, q, p); + ppc_inst_write(q, instr); + check(instr_is_branch_to_addr(p, addr)); + check(instr_is_branch_to_addr(q, addr)); + + + /* Conditional branch tests */ + + /* Simple case, branch to self moved a little */ + p = buf; + addr = (unsigned long)p; + create_cond_branch(&instr, p, addr, 0); + ppc_inst_write(p, instr); + check(instr_is_branch_to_addr(p, addr)); + q = buf + 4; + translate_branch(&instr, q, p); + ppc_inst_write(q, instr); + check(instr_is_branch_to_addr(q, addr)); + + /* Maximum negative case, move b . to addr + 32 KB */ + p = buf; + addr = (unsigned long)p; + create_cond_branch(&instr, p, addr, 0xFFFFFFFC); + ppc_inst_write(p, instr); + q = buf + 0x8000; + translate_branch(&instr, q, p); + ppc_inst_write(q, instr); + check(instr_is_branch_to_addr(p, addr)); + check(instr_is_branch_to_addr(q, addr)); + check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x43ff8000))); + + /* Maximum positive case, move x to x - 32 KB + 4 */ + p = buf + 0x8000; + addr = (unsigned long)p; + create_cond_branch(&instr, p, addr, 0xFFFFFFFC); + ppc_inst_write(p, instr); + q = buf + 4; + translate_branch(&instr, q, p); + ppc_inst_write(q, instr); + check(instr_is_branch_to_addr(p, addr)); + check(instr_is_branch_to_addr(q, addr)); + check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x43ff7ffc))); + + /* Jump to x + 12 KB moved to x + 20 KB */ + p = buf; + addr = 0x3000 + (unsigned long)buf; + create_cond_branch(&instr, p, addr, BRANCH_SET_LINK); + ppc_inst_write(p, instr); + q = buf + 0x5000; + translate_branch(&instr, q, p); + ppc_inst_write(q, instr); + check(instr_is_branch_to_addr(p, addr)); + check(instr_is_branch_to_addr(q, addr)); + + /* Jump to x + 8 KB moved to x - 8 KB + 4 */ + p = buf + 0x2000; + addr = 0x4000 + (unsigned long)buf; + create_cond_branch(&instr, p, addr, 0); + ppc_inst_write(p, instr); + q = buf + 4; + translate_branch(&instr, q, p); + ppc_inst_write(q, instr); + check(instr_is_branch_to_addr(p, addr)); + check(instr_is_branch_to_addr(q, addr)); + + /* Free the buffer we were using */ + vfree(buf); +} + +static void __init test_prefixed_patching(void) +{ + u32 *iptr = (u32 *)ppc_function_entry(test_trampoline); + u32 expected[2] = {OP_PREFIX << 26, 0}; + ppc_inst_t inst = ppc_inst_prefix(OP_PREFIX << 26, 0); + + if (!IS_ENABLED(CONFIG_PPC64)) + return; + + patch_instruction(iptr, inst); + + check(!memcmp(iptr, expected, sizeof(expected))); +} + +static int __init test_code_patching(void) +{ + pr_info("Running code patching self-tests ...\n"); + + test_branch_iform(); + test_branch_bform(); + test_create_function_call(); + test_translate_branch(); + test_prefixed_patching(); + + return 0; +} +late_initcall(test_code_patching); diff --git a/arch/powerpc/lib/test_code-patching.S b/arch/powerpc/lib/test_code-patching.S deleted file mode 100644 index a9be6107844e..000000000000 --- a/arch/powerpc/lib/test_code-patching.S +++ /dev/null @@ -1,20 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2020 IBM Corporation - */ -#include <asm/ppc-opcode.h> - - .text - -#define globl(x) \ - .globl x; \ -x: - -globl(code_patching_test1) - nop - nop -globl(end_code_patching_test1) - -globl(code_patching_test1_expected) - .long OP_PREFIX << 26 - .long 0x0000000 diff --git a/arch/powerpc/lib/test_emulate_step.c b/arch/powerpc/lib/test_emulate_step.c index 8b4f6b3e96c4..4f141daafcff 100644 --- a/arch/powerpc/lib/test_emulate_step.c +++ b/arch/powerpc/lib/test_emulate_step.c @@ -792,7 +792,7 @@ static void __init test_lxvpx_stxvpx(void) #ifdef CONFIG_VSX static void __init test_plxvp_pstxvp(void) { - struct ppc_inst instr; + ppc_inst_t instr; struct pt_regs regs; union { vector128 a; @@ -906,7 +906,7 @@ struct compute_test { struct { char *descr; unsigned long flags; - struct ppc_inst instr; + ppc_inst_t instr; struct pt_regs regs; } subtests[MAX_SUBTESTS + 1]; }; @@ -1600,7 +1600,7 @@ static struct compute_test compute_tests[] = { }; static int __init emulate_compute_instr(struct pt_regs *regs, - struct ppc_inst instr, + ppc_inst_t instr, bool negative) { int analysed; @@ -1627,7 +1627,7 @@ static int __init emulate_compute_instr(struct pt_regs *regs, } static int __init execute_compute_instr(struct pt_regs *regs, - struct ppc_inst instr) + ppc_inst_t instr) { extern int exec_instr(struct pt_regs *regs); @@ -1658,7 +1658,7 @@ static void __init run_tests_compute(void) struct compute_test *test; struct pt_regs *regs, exp, got; unsigned int i, j, k; - struct ppc_inst instr; + ppc_inst_t instr; bool ignore_gpr, ignore_xer, ignore_ccr, passed, rc, negative; for (i = 0; i < ARRAY_SIZE(compute_tests); i++) { diff --git a/arch/powerpc/lib/test_emulate_step_exec_instr.S b/arch/powerpc/lib/test_emulate_step_exec_instr.S index 9ef941d958d8..5473f9d03df3 100644 --- a/arch/powerpc/lib/test_emulate_step_exec_instr.S +++ b/arch/powerpc/lib/test_emulate_step_exec_instr.S @@ -37,7 +37,7 @@ _GLOBAL(exec_instr) * The stack pointer (GPR1) and the thread pointer (GPR13) are not * saved as these should not be modified anyway. */ - SAVE_2GPRS(2, r1) + SAVE_GPRS(2, 3, r1) SAVE_NVGPRS(r1) /* @@ -75,8 +75,7 @@ _GLOBAL(exec_instr) /* Load GPRs from pt_regs */ REST_GPR(0, r31) - REST_10GPRS(2, r31) - REST_GPR(12, r31) + REST_GPRS(2, 12, r31) REST_NVGPRS(r31) /* Placeholder for the test instruction */ @@ -99,8 +98,7 @@ _GLOBAL(exec_instr) subi r3, r3, GPR0 SAVE_GPR(0, r3) SAVE_GPR(2, r3) - SAVE_8GPRS(4, r3) - SAVE_GPR(12, r3) + SAVE_GPRS(4, 12, r3) SAVE_NVGPRS(r3) /* Save resulting LR to pt_regs */ diff --git a/arch/powerpc/mm/book3s32/Makefile b/arch/powerpc/mm/book3s32/Makefile index 15f4773643d2..50dd8f6bdf46 100644 --- a/arch/powerpc/mm/book3s32/Makefile +++ b/arch/powerpc/mm/book3s32/Makefile @@ -9,5 +9,4 @@ endif obj-y += mmu.o mmu_context.o obj-$(CONFIG_PPC_BOOK3S_603) += nohash_low.o obj-$(CONFIG_PPC_BOOK3S_604) += hash_low.o tlb.o -obj-$(CONFIG_PPC_KUEP) += kuep.o obj-$(CONFIG_PPC_KUAP) += kuap.o diff --git a/arch/powerpc/mm/book3s32/kuap.c b/arch/powerpc/mm/book3s32/kuap.c index 0f920f09af57..28676cabb005 100644 --- a/arch/powerpc/mm/book3s32/kuap.c +++ b/arch/powerpc/mm/book3s32/kuap.c @@ -20,8 +20,11 @@ EXPORT_SYMBOL(kuap_unlock_all_ool); void setup_kuap(bool disabled) { - if (!disabled) + if (!disabled) { kuap_lock_all_ool(); + init_mm.context.sr0 |= SR_KS; + current->thread.sr0 |= SR_KS; + } if (smp_processor_id() != boot_cpuid) return; diff --git a/arch/powerpc/mm/book3s32/kuep.c b/arch/powerpc/mm/book3s32/kuep.c deleted file mode 100644 index c20733d6e02c..000000000000 --- a/arch/powerpc/mm/book3s32/kuep.c +++ /dev/null @@ -1,20 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later - -#include <asm/kup.h> -#include <asm/smp.h> - -struct static_key_false disable_kuep_key; - -void setup_kuep(bool disabled) -{ - if (!disabled) - kuep_lock(); - - if (smp_processor_id() != boot_cpuid) - return; - - if (disabled) - static_branch_enable(&disable_kuep_key); - else - pr_info("Activating Kernel Userspace Execution Prevention\n"); -} diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 27061583a010..94045b265b6b 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -76,7 +76,7 @@ unsigned long p_block_mapped(phys_addr_t pa) return 0; } -static int find_free_bat(void) +static int __init find_free_bat(void) { int b; int n = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4; @@ -196,18 +196,17 @@ void mmu_mark_initmem_nx(void) int nb = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4; int i; unsigned long base = (unsigned long)_stext - PAGE_OFFSET; - unsigned long top = (unsigned long)_etext - PAGE_OFFSET; + unsigned long top = ALIGN((unsigned long)_etext - PAGE_OFFSET, SZ_128K); unsigned long border = (unsigned long)__init_begin - PAGE_OFFSET; unsigned long size; - for (i = 0; i < nb - 1 && base < top && top - base > (128 << 10);) { + for (i = 0; i < nb - 1 && base < top;) { size = block_size(base, top); setibat(i++, PAGE_OFFSET + base, base, size, PAGE_KERNEL_TEXT); base += size; } if (base < top) { size = block_size(base, top); - size = max(size, 128UL << 10); if ((top - base) > size) { size <<= 1; if (strict_kernel_rwx_enabled() && base + size > border) diff --git a/arch/powerpc/mm/book3s32/mmu_context.c b/arch/powerpc/mm/book3s32/mmu_context.c index e2708e387dc3..269a3eb25a73 100644 --- a/arch/powerpc/mm/book3s32/mmu_context.c +++ b/arch/powerpc/mm/book3s32/mmu_context.c @@ -69,6 +69,12 @@ EXPORT_SYMBOL_GPL(__init_new_context); int init_new_context(struct task_struct *t, struct mm_struct *mm) { mm->context.id = __init_new_context(); + mm->context.sr0 = CTX_TO_VSID(mm->context.id, 0); + + if (!kuep_is_disabled()) + mm->context.sr0 |= SR_NX; + if (!kuap_is_disabled()) + mm->context.sr0 |= SR_KS; return 0; } @@ -108,20 +114,13 @@ void __init mmu_context_init(void) void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { long id = next->context.id; - unsigned long val; if (id < 0) panic("mm_struct %p has no context ID", next); isync(); - val = CTX_TO_VSID(id, 0); - if (!kuep_is_disabled()) - val |= SR_NX; - if (!kuap_is_disabled()) - val |= SR_KS; - - update_user_segments(val); + update_user_segments(next->context.sr0); if (IS_ENABLED(CONFIG_BDI_SWITCH)) abatron_pteptrs[1] = next->pgd; diff --git a/arch/powerpc/mm/book3s64/Makefile b/arch/powerpc/mm/book3s64/Makefile index 1b56d3af47d4..2d50cac499c5 100644 --- a/arch/powerpc/mm/book3s64/Makefile +++ b/arch/powerpc/mm/book3s64/Makefile @@ -2,20 +2,23 @@ ccflags-y := $(NO_MINIMAL_TOC) +obj-y += mmu_context.o pgtable.o trace.o +ifdef CONFIG_PPC_64S_HASH_MMU CFLAGS_REMOVE_slb.o = $(CC_FLAGS_FTRACE) - -obj-y += hash_pgtable.o hash_utils.o slb.o \ - mmu_context.o pgtable.o hash_tlb.o -obj-$(CONFIG_PPC_NATIVE) += hash_native.o -obj-$(CONFIG_PPC_RADIX_MMU) += radix_pgtable.o radix_tlb.o +obj-y += hash_pgtable.o hash_utils.o hash_tlb.o slb.o +obj-$(CONFIG_PPC_HASH_MMU_NATIVE) += hash_native.o obj-$(CONFIG_PPC_4K_PAGES) += hash_4k.o obj-$(CONFIG_PPC_64K_PAGES) += hash_64k.o -obj-$(CONFIG_HUGETLB_PAGE) += hash_hugetlbpage.o +obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += hash_hugepage.o +obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage_prot.o +endif + +obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o + +obj-$(CONFIG_PPC_RADIX_MMU) += radix_pgtable.o radix_tlb.o ifdef CONFIG_HUGETLB_PAGE obj-$(CONFIG_PPC_RADIX_MMU) += radix_hugetlbpage.o endif -obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += hash_hugepage.o -obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage_prot.o obj-$(CONFIG_SPAPR_TCE_IOMMU) += iommu_api.o obj-$(CONFIG_PPC_PKEY) += pkeys.o diff --git a/arch/powerpc/mm/book3s64/hash_native.c b/arch/powerpc/mm/book3s64/hash_native.c index d8279bfe68ea..623a7b7ab38b 100644 --- a/arch/powerpc/mm/book3s64/hash_native.c +++ b/arch/powerpc/mm/book3s64/hash_native.c @@ -43,110 +43,6 @@ static DEFINE_RAW_SPINLOCK(native_tlbie_lock); -static inline void tlbiel_hash_set_isa206(unsigned int set, unsigned int is) -{ - unsigned long rb; - - rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53)); - - asm volatile("tlbiel %0" : : "r" (rb)); -} - -/* - * tlbiel instruction for hash, set invalidation - * i.e., r=1 and is=01 or is=10 or is=11 - */ -static __always_inline void tlbiel_hash_set_isa300(unsigned int set, unsigned int is, - unsigned int pid, - unsigned int ric, unsigned int prs) -{ - unsigned long rb; - unsigned long rs; - unsigned int r = 0; /* hash format */ - - rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53)); - rs = ((unsigned long)pid << PPC_BITLSHIFT(31)); - - asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4) - : : "r"(rb), "r"(rs), "i"(ric), "i"(prs), "i"(r) - : "memory"); -} - - -static void tlbiel_all_isa206(unsigned int num_sets, unsigned int is) -{ - unsigned int set; - - asm volatile("ptesync": : :"memory"); - - for (set = 0; set < num_sets; set++) - tlbiel_hash_set_isa206(set, is); - - ppc_after_tlbiel_barrier(); -} - -static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is) -{ - unsigned int set; - - asm volatile("ptesync": : :"memory"); - - /* - * Flush the partition table cache if this is HV mode. - */ - if (early_cpu_has_feature(CPU_FTR_HVMODE)) - tlbiel_hash_set_isa300(0, is, 0, 2, 0); - - /* - * Now invalidate the process table cache. UPRT=0 HPT modes (what - * current hardware implements) do not use the process table, but - * add the flushes anyway. - * - * From ISA v3.0B p. 1078: - * The following forms are invalid. - * * PRS=1, R=0, and RIC!=2 (The only process-scoped - * HPT caching is of the Process Table.) - */ - tlbiel_hash_set_isa300(0, is, 0, 2, 1); - - /* - * Then flush the sets of the TLB proper. Hash mode uses - * partition scoped TLB translations, which may be flushed - * in !HV mode. - */ - for (set = 0; set < num_sets; set++) - tlbiel_hash_set_isa300(set, is, 0, 0, 0); - - ppc_after_tlbiel_barrier(); - - asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory"); -} - -void hash__tlbiel_all(unsigned int action) -{ - unsigned int is; - - switch (action) { - case TLB_INVAL_SCOPE_GLOBAL: - is = 3; - break; - case TLB_INVAL_SCOPE_LPID: - is = 2; - break; - default: - BUG(); - } - - if (early_cpu_has_feature(CPU_FTR_ARCH_300)) - tlbiel_all_isa300(POWER9_TLB_SETS_HASH, is); - else if (early_cpu_has_feature(CPU_FTR_ARCH_207S)) - tlbiel_all_isa206(POWER8_TLB_SETS, is); - else if (early_cpu_has_feature(CPU_FTR_ARCH_206)) - tlbiel_all_isa206(POWER7_TLB_SETS, is); - else - WARN(1, "%s called on pre-POWER7 CPU\n", __func__); -} - static inline unsigned long ___tlbie(unsigned long vpn, int psize, int apsize, int ssize) { @@ -267,7 +163,7 @@ static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize) va |= ssize << 8; sllp = get_sllp_encoding(apsize); va |= sllp << 5; - asm volatile(ASM_FTR_IFSET("tlbiel %0", "tlbiel %0,0", %1) + asm volatile(ASM_FTR_IFSET("tlbiel %0", PPC_TLBIEL_v205(%0, 0), %1) : : "r" (va), "i" (CPU_FTR_ARCH_206) : "memory"); break; @@ -286,7 +182,7 @@ static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize) */ va |= (vpn & 0xfe); va |= 1; /* L */ - asm volatile(ASM_FTR_IFSET("tlbiel %0", "tlbiel %0,1", %1) + asm volatile(ASM_FTR_IFSET("tlbiel %0", PPC_TLBIEL_v205(%0, 1), %1) : : "r" (va), "i" (CPU_FTR_ARCH_206) : "memory"); break; diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c b/arch/powerpc/mm/book3s64/hash_pgtable.c index ad5eff097d31..7ce8914992e3 100644 --- a/arch/powerpc/mm/book3s64/hash_pgtable.c +++ b/arch/powerpc/mm/book3s64/hash_pgtable.c @@ -16,7 +16,6 @@ #include <mm/mmu_decl.h> -#define CREATE_TRACE_POINTS #include <trace/events/thp.h> #if H_PGTABLE_RANGE > (USER_VSID_RANGE * (TASK_SIZE_USER64 / TASK_CONTEXT_SIZE)) diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index cfd45245d009..7abf82a698d3 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -99,8 +99,6 @@ */ static unsigned long _SDR1; -struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; -EXPORT_SYMBOL_GPL(mmu_psize_defs); u8 hpte_page_sizes[1 << LP_BITS]; EXPORT_SYMBOL_GPL(hpte_page_sizes); @@ -114,9 +112,6 @@ EXPORT_SYMBOL_GPL(mmu_linear_psize); int mmu_virtual_psize = MMU_PAGE_4K; int mmu_vmalloc_psize = MMU_PAGE_4K; EXPORT_SYMBOL_GPL(mmu_vmalloc_psize); -#ifdef CONFIG_SPARSEMEM_VMEMMAP -int mmu_vmemmap_psize = MMU_PAGE_4K; -#endif int mmu_io_psize = MMU_PAGE_4K; int mmu_kernel_ssize = MMU_SEGSIZE_256M; EXPORT_SYMBOL_GPL(mmu_kernel_ssize); @@ -175,6 +170,110 @@ static struct mmu_psize_def mmu_psize_defaults_gp[] = { }, }; +static inline void tlbiel_hash_set_isa206(unsigned int set, unsigned int is) +{ + unsigned long rb; + + rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53)); + + asm volatile("tlbiel %0" : : "r" (rb)); +} + +/* + * tlbiel instruction for hash, set invalidation + * i.e., r=1 and is=01 or is=10 or is=11 + */ +static __always_inline void tlbiel_hash_set_isa300(unsigned int set, unsigned int is, + unsigned int pid, + unsigned int ric, unsigned int prs) +{ + unsigned long rb; + unsigned long rs; + unsigned int r = 0; /* hash format */ + + rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53)); + rs = ((unsigned long)pid << PPC_BITLSHIFT(31)); + + asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4) + : : "r"(rb), "r"(rs), "i"(ric), "i"(prs), "i"(r) + : "memory"); +} + + +static void tlbiel_all_isa206(unsigned int num_sets, unsigned int is) +{ + unsigned int set; + + asm volatile("ptesync": : :"memory"); + + for (set = 0; set < num_sets; set++) + tlbiel_hash_set_isa206(set, is); + + ppc_after_tlbiel_barrier(); +} + +static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is) +{ + unsigned int set; + + asm volatile("ptesync": : :"memory"); + + /* + * Flush the partition table cache if this is HV mode. + */ + if (early_cpu_has_feature(CPU_FTR_HVMODE)) + tlbiel_hash_set_isa300(0, is, 0, 2, 0); + + /* + * Now invalidate the process table cache. UPRT=0 HPT modes (what + * current hardware implements) do not use the process table, but + * add the flushes anyway. + * + * From ISA v3.0B p. 1078: + * The following forms are invalid. + * * PRS=1, R=0, and RIC!=2 (The only process-scoped + * HPT caching is of the Process Table.) + */ + tlbiel_hash_set_isa300(0, is, 0, 2, 1); + + /* + * Then flush the sets of the TLB proper. Hash mode uses + * partition scoped TLB translations, which may be flushed + * in !HV mode. + */ + for (set = 0; set < num_sets; set++) + tlbiel_hash_set_isa300(set, is, 0, 0, 0); + + ppc_after_tlbiel_barrier(); + + asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory"); +} + +void hash__tlbiel_all(unsigned int action) +{ + unsigned int is; + + switch (action) { + case TLB_INVAL_SCOPE_GLOBAL: + is = 3; + break; + case TLB_INVAL_SCOPE_LPID: + is = 2; + break; + default: + BUG(); + } + + if (early_cpu_has_feature(CPU_FTR_ARCH_300)) + tlbiel_all_isa300(POWER9_TLB_SETS_HASH, is); + else if (early_cpu_has_feature(CPU_FTR_ARCH_207S)) + tlbiel_all_isa206(POWER8_TLB_SETS, is); + else if (early_cpu_has_feature(CPU_FTR_ARCH_206)) + tlbiel_all_isa206(POWER7_TLB_SETS, is); + else + WARN(1, "%s called on pre-POWER7 CPU\n", __func__); +} + /* * 'R' and 'C' update notes: * - Under pHyp or KVM, the updatepp path will not set C, thus it *will* @@ -563,7 +662,7 @@ static int __init htab_dt_scan_hugepage_blocks(unsigned long node, } #endif /* CONFIG_HUGETLB_PAGE */ -static void mmu_psize_set_default_penc(void) +static void __init mmu_psize_set_default_penc(void) { int bpsize, apsize; for (bpsize = 0; bpsize < MMU_PAGE_COUNT; bpsize++) @@ -573,7 +672,7 @@ static void mmu_psize_set_default_penc(void) #ifdef CONFIG_PPC_64K_PAGES -static bool might_have_hea(void) +static bool __init might_have_hea(void) { /* * The HEA ethernet adapter requires awareness of the @@ -644,7 +743,7 @@ static void __init htab_scan_page_sizes(void) * low-order N bits as the encoding for the 2^(12+N) byte page size * (if it exists). */ -static void init_hpte_page_sizes(void) +static void __init init_hpte_page_sizes(void) { long int ap, bp; long int shift, penc; @@ -1091,7 +1190,7 @@ void __init hash__early_init_mmu(void) ps3_early_mm_init(); else if (firmware_has_feature(FW_FEATURE_LPAR)) hpte_init_pseries(); - else if (IS_ENABLED(CONFIG_PPC_NATIVE)) + else if (IS_ENABLED(CONFIG_PPC_HASH_MMU_NATIVE)) hpte_init_native(); if (!mmu_hash_ops.hpte_insert) diff --git a/arch/powerpc/mm/book3s64/hash_hugetlbpage.c b/arch/powerpc/mm/book3s64/hugetlbpage.c index a688e1324ae5..ea8f83afb0ae 100644 --- a/arch/powerpc/mm/book3s64/hash_hugetlbpage.c +++ b/arch/powerpc/mm/book3s64/hugetlbpage.c @@ -16,6 +16,7 @@ unsigned int hpage_shift; EXPORT_SYMBOL(hpage_shift); +#ifdef CONFIG_PPC_64S_HASH_MMU int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, unsigned long flags, int ssize, unsigned int shift, unsigned int mmu_psize) @@ -122,6 +123,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, *ptep = __pte(new_pte & ~H_PAGE_BUSY); return 0; } +#endif pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) @@ -148,7 +150,7 @@ void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr set_huge_pte_at(vma->vm_mm, addr, ptep, pte); } -void hugetlbpage_init_default(void) +void __init hugetlbpage_init_default(void) { /* Set default large page size. Currently, we pick 16M or 1M * depending on what is available diff --git a/arch/powerpc/mm/book3s64/mmu_context.c b/arch/powerpc/mm/book3s64/mmu_context.c index c10fc8a72fb3..c766e4c26e42 100644 --- a/arch/powerpc/mm/book3s64/mmu_context.c +++ b/arch/powerpc/mm/book3s64/mmu_context.c @@ -31,7 +31,8 @@ static int alloc_context_id(int min_id, int max_id) return ida_alloc_range(&mmu_context_ida, min_id, max_id, GFP_KERNEL); } -void hash__reserve_context_id(int id) +#ifdef CONFIG_PPC_64S_HASH_MMU +void __init hash__reserve_context_id(int id) { int result = ida_alloc_range(&mmu_context_ida, id, id, GFP_KERNEL); @@ -50,7 +51,9 @@ int hash__alloc_context_id(void) return alloc_context_id(MIN_USER_CONTEXT, max); } EXPORT_SYMBOL_GPL(hash__alloc_context_id); +#endif +#ifdef CONFIG_PPC_64S_HASH_MMU static int realloc_context_ids(mm_context_t *ctx) { int i, id; @@ -150,6 +153,13 @@ void hash__setup_new_exec(void) slb_setup_new_exec(); } +#else +static inline int hash__init_new_context(struct mm_struct *mm) +{ + BUILD_BUG(); + return 0; +} +#endif static int radix__init_new_context(struct mm_struct *mm) { @@ -175,7 +185,9 @@ static int radix__init_new_context(struct mm_struct *mm) */ asm volatile("ptesync;isync" : : : "memory"); +#ifdef CONFIG_PPC_64S_HASH_MMU mm->context.hash_context = NULL; +#endif return index; } @@ -213,14 +225,22 @@ EXPORT_SYMBOL_GPL(__destroy_context); static void destroy_contexts(mm_context_t *ctx) { - int index, context_id; + if (radix_enabled()) { + ida_free(&mmu_context_ida, ctx->id); + } else { +#ifdef CONFIG_PPC_64S_HASH_MMU + int index, context_id; - for (index = 0; index < ARRAY_SIZE(ctx->extended_id); index++) { - context_id = ctx->extended_id[index]; - if (context_id) - ida_free(&mmu_context_ida, context_id); + for (index = 0; index < ARRAY_SIZE(ctx->extended_id); index++) { + context_id = ctx->extended_id[index]; + if (context_id) + ida_free(&mmu_context_ida, context_id); + } + kfree(ctx->hash_context); +#else + BUILD_BUG(); // radix_enabled() should be constant true +#endif } - kfree(ctx->hash_context); } static void pmd_frag_destroy(void *pmd_frag) diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index 9e16c7b1a6c5..79ce3c22a29d 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -22,6 +22,13 @@ #include "internal.h" +struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; +EXPORT_SYMBOL_GPL(mmu_psize_defs); + +#ifdef CONFIG_SPARSEMEM_VMEMMAP +int mmu_vmemmap_psize = MMU_PAGE_4K; +#endif + unsigned long __pmd_frag_nr; EXPORT_SYMBOL(__pmd_frag_nr); unsigned long __pmd_frag_size_shift; @@ -207,17 +214,12 @@ void __init mmu_partition_table_init(void) unsigned long patb_size = 1UL << PATB_SIZE_SHIFT; unsigned long ptcr; - BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 36), "Partition table size too large."); /* Initialize the Partition Table with no entries */ partition_tb = memblock_alloc(patb_size, patb_size); if (!partition_tb) panic("%s: Failed to allocate %lu bytes align=0x%lx\n", __func__, patb_size, patb_size); - /* - * update partition table control register, - * 64 K size. - */ ptcr = __pa(partition_tb) | (PATB_SIZE_SHIFT - 12); set_ptcr_when_no_uv(ptcr); powernv_set_nmmu_ptcr(ptcr); @@ -526,3 +528,23 @@ static int __init pgtable_debugfs_setup(void) return 0; } arch_initcall(pgtable_debugfs_setup); + +#if defined(CONFIG_ZONE_DEVICE) && defined(CONFIG_ARCH_HAS_MEMREMAP_COMPAT_ALIGN) +/* + * Override the generic version in mm/memremap.c. + * + * With hash translation, the direct-map range is mapped with just one + * page size selected by htab_init_page_sizes(). Consult + * mmu_psize_defs[] to determine the minimum page size alignment. +*/ +unsigned long memremap_compat_align(void) +{ + if (!radix_enabled()) { + unsigned int shift = mmu_psize_defs[mmu_linear_psize].shift; + return max(SUBSECTION_SIZE, 1UL << shift); + } + + return SUBSECTION_SIZE; +} +EXPORT_SYMBOL_GPL(memremap_compat_align); +#endif diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c index a2d9ad138709..753e62ba67af 100644 --- a/arch/powerpc/mm/book3s64/pkeys.c +++ b/arch/powerpc/mm/book3s64/pkeys.c @@ -66,7 +66,7 @@ static int __init dt_scan_storage_keys(unsigned long node, return 1; } -static int scan_pkey_feature(void) +static int __init scan_pkey_feature(void) { int ret; int pkeys_total = 0; diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 3a600bd7fbc6..def04631a74d 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -33,7 +33,6 @@ #include <trace/events/thp.h> -unsigned int mmu_pid_bits; unsigned int mmu_base_pid; unsigned long radix_mem_block_size __ro_after_init; @@ -335,7 +334,7 @@ static void __init radix_init_pgtable(void) u64 i; /* We don't support slb for radix */ - mmu_slb_size = 0; + slb_set_size(0); /* * Create the linear mapping @@ -357,18 +356,13 @@ static void __init radix_init_pgtable(void) -1, PAGE_KERNEL)); } - /* Find out how many PID bits are supported */ if (!cpu_has_feature(CPU_FTR_HVMODE) && cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) { /* * Older versions of KVM on these machines perfer if the * guest only uses the low 19 PID bits. */ - if (!mmu_pid_bits) - mmu_pid_bits = 19; - } else { - if (!mmu_pid_bits) - mmu_pid_bits = 20; + mmu_pid_bits = 19; } mmu_base_pid = 1; @@ -449,11 +443,6 @@ static int __init radix_dt_scan_page_sizes(unsigned long node, if (type == NULL || strcmp(type, "cpu") != 0) return 0; - /* Find MMU PID size */ - prop = of_get_flat_dt_prop(node, "ibm,mmu-pid-bits", &size); - if (prop && size == 4) - mmu_pid_bits = be32_to_cpup(prop); - /* Grab page size encodings */ prop = of_get_flat_dt_prop(node, "ibm,processor-radix-AP-encodings", &size); if (!prop) @@ -510,7 +499,7 @@ static int __init probe_memory_block_size(unsigned long node, const char *uname, return 1; } -static unsigned long radix_memory_block_size(void) +static unsigned long __init radix_memory_block_size(void) { unsigned long mem_block_size = MIN_MEMORY_BLOCK_SIZE; @@ -528,7 +517,7 @@ static unsigned long radix_memory_block_size(void) #else /* CONFIG_MEMORY_HOTPLUG */ -static unsigned long radix_memory_block_size(void) +static unsigned long __init radix_memory_block_size(void) { return 1UL * 1024 * 1024 * 1024; } @@ -572,22 +561,11 @@ void __init radix__early_init_devtree(void) return; } -static void radix_init_amor(void) -{ - /* - * In HV mode, we init AMOR (Authority Mask Override Register) so that - * the hypervisor and guest can setup IAMR (Instruction Authority Mask - * Register), enable key 0 and set it to 1. - * - * AMOR = 0b1100 .... 0000 (Mask for key 0 is 11) - */ - mtspr(SPRN_AMOR, (3ul << 62)); -} - void __init radix__early_init_mmu(void) { unsigned long lpcr; +#ifdef CONFIG_PPC_64S_HASH_MMU #ifdef CONFIG_PPC_64K_PAGES /* PAGE_SIZE mappings */ mmu_virtual_psize = MMU_PAGE_64K; @@ -605,6 +583,7 @@ void __init radix__early_init_mmu(void) } else mmu_vmemmap_psize = mmu_virtual_psize; #endif +#endif /* * initialize page table size */ @@ -644,7 +623,6 @@ void __init radix__early_init_mmu(void) lpcr = mfspr(SPRN_LPCR); mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR); radix_init_partition_table(); - radix_init_amor(); } else { radix_init_pseries(); } @@ -668,8 +646,6 @@ void radix__early_init_mmu_secondary(void) set_ptcr_when_no_uv(__pa(partition_tb) | (PATB_SIZE_SHIFT - 12)); - - radix_init_amor(); } radix__switch_mmu_context(NULL, &init_mm); @@ -1100,7 +1076,7 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) int pud_clear_huge(pud_t *pud) { - if (pud_huge(*pud)) { + if (pud_is_leaf(*pud)) { pud_clear(pud); return 1; } @@ -1147,7 +1123,7 @@ int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) int pmd_clear_huge(pmd_t *pmd) { - if (pmd_huge(*pmd)) { + if (pmd_is_leaf(*pmd)) { pmd_clear(pmd); return 1; } diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c index f0037bcc47a0..31f4cef3adac 100644 --- a/arch/powerpc/mm/book3s64/slb.c +++ b/arch/powerpc/mm/book3s64/slb.c @@ -868,19 +868,3 @@ DEFINE_INTERRUPT_HANDLER_RAW(do_slb_fault) return err; } } - -DEFINE_INTERRUPT_HANDLER(do_bad_slb_fault) -{ - int err = regs->result; - - if (err == -EFAULT) { - if (user_mode(regs)) - _exception(SIGSEGV, regs, SEGV_BNDERR, regs->dar); - else - bad_page_fault(regs, SIGSEGV); - } else if (err == -EINVAL) { - unrecoverable_exception(regs); - } else { - BUG(); - } -} diff --git a/arch/powerpc/mm/book3s64/trace.c b/arch/powerpc/mm/book3s64/trace.c new file mode 100644 index 000000000000..b86e7b906257 --- /dev/null +++ b/arch/powerpc/mm/book3s64/trace.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * This file is for defining trace points and trace related helpers. + */ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#define CREATE_TRACE_POINTS +#include <trace/events/thp.h> +#endif diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c index 8acd00178956..c1cb21a00884 100644 --- a/arch/powerpc/mm/copro_fault.c +++ b/arch/powerpc/mm/copro_fault.c @@ -82,6 +82,7 @@ out_unlock: } EXPORT_SYMBOL_GPL(copro_handle_mm_fault); +#ifdef CONFIG_PPC_64S_HASH_MMU int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb) { u64 vsid, vsidkey; @@ -146,3 +147,4 @@ void copro_flush_all_slbs(struct mm_struct *mm) cxl_slbia(mm); } EXPORT_SYMBOL_GPL(copro_flush_all_slbs); +#endif diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index a8d0ce85d39a..2d4a411c7c85 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -35,6 +35,7 @@ #include <linux/kfence.h> #include <linux/pkeys.h> +#include <asm/asm-prototypes.h> #include <asm/firmware.h> #include <asm/interrupt.h> #include <asm/page.h> @@ -620,4 +621,27 @@ DEFINE_INTERRUPT_HANDLER(do_bad_page_fault_segv) { bad_page_fault(regs, SIGSEGV); } + +/* + * In radix, segment interrupts indicate the EA is not addressable by the + * page table geometry, so they are always sent here. + * + * In hash, this is called if do_slb_fault returns error. Typically it is + * because the EA was outside the region allowed by software. + */ +DEFINE_INTERRUPT_HANDLER(do_bad_segment_interrupt) +{ + int err = regs->result; + + if (err == -EFAULT) { + if (user_mode(regs)) + _exception(SIGSEGV, regs, SEGV_BNDERR, regs->dar); + else + bad_page_fault(regs, SIGSEGV); + } else if (err == -EINVAL) { + unrecoverable_exception(regs); + } else { + BUG(); + } +} #endif diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 82d8b368ca6d..ddead41e2194 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -542,20 +542,26 @@ retry: return page; } -#ifdef CONFIG_PPC_MM_SLICES +#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA +static inline int file_to_psize(struct file *file) +{ + struct hstate *hstate = hstate_file(file); + return shift_to_mmu_psize(huge_page_shift(hstate)); +} + unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { - struct hstate *hstate = hstate_file(file); - int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate)); - #ifdef CONFIG_PPC_RADIX_MMU if (radix_enabled()) return radix__hugetlb_get_unmapped_area(file, addr, len, pgoff, flags); #endif - return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1); +#ifdef CONFIG_PPC_MM_SLICES + return slice_get_unmapped_area(addr, len, flags, file_to_psize(file), 1); +#endif + BUG(); } #endif diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c index 3a82f89827a5..119ef491f797 100644 --- a/arch/powerpc/mm/init-common.c +++ b/arch/powerpc/mm/init-common.c @@ -20,6 +20,7 @@ #include <linux/pgtable.h> #include <asm/pgalloc.h> #include <asm/kup.h> +#include <asm/smp.h> phys_addr_t memstart_addr __ro_after_init = (phys_addr_t)~0ull; EXPORT_SYMBOL_GPL(memstart_addr); @@ -33,6 +34,9 @@ bool disable_kuap = !IS_ENABLED(CONFIG_PPC_KUAP); static int __init parse_nosmep(char *p) { + if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64)) + return 0; + disable_kuep = true; pr_warn("Disabling Kernel Userspace Execution Prevention\n"); return 0; @@ -47,6 +51,23 @@ static int __init parse_nosmap(char *p) } early_param("nosmap", parse_nosmap); +void __weak setup_kuep(bool disabled) +{ + if (!IS_ENABLED(CONFIG_PPC_KUEP) || disabled) + return; + + if (smp_processor_id() != boot_cpuid) + return; + + pr_info("Activating Kernel Userspace Execution Prevention\n"); +} + +void setup_kup(void) +{ + setup_kuap(disable_kuap); + setup_kuep(disable_kuep); +} + #define CTOR(shift) static void ctor_##shift(void *addr) \ { \ memset(addr, 0, sizeof(void *) << (shift)); \ diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 386be136026e..35f46bf54281 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -370,6 +370,9 @@ void register_page_bootmem_memmap(unsigned long section_nr, #endif /* CONFIG_SPARSEMEM_VMEMMAP */ #ifdef CONFIG_PPC_BOOK3S_64 +unsigned int mmu_lpid_bits; +unsigned int mmu_pid_bits; + static bool disable_radix = !IS_ENABLED(CONFIG_PPC_RADIX_MMU_DEFAULT); static int __init parse_disable_radix(char *p) @@ -437,11 +440,56 @@ static void __init early_check_vec5(void) } } +static int __init dt_scan_mmu_pid_width(unsigned long node, + const char *uname, int depth, + void *data) +{ + int size = 0; + const __be32 *prop; + const char *type = of_get_flat_dt_prop(node, "device_type", NULL); + + /* We are scanning "cpu" nodes only */ + if (type == NULL || strcmp(type, "cpu") != 0) + return 0; + + /* Find MMU LPID, PID register size */ + prop = of_get_flat_dt_prop(node, "ibm,mmu-lpid-bits", &size); + if (prop && size == 4) + mmu_lpid_bits = be32_to_cpup(prop); + + prop = of_get_flat_dt_prop(node, "ibm,mmu-pid-bits", &size); + if (prop && size == 4) + mmu_pid_bits = be32_to_cpup(prop); + + if (!mmu_pid_bits && !mmu_lpid_bits) + return 0; + + return 1; +} + void __init mmu_early_init_devtree(void) { + bool hvmode = !!(mfmsr() & MSR_HV); + /* Disable radix mode based on kernel command line. */ - if (disable_radix) - cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; + if (disable_radix) { + if (IS_ENABLED(CONFIG_PPC_64S_HASH_MMU)) + cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; + else + pr_warn("WARNING: Ignoring cmdline option disable_radix\n"); + } + + of_scan_flat_dt(dt_scan_mmu_pid_width, NULL); + if (hvmode && !mmu_lpid_bits) { + if (early_cpu_has_feature(CPU_FTR_ARCH_207S)) + mmu_lpid_bits = 12; /* POWER8-10 */ + else + mmu_lpid_bits = 10; /* POWER7 */ + } + if (!mmu_pid_bits) { + if (early_cpu_has_feature(CPU_FTR_ARCH_300)) + mmu_pid_bits = 20; /* POWER9-10 */ + } /* * Check /chosen/ibm,architecture-vec-5 if running as a guest. @@ -449,11 +497,12 @@ void __init mmu_early_init_devtree(void) * even though the ibm,architecture-vec-5 property created by * skiboot doesn't have the necessary bits set. */ - if (!(mfmsr() & MSR_HV)) + if (!hvmode) early_check_vec5(); if (early_radix_enabled()) { radix__early_init_devtree(); + /* * We have finalized the translation we are going to use by now. * Radix mode is not limited by RMA / VRMA addressing. @@ -463,5 +512,9 @@ void __init mmu_early_init_devtree(void) memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); } else hash__early_init_devtree(); + + if (!(cur_cpu_spec->mmu_features & MMU_FTR_HPTE_TABLE) && + !(cur_cpu_spec->mmu_features & MMU_FTR_TYPE_RADIX)) + panic("kernel does not support any MMU type offered by platform"); } #endif /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/mm/ioremap.c b/arch/powerpc/mm/ioremap.c index 57342154d2b0..4f12504fb405 100644 --- a/arch/powerpc/mm/ioremap.c +++ b/arch/powerpc/mm/ioremap.c @@ -98,23 +98,3 @@ void __iomem *do_ioremap(phys_addr_t pa, phys_addr_t offset, unsigned long size, return NULL; } - -#ifdef CONFIG_ZONE_DEVICE -/* - * Override the generic version in mm/memremap.c. - * - * With hash translation, the direct-map range is mapped with just one - * page size selected by htab_init_page_sizes(). Consult - * mmu_psize_defs[] to determine the minimum page size alignment. -*/ -unsigned long memremap_compat_align(void) -{ - unsigned int shift = mmu_psize_defs[mmu_linear_psize].shift; - - if (radix_enabled()) - return SUBSECTION_SIZE; - return max(SUBSECTION_SIZE, 1UL << shift); - -} -EXPORT_SYMBOL_GPL(memremap_compat_align); -#endif diff --git a/arch/powerpc/mm/kasan/book3s_32.c b/arch/powerpc/mm/kasan/book3s_32.c index 202bd260a009..35b287b0a8da 100644 --- a/arch/powerpc/mm/kasan/book3s_32.c +++ b/arch/powerpc/mm/kasan/book3s_32.c @@ -19,7 +19,8 @@ int __init kasan_init_region(void *start, size_t size) block = memblock_alloc(k_size, k_size_base); if (block && k_size_base >= SZ_128K && k_start == ALIGN(k_start, k_size_base)) { - int k_size_more = 1 << (ffs(k_size - k_size_base) - 1); + int shift = ffs(k_size - k_size_base); + int k_size_more = shift ? 1 << (shift - 1) : 0; setbat(-1, k_start, __pa(block), k_size_base, PAGE_KERNEL); if (k_size_more >= SZ_128K) diff --git a/arch/powerpc/mm/maccess.c b/arch/powerpc/mm/maccess.c index aad7c47e0030..ea821d0ffe16 100644 --- a/arch/powerpc/mm/maccess.c +++ b/arch/powerpc/mm/maccess.c @@ -11,20 +11,3 @@ bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) { return is_kernel_addr((unsigned long)unsafe_src); } - -int copy_inst_from_kernel_nofault(struct ppc_inst *inst, u32 *src) -{ - unsigned int val, suffix; - int err; - - err = copy_from_kernel_nofault(&val, src, sizeof(val)); - if (err) - return err; - if (IS_ENABLED(CONFIG_PPC64) && get_op(val) == OP_PREFIX) { - err = copy_from_kernel_nofault(&suffix, src + 1, sizeof(suffix)); - *inst = ppc_inst_prefix(val, suffix); - } else { - *inst = ppc_inst(val); - } - return err; -} diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index bd5d91a31183..8e301cd8925b 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -26,7 +26,6 @@ #include <mm/mmu_decl.h> unsigned long long memory_limit; -bool init_mem_is_free; unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; EXPORT_SYMBOL(empty_zero_page); @@ -312,7 +311,6 @@ void free_initmem(void) { ppc_md.progress = ppc_printk_progress; mark_initmem_nx(); - init_mem_is_free = true; free_initmem_default(POISON_FREE_INITMEM); } diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c index ae683fdc716c..c475cf810aa8 100644 --- a/arch/powerpc/mm/mmap.c +++ b/arch/powerpc/mm/mmap.c @@ -80,6 +80,7 @@ static inline unsigned long mmap_base(unsigned long rnd, return PAGE_ALIGN(DEFAULT_MAP_WINDOW - gap - rnd); } +#ifdef HAVE_ARCH_UNMAPPED_AREA #ifdef CONFIG_PPC_RADIX_MMU /* * Same function as generic code used only for radix, because we don't need to overload @@ -181,11 +182,42 @@ radix__arch_get_unmapped_area_topdown(struct file *filp, */ return radix__arch_get_unmapped_area(filp, addr0, len, pgoff, flags); } +#endif + +unsigned long arch_get_unmapped_area(struct file *filp, + unsigned long addr, + unsigned long len, + unsigned long pgoff, + unsigned long flags) +{ +#ifdef CONFIG_PPC_MM_SLICES + return slice_get_unmapped_area(addr, len, flags, + mm_ctx_user_psize(¤t->mm->context), 0); +#else + BUG(); +#endif +} + +unsigned long arch_get_unmapped_area_topdown(struct file *filp, + const unsigned long addr0, + const unsigned long len, + const unsigned long pgoff, + const unsigned long flags) +{ +#ifdef CONFIG_PPC_MM_SLICES + return slice_get_unmapped_area(addr0, len, flags, + mm_ctx_user_psize(¤t->mm->context), 1); +#else + BUG(); +#endif +} +#endif /* HAVE_ARCH_UNMAPPED_AREA */ static void radix__arch_pick_mmap_layout(struct mm_struct *mm, unsigned long random_factor, struct rlimit *rlim_stack) { +#ifdef CONFIG_PPC_RADIX_MMU if (mmap_is_legacy(rlim_stack)) { mm->mmap_base = TASK_UNMAPPED_BASE; mm->get_unmapped_area = radix__arch_get_unmapped_area; @@ -193,13 +225,9 @@ static void radix__arch_pick_mmap_layout(struct mm_struct *mm, mm->mmap_base = mmap_base(random_factor, rlim_stack); mm->get_unmapped_area = radix__arch_get_unmapped_area_topdown; } -} -#else -/* dummy */ -extern void radix__arch_pick_mmap_layout(struct mm_struct *mm, - unsigned long random_factor, - struct rlimit *rlim_stack); #endif +} + /* * This function, called very early during the creation of a new * process VM image, sets up which VM layout function to use: diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c index 74246536b832..1fb9c99f8679 100644 --- a/arch/powerpc/mm/mmu_context.c +++ b/arch/powerpc/mm/mmu_context.c @@ -18,6 +18,12 @@ static inline void switch_mm_pgdir(struct task_struct *tsk, { /* 32-bit keeps track of the current PGDIR in the thread struct */ tsk->thread.pgdir = mm->pgd; +#ifdef CONFIG_PPC_BOOK3S_32 + tsk->thread.sr0 = mm->context.sr0; +#endif +#if defined(CONFIG_BOOKE_OR_40x) && defined(CONFIG_PPC_KUAP) + tsk->thread.pid = mm->context.id; +#endif } #elif defined(CONFIG_PPC_BOOK3E_64) static inline void switch_mm_pgdir(struct task_struct *tsk, @@ -25,6 +31,9 @@ static inline void switch_mm_pgdir(struct task_struct *tsk, { /* 64-bit Book3E keeps track of current PGD in the PACA */ get_paca()->pgd = mm->pgd; +#ifdef CONFIG_PPC_KUAP + tsk->thread.pid = mm->context.id; +#endif } #else static inline void switch_mm_pgdir(struct task_struct *tsk, @@ -81,7 +90,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, * context */ if (cpu_has_feature(CPU_FTR_ALTIVEC)) - asm volatile ("dssall"); + asm volatile (PPC_DSSALL); if (!new_on_cpu) membarrier_arch_switch_mm(prev, next, tsk); diff --git a/arch/powerpc/mm/nohash/44x.c b/arch/powerpc/mm/nohash/44x.c index e079f26b267e..1beae802bb1c 100644 --- a/arch/powerpc/mm/nohash/44x.c +++ b/arch/powerpc/mm/nohash/44x.c @@ -38,7 +38,7 @@ int icache_44x_need_flush; unsigned long tlb_47x_boltmap[1024/8]; -static void ppc44x_update_tlb_hwater(void) +static void __init ppc44x_update_tlb_hwater(void) { /* The TLB miss handlers hard codes the watermark in a cmpli * instruction to improve performances rather than loading it @@ -122,7 +122,7 @@ static void __init ppc47x_update_boltmap(void) /* * "Pins" a 256MB TLB entry in AS0 for kernel lowmem for 47x type MMU */ -static void ppc47x_pin_tlb(unsigned int virt, unsigned int phys) +static void __init ppc47x_pin_tlb(unsigned int virt, unsigned int phys) { unsigned int rA; int bolted; @@ -240,19 +240,3 @@ void __init mmu_init_secondary(int cpu) } } #endif /* CONFIG_SMP */ - -#ifdef CONFIG_PPC_KUEP -void setup_kuep(bool disabled) -{ - if (smp_processor_id() != boot_cpuid) - return; - - if (disabled) - patch_instruction_site(&patch__tlb_44x_kuep, ppc_inst(PPC_RAW_NOP())); - else - pr_info("Activating Kernel Userspace Execution Prevention\n"); - - if (IS_ENABLED(CONFIG_PPC_47x) && disabled) - patch_instruction_site(&patch__tlb_47x_kuep, ppc_inst(PPC_RAW_NOP())); -} -#endif diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index 0df9fe29dd56..27f9186ae374 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -8,11 +8,7 @@ */ #include <linux/memblock.h> -#include <linux/mmu_context.h> #include <linux/hugetlb.h> -#include <asm/fixmap.h> -#include <asm/code-patching.h> -#include <asm/inst.h> #include <mm/mmu_decl.h> @@ -212,35 +208,6 @@ void __init setup_initial_memory_limit(phys_addr_t first_memblock_base, memblock_set_current_limit(min_t(u64, first_memblock_size, SZ_32M)); } -#ifdef CONFIG_PPC_KUEP -void __init setup_kuep(bool disabled) -{ - if (disabled) - return; - - pr_info("Activating Kernel Userspace Execution Prevention\n"); - - mtspr(SPRN_MI_AP, MI_APG_KUEP); -} -#endif - -#ifdef CONFIG_PPC_KUAP -struct static_key_false disable_kuap_key; -EXPORT_SYMBOL(disable_kuap_key); - -void __init setup_kuap(bool disabled) -{ - if (disabled) { - static_branch_enable(&disable_kuap_key); - return; - } - - pr_info("Activating Kernel Userspace Access Protection\n"); - - mtspr(SPRN_MD_AP, MD_APG_KUAP); -} -#endif - int pud_clear_huge(pud_t *pud) { return 0; diff --git a/arch/powerpc/mm/nohash/Makefile b/arch/powerpc/mm/nohash/Makefile index b1f630d423d8..b467a25ee155 100644 --- a/arch/powerpc/mm/nohash/Makefile +++ b/arch/powerpc/mm/nohash/Makefile @@ -2,7 +2,7 @@ ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) -obj-y += mmu_context.o tlb.o tlb_low.o +obj-y += mmu_context.o tlb.o tlb_low.o kup.o obj-$(CONFIG_PPC_BOOK3E_64) += tlb_low_64e.o book3e_pgtable.o obj-$(CONFIG_40x) += 40x.o obj-$(CONFIG_44x) += 44x.o diff --git a/arch/powerpc/mm/nohash/book3e_pgtable.c b/arch/powerpc/mm/nohash/book3e_pgtable.c index 77884e24281d..7d4368d055a6 100644 --- a/arch/powerpc/mm/nohash/book3e_pgtable.c +++ b/arch/powerpc/mm/nohash/book3e_pgtable.c @@ -10,6 +10,7 @@ #include <asm/pgalloc.h> #include <asm/tlb.h> #include <asm/dma.h> +#include <asm/code-patching.h> #include <mm/mmu_decl.h> @@ -115,3 +116,17 @@ int __ref map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) smp_wmb(); return 0; } + +void __patch_exception(int exc, unsigned long addr) +{ + unsigned int *ibase = &interrupt_base_book3e; + + /* + * Our exceptions vectors start with a NOP and -then- a branch + * to deal with single stepping from userspace which stops on + * the second instruction. Thus we need to patch the second + * instruction of the exception, not the first one. + */ + + patch_branch(ibase + (exc / 4) + 1, addr, 0); +} diff --git a/arch/powerpc/mm/nohash/fsl_book3e.c b/arch/powerpc/mm/nohash/fsl_book3e.c index b231a54f540c..dfe715e0f70a 100644 --- a/arch/powerpc/mm/nohash/fsl_book3e.c +++ b/arch/powerpc/mm/nohash/fsl_book3e.c @@ -60,11 +60,6 @@ struct tlbcamrange { phys_addr_t phys; } tlbcam_addrs[NUM_TLBCAMS]; -unsigned long tlbcam_sz(int idx) -{ - return tlbcam_addrs[idx].limit - tlbcam_addrs[idx].start + 1; -} - #ifdef CONFIG_FSL_BOOKE /* * Return PA for this VA if it is mapped by a CAM, or 0 @@ -264,6 +259,11 @@ void __init MMU_init_hw(void) flush_instruction_cache(); } +static unsigned long __init tlbcam_sz(int idx) +{ + return tlbcam_addrs[idx].limit - tlbcam_addrs[idx].start + 1; +} + void __init adjust_total_lowmem(void) { unsigned long ram; diff --git a/arch/powerpc/mm/nohash/kup.c b/arch/powerpc/mm/nohash/kup.c new file mode 100644 index 000000000000..552becf90e97 --- /dev/null +++ b/arch/powerpc/mm/nohash/kup.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * This file contains the routines for initializing kernel userspace protection + */ + +#include <linux/export.h> +#include <linux/init.h> +#include <linux/jump_label.h> +#include <linux/printk.h> +#include <linux/smp.h> + +#include <asm/kup.h> +#include <asm/smp.h> + +#ifdef CONFIG_PPC_KUAP +struct static_key_false disable_kuap_key; +EXPORT_SYMBOL(disable_kuap_key); + +void setup_kuap(bool disabled) +{ + if (disabled) { + if (IS_ENABLED(CONFIG_40x)) + disable_kuep = true; + if (smp_processor_id() == boot_cpuid) + static_branch_enable(&disable_kuap_key); + return; + } + + pr_info("Activating Kernel Userspace Access Protection\n"); + + __prevent_user_access(KUAP_READ_WRITE); +} +#endif diff --git a/arch/powerpc/mm/nohash/mmu_context.c b/arch/powerpc/mm/nohash/mmu_context.c index 44b2b5e7cabe..85b048f04c56 100644 --- a/arch/powerpc/mm/nohash/mmu_context.c +++ b/arch/powerpc/mm/nohash/mmu_context.c @@ -33,6 +33,7 @@ #include <asm/mmu_context.h> #include <asm/tlbflush.h> #include <asm/smp.h> +#include <asm/kup.h> #include <mm/mmu_decl.h> @@ -217,7 +218,7 @@ static void set_context(unsigned long id, pgd_t *pgd) /* sync */ mb(); - } else { + } else if (kuap_is_disabled()) { if (IS_ENABLED(CONFIG_40x)) mb(); /* sync */ @@ -305,6 +306,9 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next, if (IS_ENABLED(CONFIG_BDI_SWITCH)) abatron_pteptrs[1] = next->pgd; set_context(id, next->pgd); +#if defined(CONFIG_BOOKE_OR_40x) && defined(CONFIG_PPC_KUAP) + tsk->thread.pid = id; +#endif raw_spin_unlock(&context_lock); } diff --git a/arch/powerpc/mm/nohash/tlb.c b/arch/powerpc/mm/nohash/tlb.c index 647bf454a0fa..fd2c77af5c55 100644 --- a/arch/powerpc/mm/nohash/tlb.c +++ b/arch/powerpc/mm/nohash/tlb.c @@ -150,7 +150,6 @@ static inline int mmu_get_tsize(int psize) */ #ifdef CONFIG_PPC64 -int mmu_linear_psize; /* Page size used for the linear mapping */ int mmu_pte_psize; /* Page size used for PTE pages */ int mmu_vmemmap_psize; /* Page size used for the virtual mem map */ int book3e_htw_mode; /* HW tablewalk? Value is PPC_HTW_* */ @@ -433,7 +432,7 @@ void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address) } } -static void setup_page_sizes(void) +static void __init setup_page_sizes(void) { unsigned int tlb0cfg; unsigned int tlb0ps; @@ -571,7 +570,7 @@ out: } } -static void setup_mmu_htw(void) +static void __init setup_mmu_htw(void) { /* * If we want to use HW tablewalk, enable it by patching the TLB miss @@ -657,14 +656,6 @@ static void early_init_this_mmu(void) static void __init early_init_mmu_global(void) { - /* XXX This will have to be decided at runtime, but right - * now our boot and TLB miss code hard wires it. Ideally - * we should find out a suitable page size and patch the - * TLB miss code (either that or use the PACA to store - * the value we want) - */ - mmu_linear_psize = MMU_PAGE_1G; - /* XXX This should be decided at runtime based on supported * page sizes in the TLB, but for now let's assume 16M is * always there and a good fit (which it probably is) diff --git a/arch/powerpc/mm/nohash/tlb_low_64e.S b/arch/powerpc/mm/nohash/tlb_low_64e.S index 9235e720e357..8b97c4acfebf 100644 --- a/arch/powerpc/mm/nohash/tlb_low_64e.S +++ b/arch/powerpc/mm/nohash/tlb_low_64e.S @@ -128,6 +128,13 @@ END_BTB_FLUSH_SECTION bne tlb_miss_kernel_bolted +tlb_miss_user_bolted: +#ifdef CONFIG_PPC_KUAP + mfspr r10,SPRN_MAS1 + rlwinm. r10,r10,0,0x3fff0000 + beq- tlb_miss_fault_bolted /* KUAP fault */ +#endif + tlb_miss_common_bolted: /* * This is the guts of the TLB miss handler for bolted-linear. @@ -246,7 +253,7 @@ itlb_miss_fault_bolted: cmpldi cr0,r15,0 /* Check for user region */ oris r11,r11,_PAGE_ACCESSED@h - beq tlb_miss_common_bolted + beq tlb_miss_user_bolted b itlb_miss_kernel_bolted #ifdef CONFIG_PPC_FSL_BOOK3E @@ -676,6 +683,11 @@ finish_normal_tlb_miss: /* Check if required permissions are met */ andc. r15,r11,r14 bne- normal_tlb_miss_access_fault +#ifdef CONFIG_PPC_KUAP + mfspr r11,SPRN_MAS1 + rlwinm. r10,r11,0,0x3fff0000 + beq- normal_tlb_miss_access_fault /* KUAP fault */ +#endif /* Now we build the MAS: * @@ -689,15 +701,17 @@ finish_normal_tlb_miss: * * TODO: mix up code below for better scheduling */ - clrrdi r11,r16,12 /* Clear low crap in EA */ - rlwimi r11,r14,32-19,27,31 /* Insert WIMGE */ - mtspr SPRN_MAS2,r11 + clrrdi r10,r16,12 /* Clear low crap in EA */ + rlwimi r10,r14,32-19,27,31 /* Insert WIMGE */ + mtspr SPRN_MAS2,r10 /* Check page size, if not standard, update MAS1 */ - rldicl r11,r14,64-8,64-8 - cmpldi cr0,r11,BOOK3E_PAGESZ_4K + rldicl r10,r14,64-8,64-8 + cmpldi cr0,r10,BOOK3E_PAGESZ_4K beq- 1f +#ifndef CONFIG_PPC_KUAP mfspr r11,SPRN_MAS1 +#endif rlwimi r11,r14,31,21,24 rlwinm r11,r11,0,21,19 mtspr SPRN_MAS1,r11 @@ -786,7 +800,16 @@ virt_page_table_tlb_miss: mfspr r10,SPRN_MAS1 rlwinm r10,r10,0,16,1 /* Clear TID */ mtspr SPRN_MAS1,r10 +#ifdef CONFIG_PPC_KUAP + b 2f +1: + mfspr r10,SPRN_MAS1 + rlwinm. r10,r10,0,0x3fff0000 + beq- virt_page_table_tlb_miss_fault /* KUAP fault */ +2: +#else 1: +#endif BEGIN_MMU_FTR_SECTION /* Search if we already have a TLB entry for that virtual address, and * if we do, bail out. @@ -1027,6 +1050,11 @@ virt_page_table_tlb_miss_whacko_fault: * avoid too much complication, it will save/restore things for us */ htw_tlb_miss: +#ifdef CONFIG_PPC_KUAP + mfspr r10,SPRN_MAS1 + rlwinm. r10,r10,0,0x3fff0000 + beq- htw_tlb_miss_fault /* KUAP fault */ +#endif /* Search if we already have a TLB entry for that virtual address, and * if we do, bail out. * diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 59d3cfcd7887..9d5f710d2c20 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -134,7 +134,7 @@ static int __init fake_numa_create_new_node(unsigned long end_pfn, return 0; } -static void reset_numa_cpu_lookup_table(void) +static void __init reset_numa_cpu_lookup_table(void) { unsigned int cpu; @@ -372,7 +372,7 @@ void update_numa_distance(struct device_node *node) * ibm,numa-lookup-index-table= {N, domainid1, domainid2, ..... domainidN} * ibm,numa-distance-table = { N, 1, 2, 4, 5, 1, 6, .... N elements} */ -static void initialize_form2_numa_distance_lookup_table(void) +static void __init initialize_form2_numa_distance_lookup_table(void) { int i, j; struct device_node *root; @@ -581,7 +581,7 @@ static int of_get_assoc_arrays(struct assoc_arrays *aa) return 0; } -static int get_nid_and_numa_distance(struct drmem_lmb *lmb) +static int __init get_nid_and_numa_distance(struct drmem_lmb *lmb) { struct assoc_arrays aa = { .arrays = NULL }; int default_nid = NUMA_NO_NODE; diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index ce9482383144..abb3198bd277 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -81,9 +81,6 @@ static struct page *maybe_pte_to_page(pte_t pte) static pte_t set_pte_filter_hash(pte_t pte) { - if (radix_enabled()) - return pte; - pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); if (pte_looks_normal(pte) && !(cpu_has_feature(CPU_FTR_COHERENT_ICACHE) || cpu_has_feature(CPU_FTR_NOEXECUTE))) { @@ -112,6 +109,9 @@ static inline pte_t set_pte_filter(pte_t pte) { struct page *pg; + if (radix_enabled()) + return pte; + if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) return set_pte_filter_hash(pte); @@ -144,6 +144,9 @@ static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma, { struct page *pg; + if (IS_ENABLED(CONFIG_PPC_BOOK3S_64)) + return pte; + if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) return pte; diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 78c8cf01db5f..175aabf101e8 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -102,7 +102,8 @@ EXPORT_SYMBOL(__pte_frag_size_shift); struct page *p4d_page(p4d_t p4d) { if (p4d_is_leaf(p4d)) { - VM_WARN_ON(!p4d_huge(p4d)); + if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP)) + VM_WARN_ON(!p4d_huge(p4d)); return pte_page(p4d_pte(p4d)); } return virt_to_page(p4d_pgtable(p4d)); @@ -112,7 +113,8 @@ struct page *p4d_page(p4d_t p4d) struct page *pud_page(pud_t pud) { if (pud_is_leaf(pud)) { - VM_WARN_ON(!pud_huge(pud)); + if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP)) + VM_WARN_ON(!pud_huge(pud)); return pte_page(pud_pte(pud)); } return virt_to_page(pud_pgtable(pud)); @@ -125,7 +127,13 @@ struct page *pud_page(pud_t pud) struct page *pmd_page(pmd_t pmd) { if (pmd_is_leaf(pmd)) { - VM_WARN_ON(!(pmd_large(pmd) || pmd_huge(pmd))); + /* + * vmalloc_to_page may be called on any vmap address (not only + * vmalloc), and it uses pmd_page() etc., when huge vmap is + * enabled so these checks can't be used. + */ + if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP)) + VM_WARN_ON(!(pmd_large(pmd) || pmd_huge(pmd))); return pte_page(pmd_pte(pmd)); } return virt_to_page(pmd_page_vaddr(pmd)); diff --git a/arch/powerpc/mm/ptdump/Makefile b/arch/powerpc/mm/ptdump/Makefile index 4050cbb55acf..b533caaf0910 100644 --- a/arch/powerpc/mm/ptdump/Makefile +++ b/arch/powerpc/mm/ptdump/Makefile @@ -10,5 +10,5 @@ obj-$(CONFIG_PPC_BOOK3S_64) += book3s64.o ifdef CONFIG_PTDUMP_DEBUGFS obj-$(CONFIG_PPC_BOOK3S_32) += bats.o segment_regs.o -obj-$(CONFIG_PPC_BOOK3S_64) += hashpagetable.o +obj-$(CONFIG_PPC_64S_HASH_MMU) += hashpagetable.o endif diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c index 32bfb215c485..8c846982766f 100644 --- a/arch/powerpc/mm/ptdump/ptdump.c +++ b/arch/powerpc/mm/ptdump/ptdump.c @@ -123,7 +123,7 @@ static struct ptdump_range ptdump_range[] __ro_after_init = { void pt_dump_size(struct seq_file *m, unsigned long size) { - static const char units[] = "KMGTPE"; + static const char units[] = " KMGTPE"; const char *unit = units; /* Work out what appropriate unit to use */ @@ -176,7 +176,7 @@ static void dump_addr(struct pg_state *st, unsigned long addr) pt_dump_seq_printf(st->seq, REG "-" REG " ", st->start_address, addr - 1); pt_dump_seq_printf(st->seq, " " REG " ", st->start_pa); - pt_dump_size(st->seq, (addr - st->start_address) >> 10); + pt_dump_size(st->seq, addr - st->start_address); } static void note_prot_wx(struct pg_state *st, unsigned long addr) @@ -315,7 +315,7 @@ static int ptdump_show(struct seq_file *m, void *v) DEFINE_SHOW_ATTRIBUTE(ptdump); -static void build_pgtable_complete_mask(void) +static void __init build_pgtable_complete_mask(void) { unsigned int i, j; diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index 82b45b1cb973..f42711f865f3 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -639,26 +639,6 @@ return_addr: } EXPORT_SYMBOL_GPL(slice_get_unmapped_area); -unsigned long arch_get_unmapped_area(struct file *filp, - unsigned long addr, - unsigned long len, - unsigned long pgoff, - unsigned long flags) -{ - return slice_get_unmapped_area(addr, len, flags, - mm_ctx_user_psize(¤t->mm->context), 0); -} - -unsigned long arch_get_unmapped_area_topdown(struct file *filp, - const unsigned long addr0, - const unsigned long len, - const unsigned long pgoff, - const unsigned long flags) -{ - return slice_get_unmapped_area(addr0, len, flags, - mm_ctx_user_psize(¤t->mm->context), 1); -} - unsigned int notrace get_slice_psize(struct mm_struct *mm, unsigned long addr) { unsigned char *psizes; diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index 7e9b978b768e..b20a2a83a6e7 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -31,7 +31,7 @@ pr_err_ratelimited("Branch offset 0x%lx (@%u) out of range\n", offset, ctx->idx); \ return -ERANGE; \ } \ - EMIT(PPC_INST_BRANCH | (offset & 0x03fffffc)); \ + EMIT(PPC_RAW_BRANCH(offset)); \ } while (0) /* blr; (unconditional 'branch' with link) to absolute address */ @@ -125,8 +125,7 @@ #define COND_LE (CR0_GT | COND_CMP_FALSE) #define SEEN_FUNC 0x20000000 /* might call external helpers */ -#define SEEN_STACK 0x40000000 /* uses BPF stack */ -#define SEEN_TAILCALL 0x80000000 /* uses tail calls */ +#define SEEN_TAILCALL 0x40000000 /* uses tail calls */ #define SEEN_VREG_MASK 0x1ff80000 /* Volatile registers r3-r12 */ #define SEEN_NVREG_MASK 0x0003ffff /* Non volatile registers r14-r31 */ @@ -151,8 +150,15 @@ struct codegen_context { unsigned int idx; unsigned int stack_size; int b2p[ARRAY_SIZE(b2p)]; + unsigned int exentry_idx; }; +#ifdef CONFIG_PPC32 +#define BPF_FIXUP_LEN 3 /* Three instructions => 12 bytes */ +#else +#define BPF_FIXUP_LEN 2 /* Two instructions => 8 bytes */ +#endif + static inline void bpf_flush_icache(void *start, void *end) { smp_wmb(); /* smp write barrier */ @@ -176,11 +182,14 @@ static inline void bpf_clear_seen_register(struct codegen_context *ctx, int i) void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func); int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, - u32 *addrs, bool extra_pass); + u32 *addrs, int pass); void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx); void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx); void bpf_jit_realloc_regs(struct codegen_context *ctx); +int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct codegen_context *ctx, + int insn_idx, int jmp_off, int dst_reg); + #endif #endif diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 90ce75f0f1e2..d6ffdd0f2309 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -101,6 +101,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) struct bpf_prog *tmp_fp; bool bpf_blinded = false; bool extra_pass = false; + u32 extable_len; + u32 fixup_len; if (!fp->jit_requested) return org_fp; @@ -131,7 +133,6 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) image = jit_data->image; bpf_hdr = jit_data->header; proglen = jit_data->proglen; - alloclen = proglen + FUNCTION_DESCR_SIZE; extra_pass = true; goto skip_init_ctx; } @@ -149,7 +150,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) cgctx.stack_size = round_up(fp->aux->stack_depth, 16); /* Scouting faux-generate pass 0 */ - if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) { + if (bpf_jit_build_body(fp, 0, &cgctx, addrs, 0)) { /* We hit something illegal or unsupported. */ fp = org_fp; goto out_addrs; @@ -162,7 +163,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) */ if (cgctx.seen & SEEN_TAILCALL) { cgctx.idx = 0; - if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) { + if (bpf_jit_build_body(fp, 0, &cgctx, addrs, 0)) { fp = org_fp; goto out_addrs; } @@ -177,8 +178,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) bpf_jit_build_prologue(0, &cgctx); bpf_jit_build_epilogue(0, &cgctx); + fixup_len = fp->aux->num_exentries * BPF_FIXUP_LEN * 4; + extable_len = fp->aux->num_exentries * sizeof(struct exception_table_entry); + proglen = cgctx.idx * 4; - alloclen = proglen + FUNCTION_DESCR_SIZE; + alloclen = proglen + FUNCTION_DESCR_SIZE + fixup_len + extable_len; bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4, bpf_jit_fill_ill_insns); if (!bpf_hdr) { @@ -186,6 +190,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) goto out_addrs; } + if (extable_len) + fp->aux->extable = (void *)image + FUNCTION_DESCR_SIZE + proglen + fixup_len; + skip_init_ctx: code_base = (u32 *)(image + FUNCTION_DESCR_SIZE); @@ -210,7 +217,7 @@ skip_init_ctx: /* Now build the prologue, body code & epilogue for real. */ cgctx.idx = 0; bpf_jit_build_prologue(code_base, &cgctx); - if (bpf_jit_build_body(fp, code_base, &cgctx, addrs, extra_pass)) { + if (bpf_jit_build_body(fp, code_base, &cgctx, addrs, pass)) { bpf_jit_binary_free(bpf_hdr); fp = org_fp; goto out_addrs; @@ -238,7 +245,7 @@ skip_codegen_passes: fp->bpf_func = (void *)image; fp->jited = 1; - fp->jited_len = alloclen; + fp->jited_len = proglen + FUNCTION_DESCR_SIZE; bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE)); if (!fp->is_func || extra_pass) { @@ -262,3 +269,52 @@ out: return fp; } + +/* + * The caller should check for (BPF_MODE(code) == BPF_PROBE_MEM) before calling + * this function, as this only applies to BPF_PROBE_MEM, for now. + */ +int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct codegen_context *ctx, + int insn_idx, int jmp_off, int dst_reg) +{ + off_t offset; + unsigned long pc; + struct exception_table_entry *ex; + u32 *fixup; + + /* Populate extable entries only in the last pass */ + if (pass != 2) + return 0; + + if (!fp->aux->extable || + WARN_ON_ONCE(ctx->exentry_idx >= fp->aux->num_exentries)) + return -EINVAL; + + pc = (unsigned long)&image[insn_idx]; + + fixup = (void *)fp->aux->extable - + (fp->aux->num_exentries * BPF_FIXUP_LEN * 4) + + (ctx->exentry_idx * BPF_FIXUP_LEN * 4); + + fixup[0] = PPC_RAW_LI(dst_reg, 0); + if (IS_ENABLED(CONFIG_PPC32)) + fixup[1] = PPC_RAW_LI(dst_reg - 1, 0); /* clear higher 32-bit register too */ + + fixup[BPF_FIXUP_LEN - 1] = + PPC_RAW_BRANCH((long)(pc + jmp_off) - (long)&fixup[BPF_FIXUP_LEN - 1]); + + ex = &fp->aux->extable[ctx->exentry_idx]; + + offset = pc - (long)&ex->insn; + if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN)) + return -ERANGE; + ex->insn = offset; + + offset = (long)fixup - (long)&ex->fixup; + if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN)) + return -ERANGE; + ex->fixup = offset; + + ctx->exentry_idx++; + return 0; +} diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index 8a4faa05f9e4..faaebd446cad 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -268,7 +268,7 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o /* Assemble the body code between the prologue & epilogue */ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, - u32 *addrs, bool extra_pass) + u32 *addrs, int pass) { const struct bpf_insn *insn = fp->insnsi; int flen = fp->len; @@ -284,6 +284,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * u32 src_reg = bpf_to_ppc(ctx, insn[i].src_reg); u32 src_reg_h = src_reg - 1; u32 tmp_reg = bpf_to_ppc(ctx, TMP_REG); + u32 size = BPF_SIZE(code); s16 off = insn[i].off; s32 imm = insn[i].imm; bool func_addr_fixed; @@ -812,23 +813,91 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * * BPF_LDX */ case BPF_LDX | BPF_MEM | BPF_B: /* dst = *(u8 *)(ul) (src + off) */ - EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off)); - if (!fp->aux->verifier_zext) - EMIT(PPC_RAW_LI(dst_reg_h, 0)); - break; + case BPF_LDX | BPF_PROBE_MEM | BPF_B: case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */ - EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off)); - if (!fp->aux->verifier_zext) - EMIT(PPC_RAW_LI(dst_reg_h, 0)); - break; + case BPF_LDX | BPF_PROBE_MEM | BPF_H: case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */ - EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off)); - if (!fp->aux->verifier_zext) - EMIT(PPC_RAW_LI(dst_reg_h, 0)); - break; + case BPF_LDX | BPF_PROBE_MEM | BPF_W: case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */ - EMIT(PPC_RAW_LWZ(dst_reg_h, src_reg, off)); - EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off + 4)); + case BPF_LDX | BPF_PROBE_MEM | BPF_DW: + /* + * As PTR_TO_BTF_ID that uses BPF_PROBE_MEM mode could either be a valid + * kernel pointer or NULL but not a userspace address, execute BPF_PROBE_MEM + * load only if addr is kernel address (see is_kernel_addr()), otherwise + * set dst_reg=0 and move on. + */ + if (BPF_MODE(code) == BPF_PROBE_MEM) { + PPC_LI32(_R0, TASK_SIZE - off); + EMIT(PPC_RAW_CMPLW(src_reg, _R0)); + PPC_BCC(COND_GT, (ctx->idx + 5) * 4); + EMIT(PPC_RAW_LI(dst_reg, 0)); + /* + * For BPF_DW case, "li reg_h,0" would be needed when + * !fp->aux->verifier_zext. Emit NOP otherwise. + * + * Note that "li reg_h,0" is emitted for BPF_B/H/W case, + * if necessary. So, jump there insted of emitting an + * additional "li reg_h,0" instruction. + */ + if (size == BPF_DW && !fp->aux->verifier_zext) + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + else + EMIT(PPC_RAW_NOP()); + /* + * Need to jump two instructions instead of one for BPF_DW case + * as there are two load instructions for dst_reg_h & dst_reg + * respectively. + */ + if (size == BPF_DW) + PPC_JMP((ctx->idx + 3) * 4); + else + PPC_JMP((ctx->idx + 2) * 4); + } + + switch (size) { + case BPF_B: + EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off)); + break; + case BPF_H: + EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off)); + break; + case BPF_W: + EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off)); + break; + case BPF_DW: + EMIT(PPC_RAW_LWZ(dst_reg_h, src_reg, off)); + EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off + 4)); + break; + } + + if (size != BPF_DW && !fp->aux->verifier_zext) + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + + if (BPF_MODE(code) == BPF_PROBE_MEM) { + int insn_idx = ctx->idx - 1; + int jmp_off = 4; + + /* + * In case of BPF_DW, two lwz instructions are emitted, one + * for higher 32-bit and another for lower 32-bit. So, set + * ex->insn to the first of the two and jump over both + * instructions in fixup. + * + * Similarly, with !verifier_zext, two instructions are + * emitted for BPF_B/H/W case. So, set ex->insn to the + * instruction that could fault and skip over both + * instructions. + */ + if (size == BPF_DW || !fp->aux->verifier_zext) { + insn_idx -= 1; + jmp_off += 4; + } + + ret = bpf_add_extable_entry(fp, image, pass, ctx, insn_idx, + jmp_off, dst_reg); + if (ret) + return ret; + } break; /* @@ -862,7 +931,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * case BPF_JMP | BPF_CALL: ctx->seen |= SEEN_FUNC; - ret = bpf_jit_get_func_addr(fp, &insn[i], extra_pass, + ret = bpf_jit_get_func_addr(fp, &insn[i], false, &func_addr, &func_addr_fixed); if (ret < 0) return ret; diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 8571aafcc9e1..9eae8d8ed340 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -297,7 +297,7 @@ asm ( /* Assemble the body code between the prologue & epilogue */ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, - u32 *addrs, bool extra_pass) + u32 *addrs, int pass) { enum stf_barrier_type stf_barrier = stf_barrier_type_get(); const struct bpf_insn *insn = fp->insnsi; @@ -311,6 +311,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * u32 code = insn[i].code; u32 dst_reg = b2p[insn[i].dst_reg]; u32 src_reg = b2p[insn[i].src_reg]; + u32 size = BPF_SIZE(code); s16 off = insn[i].off; s32 imm = insn[i].imm; bool func_addr_fixed; @@ -778,25 +779,66 @@ emit_clear: */ /* dst = *(u8 *)(ul) (src + off) */ case BPF_LDX | BPF_MEM | BPF_B: - EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off)); - if (insn_is_zext(&insn[i + 1])) - addrs[++i] = ctx->idx * 4; - break; + case BPF_LDX | BPF_PROBE_MEM | BPF_B: /* dst = *(u16 *)(ul) (src + off) */ case BPF_LDX | BPF_MEM | BPF_H: - EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off)); - if (insn_is_zext(&insn[i + 1])) - addrs[++i] = ctx->idx * 4; - break; + case BPF_LDX | BPF_PROBE_MEM | BPF_H: /* dst = *(u32 *)(ul) (src + off) */ case BPF_LDX | BPF_MEM | BPF_W: - EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off)); - if (insn_is_zext(&insn[i + 1])) - addrs[++i] = ctx->idx * 4; - break; + case BPF_LDX | BPF_PROBE_MEM | BPF_W: /* dst = *(u64 *)(ul) (src + off) */ case BPF_LDX | BPF_MEM | BPF_DW: - PPC_BPF_LL(dst_reg, src_reg, off); + case BPF_LDX | BPF_PROBE_MEM | BPF_DW: + /* + * As PTR_TO_BTF_ID that uses BPF_PROBE_MEM mode could either be a valid + * kernel pointer or NULL but not a userspace address, execute BPF_PROBE_MEM + * load only if addr is kernel address (see is_kernel_addr()), otherwise + * set dst_reg=0 and move on. + */ + if (BPF_MODE(code) == BPF_PROBE_MEM) { + EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], src_reg, off)); + if (IS_ENABLED(CONFIG_PPC_BOOK3E_64)) + PPC_LI64(b2p[TMP_REG_2], 0x8000000000000000ul); + else /* BOOK3S_64 */ + PPC_LI64(b2p[TMP_REG_2], PAGE_OFFSET); + EMIT(PPC_RAW_CMPLD(b2p[TMP_REG_1], b2p[TMP_REG_2])); + PPC_BCC(COND_GT, (ctx->idx + 4) * 4); + EMIT(PPC_RAW_LI(dst_reg, 0)); + /* + * Check if 'off' is word aligned because PPC_BPF_LL() + * (BPF_DW case) generates two instructions if 'off' is not + * word-aligned and one instruction otherwise. + */ + if (BPF_SIZE(code) == BPF_DW && (off & 3)) + PPC_JMP((ctx->idx + 3) * 4); + else + PPC_JMP((ctx->idx + 2) * 4); + } + + switch (size) { + case BPF_B: + EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off)); + break; + case BPF_H: + EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off)); + break; + case BPF_W: + EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off)); + break; + case BPF_DW: + PPC_BPF_LL(dst_reg, src_reg, off); + break; + } + + if (size != BPF_DW && insn_is_zext(&insn[i + 1])) + addrs[++i] = ctx->idx * 4; + + if (BPF_MODE(code) == BPF_PROBE_MEM) { + ret = bpf_add_extable_entry(fp, image, pass, ctx, ctx->idx - 1, + 4, dst_reg); + if (ret) + return ret; + } break; /* @@ -831,7 +873,7 @@ emit_clear: case BPF_JMP | BPF_CALL: ctx->seen |= SEEN_FUNC; - ret = bpf_jit_get_func_addr(fp, &insn[i], extra_pass, + ret = bpf_jit_get_func_addr(fp, &insn[i], false, &func_addr, &func_addr_fixed); if (ret < 0) return ret; diff --git a/arch/powerpc/perf/8xx-pmu.c b/arch/powerpc/perf/8xx-pmu.c index f970d1510d3d..4738c4dbf567 100644 --- a/arch/powerpc/perf/8xx-pmu.c +++ b/arch/powerpc/perf/8xx-pmu.c @@ -153,7 +153,7 @@ static void mpc8xx_pmu_read(struct perf_event *event) static void mpc8xx_pmu_del(struct perf_event *event, int flags) { - struct ppc_inst insn = ppc_inst(PPC_RAW_MFSPR(10, SPRN_SPRG_SCRATCH2)); + ppc_inst_t insn = ppc_inst(PPC_RAW_MFSPR(10, SPRN_SPRG_SCRATCH2)); mpc8xx_pmu_read(event); diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 73e62e9b179b..a684901b6965 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -17,6 +17,7 @@ #include <asm/firmware.h> #include <asm/ptrace.h> #include <asm/code-patching.h> +#include <asm/hw_irq.h> #include <asm/interrupt.h> #ifdef CONFIG_PPC64 @@ -857,6 +858,19 @@ static void write_pmc(int idx, unsigned long val) } } +static int any_pmc_overflown(struct cpu_hw_events *cpuhw) +{ + int i, idx; + + for (i = 0; i < cpuhw->n_events; i++) { + idx = cpuhw->event[i]->hw.idx; + if ((idx) && ((int)read_pmc(idx) < 0)) + return idx; + } + + return 0; +} + /* Called from sysrq_handle_showregs() */ void perf_event_print_debug(void) { @@ -1281,11 +1295,13 @@ static void power_pmu_disable(struct pmu *pmu) /* * Set the 'freeze counters' bit, clear EBE/BHRBA/PMCC/PMAO/FC56 + * Also clear PMXE to disable PMI's getting triggered in some + * corner cases during PMU disable. */ val = mmcr0 = mfspr(SPRN_MMCR0); val |= MMCR0_FC; val &= ~(MMCR0_EBE | MMCR0_BHRBA | MMCR0_PMCC | MMCR0_PMAO | - MMCR0_FC56); + MMCR0_PMXE | MMCR0_FC56); /* Set mmcr0 PMCCEXT for p10 */ if (ppmu->flags & PPMU_ARCH_31) val |= MMCR0_PMCCEXT; @@ -1299,6 +1315,23 @@ static void power_pmu_disable(struct pmu *pmu) mb(); isync(); + /* + * Some corner cases could clear the PMU counter overflow + * while a masked PMI is pending. One such case is when + * a PMI happens during interrupt replay and perf counter + * values are cleared by PMU callbacks before replay. + * + * If any PMC corresponding to the active PMU events are + * overflown, disable the interrupt by clearing the paca + * bit for PMI since we are disabling the PMU now. + * Otherwise provide a warning if there is PMI pending, but + * no counter is found overflown. + */ + if (any_pmc_overflown(cpuhw)) + clear_pmi_irq_pending(); + else + WARN_ON(pmi_irq_pending()); + val = mmcra = cpuhw->mmcr.mmcra; /* @@ -1390,6 +1423,15 @@ static void power_pmu_enable(struct pmu *pmu) * (possibly updated for removal of events). */ if (!cpuhw->n_added) { + /* + * If there is any active event with an overflown PMC + * value, set back PACA_IRQ_PMI which would have been + * cleared in power_pmu_disable(). + */ + hard_irq_disable(); + if (any_pmc_overflown(cpuhw)) + set_pmi_irq_pending(); + mtspr(SPRN_MMCRA, cpuhw->mmcr.mmcra & ~MMCRA_SAMPLE_ENABLE); mtspr(SPRN_MMCR1, cpuhw->mmcr.mmcr1); if (ppmu->flags & PPMU_ARCH_31) @@ -2337,6 +2379,14 @@ static void __perf_event_interrupt(struct pt_regs *regs) break; } } + + /* + * Clear PACA_IRQ_PMI in case it was set by + * set_pmi_irq_pending() when PMU was enabled + * after accounting for interrupts. + */ + clear_pmi_irq_pending(); + if (!active) /* reset non active counters that have overflowed */ write_pmc(i + 1, 0); @@ -2356,6 +2406,13 @@ static void __perf_event_interrupt(struct pt_regs *regs) } } } + + /* + * During system wide profling or while specific CPU is monitored for an + * event, some corner cases could cause PMC to overflow in idle path. This + * will trigger a PMI after waking up from idle. Since counter values are _not_ + * saved/restored in idle path, can lead to below "Can't find PMC" message. + */ if (unlikely(!found) && !arch_irq_disabled_regs(regs)) printk_ratelimited(KERN_WARNING "Can't find PMC that caused IRQ\n"); @@ -2381,6 +2438,36 @@ static void perf_event_interrupt(struct pt_regs *regs) perf_sample_event_took(sched_clock() - start_clock); } +/* + * If the perf subsystem wants performance monitor interrupts as soon as + * possible (e.g., to sample the instruction address and stack chain), + * this should return true. The IRQ masking code can then enable MSR[EE] + * in some places (e.g., interrupt handlers) that allows PMI interrupts + * though to improve accuracy of profiles, at the cost of some performance. + * + * The PMU counters can be enabled by other means (e.g., sysfs raw SPR + * access), but in that case there is no need for prompt PMI handling. + * + * This currently returns true if any perf counter is being used. It + * could possibly return false if only events are being counted rather than + * samples being taken, but for now this is good enough. + */ +bool power_pmu_wants_prompt_pmi(void) +{ + struct cpu_hw_events *cpuhw; + + /* + * This could simply test local_paca->pmcregs_in_use if that were not + * under ifdef KVM. + */ + + if (!ppmu) + return false; + + cpuhw = this_cpu_ptr(&cpu_hw_events); + return cpuhw->n_events; +} + static int power_pmu_prepare_cpu(unsigned int cpu) { struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); @@ -2392,7 +2479,7 @@ static int power_pmu_prepare_cpu(unsigned int cpu) return 0; } -int register_power_pmu(struct power_pmu *pmu) +int __init register_power_pmu(struct power_pmu *pmu) { if (ppmu) return -EBUSY; /* something's already registered */ @@ -2419,8 +2506,24 @@ int register_power_pmu(struct power_pmu *pmu) } #ifdef CONFIG_PPC64 +static bool pmu_override = false; +static unsigned long pmu_override_val; +static void do_pmu_override(void *data) +{ + ppc_set_pmu_inuse(1); + if (pmu_override_val) + mtspr(SPRN_MMCR1, pmu_override_val); + mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_FC); +} + static int __init init_ppc64_pmu(void) { + if (cpu_has_feature(CPU_FTR_HVMODE) && pmu_override) { + pr_warn("disabling perf due to pmu_override= command line option.\n"); + on_each_cpu(do_pmu_override, NULL, 1); + return 0; + } + /* run through all the pmu drivers one at a time */ if (!init_power5_pmu()) return 0; @@ -2442,4 +2545,23 @@ static int __init init_ppc64_pmu(void) return init_generic_compat_pmu(); } early_initcall(init_ppc64_pmu); + +static int __init pmu_setup(char *str) +{ + unsigned long val; + + if (!early_cpu_has_feature(CPU_FTR_HVMODE)) + return 0; + + pmu_override = true; + + if (kstrtoul(str, 0, &val)) + val = 0; + + pmu_override_val = val; + + return 1; +} +__setup("pmu_override=", pmu_setup); + #endif diff --git a/arch/powerpc/perf/generic-compat-pmu.c b/arch/powerpc/perf/generic-compat-pmu.c index 695975227e60..b6e25f75109d 100644 --- a/arch/powerpc/perf/generic-compat-pmu.c +++ b/arch/powerpc/perf/generic-compat-pmu.c @@ -307,7 +307,7 @@ static struct power_pmu generic_compat_pmu = { .attr_groups = generic_compat_pmu_attr_groups, }; -int init_generic_compat_pmu(void) +int __init init_generic_compat_pmu(void) { int rc = 0; diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index 1816f560a465..1e8aa934e37e 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -756,7 +756,7 @@ static ssize_t catalog_event_len_validate(struct hv_24x7_event_data *event, } if (calc_ev_end > ev_end) { - pr_warn("event %zu exceeds it's own length: event=%pK, end=%pK, offset=%zu, calc_ev_end=%pK\n", + pr_warn("event %zu exceeds its own length: event=%pK, end=%pK, offset=%zu, calc_ev_end=%pK\n", event_idx, event, ev_end, offset, calc_ev_end); return -1; } diff --git a/arch/powerpc/perf/internal.h b/arch/powerpc/perf/internal.h index 80bbf72bfec2..4c18b5504326 100644 --- a/arch/powerpc/perf/internal.h +++ b/arch/powerpc/perf/internal.h @@ -2,12 +2,12 @@ // // Copyright 2019 Madhavan Srinivasan, IBM Corporation. -extern int init_ppc970_pmu(void); -extern int init_power5_pmu(void); -extern int init_power5p_pmu(void); -extern int init_power6_pmu(void); -extern int init_power7_pmu(void); -extern int init_power8_pmu(void); -extern int init_power9_pmu(void); -extern int init_power10_pmu(void); -extern int init_generic_compat_pmu(void); +int __init init_ppc970_pmu(void); +int __init init_power5_pmu(void); +int __init init_power5p_pmu(void); +int __init init_power6_pmu(void); +int __init init_power7_pmu(void); +int __init init_power8_pmu(void); +int __init init_power9_pmu(void); +int __init init_power10_pmu(void); +int __init init_generic_compat_pmu(void); diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index 7ea873ab2e6f..4037ea652522 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -220,22 +220,37 @@ static inline u64 isa207_find_source(u64 idx, u32 sub_idx) /* Nothing to do */ break; case 1: - ret = PH(LVL, L1); + ret = PH(LVL, L1) | LEVEL(L1) | P(SNOOP, HIT); break; case 2: - ret = PH(LVL, L2); + ret = PH(LVL, L2) | LEVEL(L2) | P(SNOOP, HIT); break; case 3: - ret = PH(LVL, L3); + ret = PH(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT); break; case 4: - if (sub_idx <= 1) - ret = PH(LVL, LOC_RAM); - else if (sub_idx > 1 && sub_idx <= 2) - ret = PH(LVL, REM_RAM1); - else - ret = PH(LVL, REM_RAM2); - ret |= P(SNOOP, HIT); + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + ret = P(SNOOP, HIT); + + if (sub_idx == 1) + ret |= PH(LVL, LOC_RAM) | LEVEL(RAM); + else if (sub_idx == 2 || sub_idx == 3) + ret |= P(LVL, HIT) | LEVEL(PMEM); + else if (sub_idx == 4) + ret |= PH(LVL, REM_RAM1) | REM | LEVEL(RAM) | P(HOPS, 2); + else if (sub_idx == 5 || sub_idx == 7) + ret |= P(LVL, HIT) | LEVEL(PMEM) | REM; + else if (sub_idx == 6) + ret |= PH(LVL, REM_RAM2) | REM | LEVEL(RAM) | P(HOPS, 3); + } else { + if (sub_idx <= 1) + ret = PH(LVL, LOC_RAM); + else if (sub_idx > 1 && sub_idx <= 2) + ret = PH(LVL, REM_RAM1); + else + ret = PH(LVL, REM_RAM2); + ret |= P(SNOOP, HIT); + } break; case 5: if (cpu_has_feature(CPU_FTR_ARCH_31)) { @@ -261,11 +276,26 @@ static inline u64 isa207_find_source(u64 idx, u32 sub_idx) } break; case 6: - ret = PH(LVL, REM_CCE2); - if ((sub_idx == 0) || (sub_idx == 2)) - ret |= P(SNOOP, HIT); - else if ((sub_idx == 1) || (sub_idx == 3)) - ret |= P(SNOOP, HITM); + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + if (sub_idx == 0) + ret = PH(LVL, REM_CCE1) | LEVEL(ANY_CACHE) | REM | + P(SNOOP, HIT) | P(HOPS, 2); + else if (sub_idx == 1) + ret = PH(LVL, REM_CCE1) | LEVEL(ANY_CACHE) | REM | + P(SNOOP, HITM) | P(HOPS, 2); + else if (sub_idx == 2) + ret = PH(LVL, REM_CCE2) | LEVEL(ANY_CACHE) | REM | + P(SNOOP, HIT) | P(HOPS, 3); + else if (sub_idx == 3) + ret = PH(LVL, REM_CCE2) | LEVEL(ANY_CACHE) | REM | + P(SNOOP, HITM) | P(HOPS, 3); + } else { + ret = PH(LVL, REM_CCE2); + if (sub_idx == 0 || sub_idx == 2) + ret |= P(SNOOP, HIT); + else if (sub_idx == 1 || sub_idx == 3) + ret |= P(SNOOP, HITM); + } break; case 7: ret = PM(LVL, L1); diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c index 9dd75f385837..0975ad0b42c4 100644 --- a/arch/powerpc/perf/power10-pmu.c +++ b/arch/powerpc/perf/power10-pmu.c @@ -592,7 +592,7 @@ static struct power_pmu power10_pmu = { .check_attr_config = power10_check_attr_config, }; -int init_power10_pmu(void) +int __init init_power10_pmu(void) { unsigned int pvr; int rc; diff --git a/arch/powerpc/perf/power5+-pmu.c b/arch/powerpc/perf/power5+-pmu.c index 18732267993a..753b4740ef64 100644 --- a/arch/powerpc/perf/power5+-pmu.c +++ b/arch/powerpc/perf/power5+-pmu.c @@ -677,7 +677,7 @@ static struct power_pmu power5p_pmu = { .cache_events = &power5p_cache_events, }; -int init_power5p_pmu(void) +int __init init_power5p_pmu(void) { if (!cur_cpu_spec->oprofile_cpu_type || (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5+") diff --git a/arch/powerpc/perf/power5-pmu.c b/arch/powerpc/perf/power5-pmu.c index cb611c1e7abe..1f83c4cba0aa 100644 --- a/arch/powerpc/perf/power5-pmu.c +++ b/arch/powerpc/perf/power5-pmu.c @@ -618,7 +618,7 @@ static struct power_pmu power5_pmu = { .flags = PPMU_HAS_SSLOT, }; -int init_power5_pmu(void) +int __init init_power5_pmu(void) { if (!cur_cpu_spec->oprofile_cpu_type || strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5")) diff --git a/arch/powerpc/perf/power6-pmu.c b/arch/powerpc/perf/power6-pmu.c index 69ef38216418..aec746f86804 100644 --- a/arch/powerpc/perf/power6-pmu.c +++ b/arch/powerpc/perf/power6-pmu.c @@ -539,7 +539,7 @@ static struct power_pmu power6_pmu = { .cache_events = &power6_cache_events, }; -int init_power6_pmu(void) +int __init init_power6_pmu(void) { if (!cur_cpu_spec->oprofile_cpu_type || strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power6")) diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c index 894c17f9a762..99b5ba314ea7 100644 --- a/arch/powerpc/perf/power7-pmu.c +++ b/arch/powerpc/perf/power7-pmu.c @@ -445,7 +445,7 @@ static struct power_pmu power7_pmu = { .cache_events = &power7_cache_events, }; -int init_power7_pmu(void) +int __init init_power7_pmu(void) { if (!cur_cpu_spec->oprofile_cpu_type || strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power7")) diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c index 5282e8415ddf..f21194b5604a 100644 --- a/arch/powerpc/perf/power8-pmu.c +++ b/arch/powerpc/perf/power8-pmu.c @@ -378,7 +378,7 @@ static struct power_pmu power8_pmu = { .bhrb_nr = 32, }; -int init_power8_pmu(void) +int __init init_power8_pmu(void) { int rc; diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c index ff3382140d7e..4b7c17e36100 100644 --- a/arch/powerpc/perf/power9-pmu.c +++ b/arch/powerpc/perf/power9-pmu.c @@ -452,7 +452,7 @@ static struct power_pmu power9_pmu = { .check_attr_config = power9_check_attr_config, }; -int init_power9_pmu(void) +int __init init_power9_pmu(void) { int rc = 0; unsigned int pvr = mfspr(SPRN_PVR); diff --git a/arch/powerpc/perf/ppc970-pmu.c b/arch/powerpc/perf/ppc970-pmu.c index 1f8263785286..09802482ba72 100644 --- a/arch/powerpc/perf/ppc970-pmu.c +++ b/arch/powerpc/perf/ppc970-pmu.c @@ -489,7 +489,7 @@ static struct power_pmu ppc970_pmu = { .flags = PPMU_NO_SIPR | PPMU_NO_CONT_SAMPLING, }; -int init_ppc970_pmu(void) +int __init init_ppc970_pmu(void) { if (!cur_cpu_spec->oprofile_cpu_type || (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/970") diff --git a/arch/powerpc/platforms/44x/fsp2.c b/arch/powerpc/platforms/44x/fsp2.c index 823397c802de..af13a59d2f60 100644 --- a/arch/powerpc/platforms/44x/fsp2.c +++ b/arch/powerpc/platforms/44x/fsp2.c @@ -197,7 +197,7 @@ static irqreturn_t rst_wrn_handler(int irq, void *data) { } } -static void node_irq_request(const char *compat, irq_handler_t errirq_handler) +static void __init node_irq_request(const char *compat, irq_handler_t errirq_handler) { struct device_node *np; unsigned int irq; @@ -222,7 +222,7 @@ static void node_irq_request(const char *compat, irq_handler_t errirq_handler) } } -static void critical_irq_setup(void) +static void __init critical_irq_setup(void) { node_irq_request(FSP2_CMU_ERR, cmu_err_handler); node_irq_request(FSP2_BUS_ERR, bus_err_handler); diff --git a/arch/powerpc/platforms/4xx/cpm.c b/arch/powerpc/platforms/4xx/cpm.c index ae8b812c9202..2571841625a2 100644 --- a/arch/powerpc/platforms/4xx/cpm.c +++ b/arch/powerpc/platforms/4xx/cpm.c @@ -163,7 +163,7 @@ static ssize_t cpm_idle_store(struct kobject *kobj, static struct kobj_attribute cpm_idle_attr = __ATTR(idle, 0644, cpm_idle_show, cpm_idle_store); -static void cpm_idle_config_sysfs(void) +static void __init cpm_idle_config_sysfs(void) { struct device *dev; unsigned long ret; @@ -231,7 +231,7 @@ static const struct platform_suspend_ops cpm_suspend_ops = { .enter = cpm_suspend_enter, }; -static int cpm_get_uint_property(struct device_node *np, +static int __init cpm_get_uint_property(struct device_node *np, const char *name) { int len; diff --git a/arch/powerpc/platforms/4xx/pci.c b/arch/powerpc/platforms/4xx/pci.c index c13d64c3b019..24f41e178cbc 100644 --- a/arch/powerpc/platforms/4xx/pci.c +++ b/arch/powerpc/platforms/4xx/pci.c @@ -1273,7 +1273,7 @@ static int __init ppc405ex_pciex_core_init(struct device_node *np) return 2; } -static void ppc405ex_pcie_phy_reset(struct ppc4xx_pciex_port *port) +static void __init ppc405ex_pcie_phy_reset(struct ppc4xx_pciex_port *port) { /* Assert the PE0_PHY reset */ mtdcri(SDR0, port->sdr_base + PESDRn_RCSSET, 0x01010000); diff --git a/arch/powerpc/platforms/512x/clock-commonclk.c b/arch/powerpc/platforms/512x/clock-commonclk.c index 30342b60aa63..0b03d812baae 100644 --- a/arch/powerpc/platforms/512x/clock-commonclk.c +++ b/arch/powerpc/platforms/512x/clock-commonclk.c @@ -97,7 +97,7 @@ static enum soc_type { MPC512x_SOC_MPC5125, } soc; -static void mpc512x_clk_determine_soc(void) +static void __init mpc512x_clk_determine_soc(void) { if (of_machine_is_compatible("fsl,mpc5121")) { soc = MPC512x_SOC_MPC5121; @@ -113,98 +113,98 @@ static void mpc512x_clk_determine_soc(void) } } -static bool soc_has_mbx(void) +static bool __init soc_has_mbx(void) { if (soc == MPC512x_SOC_MPC5121) return true; return false; } -static bool soc_has_axe(void) +static bool __init soc_has_axe(void) { if (soc == MPC512x_SOC_MPC5125) return false; return true; } -static bool soc_has_viu(void) +static bool __init soc_has_viu(void) { if (soc == MPC512x_SOC_MPC5125) return false; return true; } -static bool soc_has_spdif(void) +static bool __init soc_has_spdif(void) { if (soc == MPC512x_SOC_MPC5125) return false; return true; } -static bool soc_has_pata(void) +static bool __init soc_has_pata(void) { if (soc == MPC512x_SOC_MPC5125) return false; return true; } -static bool soc_has_sata(void) +static bool __init soc_has_sata(void) { if (soc == MPC512x_SOC_MPC5125) return false; return true; } -static bool soc_has_pci(void) +static bool __init soc_has_pci(void) { if (soc == MPC512x_SOC_MPC5125) return false; return true; } -static bool soc_has_fec2(void) +static bool __init soc_has_fec2(void) { if (soc == MPC512x_SOC_MPC5125) return true; return false; } -static int soc_max_pscnum(void) +static int __init soc_max_pscnum(void) { if (soc == MPC512x_SOC_MPC5125) return 10; return 12; } -static bool soc_has_sdhc2(void) +static bool __init soc_has_sdhc2(void) { if (soc == MPC512x_SOC_MPC5125) return true; return false; } -static bool soc_has_nfc_5125(void) +static bool __init soc_has_nfc_5125(void) { if (soc == MPC512x_SOC_MPC5125) return true; return false; } -static bool soc_has_outclk(void) +static bool __init soc_has_outclk(void) { if (soc == MPC512x_SOC_MPC5125) return true; return false; } -static bool soc_has_cpmf_0_bypass(void) +static bool __init soc_has_cpmf_0_bypass(void) { if (soc == MPC512x_SOC_MPC5125) return true; return false; } -static bool soc_has_mclk_mux0_canin(void) +static bool __init soc_has_mclk_mux0_canin(void) { if (soc == MPC512x_SOC_MPC5125) return true; @@ -294,7 +294,7 @@ static inline int get_bit_field(uint32_t __iomem *reg, uint8_t pos, uint8_t len) } /* get the SPMF and translate it into the "sys pll" multiplier */ -static int get_spmf_mult(void) +static int __init get_spmf_mult(void) { static int spmf_to_mult[] = { 68, 1, 12, 16, 20, 24, 28, 32, @@ -312,7 +312,7 @@ static int get_spmf_mult(void) * values returned from here are a multiple of the real factor since the * divide ratio is fractional */ -static int get_sys_div_x2(void) +static int __init get_sys_div_x2(void) { static int sysdiv_code_to_x2[] = { 4, 5, 6, 7, 8, 9, 10, 14, @@ -333,7 +333,7 @@ static int get_sys_div_x2(void) * values returned from here are a multiple of the real factor since the * multiplier ratio is fractional */ -static int get_cpmf_mult_x2(void) +static int __init get_cpmf_mult_x2(void) { static int cpmf_to_mult_x36[] = { /* 0b000 is "times 36" */ @@ -379,7 +379,7 @@ static const struct clk_div_table divtab_1234[] = { { .div = 0, }, }; -static int get_freq_from_dt(char *propname) +static int __init get_freq_from_dt(char *propname) { struct device_node *np; const unsigned int *prop; @@ -396,7 +396,7 @@ static int get_freq_from_dt(char *propname) return val; } -static void mpc512x_clk_preset_data(void) +static void __init mpc512x_clk_preset_data(void) { size_t i; @@ -418,7 +418,7 @@ static void mpc512x_clk_preset_data(void) * SYS -> CSB -> IPS) from the REF clock rate and the returned mul/div * values */ -static void mpc512x_clk_setup_ref_clock(struct device_node *np, int bus_freq, +static void __init mpc512x_clk_setup_ref_clock(struct device_node *np, int bus_freq, int *sys_mul, int *sys_div, int *ips_div) { @@ -592,7 +592,7 @@ static struct mclk_setup_data mclk_outclk_data[] = { }; /* setup the MCLK clock subtree of an individual PSC/MSCAN/SPDIF */ -static void mpc512x_clk_setup_mclk(struct mclk_setup_data *entry, size_t idx) +static void __init mpc512x_clk_setup_mclk(struct mclk_setup_data *entry, size_t idx) { size_t clks_idx_pub, clks_idx_int; u32 __iomem *mccr_reg; /* MCLK control register (mux, en, div) */ @@ -701,7 +701,7 @@ static void mpc512x_clk_setup_mclk(struct mclk_setup_data *entry, size_t idx) /* }}} MCLK helpers */ -static void mpc512x_clk_setup_clock_tree(struct device_node *np, int busfreq) +static void __init mpc512x_clk_setup_clock_tree(struct device_node *np, int busfreq) { int sys_mul, sys_div, ips_div; int mul, div; @@ -937,7 +937,7 @@ static void mpc512x_clk_setup_clock_tree(struct device_node *np, int busfreq) * registers the set of public clocks (those listed in the dt-bindings/ * header file) for OF lookups, keeps the intermediates private to us */ -static void mpc5121_clk_register_of_provider(struct device_node *np) +static void __init mpc5121_clk_register_of_provider(struct device_node *np) { clk_data.clks = clks; clk_data.clk_num = MPC512x_CLK_LAST_PUBLIC + 1; /* _not_ ARRAY_SIZE() */ @@ -948,7 +948,7 @@ static void mpc5121_clk_register_of_provider(struct device_node *np) * temporary support for the period of time between introduction of CCF * support and the adjustment of peripheral drivers to OF based lookups */ -static void mpc5121_clk_provide_migration_support(void) +static void __init mpc5121_clk_provide_migration_support(void) { /* @@ -1009,7 +1009,7 @@ static void mpc5121_clk_provide_migration_support(void) * case of not yet adjusted device tree data, where clock related specs * are missing) */ -static void mpc5121_clk_provide_backwards_compat(void) +static void __init mpc5121_clk_provide_backwards_compat(void) { enum did_reg_flags { DID_REG_PSC = BIT(0), diff --git a/arch/powerpc/platforms/512x/mpc512x.h b/arch/powerpc/platforms/512x/mpc512x.h index fff225901e2f..2f3c60e373e1 100644 --- a/arch/powerpc/platforms/512x/mpc512x.h +++ b/arch/powerpc/platforms/512x/mpc512x.h @@ -12,8 +12,8 @@ extern void __init mpc512x_init_early(void); extern void __init mpc512x_init(void); extern void __init mpc512x_setup_arch(void); extern int __init mpc5121_clk_init(void); -extern const char *mpc512x_select_psc_compat(void); -extern const char *mpc512x_select_reset_compat(void); +const char *__init mpc512x_select_psc_compat(void); +const char *__init mpc512x_select_reset_compat(void); extern void __noreturn mpc512x_restart(char *cmd); #endif /* __MPC512X_H__ */ diff --git a/arch/powerpc/platforms/512x/mpc512x_shared.c b/arch/powerpc/platforms/512x/mpc512x_shared.c index 7a9ae9591d60..e3411663edad 100644 --- a/arch/powerpc/platforms/512x/mpc512x_shared.c +++ b/arch/powerpc/platforms/512x/mpc512x_shared.c @@ -352,7 +352,7 @@ static void __init mpc512x_declare_of_platform_devices(void) #define DEFAULT_FIFO_SIZE 16 -const char *mpc512x_select_psc_compat(void) +const char *__init mpc512x_select_psc_compat(void) { if (of_machine_is_compatible("fsl,mpc5121")) return "fsl,mpc5121-psc"; @@ -363,7 +363,7 @@ const char *mpc512x_select_psc_compat(void) return NULL; } -const char *mpc512x_select_reset_compat(void) +const char *__init mpc512x_select_reset_compat(void) { if (of_machine_is_compatible("fsl,mpc5121")) return "fsl,mpc5121-reset"; diff --git a/arch/powerpc/platforms/52xx/Kconfig b/arch/powerpc/platforms/52xx/Kconfig index 99d60acc20c8..b72ed2950ca8 100644 --- a/arch/powerpc/platforms/52xx/Kconfig +++ b/arch/powerpc/platforms/52xx/Kconfig @@ -34,7 +34,7 @@ config PPC_EFIKA bool "bPlan Efika 5k2. MPC5200B based computer" depends on PPC_MPC52xx select PPC_RTAS - select PPC_NATIVE + select PPC_HASH_MMU_NATIVE config PPC_LITE5200 bool "Freescale Lite5200 Eval Board" diff --git a/arch/powerpc/platforms/83xx/km83xx.c b/arch/powerpc/platforms/83xx/km83xx.c index 108e1e4d2683..d9eed0decb28 100644 --- a/arch/powerpc/platforms/83xx/km83xx.c +++ b/arch/powerpc/platforms/83xx/km83xx.c @@ -39,7 +39,7 @@ #define SVR_REV(svr) (((svr) >> 0) & 0xFFFF) /* Revision field */ -static void quirk_mpc8360e_qe_enet10(void) +static void __init quirk_mpc8360e_qe_enet10(void) { /* * handle mpc8360E Erratum QE_ENET10: diff --git a/arch/powerpc/platforms/83xx/mpc834x_mds.c b/arch/powerpc/platforms/83xx/mpc834x_mds.c index 6d91bdce0a18..0713deffb40c 100644 --- a/arch/powerpc/platforms/83xx/mpc834x_mds.c +++ b/arch/powerpc/platforms/83xx/mpc834x_mds.c @@ -35,7 +35,7 @@ #include "mpc83xx.h" #define BCSR5_INT_USB 0x02 -static int mpc834xemds_usb_cfg(void) +static int __init mpc834xemds_usb_cfg(void) { struct device_node *np; void __iomem *bcsr_regs = NULL; diff --git a/arch/powerpc/platforms/83xx/mpc837x_mds.c b/arch/powerpc/platforms/83xx/mpc837x_mds.c index f28d166ea7db..fc88ab97f6e3 100644 --- a/arch/powerpc/platforms/83xx/mpc837x_mds.c +++ b/arch/powerpc/platforms/83xx/mpc837x_mds.c @@ -23,7 +23,7 @@ #define BCSR12_USB_SER_PIN 0x80 #define BCSR12_USB_SER_DEVICE 0x02 -static int mpc837xmds_usb_cfg(void) +static int __init mpc837xmds_usb_cfg(void) { struct device_node *np; const void *phy_type, *mode; diff --git a/arch/powerpc/platforms/83xx/mpc837x_rdb.c b/arch/powerpc/platforms/83xx/mpc837x_rdb.c index 7fb7684c256b..5d48c6842098 100644 --- a/arch/powerpc/platforms/83xx/mpc837x_rdb.c +++ b/arch/powerpc/platforms/83xx/mpc837x_rdb.c @@ -18,7 +18,7 @@ #include "mpc83xx.h" -static void mpc837x_rdb_sd_cfg(void) +static void __init mpc837x_rdb_sd_cfg(void) { void __iomem *im; diff --git a/arch/powerpc/platforms/83xx/mpc83xx.h b/arch/powerpc/platforms/83xx/mpc83xx.h index a30d30588cf6..aea803ba3a15 100644 --- a/arch/powerpc/platforms/83xx/mpc83xx.h +++ b/arch/powerpc/platforms/83xx/mpc83xx.h @@ -68,9 +68,9 @@ extern void __noreturn mpc83xx_restart(char *cmd); extern long mpc83xx_time_init(void); -extern int mpc837x_usb_cfg(void); -extern int mpc834x_usb_cfg(void); -extern int mpc831x_usb_cfg(void); +int __init mpc837x_usb_cfg(void); +int __init mpc834x_usb_cfg(void); +int __init mpc831x_usb_cfg(void); extern void mpc83xx_ipic_init_IRQ(void); #ifdef CONFIG_PCI diff --git a/arch/powerpc/platforms/83xx/usb.c b/arch/powerpc/platforms/83xx/usb.c index 3d247d726ed5..b0bda20aaccf 100644 --- a/arch/powerpc/platforms/83xx/usb.c +++ b/arch/powerpc/platforms/83xx/usb.c @@ -20,7 +20,7 @@ #ifdef CONFIG_PPC_MPC834x -int mpc834x_usb_cfg(void) +int __init mpc834x_usb_cfg(void) { unsigned long sccr, sicrl, sicrh; void __iomem *immap; @@ -96,7 +96,7 @@ int mpc834x_usb_cfg(void) #endif /* CONFIG_PPC_MPC834x */ #ifdef CONFIG_PPC_MPC831x -int mpc831x_usb_cfg(void) +int __init mpc831x_usb_cfg(void) { u32 temp; void __iomem *immap, *usb_regs; @@ -209,7 +209,7 @@ out: #endif /* CONFIG_PPC_MPC831x */ #ifdef CONFIG_PPC_MPC837x -int mpc837x_usb_cfg(void) +int __init mpc837x_usb_cfg(void) { void __iomem *immap; struct device_node *np = NULL; diff --git a/arch/powerpc/platforms/85xx/c293pcie.c b/arch/powerpc/platforms/85xx/c293pcie.c index 8d9a2503dd0f..58a398c89e97 100644 --- a/arch/powerpc/platforms/85xx/c293pcie.c +++ b/arch/powerpc/platforms/85xx/c293pcie.c @@ -19,7 +19,7 @@ #include "mpc85xx.h" -void __init c293_pcie_pic_init(void) +static void __init c293_pcie_pic_init(void) { struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU, 0, 256, " OpenPIC "); diff --git a/arch/powerpc/platforms/85xx/ge_imp3a.c b/arch/powerpc/platforms/85xx/ge_imp3a.c index 83a0f7a1f0de..743c65e4d8e4 100644 --- a/arch/powerpc/platforms/85xx/ge_imp3a.c +++ b/arch/powerpc/platforms/85xx/ge_imp3a.c @@ -78,7 +78,7 @@ void __init ge_imp3a_pic_init(void) of_node_put(cascade_node); } -static void ge_imp3a_pci_assign_primary(void) +static void __init ge_imp3a_pci_assign_primary(void) { #ifdef CONFIG_PCI struct device_node *np; diff --git a/arch/powerpc/platforms/85xx/mpc85xx_cds.c b/arch/powerpc/platforms/85xx/mpc85xx_cds.c index 172d2b7cfeb7..5bd487030256 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_cds.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_cds.c @@ -282,7 +282,7 @@ machine_device_initcall(mpc85xx_cds, mpc85xx_cds_8259_attach); #endif /* CONFIG_PPC_I8259 */ -static void mpc85xx_cds_pci_assign_primary(void) +static void __init mpc85xx_cds_pci_assign_primary(void) { #ifdef CONFIG_PCI struct device_node *np; diff --git a/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c b/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c index 4a8af80011a6..f7ac92a8ae97 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c @@ -15,6 +15,8 @@ #include <asm/io.h> #include <asm/fsl_pm.h> +#include "smp.h" + static struct ccsr_guts __iomem *guts; #ifdef CONFIG_FSL_PMC diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c index d7081e9af65c..a1c6a7827c8f 100644 --- a/arch/powerpc/platforms/85xx/smp.c +++ b/arch/powerpc/platforms/85xx/smp.c @@ -366,7 +366,7 @@ struct smp_ops_t smp_85xx_ops = { #ifdef CONFIG_PPC32 atomic_t kexec_down_cpus = ATOMIC_INIT(0); -void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary) +static void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary) { local_irq_disable(); @@ -384,7 +384,7 @@ static void mpc85xx_smp_kexec_down(void *arg) ppc_md.kexec_cpu_down(0,1); } #else -void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary) +static void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary) { int cpu = smp_processor_id(); int sibling = cpu_last_thread_sibling(cpu); diff --git a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c index 199a137c0ddb..3768c86b9629 100644 --- a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c +++ b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c @@ -271,7 +271,7 @@ static const struct irq_domain_ops socrates_fpga_pic_host_ops = { .xlate = socrates_fpga_pic_host_xlate, }; -void socrates_fpga_pic_init(struct device_node *pic) +void __init socrates_fpga_pic_init(struct device_node *pic) { unsigned long flags; int i; diff --git a/arch/powerpc/platforms/85xx/socrates_fpga_pic.h b/arch/powerpc/platforms/85xx/socrates_fpga_pic.h index c592b8bc94dd..c50b23794a06 100644 --- a/arch/powerpc/platforms/85xx/socrates_fpga_pic.h +++ b/arch/powerpc/platforms/85xx/socrates_fpga_pic.h @@ -6,6 +6,6 @@ #ifndef SOCRATES_FPGA_PIC_H #define SOCRATES_FPGA_PIC_H -void socrates_fpga_pic_init(struct device_node *pic); +void __init socrates_fpga_pic_init(struct device_node *pic); #endif diff --git a/arch/powerpc/platforms/85xx/xes_mpc85xx.c b/arch/powerpc/platforms/85xx/xes_mpc85xx.c index d54e1ae56997..397e158c1edb 100644 --- a/arch/powerpc/platforms/85xx/xes_mpc85xx.c +++ b/arch/powerpc/platforms/85xx/xes_mpc85xx.c @@ -45,7 +45,7 @@ void __init xes_mpc85xx_pic_init(void) mpic_init(mpic); } -static void xes_mpc85xx_configure_l2(void __iomem *l2_base) +static void __init xes_mpc85xx_configure_l2(void __iomem *l2_base) { volatile uint32_t ctl, tmp; @@ -72,7 +72,7 @@ static void xes_mpc85xx_configure_l2(void __iomem *l2_base) asm volatile("msync; isync"); } -static void xes_mpc85xx_fixups(void) +static void __init xes_mpc85xx_fixups(void) { struct device_node *np; int err; diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index e02d29a9d12f..d41dad227de8 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -40,9 +40,9 @@ config EPAPR_PARAVIRT In case of doubt, say Y -config PPC_NATIVE +config PPC_HASH_MMU_NATIVE bool - depends on PPC_BOOK3S_32 || PPC64 + depends on PPC_BOOK3S help Support for running natively on the hardware, i.e. without a hypervisor. This option is not user-selectable but should diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index a208997ade88..87bc1929ee5a 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -30,8 +30,6 @@ config PPC_BOOK3S_32 bool "512x/52xx/6xx/7xx/74xx/82xx/83xx/86xx" imply PPC_FPU select PPC_HAVE_PMU_SUPPORT - select PPC_HAVE_KUEP - select PPC_HAVE_KUAP select HAVE_ARCH_VMAP_STACK config PPC_85xx @@ -42,8 +40,7 @@ config PPC_8xx bool "Freescale 8xx" select ARCH_SUPPORTS_HUGETLBFS select FSL_SOC - select PPC_HAVE_KUEP - select PPC_HAVE_KUAP + select PPC_KUEP select HAVE_ARCH_VMAP_STACK select HUGETLBFS @@ -53,6 +50,7 @@ config 40x select PPC_UDBG_16550 select 4xx_SOC select HAVE_PCI + select PPC_KUEP if PPC_KUAP config 44x bool "AMCC 44x, 46x or 47x" @@ -61,7 +59,7 @@ config 44x select 4xx_SOC select HAVE_PCI select PHYS_64BIT - select PPC_HAVE_KUEP + select PPC_KUEP endchoice @@ -105,9 +103,7 @@ config PPC_BOOK3S_64 select HAVE_MOVE_PMD select HAVE_MOVE_PUD select IRQ_WORK - select PPC_MM_SLICES - select PPC_HAVE_KUEP - select PPC_HAVE_KUAP + select PPC_64S_HASH_MMU if !PPC_RADIX_MMU config PPC_BOOK3E_64 bool "Embedded processors" @@ -130,11 +126,13 @@ choice config GENERIC_CPU bool "Generic (POWER4 and above)" depends on PPC64 && !CPU_LITTLE_ENDIAN + select PPC_64S_HASH_MMU if PPC_BOOK3S_64 config GENERIC_CPU bool "Generic (POWER8 and above)" depends on PPC64 && CPU_LITTLE_ENDIAN select ARCH_HAS_FAST_MULTIPLIER + select PPC_64S_HASH_MMU config GENERIC_CPU bool "Generic 32 bits powerpc" @@ -143,24 +141,29 @@ config GENERIC_CPU config CELL_CPU bool "Cell Broadband Engine" depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN + select PPC_64S_HASH_MMU config POWER5_CPU bool "POWER5" depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN + select PPC_64S_HASH_MMU config POWER6_CPU bool "POWER6" depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN + select PPC_64S_HASH_MMU config POWER7_CPU bool "POWER7" depends on PPC_BOOK3S_64 select ARCH_HAS_FAST_MULTIPLIER + select PPC_64S_HASH_MMU config POWER8_CPU bool "POWER8" depends on PPC_BOOK3S_64 select ARCH_HAS_FAST_MULTIPLIER + select PPC_64S_HASH_MMU config POWER9_CPU bool "POWER9" @@ -278,6 +281,11 @@ config BOOKE depends on E500 || 44x || PPC_BOOK3E default y +config BOOKE_OR_40x + bool + depends on BOOKE || 40x + default y + config FSL_BOOKE bool depends on E500 && PPC32 @@ -290,6 +298,7 @@ config PPC_FSL_BOOK3E select FSL_EMB_PERFMON select PPC_SMP_MUXED_IPI select PPC_DOORBELL + select PPC_KUEP default y if FSL_BOOKE config PTE_64BIT @@ -364,6 +373,22 @@ config SPE If in doubt, say Y here. +config PPC_64S_HASH_MMU + bool "Hash MMU Support" + depends on PPC_BOOK3S_64 + select PPC_MM_SLICES + default y + help + Enable support for the Power ISA Hash style MMU. This is implemented + by all IBM Power and other 64-bit Book3S CPUs before ISA v3.0. The + OpenPOWER ISA does not mandate the hash MMU and some CPUs do not + implement it (e.g., Microwatt). + + Note that POWER9 PowerVM platforms only support the hash + MMU. From POWER10 radix is also supported by PowerVM. + + If you're unsure, say Y. + config PPC_RADIX_MMU bool "Radix MMU Support" depends on PPC_BOOK3S_64 @@ -375,7 +400,8 @@ config PPC_RADIX_MMU you can probably disable this. config PPC_RADIX_MMU_DEFAULT - bool "Default to using the Radix MMU when possible" + bool "Default to using the Radix MMU when possible" if PPC_64S_HASH_MMU + depends on PPC_BOOK3S_64 depends on PPC_RADIX_MMU default y help @@ -387,24 +413,16 @@ config PPC_RADIX_MMU_DEFAULT If you're unsure, say Y. -config PPC_HAVE_KUEP - bool - config PPC_KUEP - bool "Kernel Userspace Execution Prevention" - depends on PPC_HAVE_KUEP - default y + bool "Kernel Userspace Execution Prevention" if !40x + default y if !40x help Enable support for Kernel Userspace Execution Prevention (KUEP) If you're unsure, say Y. -config PPC_HAVE_KUAP - bool - config PPC_KUAP bool "Kernel Userspace Access Protection" - depends on PPC_HAVE_KUAP default y help Enable support for Kernel Userspace Access Protection (KUAP) @@ -413,7 +431,7 @@ config PPC_KUAP config PPC_KUAP_DEBUG bool "Extra debugging for Kernel Userspace Access Protection" - depends on PPC_KUAP && (PPC_RADIX_MMU || PPC32) + depends on PPC_KUAP help Add extra debugging for Kernel Userspace Access Protection (KUAP) If you're unsure, say N. diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig index cb70c5f25bc6..34669b060f36 100644 --- a/arch/powerpc/platforms/cell/Kconfig +++ b/arch/powerpc/platforms/cell/Kconfig @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 config PPC_CELL + select PPC_64S_HASH_MMU if PPC64 bool config PPC_CELL_COMMON @@ -8,7 +9,7 @@ config PPC_CELL_COMMON select PPC_DCR_MMIO select PPC_INDIRECT_PIO select PPC_INDIRECT_MMIO - select PPC_NATIVE + select PPC_HASH_MMU_NATIVE select PPC_RTAS select IRQ_EDGE_EOI_HANDLER diff --git a/arch/powerpc/platforms/cell/cbe_regs.c b/arch/powerpc/platforms/cell/cbe_regs.c index c2a0678d85db..1c4c53bec66c 100644 --- a/arch/powerpc/platforms/cell/cbe_regs.c +++ b/arch/powerpc/platforms/cell/cbe_regs.c @@ -165,7 +165,7 @@ u32 cbe_node_to_cpu(int node) } EXPORT_SYMBOL_GPL(cbe_node_to_cpu); -static struct device_node *cbe_get_be_node(int cpu_id) +static struct device_node *__init cbe_get_be_node(int cpu_id) { struct device_node *np; diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index fa08699aedeb..25e726bf0172 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -253,7 +253,7 @@ static irqreturn_t ioc_interrupt(int irq, void *data) return IRQ_HANDLED; } -static int cell_iommu_find_ioc(int nid, unsigned long *base) +static int __init cell_iommu_find_ioc(int nid, unsigned long *base) { struct device_node *np; struct resource r; @@ -293,7 +293,7 @@ static int cell_iommu_find_ioc(int nid, unsigned long *base) return -ENODEV; } -static void cell_iommu_setup_stab(struct cbe_iommu *iommu, +static void __init cell_iommu_setup_stab(struct cbe_iommu *iommu, unsigned long dbase, unsigned long dsize, unsigned long fbase, unsigned long fsize) { @@ -313,7 +313,7 @@ static void cell_iommu_setup_stab(struct cbe_iommu *iommu, memset(iommu->stab, 0, stab_size); } -static unsigned long *cell_iommu_alloc_ptab(struct cbe_iommu *iommu, +static unsigned long *__init cell_iommu_alloc_ptab(struct cbe_iommu *iommu, unsigned long base, unsigned long size, unsigned long gap_base, unsigned long gap_size, unsigned long page_shift) { @@ -373,7 +373,7 @@ static unsigned long *cell_iommu_alloc_ptab(struct cbe_iommu *iommu, return ptab; } -static void cell_iommu_enable_hardware(struct cbe_iommu *iommu) +static void __init cell_iommu_enable_hardware(struct cbe_iommu *iommu) { int ret; unsigned long reg, xlate_base; @@ -413,7 +413,7 @@ static void cell_iommu_enable_hardware(struct cbe_iommu *iommu) out_be64(iommu->cmd_regs + IOC_IOCmd_Cfg, reg); } -static void cell_iommu_setup_hardware(struct cbe_iommu *iommu, +static void __init cell_iommu_setup_hardware(struct cbe_iommu *iommu, unsigned long base, unsigned long size) { cell_iommu_setup_stab(iommu, base, size, 0, 0); @@ -858,7 +858,7 @@ static bool cell_pci_iommu_bypass_supported(struct pci_dev *pdev, u64 mask) cell_iommu_get_fixed_address(&pdev->dev) != OF_BAD_ADDR; } -static void insert_16M_pte(unsigned long addr, unsigned long *ptab, +static void __init insert_16M_pte(unsigned long addr, unsigned long *ptab, unsigned long base_pte) { unsigned long segment, offset; @@ -873,7 +873,7 @@ static void insert_16M_pte(unsigned long addr, unsigned long *ptab, ptab[offset] = base_pte | (__pa(addr) & CBE_IOPTE_RPN_Mask); } -static void cell_iommu_setup_fixed_ptab(struct cbe_iommu *iommu, +static void __init cell_iommu_setup_fixed_ptab(struct cbe_iommu *iommu, struct device_node *np, unsigned long dbase, unsigned long dsize, unsigned long fbase, unsigned long fsize) { @@ -977,6 +977,7 @@ static int __init cell_iommu_fixed_mapping_init(void) if (hbase < dbase || (hend > (dbase + dsize))) { pr_debug("iommu: hash window doesn't fit in" "real DMA window\n"); + of_node_put(np); return -1; } } diff --git a/arch/powerpc/platforms/cell/pervasive.c b/arch/powerpc/platforms/cell/pervasive.c index 5b9a7e9f144b..dff8d5e7ab82 100644 --- a/arch/powerpc/platforms/cell/pervasive.c +++ b/arch/powerpc/platforms/cell/pervasive.c @@ -78,6 +78,7 @@ static int cbe_system_reset_exception(struct pt_regs *regs) switch (regs->msr & SRR1_WAKEMASK) { case SRR1_WAKEDEC: set_dec(1); + break; case SRR1_WAKEEE: /* * Handle these when interrupts get re-enabled and we take diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c index bc48234443b6..83cea9e7ee72 100644 --- a/arch/powerpc/platforms/cell/spu_base.c +++ b/arch/powerpc/platforms/cell/spu_base.c @@ -387,7 +387,7 @@ spu_irq_class_2(int irq, void *data) return stat ? IRQ_HANDLED : IRQ_NONE; } -static int spu_request_irqs(struct spu *spu) +static int __init spu_request_irqs(struct spu *spu) { int ret = 0; @@ -540,7 +540,7 @@ void spu_remove_dev_attr_group(struct attribute_group *attrs) } EXPORT_SYMBOL_GPL(spu_remove_dev_attr_group); -static int spu_create_dev(struct spu *spu) +static int __init spu_create_dev(struct spu *spu) { int ret; @@ -711,7 +711,7 @@ static void crash_kexec_stop_spus(void) } } -static void crash_register_spus(struct list_head *list) +static void __init crash_register_spus(struct list_head *list) { struct spu *spu; int ret; diff --git a/arch/powerpc/platforms/cell/spu_manage.c b/arch/powerpc/platforms/cell/spu_manage.c index 8e9ef65240c3..ddf8742f09a3 100644 --- a/arch/powerpc/platforms/cell/spu_manage.c +++ b/arch/powerpc/platforms/cell/spu_manage.c @@ -186,7 +186,7 @@ err: return -EINVAL; } -static int spu_map_resource(struct spu *spu, int nr, +static int __init spu_map_resource(struct spu *spu, int nr, void __iomem** virt, unsigned long *phys) { struct device_node *np = spu->devnode; @@ -361,7 +361,7 @@ static void disable_spu_by_master_run(struct spu_context *ctx) static int qs20_reg_idxs[QS20_SPES_PER_BE] = { 0, 2, 4, 6, 7, 5, 3, 1 }; static int qs20_reg_memory[QS20_SPES_PER_BE] = { 1, 1, 0, 0, 0, 0, 0, 0 }; -static struct spu *spu_lookup_reg(int node, u32 reg) +static struct spu *__init spu_lookup_reg(int node, u32 reg) { struct spu *spu; const u32 *spu_reg; @@ -374,7 +374,7 @@ static struct spu *spu_lookup_reg(int node, u32 reg) return NULL; } -static void init_affinity_qs20_harcoded(void) +static void __init init_affinity_qs20_harcoded(void) { int node, i; struct spu *last_spu, *spu; @@ -396,7 +396,7 @@ static void init_affinity_qs20_harcoded(void) } } -static int of_has_vicinity(void) +static int __init of_has_vicinity(void) { struct device_node *dn; @@ -409,7 +409,7 @@ static int of_has_vicinity(void) return 0; } -static struct spu *devnode_spu(int cbe, struct device_node *dn) +static struct spu *__init devnode_spu(int cbe, struct device_node *dn) { struct spu *spu; @@ -419,7 +419,7 @@ static struct spu *devnode_spu(int cbe, struct device_node *dn) return NULL; } -static struct spu * +static struct spu * __init neighbour_spu(int cbe, struct device_node *target, struct device_node *avoid) { struct spu *spu; @@ -440,7 +440,7 @@ neighbour_spu(int cbe, struct device_node *target, struct device_node *avoid) return NULL; } -static void init_affinity_node(int cbe) +static void __init init_affinity_node(int cbe) { struct spu *spu, *last_spu; struct device_node *vic_dn, *last_spu_dn; @@ -494,7 +494,7 @@ static void init_affinity_node(int cbe) } } -static void init_affinity_fw(void) +static void __init init_affinity_fw(void) { int cbe; diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index cb25acccd746..4c702192412f 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -648,7 +648,7 @@ static void spufs_exit_isolated_loader(void) get_order(isolated_loader_size)); } -static void +static void __init spufs_init_isolated_loader(void) { struct device_node *dn; diff --git a/arch/powerpc/platforms/chrp/Kconfig b/arch/powerpc/platforms/chrp/Kconfig index 9b5c5505718a..ff30ed579a39 100644 --- a/arch/powerpc/platforms/chrp/Kconfig +++ b/arch/powerpc/platforms/chrp/Kconfig @@ -11,6 +11,6 @@ config PPC_CHRP select RTAS_ERROR_LOGGING select PPC_MPC106 select PPC_UDBG_16550 - select PPC_NATIVE + select PPC_HASH_MMU_NATIVE select FORCE_PCI default y diff --git a/arch/powerpc/platforms/chrp/pegasos_eth.c b/arch/powerpc/platforms/chrp/pegasos_eth.c index 485cf5ef73d4..5c4f1a9ca154 100644 --- a/arch/powerpc/platforms/chrp/pegasos_eth.c +++ b/arch/powerpc/platforms/chrp/pegasos_eth.c @@ -113,7 +113,7 @@ static struct platform_device *mv643xx_eth_pd_devs[] __initdata = { static void __iomem *mv643xx_reg_base; -static int Enable_SRAM(void) +static int __init Enable_SRAM(void) { u32 ALong; diff --git a/arch/powerpc/platforms/embedded6xx/Kconfig b/arch/powerpc/platforms/embedded6xx/Kconfig index 4c6d703a4284..c54786f8461e 100644 --- a/arch/powerpc/platforms/embedded6xx/Kconfig +++ b/arch/powerpc/platforms/embedded6xx/Kconfig @@ -55,7 +55,7 @@ config MVME5100 select FORCE_PCI select PPC_INDIRECT_PCI select PPC_I8259 - select PPC_NATIVE + select PPC_HASH_MMU_NATIVE select PPC_UDBG_16550 help This option enables support for the Motorola (now Emerson) MVME5100 diff --git a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c index 15396333a90b..380b4285cce4 100644 --- a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c +++ b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c @@ -153,7 +153,7 @@ static void __hlwd_quiesce(void __iomem *io_base) out_be32(io_base + HW_BROADWAY_ICR, 0xffffffff); } -static struct irq_domain *hlwd_pic_init(struct device_node *np) +static struct irq_domain *__init hlwd_pic_init(struct device_node *np) { struct irq_domain *irq_domain; struct resource res; @@ -197,7 +197,7 @@ unsigned int hlwd_pic_get_irq(void) * */ -void hlwd_pic_probe(void) +void __init hlwd_pic_probe(void) { struct irq_domain *host; struct device_node *np; @@ -214,6 +214,7 @@ void hlwd_pic_probe(void) irq_set_chained_handler(cascade_virq, hlwd_pic_irq_cascade); hlwd_irq_host = host; + of_node_put(np); break; } } diff --git a/arch/powerpc/platforms/embedded6xx/hlwd-pic.h b/arch/powerpc/platforms/embedded6xx/hlwd-pic.h index f18eeeef0815..c2fa42e191dc 100644 --- a/arch/powerpc/platforms/embedded6xx/hlwd-pic.h +++ b/arch/powerpc/platforms/embedded6xx/hlwd-pic.h @@ -11,7 +11,7 @@ #define __HLWD_PIC_H extern unsigned int hlwd_pic_get_irq(void); -extern void hlwd_pic_probe(void); +void __init hlwd_pic_probe(void); extern void hlwd_quiesce(void); #endif diff --git a/arch/powerpc/platforms/embedded6xx/holly.c b/arch/powerpc/platforms/embedded6xx/holly.c index 7a85b117f7a4..07e71ba3e846 100644 --- a/arch/powerpc/platforms/embedded6xx/holly.c +++ b/arch/powerpc/platforms/embedded6xx/holly.c @@ -50,7 +50,7 @@ static int holly_exclude_device(struct pci_controller *hose, u_char bus, return PCIBIOS_SUCCESSFUL; } -static void holly_remap_bridge(void) +static void __init holly_remap_bridge(void) { u32 lut_val, lut_addr; int i; diff --git a/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c b/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c index ed45db70a781..5aea46566233 100644 --- a/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c +++ b/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c @@ -194,7 +194,7 @@ static int ug_udbg_getc_poll(void) /* * Retrieves and prepares the virtual address needed to access the hardware. */ -static void __iomem *ug_udbg_setup_exi_io_base(struct device_node *np) +static void __iomem *__init ug_udbg_setup_exi_io_base(struct device_node *np) { void __iomem *exi_io_base = NULL; phys_addr_t paddr; @@ -212,7 +212,7 @@ static void __iomem *ug_udbg_setup_exi_io_base(struct device_node *np) /* * Checks if a USB Gecko adapter is inserted in any memory card slot. */ -static void __iomem *ug_udbg_probe(void __iomem *exi_io_base) +static void __iomem *__init ug_udbg_probe(void __iomem *exi_io_base) { int i; diff --git a/arch/powerpc/platforms/embedded6xx/wii.c b/arch/powerpc/platforms/embedded6xx/wii.c index a802ef957d63..f60ade584bb2 100644 --- a/arch/powerpc/platforms/embedded6xx/wii.c +++ b/arch/powerpc/platforms/embedded6xx/wii.c @@ -69,7 +69,7 @@ static void __noreturn wii_spin(void) cpu_relax(); } -static void __iomem *wii_ioremap_hw_regs(char *name, char *compatible) +static void __iomem *__init wii_ioremap_hw_regs(char *name, char *compatible) { void __iomem *hw_regs = NULL; struct device_node *np; diff --git a/arch/powerpc/platforms/maple/Kconfig b/arch/powerpc/platforms/maple/Kconfig index 86ae210bee9a..4c058cc57c90 100644 --- a/arch/powerpc/platforms/maple/Kconfig +++ b/arch/powerpc/platforms/maple/Kconfig @@ -9,7 +9,8 @@ config PPC_MAPLE select GENERIC_TBSYNC select PPC_UDBG_16550 select PPC_970_NAP - select PPC_NATIVE + select PPC_64S_HASH_MMU + select PPC_HASH_MMU_NATIVE select PPC_RTAS select MMIO_NVRAM select ATA_NONSTANDARD if ATA diff --git a/arch/powerpc/platforms/microwatt/Kconfig b/arch/powerpc/platforms/microwatt/Kconfig index 8f6a81978461..5e320f49583a 100644 --- a/arch/powerpc/platforms/microwatt/Kconfig +++ b/arch/powerpc/platforms/microwatt/Kconfig @@ -5,7 +5,6 @@ config PPC_MICROWATT select PPC_XICS select PPC_ICS_NATIVE select PPC_ICP_NATIVE - select PPC_NATIVE select PPC_UDBG_16550 select ARCH_RANDOM help diff --git a/arch/powerpc/platforms/microwatt/rng.c b/arch/powerpc/platforms/microwatt/rng.c index 3d8ee6eb7dad..7bc4d1cbfaf0 100644 --- a/arch/powerpc/platforms/microwatt/rng.c +++ b/arch/powerpc/platforms/microwatt/rng.c @@ -14,7 +14,7 @@ #define DARN_ERR 0xFFFFFFFFFFFFFFFFul -int microwatt_get_random_darn(unsigned long *v) +static int microwatt_get_random_darn(unsigned long *v) { unsigned long val; diff --git a/arch/powerpc/platforms/pasemi/Kconfig b/arch/powerpc/platforms/pasemi/Kconfig index c52731a7773f..85ae18ddd911 100644 --- a/arch/powerpc/platforms/pasemi/Kconfig +++ b/arch/powerpc/platforms/pasemi/Kconfig @@ -5,7 +5,8 @@ config PPC_PASEMI select MPIC select FORCE_PCI select PPC_UDBG_16550 - select PPC_NATIVE + select PPC_64S_HASH_MMU + select PPC_HASH_MMU_NATIVE select MPIC_BROKEN_REGREAD help This option enables support for PA Semi's PWRficient line diff --git a/arch/powerpc/platforms/pasemi/msi.c b/arch/powerpc/platforms/pasemi/msi.c index e8af72282682..ea1e41451408 100644 --- a/arch/powerpc/platforms/pasemi/msi.c +++ b/arch/powerpc/platforms/pasemi/msi.c @@ -130,7 +130,7 @@ static int pasemi_msi_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) return 0; } -int mpic_pasemi_msi_init(struct mpic *mpic) +int __init mpic_pasemi_msi_init(struct mpic *mpic) { int rc; struct pci_controller *phb; diff --git a/arch/powerpc/platforms/pasemi/pasemi.h b/arch/powerpc/platforms/pasemi/pasemi.h index 70b56048ed1b..3f277a200fd8 100644 --- a/arch/powerpc/platforms/pasemi/pasemi.h +++ b/arch/powerpc/platforms/pasemi/pasemi.h @@ -7,7 +7,7 @@ extern void pas_pci_init(void); extern void pas_pci_irq_fixup(struct pci_dev *dev); extern void pas_pci_dma_dev_setup(struct pci_dev *dev); -extern void __iomem *pasemi_pci_getcfgaddr(struct pci_dev *dev, int offset); +void __iomem *__init pasemi_pci_getcfgaddr(struct pci_dev *dev, int offset); extern void __init pasemi_map_registers(void); diff --git a/arch/powerpc/platforms/pasemi/pci.c b/arch/powerpc/platforms/pasemi/pci.c index 8779b107d872..d4b922759d6e 100644 --- a/arch/powerpc/platforms/pasemi/pci.c +++ b/arch/powerpc/platforms/pasemi/pci.c @@ -287,7 +287,7 @@ void __init pas_pci_init(void) } } -void __iomem *pasemi_pci_getcfgaddr(struct pci_dev *dev, int offset) +void __iomem *__init pasemi_pci_getcfgaddr(struct pci_dev *dev, int offset) { struct pci_controller *hose; diff --git a/arch/powerpc/platforms/pasemi/setup.c b/arch/powerpc/platforms/pasemi/setup.c index 376797eb7894..f974bfe7fde1 100644 --- a/arch/powerpc/platforms/pasemi/setup.c +++ b/arch/powerpc/platforms/pasemi/setup.c @@ -212,7 +212,7 @@ static void sb600_8259_cascade(struct irq_desc *desc) chip->irq_eoi(&desc->irq_data); } -static void nemo_init_IRQ(struct mpic *mpic) +static void __init nemo_init_IRQ(struct mpic *mpic) { struct device_node *np; int gpio_virq; diff --git a/arch/powerpc/platforms/powermac/Kconfig b/arch/powerpc/platforms/powermac/Kconfig index b97bf12801eb..130707ec9f99 100644 --- a/arch/powerpc/platforms/powermac/Kconfig +++ b/arch/powerpc/platforms/powermac/Kconfig @@ -6,7 +6,8 @@ config PPC_PMAC select FORCE_PCI select PPC_INDIRECT_PCI if PPC32 select PPC_MPC106 if PPC32 - select PPC_NATIVE + select PPC_64S_HASH_MMU if PPC64 + select PPC_HASH_MMU_NATIVE select ZONE_DMA if PPC32 default y diff --git a/arch/powerpc/platforms/powermac/cache.S b/arch/powerpc/platforms/powermac/cache.S index ced225415486..b8ae56e9f414 100644 --- a/arch/powerpc/platforms/powermac/cache.S +++ b/arch/powerpc/platforms/powermac/cache.S @@ -48,7 +48,7 @@ flush_disable_75x: /* Stop DST streams */ BEGIN_FTR_SECTION - DSSALL + PPC_DSSALL sync END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) @@ -197,7 +197,7 @@ flush_disable_745x: isync /* Stop prefetch streams */ - DSSALL + PPC_DSSALL sync /* Disable L2 prefetching */ diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c index 5c77b9a24c0e..e67c624f35a2 100644 --- a/arch/powerpc/platforms/powermac/feature.c +++ b/arch/powerpc/platforms/powermac/feature.c @@ -1530,7 +1530,7 @@ static long g5_reset_cpu(struct device_node *node, long param, long value) * This takes the second CPU off the bus on dual CPU machines * running UP */ -void g5_phy_disable_cpu1(void) +void __init g5_phy_disable_cpu1(void) { if (uninorth_maj == 3) UN_OUT(U3_API_PHY_CONFIG_1, 0); diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c index f77a59b5c2e1..df89d916236d 100644 --- a/arch/powerpc/platforms/powermac/low_i2c.c +++ b/arch/powerpc/platforms/powermac/low_i2c.c @@ -582,6 +582,7 @@ static void __init kw_i2c_add(struct pmac_i2c_host_kw *host, bus->close = kw_i2c_close; bus->xfer = kw_i2c_xfer; mutex_init(&bus->mutex); + lockdep_register_key(&bus->lock_key); lockdep_set_class(&bus->mutex, &bus->lock_key); if (controller == busnode) bus->flags = pmac_i2c_multibus; @@ -810,6 +811,7 @@ static void __init pmu_i2c_probe(void) bus->hostdata = bus + 1; bus->xfer = pmu_i2c_xfer; mutex_init(&bus->mutex); + lockdep_register_key(&bus->lock_key); lockdep_set_class(&bus->mutex, &bus->lock_key); bus->flags = pmac_i2c_multibus; list_add(&bus->link, &pmac_i2c_busses); @@ -933,6 +935,7 @@ static void __init smu_i2c_probe(void) bus->hostdata = bus + 1; bus->xfer = smu_i2c_xfer; mutex_init(&bus->mutex); + lockdep_register_key(&bus->lock_key); lockdep_set_class(&bus->mutex, &bus->lock_key); bus->flags = 0; list_add(&bus->link, &pmac_i2c_busses); diff --git a/arch/powerpc/platforms/powermac/nvram.c b/arch/powerpc/platforms/powermac/nvram.c index 853ccc4480e2..de8fcb607290 100644 --- a/arch/powerpc/platforms/powermac/nvram.c +++ b/arch/powerpc/platforms/powermac/nvram.c @@ -258,7 +258,7 @@ static u32 core99_calc_adler(u8 *buffer) return (high << 16) | low; } -static u32 core99_check(u8* datas) +static u32 __init core99_check(u8 *datas) { struct core99_header* hdr99 = (struct core99_header*)datas; diff --git a/arch/powerpc/platforms/powermac/pfunc_base.c b/arch/powerpc/platforms/powermac/pfunc_base.c index f5422506d4b0..9c2947a3edd5 100644 --- a/arch/powerpc/platforms/powermac/pfunc_base.c +++ b/arch/powerpc/platforms/powermac/pfunc_base.c @@ -93,7 +93,7 @@ static struct pmf_handlers macio_gpio_handlers = { .delay = macio_do_delay, }; -static void macio_gpio_init_one(struct macio_chip *macio) +static void __init macio_gpio_init_one(struct macio_chip *macio) { struct device_node *gparent, *gp; @@ -265,7 +265,7 @@ static struct pmf_handlers macio_mmio_handlers = { .delay = macio_do_delay, }; -static void macio_mmio_init_one(struct macio_chip *macio) +static void __init macio_mmio_init_one(struct macio_chip *macio) { DBG("Installing MMIO functions for macio %pOF\n", macio->of_node); @@ -294,7 +294,7 @@ static struct pmf_handlers unin_mmio_handlers = { .delay = macio_do_delay, }; -static void uninorth_install_pfunc(void) +static void __init uninorth_install_pfunc(void) { struct device_node *np; diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c index 4921bccf0376..bb0566633af5 100644 --- a/arch/powerpc/platforms/powermac/pic.c +++ b/arch/powerpc/platforms/powermac/pic.c @@ -18,6 +18,7 @@ #include <linux/interrupt.h> #include <linux/syscore_ops.h> #include <linux/adb.h> +#include <linux/minmax.h> #include <linux/pmu.h> #include <asm/sections.h> @@ -311,11 +312,8 @@ static void __init pmac_pic_probe_oldstyle(void) /* Check ordering of master & slave */ if (of_device_is_compatible(master, "gatwick")) { - struct device_node *tmp; BUG_ON(slave == NULL); - tmp = master; - master = slave; - slave = tmp; + swap(master, slave); } /* We found a slave */ diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index 13e8a8a9841c..974d4b49867b 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -166,7 +166,7 @@ static void pmac_show_cpuinfo(struct seq_file *m) } #ifndef CONFIG_ADB_CUDA -int find_via_cuda(void) +int __init find_via_cuda(void) { struct device_node *dn = of_find_node_by_name(NULL, "via-cuda"); @@ -180,7 +180,7 @@ int find_via_cuda(void) #endif #ifndef CONFIG_ADB_PMU -int find_via_pmu(void) +int __init find_via_pmu(void) { struct device_node *dn = of_find_node_by_name(NULL, "via-pmu"); @@ -194,7 +194,7 @@ int find_via_pmu(void) #endif #ifndef CONFIG_PMAC_SMU -int smu_init(void) +int __init smu_init(void) { /* should check and warn if SMU is present */ return 0; diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index 3256a316e884..da1efdc30d6c 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -186,7 +186,7 @@ static const struct irq_domain_ops psurge_host_ops = { .map = psurge_host_map, }; -static int psurge_secondary_ipi_init(void) +static int __init psurge_secondary_ipi_init(void) { int rc = -ENOMEM; @@ -875,7 +875,7 @@ static int smp_core99_cpu_online(unsigned int cpu) static void __init smp_core99_bringup_done(void) { - extern void g5_phy_disable_cpu1(void); + extern void __init g5_phy_disable_cpu1(void); /* Close i2c bus if it was used for tb sync */ if (pmac_tb_clock_chip_host) diff --git a/arch/powerpc/platforms/powermac/udbg_scc.c b/arch/powerpc/platforms/powermac/udbg_scc.c index f286bdfe8346..965827ac2e9c 100644 --- a/arch/powerpc/platforms/powermac/udbg_scc.c +++ b/arch/powerpc/platforms/powermac/udbg_scc.c @@ -62,7 +62,7 @@ static unsigned char scc_inittab[] = { 3, 0xc1, /* rx enable, 8 bits */ }; -void udbg_scc_init(int force_scc) +void __init udbg_scc_init(int force_scc) { const u32 *reg; unsigned long addr; diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig index 043eefbbdd28..161dfe024085 100644 --- a/arch/powerpc/platforms/powernv/Kconfig +++ b/arch/powerpc/platforms/powernv/Kconfig @@ -2,7 +2,7 @@ config PPC_POWERNV depends on PPC64 && PPC_BOOK3S bool "IBM PowerNV (Non-Virtualized) platform support" - select PPC_NATIVE + select PPC_HASH_MMU_NATIVE if PPC_64S_HASH_MMU select PPC_XICS select PPC_ICP_NATIVE select PPC_XIVE_NATIVE diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index e3ffdc8e8567..9942289f379b 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -62,7 +62,7 @@ static bool deepest_stop_found; static unsigned long power7_offline_type; -static int pnv_save_sprs_for_deep_states(void) +static int __init pnv_save_sprs_for_deep_states(void) { int cpu; int rc; @@ -146,9 +146,13 @@ EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states); static void pnv_fastsleep_workaround_apply(void *info) { + int cpu = smp_processor_id(); int rc; int *err = info; + if (cpu_first_thread_sibling(cpu) != cpu) + return; + rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP, OPAL_CONFIG_IDLE_APPLY); if (rc) @@ -175,7 +179,6 @@ static ssize_t store_fastsleep_workaround_applyonce(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - cpumask_t primary_thread_mask; int err; u8 val; @@ -200,10 +203,7 @@ static ssize_t store_fastsleep_workaround_applyonce(struct device *dev, power7_fastsleep_workaround_exit = false; cpus_read_lock(); - primary_thread_mask = cpu_online_cores_map(); - on_each_cpu_mask(&primary_thread_mask, - pnv_fastsleep_workaround_apply, - &err, 1); + on_each_cpu(pnv_fastsleep_workaround_apply, &err, 1); cpus_read_unlock(); if (err) { pr_err("fastsleep_workaround_applyonce change failed while running pnv_fastsleep_workaround_apply"); @@ -306,8 +306,8 @@ struct p7_sprs { /* per thread SPRs that get lost in shallow states */ u64 amr; u64 iamr; - u64 amor; u64 uamor; + /* amor is restored to constant ~0 */ }; static unsigned long power7_idle_insn(unsigned long type) @@ -378,7 +378,6 @@ static unsigned long power7_idle_insn(unsigned long type) if (cpu_has_feature(CPU_FTR_ARCH_207S)) { sprs.amr = mfspr(SPRN_AMR); sprs.iamr = mfspr(SPRN_IAMR); - sprs.amor = mfspr(SPRN_AMOR); sprs.uamor = mfspr(SPRN_UAMOR); } @@ -397,7 +396,7 @@ static unsigned long power7_idle_insn(unsigned long type) */ mtspr(SPRN_AMR, sprs.amr); mtspr(SPRN_IAMR, sprs.iamr); - mtspr(SPRN_AMOR, sprs.amor); + mtspr(SPRN_AMOR, ~0); mtspr(SPRN_UAMOR, sprs.uamor); } } @@ -492,12 +491,14 @@ subcore_woken: mtspr(SPRN_SPRG3, local_paca->sprg_vdso); +#ifdef CONFIG_PPC_64S_HASH_MMU /* * The SLB has to be restored here, but it sometimes still * contains entries, so the __ variant must be used to prevent * multi hits. */ __slb_restore_bolted_realmode(); +#endif return srr1; } @@ -589,7 +590,6 @@ struct p9_sprs { u64 purr; u64 spurr; u64 dscr; - u64 wort; u64 ciabr; u64 mmcra; @@ -687,7 +687,6 @@ static unsigned long power9_idle_stop(unsigned long psscr) sprs.amr = mfspr(SPRN_AMR); sprs.iamr = mfspr(SPRN_IAMR); - sprs.amor = mfspr(SPRN_AMOR); sprs.uamor = mfspr(SPRN_UAMOR); srr1 = isa300_idle_stop_mayloss(psscr); /* go idle */ @@ -708,7 +707,7 @@ static unsigned long power9_idle_stop(unsigned long psscr) */ mtspr(SPRN_AMR, sprs.amr); mtspr(SPRN_IAMR, sprs.iamr); - mtspr(SPRN_AMOR, sprs.amor); + mtspr(SPRN_AMOR, ~0); mtspr(SPRN_UAMOR, sprs.uamor); /* @@ -1124,7 +1123,7 @@ unsigned long pnv_cpu_offline(unsigned int cpu) * stop instruction */ -int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags) +int __init validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags) { int err = 0; @@ -1318,7 +1317,7 @@ static void __init pnv_probe_idle_states(void) * which is the number of cpuidle states discovered through device-tree. */ -static int pnv_parse_cpuidle_dt(void) +static int __init pnv_parse_cpuidle_dt(void) { struct device_node *np; int nr_idle_states, i; diff --git a/arch/powerpc/platforms/powernv/opal-core.c b/arch/powerpc/platforms/powernv/opal-core.c index 5b9736bbc2aa..0331f1973f0e 100644 --- a/arch/powerpc/platforms/powernv/opal-core.c +++ b/arch/powerpc/platforms/powernv/opal-core.c @@ -89,7 +89,7 @@ static inline int is_opalcore_usable(void) return (oc_conf && oc_conf->opalcorebuf != NULL) ? 1 : 0; } -static Elf64_Word *append_elf64_note(Elf64_Word *buf, char *name, +static Elf64_Word *__init append_elf64_note(Elf64_Word *buf, char *name, u32 type, void *data, size_t data_len) { @@ -108,7 +108,7 @@ static Elf64_Word *append_elf64_note(Elf64_Word *buf, char *name, return buf; } -static void fill_prstatus(struct elf_prstatus *prstatus, int pir, +static void __init fill_prstatus(struct elf_prstatus *prstatus, int pir, struct pt_regs *regs) { memset(prstatus, 0, sizeof(struct elf_prstatus)); @@ -134,7 +134,7 @@ static void fill_prstatus(struct elf_prstatus *prstatus, int pir, } } -static Elf64_Word *auxv_to_elf64_notes(Elf64_Word *buf, +static Elf64_Word *__init auxv_to_elf64_notes(Elf64_Word *buf, u64 opal_boot_entry) { Elf64_Off *bufp = (Elf64_Off *)oc_conf->auxv_buf; diff --git a/arch/powerpc/platforms/powernv/opal-dump.c b/arch/powerpc/platforms/powernv/opal-dump.c index 717d1d30ade5..410ed5b9de29 100644 --- a/arch/powerpc/platforms/powernv/opal-dump.c +++ b/arch/powerpc/platforms/powernv/opal-dump.c @@ -208,11 +208,12 @@ static struct attribute *dump_default_attrs[] = { &ack_attribute.attr, NULL, }; +ATTRIBUTE_GROUPS(dump_default); static struct kobj_type dump_ktype = { .sysfs_ops = &dump_sysfs_ops, .release = &dump_release, - .default_attrs = dump_default_attrs, + .default_groups = dump_default_groups, }; static int64_t dump_read_info(uint32_t *dump_id, uint32_t *dump_size, uint32_t *dump_type) diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c index 5821b0fa8614..554fdd7f88b8 100644 --- a/arch/powerpc/platforms/powernv/opal-elog.c +++ b/arch/powerpc/platforms/powernv/opal-elog.c @@ -144,11 +144,12 @@ static struct attribute *elog_default_attrs[] = { &ack_attribute.attr, NULL, }; +ATTRIBUTE_GROUPS(elog_default); static struct kobj_type elog_ktype = { .sysfs_ops = &elog_sysfs_ops, .release = &elog_release, - .default_attrs = elog_default_attrs, + .default_groups = elog_default_groups, }; /* Maximum size of a single log on FSP is 16KB */ diff --git a/arch/powerpc/platforms/powernv/opal-fadump.c b/arch/powerpc/platforms/powernv/opal-fadump.c index 9a360ced663b..c8ad057c7221 100644 --- a/arch/powerpc/platforms/powernv/opal-fadump.c +++ b/arch/powerpc/platforms/powernv/opal-fadump.c @@ -112,7 +112,7 @@ static void opal_fadump_update_config(struct fw_dump *fadump_conf, * This function is called in the capture kernel to get configuration details * from metadata setup by the first kernel. */ -static void opal_fadump_get_config(struct fw_dump *fadump_conf, +static void __init opal_fadump_get_config(struct fw_dump *fadump_conf, const struct opal_fadump_mem_struct *fdm) { unsigned long base, size, last_end, hole_size; diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c index 05d3832019b9..3fea5da6d1b3 100644 --- a/arch/powerpc/platforms/powernv/opal-imc.c +++ b/arch/powerpc/platforms/powernv/opal-imc.c @@ -200,13 +200,13 @@ static void disable_nest_pmu_counters(void) static void disable_core_pmu_counters(void) { - cpumask_t cores_map; int cpu, rc; cpus_read_lock(); /* Disable the IMC Core functions */ - cores_map = cpu_online_cores_map(); - for_each_cpu(cpu, &cores_map) { + for_each_online_cpu(cpu) { + if (cpu_first_thread_sibling(cpu) != cpu) + continue; rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE, get_hard_smp_processor_id(cpu)); if (rc) diff --git a/arch/powerpc/platforms/powernv/opal-lpc.c b/arch/powerpc/platforms/powernv/opal-lpc.c index 1e5d51db40f8..5390c888db16 100644 --- a/arch/powerpc/platforms/powernv/opal-lpc.c +++ b/arch/powerpc/platforms/powernv/opal-lpc.c @@ -396,6 +396,7 @@ void __init opal_lpc_init(void) if (!of_get_property(np, "primary", NULL)) continue; opal_lpc_chip_id = of_get_ibm_chip_id(np); + of_node_put(np); break; } if (opal_lpc_chip_id < 0) diff --git a/arch/powerpc/platforms/powernv/opal-msglog.c b/arch/powerpc/platforms/powernv/opal-msglog.c index d3b6e135c18b..22d6efe17b0d 100644 --- a/arch/powerpc/platforms/powernv/opal-msglog.c +++ b/arch/powerpc/platforms/powernv/opal-msglog.c @@ -105,7 +105,7 @@ static struct bin_attribute opal_msglog_attr = { .read = opal_msglog_read }; -struct memcons *memcons_init(struct device_node *node, const char *mc_prop_name) +struct memcons *__init memcons_init(struct device_node *node, const char *mc_prop_name) { u64 mcaddr; struct memcons *mc; @@ -133,7 +133,7 @@ out_err: return NULL; } -u32 memcons_get_size(struct memcons *mc) +u32 __init memcons_get_size(struct memcons *mc) { return be32_to_cpu(mc->ibuf_size) + be32_to_cpu(mc->obuf_size); } diff --git a/arch/powerpc/platforms/powernv/opal-power.c b/arch/powerpc/platforms/powernv/opal-power.c index 2a3717fc24ea..db99ffcb7b82 100644 --- a/arch/powerpc/platforms/powernv/opal-power.c +++ b/arch/powerpc/platforms/powernv/opal-power.c @@ -53,7 +53,7 @@ static bool detect_epow(void) } /* Check for existing EPOW, DPO events */ -static bool poweroff_pending(void) +static bool __init poweroff_pending(void) { int rc; __be64 opal_dpo_timeout; diff --git a/arch/powerpc/platforms/powernv/opal-powercap.c b/arch/powerpc/platforms/powernv/opal-powercap.c index c16d44f6f1d1..64506b46e77b 100644 --- a/arch/powerpc/platforms/powernv/opal-powercap.c +++ b/arch/powerpc/platforms/powernv/opal-powercap.c @@ -129,7 +129,7 @@ out_token: return ret; } -static void powercap_add_attr(int handle, const char *name, +static void __init powercap_add_attr(int handle, const char *name, struct powercap_attr *attr) { attr->handle = handle; diff --git a/arch/powerpc/platforms/powernv/opal-rtc.c b/arch/powerpc/platforms/powernv/opal-rtc.c index 44d7dacb33a2..a9bcf9217e64 100644 --- a/arch/powerpc/platforms/powernv/opal-rtc.c +++ b/arch/powerpc/platforms/powernv/opal-rtc.c @@ -18,7 +18,7 @@ #include <asm/firmware.h> #include <asm/machdep.h> -static void opal_to_tm(u32 y_m_d, u64 h_m_s_ms, struct rtc_time *tm) +static void __init opal_to_tm(u32 y_m_d, u64 h_m_s_ms, struct rtc_time *tm) { tm->tm_year = ((bcd2bin(y_m_d >> 24) * 100) + bcd2bin((y_m_d >> 16) & 0xff)) - 1900; diff --git a/arch/powerpc/platforms/powernv/opal-sensor-groups.c b/arch/powerpc/platforms/powernv/opal-sensor-groups.c index f8ae1fb0c102..8fba7d25ae56 100644 --- a/arch/powerpc/platforms/powernv/opal-sensor-groups.c +++ b/arch/powerpc/platforms/powernv/opal-sensor-groups.c @@ -126,7 +126,7 @@ static void add_attr(int handle, struct sg_attr *attr, int index) attr->attr.store = ops_info[index].store; } -static int add_attr_group(const __be32 *ops, int len, struct sensor_group *sg, +static int __init add_attr_group(const __be32 *ops, int len, struct sensor_group *sg, u32 handle) { int i, j; @@ -144,7 +144,7 @@ static int add_attr_group(const __be32 *ops, int len, struct sensor_group *sg, return sysfs_create_group(sg_kobj, &sg->sg); } -static int get_nr_attrs(const __be32 *ops, int len) +static int __init get_nr_attrs(const __be32 *ops, int len) { int i, j; int nr_attrs = 0; diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index e9d18519e650..55a8fbfdb5b2 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -73,7 +73,7 @@ static struct task_struct *kopald_tsk; static struct opal_msg *opal_msg; static u32 opal_msg_size __ro_after_init; -void opal_configure_cores(void) +void __init opal_configure_cores(void) { u64 reinit_flags = 0; @@ -779,7 +779,7 @@ out: return !!recover_addr; } -static int opal_sysfs_init(void) +static int __init opal_sysfs_init(void) { opal_kobj = kobject_create_and_add("opal", firmware_kobj); if (!opal_kobj) { @@ -937,7 +937,7 @@ static void __init opal_dump_region_init(void) "rc = %d\n", rc); } -static void opal_pdev_init(const char *compatible) +static void __init opal_pdev_init(const char *compatible) { struct device_node *np; @@ -981,7 +981,7 @@ void opal_wake_poller(void) wake_up_process(kopald_tsk); } -static void opal_init_heartbeat(void) +static void __init opal_init_heartbeat(void) { /* Old firwmware, we assume the HVC heartbeat is sufficient */ if (of_property_read_u32(opal_node, "ibm,heartbeat-ms", diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 8913c86009d9..b722ac902269 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2265,7 +2265,7 @@ static const struct irq_domain_ops pnv_irq_domain_ops = { .free = pnv_irq_domain_free, }; -static int pnv_msi_allocate_domains(struct pci_controller *hose, unsigned int count) +static int __init pnv_msi_allocate_domains(struct pci_controller *hose, unsigned int count) { struct pnv_phb *phb = hose->private_data; struct irq_domain *parent = irq_get_default_host(); @@ -2298,7 +2298,7 @@ static int pnv_msi_allocate_domains(struct pci_controller *hose, unsigned int co return 0; } -static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) +static void __init pnv_pci_init_ioda_msis(struct pnv_phb *phb) { unsigned int count; const __be32 *prop = of_get_property(phb->hose->dn, diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h index 11df4e16a1cc..e297bf4abfcb 100644 --- a/arch/powerpc/platforms/powernv/powernv.h +++ b/arch/powerpc/platforms/powernv/powernv.h @@ -39,7 +39,7 @@ bool cpu_core_split_required(void); struct memcons; ssize_t memcons_copy(struct memcons *mc, char *to, loff_t pos, size_t count); -u32 memcons_get_size(struct memcons *mc); -struct memcons *memcons_init(struct device_node *node, const char *mc_prop_name); +u32 __init memcons_get_size(struct memcons *mc); +struct memcons *__init memcons_init(struct device_node *node, const char *mc_prop_name); #endif /* _POWERNV_H */ diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c index 72c25295c1c2..b4386714494a 100644 --- a/arch/powerpc/platforms/powernv/rng.c +++ b/arch/powerpc/platforms/powernv/rng.c @@ -80,7 +80,7 @@ static int powernv_get_random_darn(unsigned long *v) return 1; } -static int initialise_darn(void) +static int __init initialise_darn(void) { unsigned long val; int i; diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index ad56a54ac9c5..105d889abd51 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -40,7 +40,7 @@ #include "powernv.h" -static bool fw_feature_is(const char *state, const char *name, +static bool __init fw_feature_is(const char *state, const char *name, struct device_node *fw_features) { struct device_node *np; @@ -55,7 +55,7 @@ static bool fw_feature_is(const char *state, const char *name, return rc; } -static void init_fw_feat_flags(struct device_node *np) +static void __init init_fw_feat_flags(struct device_node *np) { if (fw_feature_is("enabled", "inst-spec-barrier-ori31,31,0", np)) security_ftr_set(SEC_FTR_SPEC_BAR_ORI31); @@ -98,7 +98,7 @@ static void init_fw_feat_flags(struct device_node *np) security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR); } -static void pnv_setup_security_mitigations(void) +static void __init pnv_setup_security_mitigations(void) { struct device_node *np, *fw_features; enum l1d_flush_type type; @@ -123,10 +123,14 @@ static void pnv_setup_security_mitigations(void) } /* - * If we are non-Power9 bare metal, we don't need to flush on kernel - * entry or after user access: they fix a P9 specific vulnerability. + * The issues addressed by the entry and uaccess flush don't affect P7 + * or P8, so on bare metal disable them explicitly in case firmware does + * not include the features to disable them. POWER9 and newer processors + * should have the appropriate firmware flags. */ - if (!pvr_version_is(PVR_POWER9)) { + if (pvr_version_is(PVR_POWER7) || pvr_version_is(PVR_POWER7p) || + pvr_version_is(PVR_POWER8E) || pvr_version_is(PVR_POWER8NVL) || + pvr_version_is(PVR_POWER8)) { security_ftr_clear(SEC_FTR_L1D_FLUSH_ENTRY); security_ftr_clear(SEC_FTR_L1D_FLUSH_UACCESS); } @@ -207,6 +211,7 @@ static void __init pnv_init(void) #endif add_preferred_console("hvc", 0, NULL); +#ifdef CONFIG_PPC_64S_HASH_MMU if (!radix_enabled()) { size_t size = sizeof(struct slb_entry) * mmu_slb_size; int i; @@ -219,6 +224,7 @@ static void __init pnv_init(void) cpu_to_node(i)); } } +#endif } static void __init pnv_init_IRQ(void) diff --git a/arch/powerpc/platforms/ps3/gelic_udbg.c b/arch/powerpc/platforms/ps3/gelic_udbg.c index cba4f8f5b8d7..6b298010fd84 100644 --- a/arch/powerpc/platforms/ps3/gelic_udbg.c +++ b/arch/powerpc/platforms/ps3/gelic_udbg.c @@ -113,7 +113,7 @@ static int unmap_dma_mem(int bus_id, int dev_id, u64 bus_addr, size_t len) return lv1_free_device_dma_region(bus_id, dev_id, real_bus_addr); } -static void gelic_debug_init(void) +static void __init gelic_debug_init(void) { s64 result; u64 v2; diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c index 9c44f335c0b9..5ce924611b94 100644 --- a/arch/powerpc/platforms/ps3/mm.c +++ b/arch/powerpc/platforms/ps3/mm.c @@ -41,7 +41,7 @@ enum { PAGE_SHIFT_16M = 24U, }; -static unsigned long make_page_sizes(unsigned long a, unsigned long b) +static unsigned long __init make_page_sizes(unsigned long a, unsigned long b) { return (a << 56) | (b << 48); } @@ -215,7 +215,7 @@ notrace void ps3_mm_vas_destroy(void) } } -static int ps3_mm_get_repository_highmem(struct mem_region *r) +static int __init ps3_mm_get_repository_highmem(struct mem_region *r) { int result; diff --git a/arch/powerpc/platforms/ps3/os-area.c b/arch/powerpc/platforms/ps3/os-area.c index e8530371aed6..cb844e0add2b 100644 --- a/arch/powerpc/platforms/ps3/os-area.c +++ b/arch/powerpc/platforms/ps3/os-area.c @@ -501,7 +501,7 @@ static int db_set_64(struct os_area_db *db, const struct os_area_db_id *id, return -1; } -static int db_get_64(const struct os_area_db *db, +static int __init db_get_64(const struct os_area_db *db, const struct os_area_db_id *id, uint64_t *value) { struct db_iterator i; @@ -517,7 +517,7 @@ static int db_get_64(const struct os_area_db *db, return -1; } -static int db_get_rtc_diff(const struct os_area_db *db, int64_t *rtc_diff) +static int __init db_get_rtc_diff(const struct os_area_db *db, int64_t *rtc_diff) { return db_get_64(db, &os_area_db_id_rtc_diff, (uint64_t*)rtc_diff); } diff --git a/arch/powerpc/platforms/ps3/platform.h b/arch/powerpc/platforms/ps3/platform.h index 07bd39ef71ff..6beecdb0d51f 100644 --- a/arch/powerpc/platforms/ps3/platform.h +++ b/arch/powerpc/platforms/ps3/platform.h @@ -35,7 +35,7 @@ void __init ps3_register_ipi_irq(unsigned int cpu, unsigned int virq); /* smp */ -void smp_init_ps3(void); +void __init smp_init_ps3(void); #ifdef CONFIG_SMP void ps3_smp_cleanup_cpu(int cpu); #else @@ -134,9 +134,9 @@ struct ps3_repository_device { int ps3_repository_find_device(struct ps3_repository_device *repo); int ps3_repository_find_device_by_id(struct ps3_repository_device *repo, u64 bus_id, u64 dev_id); -int ps3_repository_find_devices(enum ps3_bus_type bus_type, +int __init ps3_repository_find_devices(enum ps3_bus_type bus_type, int (*callback)(const struct ps3_repository_device *repo)); -int ps3_repository_find_bus(enum ps3_bus_type bus_type, unsigned int from, +int __init ps3_repository_find_bus(enum ps3_bus_type bus_type, unsigned int from, unsigned int *bus_index); int ps3_repository_find_interrupt(const struct ps3_repository_device *repo, enum ps3_interrupt_type intr_type, unsigned int *interrupt_id); @@ -211,8 +211,8 @@ static inline int ps3_repository_delete_highmem_info(unsigned int region_index) int ps3_repository_read_num_be(unsigned int *num_be); int ps3_repository_read_be_node_id(unsigned int be_index, u64 *node_id); int ps3_repository_read_be_id(u64 node_id, u64 *be_id); -int ps3_repository_read_tb_freq(u64 node_id, u64 *tb_freq); -int ps3_repository_read_be_tb_freq(unsigned int be_index, u64 *tb_freq); +int __init ps3_repository_read_tb_freq(u64 node_id, u64 *tb_freq); +int __init ps3_repository_read_be_tb_freq(unsigned int be_index, u64 *tb_freq); /* repository performance monitor info */ @@ -247,7 +247,7 @@ int ps3_repository_read_spu_resource_id(unsigned int res_index, /* repository vuart info */ -int ps3_repository_read_vuart_av_port(unsigned int *port); -int ps3_repository_read_vuart_sysmgr_port(unsigned int *port); +int __init ps3_repository_read_vuart_av_port(unsigned int *port); +int __init ps3_repository_read_vuart_sysmgr_port(unsigned int *port); #endif diff --git a/arch/powerpc/platforms/ps3/repository.c b/arch/powerpc/platforms/ps3/repository.c index 21712964e76f..205763061a2d 100644 --- a/arch/powerpc/platforms/ps3/repository.c +++ b/arch/powerpc/platforms/ps3/repository.c @@ -413,7 +413,7 @@ found_dev: return 0; } -int ps3_repository_find_devices(enum ps3_bus_type bus_type, +int __init ps3_repository_find_devices(enum ps3_bus_type bus_type, int (*callback)(const struct ps3_repository_device *repo)) { int result = 0; @@ -455,7 +455,7 @@ int ps3_repository_find_devices(enum ps3_bus_type bus_type, return result; } -int ps3_repository_find_bus(enum ps3_bus_type bus_type, unsigned int from, +int __init ps3_repository_find_bus(enum ps3_bus_type bus_type, unsigned int from, unsigned int *bus_index) { unsigned int i; @@ -908,7 +908,7 @@ int ps3_repository_read_boot_dat_size(unsigned int *size) return result; } -int ps3_repository_read_vuart_av_port(unsigned int *port) +int __init ps3_repository_read_vuart_av_port(unsigned int *port) { int result; u64 v1 = 0; @@ -923,7 +923,7 @@ int ps3_repository_read_vuart_av_port(unsigned int *port) return result; } -int ps3_repository_read_vuart_sysmgr_port(unsigned int *port) +int __init ps3_repository_read_vuart_sysmgr_port(unsigned int *port) { int result; u64 v1 = 0; @@ -1005,7 +1005,7 @@ int ps3_repository_read_be_id(u64 node_id, u64 *be_id) be_id, NULL); } -int ps3_repository_read_tb_freq(u64 node_id, u64 *tb_freq) +int __init ps3_repository_read_tb_freq(u64 node_id, u64 *tb_freq) { return read_node(PS3_LPAR_ID_PME, make_first_field("be", 0), @@ -1015,7 +1015,7 @@ int ps3_repository_read_tb_freq(u64 node_id, u64 *tb_freq) tb_freq, NULL); } -int ps3_repository_read_be_tb_freq(unsigned int be_index, u64 *tb_freq) +int __init ps3_repository_read_be_tb_freq(unsigned int be_index, u64 *tb_freq) { int result; u64 node_id; @@ -1178,7 +1178,7 @@ int ps3_repository_delete_highmem_info(unsigned int region_index) #if defined(DEBUG) -int ps3_repository_dump_resource_info(const struct ps3_repository_device *repo) +int __init ps3_repository_dump_resource_info(const struct ps3_repository_device *repo) { int result = 0; unsigned int res_index; @@ -1231,7 +1231,7 @@ int ps3_repository_dump_resource_info(const struct ps3_repository_device *repo) return result; } -static int dump_stor_dev_info(struct ps3_repository_device *repo) +static int __init dump_stor_dev_info(struct ps3_repository_device *repo) { int result = 0; unsigned int num_regions, region_index; @@ -1279,7 +1279,7 @@ out: return result; } -static int dump_device_info(struct ps3_repository_device *repo, +static int __init dump_device_info(struct ps3_repository_device *repo, unsigned int num_dev) { int result = 0; @@ -1323,7 +1323,7 @@ static int dump_device_info(struct ps3_repository_device *repo, return result; } -int ps3_repository_dump_bus_info(void) +int __init ps3_repository_dump_bus_info(void) { int result = 0; struct ps3_repository_device repo; diff --git a/arch/powerpc/platforms/ps3/smp.c b/arch/powerpc/platforms/ps3/smp.c index 93b1e73b3529..85295756005a 100644 --- a/arch/powerpc/platforms/ps3/smp.c +++ b/arch/powerpc/platforms/ps3/smp.c @@ -112,7 +112,7 @@ static struct smp_ops_t ps3_smp_ops = { .kick_cpu = smp_generic_kick_cpu, }; -void smp_init_ps3(void) +void __init smp_init_ps3(void) { DBG(" -> %s\n", __func__); smp_ops = &ps3_smp_ops; diff --git a/arch/powerpc/platforms/ps3/spu.c b/arch/powerpc/platforms/ps3/spu.c index 0c252478e556..4a2520ec6d7f 100644 --- a/arch/powerpc/platforms/ps3/spu.c +++ b/arch/powerpc/platforms/ps3/spu.c @@ -137,7 +137,7 @@ u64 ps3_get_spe_id(void *arg) } EXPORT_SYMBOL_GPL(ps3_get_spe_id); -static unsigned long get_vas_id(void) +static unsigned long __init get_vas_id(void) { u64 id; diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index 2e57391e0778..f7fd91d153a4 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -17,7 +17,6 @@ config PPC_PSERIES select PPC_RTAS_DAEMON select RTAS_ERROR_LOGGING select PPC_UDBG_16550 - select PPC_NATIVE select PPC_DOORBELL select HOTPLUG_CPU select ARCH_RANDOM @@ -61,10 +60,6 @@ config PSERIES_ENERGY Provides: /sys/devices/system/cpu/pseries_(de)activation_hint_list and /sys/devices/system/cpu/cpuN/pseries_(de)activation_hint -config SCANLOG - tristate "Scanlog dump interface" - depends on RTAS_PROC && PPC_PSERIES - config IO_EVENT_IRQ bool "IO Event Interrupt support" depends on PPC_PSERIES diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index 41d8aee98da4..ee60b59024b4 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -8,7 +8,6 @@ obj-y := lpar.o hvCall.o nvram.o reconfig.o \ firmware.o power.o dlpar.o mobility.o rng.o \ pci.o pci_dlpar.o eeh_pseries.o msi.o obj-$(CONFIG_SMP) += smp.o -obj-$(CONFIG_SCANLOG) += scanlog.o obj-$(CONFIG_KEXEC_CORE) += kexec.o obj-$(CONFIG_PSERIES_ENERGY) += pseries_energy.o diff --git a/arch/powerpc/platforms/pseries/event_sources.c b/arch/powerpc/platforms/pseries/event_sources.c index be661e919c76..623dfe0d8e1c 100644 --- a/arch/powerpc/platforms/pseries/event_sources.c +++ b/arch/powerpc/platforms/pseries/event_sources.c @@ -8,7 +8,7 @@ #include "pseries.h" -void request_event_sources_irqs(struct device_node *np, +void __init request_event_sources_irqs(struct device_node *np, irq_handler_t handler, const char *name) { diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index 5ab44600c8d3..b81fc846d99c 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -864,12 +864,13 @@ static int __init pseries_cpu_hotplug_init(void) /* Processors can be added/removed only on LPAR */ if (firmware_has_feature(FW_FEATURE_LPAR)) { for_each_node(node) { - alloc_bootmem_cpumask_var(&node_recorded_ids_map[node]); + if (!alloc_cpumask_var_node(&node_recorded_ids_map[node], + GFP_KERNEL, node)) + return -ENOMEM; /* Record ids of CPU added at boot time */ - cpumask_or(node_recorded_ids_map[node], - node_recorded_ids_map[node], - cpumask_of_node(node)); + cpumask_copy(node_recorded_ids_map[node], + cpumask_of_node(node)); } of_reconfig_notifier_register(&pseries_smp_nb); diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 8f998e55735b..4d991cf840d9 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -1654,7 +1654,7 @@ static struct notifier_block iommu_reconfig_nb = { }; /* These are called very early. */ -void iommu_init_early_pSeries(void) +void __init iommu_init_early_pSeries(void) { if (of_chosen && of_get_property(of_chosen, "linux,iommu-off", NULL)) return; diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 3df6bdfea475..f8899d506ea4 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -58,6 +58,7 @@ EXPORT_SYMBOL(plpar_hcall); EXPORT_SYMBOL(plpar_hcall9); EXPORT_SYMBOL(plpar_hcall_norets); +#ifdef CONFIG_PPC_64S_HASH_MMU /* * H_BLOCK_REMOVE supported block size for this page size in segment who's base * page size is that page size. @@ -66,6 +67,7 @@ EXPORT_SYMBOL(plpar_hcall_norets); * page size. */ static int hblkrm_size[MMU_PAGE_COUNT][MMU_PAGE_COUNT] __ro_after_init; +#endif /* * Due to the involved complexity, and that the current hypervisor is only @@ -689,7 +691,7 @@ void vpa_init(int cpu) return; } -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU /* * PAPR says this feature is SLB-Buffer but firmware never * reports that. All SPLPAR support SLB shadow buffer. @@ -702,7 +704,7 @@ void vpa_init(int cpu) "cpu %d (hw %d) of area %lx failed with %ld\n", cpu, hwcpu, addr, ret); } -#endif /* CONFIG_PPC_BOOK3S_64 */ +#endif /* CONFIG_PPC_64S_HASH_MMU */ /* * Register dispatch trace log, if one has been allocated. @@ -712,6 +714,36 @@ void vpa_init(int cpu) #ifdef CONFIG_PPC_BOOK3S_64 +static int __init pseries_lpar_register_process_table(unsigned long base, + unsigned long page_size, unsigned long table_size) +{ + long rc; + unsigned long flags = 0; + + if (table_size) + flags |= PROC_TABLE_NEW; + if (radix_enabled()) { + flags |= PROC_TABLE_RADIX; + if (mmu_has_feature(MMU_FTR_GTSE)) + flags |= PROC_TABLE_GTSE; + } else + flags |= PROC_TABLE_HPT_SLB; + for (;;) { + rc = plpar_hcall_norets(H_REGISTER_PROC_TBL, flags, base, + page_size, table_size); + if (!H_IS_LONG_BUSY(rc)) + break; + mdelay(get_longbusy_msecs(rc)); + } + if (rc != H_SUCCESS) { + pr_err("Failed to register process table (rc=%ld)\n", rc); + BUG(); + } + return rc; +} + +#ifdef CONFIG_PPC_64S_HASH_MMU + static long pSeries_lpar_hpte_insert(unsigned long hpte_group, unsigned long vpn, unsigned long pa, unsigned long rflags, unsigned long vflags, @@ -1680,34 +1712,6 @@ static int pseries_lpar_resize_hpt(unsigned long shift) return 0; } -static int pseries_lpar_register_process_table(unsigned long base, - unsigned long page_size, unsigned long table_size) -{ - long rc; - unsigned long flags = 0; - - if (table_size) - flags |= PROC_TABLE_NEW; - if (radix_enabled()) { - flags |= PROC_TABLE_RADIX; - if (mmu_has_feature(MMU_FTR_GTSE)) - flags |= PROC_TABLE_GTSE; - } else - flags |= PROC_TABLE_HPT_SLB; - for (;;) { - rc = plpar_hcall_norets(H_REGISTER_PROC_TBL, flags, base, - page_size, table_size); - if (!H_IS_LONG_BUSY(rc)) - break; - mdelay(get_longbusy_msecs(rc)); - } - if (rc != H_SUCCESS) { - pr_err("Failed to register process table (rc=%ld)\n", rc); - BUG(); - } - return rc; -} - void __init hpte_init_pseries(void) { mmu_hash_ops.hpte_invalidate = pSeries_lpar_hpte_invalidate; @@ -1730,9 +1734,10 @@ void __init hpte_init_pseries(void) if (cpu_has_feature(CPU_FTR_ARCH_300)) pseries_lpar_register_process_table(0, 0, 0); } +#endif /* CONFIG_PPC_64S_HASH_MMU */ #ifdef CONFIG_PPC_RADIX_MMU -void radix_init_pseries(void) +void __init radix_init_pseries(void) { pr_info("Using radix MMU under hypervisor\n"); @@ -1932,7 +1937,8 @@ int h_get_mpp_x(struct hvcall_mpp_x_data *mpp_x_data) return rc; } -static unsigned long vsid_unscramble(unsigned long vsid, int ssize) +#ifdef CONFIG_PPC_64S_HASH_MMU +static unsigned long __init vsid_unscramble(unsigned long vsid, int ssize) { unsigned long protovsid; unsigned long va_bits = VA_BITS; @@ -1992,6 +1998,7 @@ static int __init reserve_vrma_context_id(void) return 0; } machine_device_initcall(pseries, reserve_vrma_context_id); +#endif #ifdef CONFIG_DEBUG_FS /* debugfs file interface for vpa data */ diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c index f71eac74ea92..c7940fcfc911 100644 --- a/arch/powerpc/platforms/pseries/lparcfg.c +++ b/arch/powerpc/platforms/pseries/lparcfg.c @@ -531,8 +531,9 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v) seq_printf(m, "shared_processor_mode=%d\n", lppaca_shared_proc(get_lppaca())); -#ifdef CONFIG_PPC_BOOK3S_64 - seq_printf(m, "slb_size=%d\n", mmu_slb_size); +#ifdef CONFIG_PPC_64S_HASH_MMU + if (!radix_enabled()) + seq_printf(m, "slb_size=%d\n", mmu_slb_size); #endif parse_em_data(m); maxmem_data(m); diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index 210a37a065fb..85033f392c78 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -451,11 +451,15 @@ static void prod_others(void) static u16 clamp_slb_size(void) { +#ifdef CONFIG_PPC_64S_HASH_MMU u16 prev = mmu_slb_size; slb_set_size(SLB_MIN_SIZE); return prev; +#else + return 0; +#endif } static int do_suspend(void) diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index 3544778e06d0..56c9ef9052e9 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -11,7 +11,7 @@ struct device_node; -extern void request_event_sources_irqs(struct device_node *np, +void __init request_event_sources_irqs(struct device_node *np, irq_handler_t handler, const char *name); #include <linux/of.h> @@ -113,6 +113,11 @@ int dlpar_workqueue_init(void); extern u32 pseries_security_flavor; void pseries_setup_security_mitigations(void); + +#ifdef CONFIG_PPC_64S_HASH_MMU void pseries_lpar_read_hblkrm_characteristics(void); +#else +static inline void pseries_lpar_read_hblkrm_characteristics(void) { } +#endif #endif /* _PSERIES_PSERIES_H */ diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 56092dccfdb8..74c9b1b5bc66 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -526,6 +526,7 @@ static int mce_handle_err_realmode(int disposition, u8 error_type) disposition = RTAS_DISP_FULLY_RECOVERED; break; case MC_ERROR_TYPE_SLB: +#ifdef CONFIG_PPC_64S_HASH_MMU /* * Store the old slb content in paca before flushing. * Print this when we go to virtual mode. @@ -538,6 +539,7 @@ static int mce_handle_err_realmode(int disposition, u8 error_type) slb_save_contents(local_paca->mce_faulty_slbs); flush_and_reload_slb(); disposition = RTAS_DISP_FULLY_RECOVERED; +#endif break; default: break; diff --git a/arch/powerpc/platforms/pseries/rtas-fadump.c b/arch/powerpc/platforms/pseries/rtas-fadump.c index f8f73b47b107..35f9cb602c30 100644 --- a/arch/powerpc/platforms/pseries/rtas-fadump.c +++ b/arch/powerpc/platforms/pseries/rtas-fadump.c @@ -39,7 +39,7 @@ static void rtas_fadump_update_config(struct fw_dump *fadump_conf, * This function is called in the capture kernel to get configuration details * setup in the first kernel and passed to the f/w. */ -static void rtas_fadump_get_config(struct fw_dump *fadump_conf, +static void __init rtas_fadump_get_config(struct fw_dump *fadump_conf, const struct rtas_fadump_mem_struct *fdm) { fadump_conf->boot_mem_addr[0] = @@ -247,7 +247,7 @@ static inline int rtas_fadump_gpr_index(u64 id) return i; } -static void rtas_fadump_set_regval(struct pt_regs *regs, u64 reg_id, u64 reg_val) +static void __init rtas_fadump_set_regval(struct pt_regs *regs, u64 reg_id, u64 reg_val) { int i; @@ -272,7 +272,7 @@ static void rtas_fadump_set_regval(struct pt_regs *regs, u64 reg_id, u64 reg_val regs->dsisr = (unsigned long)reg_val; } -static struct rtas_fadump_reg_entry* +static struct rtas_fadump_reg_entry* __init rtas_fadump_read_regs(struct rtas_fadump_reg_entry *reg_entry, struct pt_regs *regs) { diff --git a/arch/powerpc/platforms/pseries/scanlog.c b/arch/powerpc/platforms/pseries/scanlog.c deleted file mode 100644 index 2879c4f0ceb7..000000000000 --- a/arch/powerpc/platforms/pseries/scanlog.c +++ /dev/null @@ -1,195 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * c 2001 PPC 64 Team, IBM Corp - * - * scan-log-data driver for PPC64 Todd Inglett <tinglett@vnet.ibm.com> - * - * When ppc64 hardware fails the service processor dumps internal state - * of the system. After a reboot the operating system can access a dump - * of this data using this driver. A dump exists if the device-tree - * /chosen/ibm,scan-log-data property exists. - * - * This driver exports /proc/powerpc/scan-log-dump which can be read. - * The driver supports only sequential reads. - * - * The driver looks at a write to the driver for the single word "reset". - * If given, the driver will reset the scanlog so the platform can free it. - */ - -#include <linux/module.h> -#include <linux/types.h> -#include <linux/errno.h> -#include <linux/proc_fs.h> -#include <linux/init.h> -#include <linux/delay.h> -#include <linux/slab.h> -#include <linux/uaccess.h> -#include <asm/rtas.h> -#include <asm/prom.h> - -#define MODULE_VERS "1.0" -#define MODULE_NAME "scanlog" - -/* Status returns from ibm,scan-log-dump */ -#define SCANLOG_COMPLETE 0 -#define SCANLOG_HWERROR -1 -#define SCANLOG_CONTINUE 1 - - -static unsigned int ibm_scan_log_dump; /* RTAS token */ -static unsigned int *scanlog_buffer; /* The data buffer */ - -static ssize_t scanlog_read(struct file *file, char __user *buf, - size_t count, loff_t *ppos) -{ - unsigned int *data = scanlog_buffer; - int status; - unsigned long len, off; - unsigned int wait_time; - - if (count > RTAS_DATA_BUF_SIZE) - count = RTAS_DATA_BUF_SIZE; - - if (count < 1024) { - /* This is the min supported by this RTAS call. Rather - * than do all the buffering we insist the user code handle - * larger reads. As long as cp works... :) - */ - printk(KERN_ERR "scanlog: cannot perform a small read (%ld)\n", count); - return -EINVAL; - } - - if (!access_ok(buf, count)) - return -EFAULT; - - for (;;) { - wait_time = 500; /* default wait if no data */ - spin_lock(&rtas_data_buf_lock); - memcpy(rtas_data_buf, data, RTAS_DATA_BUF_SIZE); - status = rtas_call(ibm_scan_log_dump, 2, 1, NULL, - (u32) __pa(rtas_data_buf), (u32) count); - memcpy(data, rtas_data_buf, RTAS_DATA_BUF_SIZE); - spin_unlock(&rtas_data_buf_lock); - - pr_debug("scanlog: status=%d, data[0]=%x, data[1]=%x, " \ - "data[2]=%x\n", status, data[0], data[1], data[2]); - switch (status) { - case SCANLOG_COMPLETE: - pr_debug("scanlog: hit eof\n"); - return 0; - case SCANLOG_HWERROR: - pr_debug("scanlog: hardware error reading data\n"); - return -EIO; - case SCANLOG_CONTINUE: - /* We may or may not have data yet */ - len = data[1]; - off = data[2]; - if (len > 0) { - if (copy_to_user(buf, ((char *)data)+off, len)) - return -EFAULT; - return len; - } - /* Break to sleep default time */ - break; - default: - /* Assume extended busy */ - wait_time = rtas_busy_delay_time(status); - if (!wait_time) { - printk(KERN_ERR "scanlog: unknown error " \ - "from rtas: %d\n", status); - return -EIO; - } - } - /* Apparently no data yet. Wait and try again. */ - msleep_interruptible(wait_time); - } - /*NOTREACHED*/ -} - -static ssize_t scanlog_write(struct file * file, const char __user * buf, - size_t count, loff_t *ppos) -{ - char stkbuf[20]; - int status; - - if (count > 19) count = 19; - if (copy_from_user (stkbuf, buf, count)) { - return -EFAULT; - } - stkbuf[count] = 0; - - if (buf) { - if (strncmp(stkbuf, "reset", 5) == 0) { - pr_debug("scanlog: reset scanlog\n"); - status = rtas_call(ibm_scan_log_dump, 2, 1, NULL, 0, 0); - pr_debug("scanlog: rtas returns %d\n", status); - } - } - return count; -} - -static int scanlog_open(struct inode * inode, struct file * file) -{ - unsigned int *data = scanlog_buffer; - - if (data[0] != 0) { - /* This imperfect test stops a second copy of the - * data (or a reset while data is being copied) - */ - return -EBUSY; - } - - data[0] = 0; /* re-init so we restart the scan */ - - return 0; -} - -static int scanlog_release(struct inode * inode, struct file * file) -{ - unsigned int *data = scanlog_buffer; - - data[0] = 0; - return 0; -} - -static const struct proc_ops scanlog_proc_ops = { - .proc_read = scanlog_read, - .proc_write = scanlog_write, - .proc_open = scanlog_open, - .proc_release = scanlog_release, - .proc_lseek = noop_llseek, -}; - -static int __init scanlog_init(void) -{ - struct proc_dir_entry *ent; - int err = -ENOMEM; - - ibm_scan_log_dump = rtas_token("ibm,scan-log-dump"); - if (ibm_scan_log_dump == RTAS_UNKNOWN_SERVICE) - return -ENODEV; - - /* Ideally we could allocate a buffer < 4G */ - scanlog_buffer = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL); - if (!scanlog_buffer) - goto err; - - ent = proc_create("powerpc/rtas/scan-log-dump", 0400, NULL, - &scanlog_proc_ops); - if (!ent) - goto err; - return 0; -err: - kfree(scanlog_buffer); - return err; -} - -static void __exit scanlog_cleanup(void) -{ - remove_proc_entry("powerpc/rtas/scan-log-dump", NULL); - kfree(scanlog_buffer); -} - -module_init(scanlog_init); -module_exit(scanlog_cleanup); -MODULE_LICENSE("GPL"); diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 8a62af5b9c24..83a04d967a59 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -112,7 +112,7 @@ static void __init fwnmi_init(void) u8 *mce_data_buf; unsigned int i; int nr_cpus = num_possible_cpus(); -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU struct slb_entry *slb_ptr; size_t size; #endif @@ -152,7 +152,7 @@ static void __init fwnmi_init(void) (RTAS_ERROR_LOG_MAX * i); } -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU if (!radix_enabled()) { /* Allocate per cpu area to save old slb contents during MCE */ size = sizeof(struct slb_entry) * mmu_slb_size * nr_cpus; @@ -447,7 +447,7 @@ void pseries_big_endian_exceptions(void) panic("Could not enable big endian exceptions"); } -void pseries_little_endian_exceptions(void) +void __init pseries_little_endian_exceptions(void) { long rc; @@ -801,7 +801,9 @@ static void __init pSeries_setup_arch(void) fwnmi_init(); pseries_setup_security_mitigations(); +#ifdef CONFIG_PPC_64S_HASH_MMU pseries_lpar_read_hblkrm_characteristics(); +#endif /* By default, only probe PCI (can be overridden by rtas_pci) */ pci_add_flags(PCI_PROBE_ONLY); @@ -905,7 +907,7 @@ void pSeries_coalesce_init(void) * fw_cmo_feature_init - FW_FEATURE_CMO is not stored in ibm,hypertas-functions, * handle that here. (Stolen from parse_system_parameter_string) */ -static void pSeries_cmo_feature_init(void) +static void __init pSeries_cmo_feature_init(void) { char *ptr, *key, *value, *end; int call_status; diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c index b043e3936d21..d243ddc58827 100644 --- a/arch/powerpc/platforms/pseries/vas.c +++ b/arch/powerpc/platforms/pseries/vas.c @@ -151,8 +151,15 @@ int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result) if (rc == H_SUCCESS) return 0; - pr_err("HCALL(%llx) error %ld, query_type %u, result buffer 0x%llx\n", - hcall, rc, query_type, result); + /* H_FUNCTION means HV does not support VAS so don't print an error */ + if (rc != H_FUNCTION) { + pr_err("%s error %ld, query_type %u, result buffer 0x%llx\n", + (hcall == H_QUERY_VAS_CAPABILITIES) ? + "H_QUERY_VAS_CAPABILITIES" : + "H_QUERY_NX_CAPABILITIES", + rc, query_type, result); + } + return -EIO; } EXPORT_SYMBOL_GPL(h_query_vas_capabilities); @@ -482,7 +489,7 @@ EXPORT_SYMBOL_GPL(vas_unregister_api_pseries); * Get the specific capabilities based on the feature type. * Right now supports GZIP default and GZIP QoS capabilities. */ -static int get_vas_capabilities(u8 feat, enum vas_cop_feat_type type, +static int __init get_vas_capabilities(u8 feat, enum vas_cop_feat_type type, struct hv_vas_cop_feat_caps *hv_caps) { struct vas_cop_feat_caps *caps; diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c index feafcb582e1b..c9f9be4ea26a 100644 --- a/arch/powerpc/platforms/pseries/vio.c +++ b/arch/powerpc/platforms/pseries/vio.c @@ -1061,7 +1061,7 @@ static struct attribute *vio_bus_attrs[] = { }; ATTRIBUTE_GROUPS(vio_bus); -static void vio_cmo_sysfs_init(void) +static void __init vio_cmo_sysfs_init(void) { vio_bus_type.dev_groups = vio_cmo_dev_groups; vio_bus_type.bus_groups = vio_bus_groups; @@ -1073,7 +1073,7 @@ static int vio_cmo_bus_probe(struct vio_dev *viodev) { return 0; } static void vio_cmo_bus_remove(struct vio_dev *viodev) {} static void vio_cmo_set_dma_ops(struct vio_dev *viodev) {} static void vio_cmo_bus_init(void) {} -static void vio_cmo_sysfs_init(void) { } +static void __init vio_cmo_sysfs_init(void) { } #endif /* CONFIG_PPC_SMLPAR */ EXPORT_SYMBOL(vio_cmo_entitlement_update); EXPORT_SYMBOL(vio_cmo_set_dev_desired); @@ -1479,7 +1479,7 @@ EXPORT_SYMBOL(vio_register_device_node); * Starting from the root node provide, register the device node for * each child beneath the root. */ -static void vio_bus_scan_register_devices(char *root_name) +static void __init vio_bus_scan_register_devices(char *root_name) { struct device_node *node_root, *node_child; diff --git a/arch/powerpc/sysdev/cpm2.c b/arch/powerpc/sysdev/cpm2.c index 68538b8329f7..3f130312b6e9 100644 --- a/arch/powerpc/sysdev/cpm2.c +++ b/arch/powerpc/sysdev/cpm2.c @@ -135,7 +135,7 @@ void __cpm2_setbrg(uint brg, uint rate, uint clk, int div16, int src) } EXPORT_SYMBOL(__cpm2_setbrg); -int cpm2_clk_setup(enum cpm_clk_target target, int clock, int mode) +int __init cpm2_clk_setup(enum cpm_clk_target target, int clock, int mode) { int ret = 0; int shift; @@ -265,7 +265,7 @@ int cpm2_clk_setup(enum cpm_clk_target target, int clock, int mode) return ret; } -int cpm2_smc_clk_setup(enum cpm_clk_target target, int clock) +int __init cpm2_smc_clk_setup(enum cpm_clk_target target, int clock) { int ret = 0; int shift; @@ -326,7 +326,7 @@ struct cpm2_ioports { u32 res[3]; }; -void cpm2_set_pin(int port, int pin, int flags) +void __init cpm2_set_pin(int port, int pin, int flags) { struct cpm2_ioports __iomem *iop = (struct cpm2_ioports __iomem *)&cpm2_immr->im_ioport; diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c index 1d33b7a5ea83..be6b99b1b352 100644 --- a/arch/powerpc/sysdev/dart_iommu.c +++ b/arch/powerpc/sysdev/dart_iommu.c @@ -226,7 +226,7 @@ static void dart_free(struct iommu_table *tbl, long index, long npages) dart_cache_sync(orig_dp, orig_npages); } -static void allocate_dart(void) +static void __init allocate_dart(void) { unsigned long tmp; diff --git a/arch/powerpc/sysdev/fsl_mpic_err.c b/arch/powerpc/sysdev/fsl_mpic_err.c index 9a98bb212922..df06bb6b838f 100644 --- a/arch/powerpc/sysdev/fsl_mpic_err.c +++ b/arch/powerpc/sysdev/fsl_mpic_err.c @@ -58,7 +58,7 @@ static struct irq_chip fsl_mpic_err_chip = { .irq_unmask = fsl_mpic_unmask_err, }; -int mpic_setup_error_int(struct mpic *mpic, int intvec) +int __init mpic_setup_error_int(struct mpic *mpic, int intvec) { int i; @@ -121,7 +121,7 @@ static irqreturn_t fsl_error_int_handler(int irq, void *data) return IRQ_HANDLED; } -void mpic_err_int_init(struct mpic *mpic, irq_hw_number_t irqnum) +void __init mpic_err_int_init(struct mpic *mpic, irq_hw_number_t irqnum) { unsigned int virq; int ret; diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index b8f76f3fd994..674f047b7820 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -1106,7 +1106,7 @@ static const struct of_device_id pci_ids[] = { struct device_node *fsl_pci_primary; -void fsl_pci_assign_primary(void) +void __init fsl_pci_assign_primary(void) { struct device_node *np; diff --git a/arch/powerpc/sysdev/fsl_pci.h b/arch/powerpc/sysdev/fsl_pci.h index 1d7a41205695..cdbde2e0c96e 100644 --- a/arch/powerpc/sysdev/fsl_pci.h +++ b/arch/powerpc/sysdev/fsl_pci.h @@ -120,7 +120,7 @@ u64 fsl_pci_immrbar_base(struct pci_controller *hose); extern struct device_node *fsl_pci_primary; #ifdef CONFIG_PCI -void fsl_pci_assign_primary(void); +void __init fsl_pci_assign_primary(void); #else static inline void fsl_pci_assign_primary(void) {} #endif diff --git a/arch/powerpc/sysdev/i8259.c b/arch/powerpc/sysdev/i8259.c index dc1a151c63d7..3b1ae98e3ce9 100644 --- a/arch/powerpc/sysdev/i8259.c +++ b/arch/powerpc/sysdev/i8259.c @@ -208,7 +208,7 @@ static const struct irq_domain_ops i8259_host_ops = { .xlate = i8259_host_xlate, }; -struct irq_domain *i8259_get_host(void) +struct irq_domain *__init i8259_get_host(void) { return i8259_host; } diff --git a/arch/powerpc/sysdev/ipic.c b/arch/powerpc/sysdev/ipic.c index 7638a50a7c38..3f10c9fc3b68 100644 --- a/arch/powerpc/sysdev/ipic.c +++ b/arch/powerpc/sysdev/ipic.c @@ -767,7 +767,7 @@ struct ipic * __init ipic_init(struct device_node *node, unsigned int flags) return ipic; } -void ipic_set_default_priority(void) +void __init ipic_set_default_priority(void) { ipic_write(primary_ipic->regs, IPIC_SIPRR_A, IPIC_PRIORITY_DEFAULT); ipic_write(primary_ipic->regs, IPIC_SIPRR_B, IPIC_PRIORITY_DEFAULT); diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index 995fb2ada507..d5cb48b61bbd 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -1323,8 +1323,7 @@ struct mpic * __init mpic_alloc(struct device_node *node, psrc = of_get_property(mpic->node, "protected-sources", &psize); if (psrc) { /* Allocate a bitmap with one bit per interrupt */ - unsigned int mapsize = BITS_TO_LONGS(intvec_top + 1); - mpic->protected = kcalloc(mapsize, sizeof(long), GFP_KERNEL); + mpic->protected = bitmap_zalloc(intvec_top + 1, GFP_KERNEL); BUG_ON(mpic->protected == NULL); for (i = 0; i < psize/sizeof(u32); i++) { if (psrc[i] > intvec_top) @@ -1840,7 +1839,7 @@ unsigned int mpic_get_mcirq(void) } #ifdef CONFIG_SMP -void mpic_request_ipis(void) +void __init mpic_request_ipis(void) { struct mpic *mpic = mpic_primary; int i; diff --git a/arch/powerpc/sysdev/mpic.h b/arch/powerpc/sysdev/mpic.h index 73a31a429d46..bb460ff57a06 100644 --- a/arch/powerpc/sysdev/mpic.h +++ b/arch/powerpc/sysdev/mpic.h @@ -8,8 +8,8 @@ #ifdef CONFIG_PCI_MSI extern void mpic_msi_reserve_hwirq(struct mpic *mpic, irq_hw_number_t hwirq); -extern int mpic_msi_init_allocator(struct mpic *mpic); -extern int mpic_u3msi_init(struct mpic *mpic); +int __init mpic_msi_init_allocator(struct mpic *mpic); +int __init mpic_u3msi_init(struct mpic *mpic); #else static inline void mpic_msi_reserve_hwirq(struct mpic *mpic, irq_hw_number_t hwirq) @@ -24,7 +24,7 @@ static inline int mpic_u3msi_init(struct mpic *mpic) #endif #if defined(CONFIG_PCI_MSI) && defined(CONFIG_PPC_PASEMI) -int mpic_pasemi_msi_init(struct mpic *mpic); +int __init mpic_pasemi_msi_init(struct mpic *mpic); #else static inline int mpic_pasemi_msi_init(struct mpic *mpic) { return -1; } #endif @@ -37,8 +37,8 @@ extern void mpic_reset_core(int cpu); #ifdef CONFIG_FSL_SOC extern int mpic_map_error_int(struct mpic *mpic, unsigned int virq, irq_hw_number_t hw); -extern void mpic_err_int_init(struct mpic *mpic, irq_hw_number_t irqnum); -extern int mpic_setup_error_int(struct mpic *mpic, int intvec); +void __init mpic_err_int_init(struct mpic *mpic, irq_hw_number_t irqnum); +int __init mpic_setup_error_int(struct mpic *mpic, int intvec); #else static inline int mpic_map_error_int(struct mpic *mpic, unsigned int virq, irq_hw_number_t hw) { diff --git a/arch/powerpc/sysdev/mpic_msi.c b/arch/powerpc/sysdev/mpic_msi.c index 4695c04320ae..f412d6ad0b66 100644 --- a/arch/powerpc/sysdev/mpic_msi.c +++ b/arch/powerpc/sysdev/mpic_msi.c @@ -24,7 +24,7 @@ void mpic_msi_reserve_hwirq(struct mpic *mpic, irq_hw_number_t hwirq) } #ifdef CONFIG_MPIC_U3_HT_IRQS -static int mpic_msi_reserve_u3_hwirqs(struct mpic *mpic) +static int __init mpic_msi_reserve_u3_hwirqs(struct mpic *mpic) { irq_hw_number_t hwirq; const struct irq_domain_ops *ops = mpic->irqhost->ops; @@ -68,13 +68,13 @@ static int mpic_msi_reserve_u3_hwirqs(struct mpic *mpic) return 0; } #else -static int mpic_msi_reserve_u3_hwirqs(struct mpic *mpic) +static int __init mpic_msi_reserve_u3_hwirqs(struct mpic *mpic) { return -1; } #endif -int mpic_msi_init_allocator(struct mpic *mpic) +int __init mpic_msi_init_allocator(struct mpic *mpic) { int rc; diff --git a/arch/powerpc/sysdev/mpic_timer.c b/arch/powerpc/sysdev/mpic_timer.c index a42a20280035..444e9ce42d0a 100644 --- a/arch/powerpc/sysdev/mpic_timer.c +++ b/arch/powerpc/sysdev/mpic_timer.c @@ -384,7 +384,7 @@ struct mpic_timer *mpic_request_timer(irq_handler_t fn, void *dev, } EXPORT_SYMBOL(mpic_request_timer); -static int timer_group_get_freq(struct device_node *np, +static int __init timer_group_get_freq(struct device_node *np, struct timer_group_priv *priv) { u32 div; @@ -411,7 +411,7 @@ static int timer_group_get_freq(struct device_node *np, return 0; } -static int timer_group_get_irq(struct device_node *np, +static int __init timer_group_get_irq(struct device_node *np, struct timer_group_priv *priv) { const u32 all_timer[] = { 0, TIMERS_PER_GROUP }; @@ -459,7 +459,7 @@ static int timer_group_get_irq(struct device_node *np, return 0; } -static void timer_group_init(struct device_node *np) +static void __init timer_group_init(struct device_node *np) { struct timer_group_priv *priv; unsigned int i = 0; diff --git a/arch/powerpc/sysdev/mpic_u3msi.c b/arch/powerpc/sysdev/mpic_u3msi.c index fa53b6d85ef9..3f4841dfefb5 100644 --- a/arch/powerpc/sysdev/mpic_u3msi.c +++ b/arch/powerpc/sysdev/mpic_u3msi.c @@ -169,7 +169,7 @@ static int u3msi_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) return 0; } -int mpic_u3msi_init(struct mpic *mpic) +int __init mpic_u3msi_init(struct mpic *mpic) { int rc; struct pci_controller *phb; diff --git a/arch/powerpc/sysdev/tsi108_dev.c b/arch/powerpc/sysdev/tsi108_dev.c index 4c4a6efd5e5f..9e13fb35ed5c 100644 --- a/arch/powerpc/sysdev/tsi108_dev.c +++ b/arch/powerpc/sysdev/tsi108_dev.c @@ -51,13 +51,12 @@ phys_addr_t get_csrbase(void) } return tsi108_csr_base; } +EXPORT_SYMBOL(get_csrbase); u32 get_vir_csrbase(void) { return (u32) (ioremap(get_csrbase(), 0x10000)); } - -EXPORT_SYMBOL(get_csrbase); EXPORT_SYMBOL(get_vir_csrbase); static int __init tsi108_eth_of_init(void) diff --git a/arch/powerpc/sysdev/tsi108_pci.c b/arch/powerpc/sysdev/tsi108_pci.c index 042bb38fa5c2..1070220f15d5 100644 --- a/arch/powerpc/sysdev/tsi108_pci.c +++ b/arch/powerpc/sysdev/tsi108_pci.c @@ -257,7 +257,7 @@ static void tsi108_pci_int_unmask(u_int irq) mb(); } -static void init_pci_source(void) +static void __init init_pci_source(void) { tsi108_write_reg(TSI108_PCI_OFFSET + TSI108_PCI_IRP_CFG_CTL, 0x0000ff00); diff --git a/arch/powerpc/sysdev/udbg_memcons.c b/arch/powerpc/sysdev/udbg_memcons.c index d38bbeed219b..5020044400dc 100644 --- a/arch/powerpc/sysdev/udbg_memcons.c +++ b/arch/powerpc/sysdev/udbg_memcons.c @@ -92,7 +92,7 @@ int memcons_getc(void) return c; } -void udbg_init_memcons(void) +void __init udbg_init_memcons(void) { udbg_putc = memcons_putc; udbg_getc = memcons_getc; diff --git a/arch/powerpc/sysdev/xics/icp-hv.c b/arch/powerpc/sysdev/xics/icp-hv.c index 6765d9e264a3..cf8db19a4f7d 100644 --- a/arch/powerpc/sysdev/xics/icp-hv.c +++ b/arch/powerpc/sysdev/xics/icp-hv.c @@ -162,7 +162,7 @@ static const struct icp_ops icp_hv_ops = { #endif }; -int icp_hv_init(void) +int __init icp_hv_init(void) { struct device_node *np; diff --git a/arch/powerpc/sysdev/xics/icp-opal.c b/arch/powerpc/sysdev/xics/icp-opal.c index 675d708863d5..bda4c32582d9 100644 --- a/arch/powerpc/sysdev/xics/icp-opal.c +++ b/arch/powerpc/sysdev/xics/icp-opal.c @@ -184,7 +184,7 @@ static const struct icp_ops icp_opal_ops = { #endif }; -int icp_opal_init(void) +int __init icp_opal_init(void) { struct device_node *np; diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c index 244a727c6ba4..f3fb2a12124c 100644 --- a/arch/powerpc/sysdev/xics/xics-common.c +++ b/arch/powerpc/sysdev/xics/xics-common.c @@ -121,7 +121,7 @@ void xics_mask_unknown_vec(unsigned int vec) #ifdef CONFIG_SMP -static void xics_request_ipi(void) +static void __init xics_request_ipi(void) { unsigned int ipi; diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 7b69299c2912..1ca5564bda9d 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -85,6 +85,16 @@ static DEFINE_PER_CPU(struct xive_cpu *, xive_cpu); #define XIVE_INVALID_TARGET (-1) /* + * Global toggle to switch on/off StoreEOI + */ +static bool xive_store_eoi = true; + +static bool xive_is_store_eoi(struct xive_irq_data *xd) +{ + return xd->flags & XIVE_IRQ_FLAG_STORE_EOI && xive_store_eoi; +} + +/* * Read the next entry in a queue, return its content if it's valid * or 0 if there is no new entry. * @@ -208,7 +218,7 @@ static notrace u8 xive_esb_read(struct xive_irq_data *xd, u32 offset) { u64 val; - if (offset == XIVE_ESB_SET_PQ_10 && xd->flags & XIVE_IRQ_FLAG_STORE_EOI) + if (offset == XIVE_ESB_SET_PQ_10 && xive_is_store_eoi(xd)) offset |= XIVE_ESB_LD_ST_MO; if ((xd->flags & XIVE_IRQ_FLAG_H_INT_ESB) && xive_ops->esb_rw) @@ -227,6 +237,21 @@ static void xive_esb_write(struct xive_irq_data *xd, u32 offset, u64 data) out_be64(xd->eoi_mmio + offset, data); } +#if defined(CONFIG_XMON) || defined(CONFIG_DEBUG_FS) +static void xive_irq_data_dump(struct xive_irq_data *xd, char *buffer, size_t size) +{ + u64 val = xive_esb_read(xd, XIVE_ESB_GET); + + snprintf(buffer, size, "flags=%c%c%c PQ=%c%c 0x%016llx 0x%016llx", + xive_is_store_eoi(xd) ? 'S' : ' ', + xd->flags & XIVE_IRQ_FLAG_LSI ? 'L' : ' ', + xd->flags & XIVE_IRQ_FLAG_H_INT_ESB ? 'H' : ' ', + val & XIVE_ESB_VAL_P ? 'P' : '-', + val & XIVE_ESB_VAL_Q ? 'Q' : '-', + xd->trig_page, xd->eoi_page); +} +#endif + #ifdef CONFIG_XMON static notrace void xive_dump_eq(const char *name, struct xive_q *q) { @@ -252,11 +277,10 @@ notrace void xmon_xive_do_dump(int cpu) #ifdef CONFIG_SMP { - u64 val = xive_esb_read(&xc->ipi_data, XIVE_ESB_GET); + char buffer[128]; - xmon_printf("IPI=0x%08x PQ=%c%c ", xc->hw_ipi, - val & XIVE_ESB_VAL_P ? 'P' : '-', - val & XIVE_ESB_VAL_Q ? 'Q' : '-'); + xive_irq_data_dump(&xc->ipi_data, buffer, sizeof(buffer)); + xmon_printf("IPI=0x%08x %s", xc->hw_ipi, buffer); } #endif xive_dump_eq("EQ", &xc->queue[xive_irq_priority]); @@ -291,15 +315,11 @@ int xmon_xive_get_irq_config(u32 hw_irq, struct irq_data *d) d = xive_get_irq_data(hw_irq); if (d) { - struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); - u64 val = xive_esb_read(xd, XIVE_ESB_GET); - - xmon_printf("flags=%c%c%c PQ=%c%c", - xd->flags & XIVE_IRQ_FLAG_STORE_EOI ? 'S' : ' ', - xd->flags & XIVE_IRQ_FLAG_LSI ? 'L' : ' ', - xd->flags & XIVE_IRQ_FLAG_H_INT_ESB ? 'H' : ' ', - val & XIVE_ESB_VAL_P ? 'P' : '-', - val & XIVE_ESB_VAL_Q ? 'Q' : '-'); + char buffer[128]; + + xive_irq_data_dump(irq_data_get_irq_handler_data(d), + buffer, sizeof(buffer)); + xmon_printf("%s", buffer); } xmon_printf("\n"); @@ -385,7 +405,7 @@ static void xive_do_source_eoi(struct xive_irq_data *xd) xd->stale_p = false; /* If the XIVE supports the new "store EOI facility, use it */ - if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI) { + if (xive_is_store_eoi(xd)) { xive_esb_write(xd, XIVE_ESB_STORE_EOI, 0); return; } @@ -451,6 +471,8 @@ static void xive_do_source_set_mask(struct xive_irq_data *xd, { u64 val; + pr_debug("%s: HW 0x%x %smask\n", __func__, xd->hw_irq, mask ? "" : "un"); + /* * If the interrupt had P set, it may be in a queue. * @@ -612,8 +634,8 @@ static unsigned int xive_irq_startup(struct irq_data *d) xd->saved_p = false; xd->stale_p = false; - pr_devel("xive_irq_startup: irq %d [0x%x] data @%p\n", - d->irq, hw_irq, d); + + pr_debug("%s: irq %d [0x%x] data @%p\n", __func__, d->irq, hw_irq, d); /* Pick a target */ target = xive_pick_irq_target(d, irq_data_get_affinity_mask(d)); @@ -654,8 +676,7 @@ static void xive_irq_shutdown(struct irq_data *d) struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); - pr_devel("xive_irq_shutdown: irq %d [0x%x] data @%p\n", - d->irq, hw_irq, d); + pr_debug("%s: irq %d [0x%x] data @%p\n", __func__, d->irq, hw_irq, d); if (WARN_ON(xd->target == XIVE_INVALID_TARGET)) return; @@ -679,7 +700,7 @@ static void xive_irq_unmask(struct irq_data *d) { struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); - pr_devel("xive_irq_unmask: irq %d data @%p\n", d->irq, xd); + pr_debug("%s: irq %d data @%p\n", __func__, d->irq, xd); xive_do_source_set_mask(xd, false); } @@ -688,7 +709,7 @@ static void xive_irq_mask(struct irq_data *d) { struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); - pr_devel("xive_irq_mask: irq %d data @%p\n", d->irq, xd); + pr_debug("%s: irq %d data @%p\n", __func__, d->irq, xd); xive_do_source_set_mask(xd, true); } @@ -702,7 +723,7 @@ static int xive_irq_set_affinity(struct irq_data *d, u32 target, old_target; int rc = 0; - pr_debug("%s: irq %d/%x\n", __func__, d->irq, hw_irq); + pr_debug("%s: irq %d/0x%x\n", __func__, d->irq, hw_irq); /* Is this valid ? */ if (cpumask_any_and(cpumask, cpu_online_mask) >= nr_cpu_ids) @@ -975,7 +996,7 @@ EXPORT_SYMBOL_GPL(is_xive_irq); void xive_cleanup_irq_data(struct xive_irq_data *xd) { - pr_debug("%s for HW %x\n", __func__, xd->hw_irq); + pr_debug("%s for HW 0x%x\n", __func__, xd->hw_irq); if (xd->eoi_mmio) { iounmap(xd->eoi_mmio); @@ -1211,8 +1232,8 @@ static int xive_setup_cpu_ipi(unsigned int cpu) pr_err("Failed to map IPI CPU %d\n", cpu); return -EIO; } - pr_devel("CPU %d HW IPI %x, virq %d, trig_mmio=%p\n", cpu, - xc->hw_ipi, xive_ipi_irq, xc->ipi_data.trig_mmio); + pr_debug("CPU %d HW IPI 0x%x, virq %d, trig_mmio=%p\n", cpu, + xc->hw_ipi, xive_ipi_irq, xc->ipi_data.trig_mmio); /* Unmask it */ xive_do_source_set_mask(&xc->ipi_data, false); @@ -1390,7 +1411,7 @@ static int xive_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, if (rc) return rc; - pr_debug("%s %d/%lx #%d\n", __func__, virq, hwirq, nr_irqs); + pr_debug("%s %d/0x%lx #%d\n", __func__, virq, hwirq, nr_irqs); for (i = 0; i < nr_irqs; i++) { /* TODO: call xive_irq_domain_map() */ @@ -1504,7 +1525,7 @@ static void xive_setup_cpu(void) #ifdef CONFIG_SMP void xive_smp_setup_cpu(void) { - pr_devel("SMP setup CPU %d\n", smp_processor_id()); + pr_debug("SMP setup CPU %d\n", smp_processor_id()); /* This will have already been done on the boot CPU */ if (smp_processor_id() != boot_cpuid) @@ -1650,10 +1671,10 @@ bool __init xive_core_init(struct device_node *np, const struct xive_ops *ops, ppc_md.get_irq = xive_get_irq; __xive_enabled = true; - pr_devel("Initializing host..\n"); + pr_debug("Initializing host..\n"); xive_init_host(np); - pr_devel("Initializing boot CPU..\n"); + pr_debug("Initializing boot CPU..\n"); /* Allocate per-CPU data and queues */ xive_prepare_cpu(smp_processor_id()); @@ -1691,36 +1712,36 @@ static int __init xive_off(char *arg) } __setup("xive=off", xive_off); -static void xive_debug_show_cpu(struct seq_file *m, int cpu) +static int __init xive_store_eoi_cmdline(char *arg) +{ + if (!arg) + return -EINVAL; + + if (strncmp(arg, "off", 3) == 0) { + pr_info("StoreEOI disabled on kernel command line\n"); + xive_store_eoi = false; + } + return 0; +} +__setup("xive.store-eoi=", xive_store_eoi_cmdline); + +#ifdef CONFIG_DEBUG_FS +static void xive_debug_show_ipi(struct seq_file *m, int cpu) { struct xive_cpu *xc = per_cpu(xive_cpu, cpu); - seq_printf(m, "CPU %d:", cpu); + seq_printf(m, "CPU %d: ", cpu); if (xc) { seq_printf(m, "pp=%02x CPPR=%02x ", xc->pending_prio, xc->cppr); #ifdef CONFIG_SMP { - u64 val = xive_esb_read(&xc->ipi_data, XIVE_ESB_GET); + char buffer[128]; - seq_printf(m, "IPI=0x%08x PQ=%c%c ", xc->hw_ipi, - val & XIVE_ESB_VAL_P ? 'P' : '-', - val & XIVE_ESB_VAL_Q ? 'Q' : '-'); + xive_irq_data_dump(&xc->ipi_data, buffer, sizeof(buffer)); + seq_printf(m, "IPI=0x%08x %s", xc->hw_ipi, buffer); } #endif - { - struct xive_q *q = &xc->queue[xive_irq_priority]; - u32 i0, i1, idx; - - if (q->qpage) { - idx = q->idx; - i0 = be32_to_cpup(q->qpage + idx); - idx = (idx + 1) & q->msk; - i1 = be32_to_cpup(q->qpage + idx); - seq_printf(m, "EQ idx=%d T=%d %08x %08x ...", - q->idx, q->toggle, i0, i1); - } - } } seq_puts(m, "\n"); } @@ -1732,8 +1753,7 @@ static void xive_debug_show_irq(struct seq_file *m, struct irq_data *d) u32 target; u8 prio; u32 lirq; - struct xive_irq_data *xd; - u64 val; + char buffer[128]; rc = xive_ops->get_irq_config(hw_irq, &target, &prio, &lirq); if (rc) { @@ -1744,43 +1764,101 @@ static void xive_debug_show_irq(struct seq_file *m, struct irq_data *d) seq_printf(m, "IRQ 0x%08x : target=0x%x prio=%02x lirq=0x%x ", hw_irq, target, prio, lirq); - xd = irq_data_get_irq_handler_data(d); - val = xive_esb_read(xd, XIVE_ESB_GET); - seq_printf(m, "flags=%c%c%c PQ=%c%c", - xd->flags & XIVE_IRQ_FLAG_STORE_EOI ? 'S' : ' ', - xd->flags & XIVE_IRQ_FLAG_LSI ? 'L' : ' ', - xd->flags & XIVE_IRQ_FLAG_H_INT_ESB ? 'H' : ' ', - val & XIVE_ESB_VAL_P ? 'P' : '-', - val & XIVE_ESB_VAL_Q ? 'Q' : '-'); + xive_irq_data_dump(irq_data_get_irq_handler_data(d), buffer, sizeof(buffer)); + seq_puts(m, buffer); seq_puts(m, "\n"); } -static int xive_core_debug_show(struct seq_file *m, void *private) +static int xive_irq_debug_show(struct seq_file *m, void *private) { unsigned int i; struct irq_desc *desc; + + for_each_irq_desc(i, desc) { + struct irq_data *d = irq_domain_get_irq_data(xive_irq_domain, i); + + if (d) + xive_debug_show_irq(m, d); + } + return 0; +} +DEFINE_SHOW_ATTRIBUTE(xive_irq_debug); + +static int xive_ipi_debug_show(struct seq_file *m, void *private) +{ int cpu; if (xive_ops->debug_show) xive_ops->debug_show(m, private); for_each_possible_cpu(cpu) - xive_debug_show_cpu(m, cpu); + xive_debug_show_ipi(m, cpu); + return 0; +} +DEFINE_SHOW_ATTRIBUTE(xive_ipi_debug); - for_each_irq_desc(i, desc) { - struct irq_data *d = irq_domain_get_irq_data(xive_irq_domain, i); +static void xive_eq_debug_show_one(struct seq_file *m, struct xive_q *q, u8 prio) +{ + int i; - if (d) - xive_debug_show_irq(m, d); + seq_printf(m, "EQ%d idx=%d T=%d\n", prio, q->idx, q->toggle); + if (q->qpage) { + for (i = 0; i < q->msk + 1; i++) { + if (!(i % 8)) + seq_printf(m, "%05d ", i); + seq_printf(m, "%08x%s", be32_to_cpup(q->qpage + i), + (i + 1) % 8 ? " " : "\n"); + } } + seq_puts(m, "\n"); +} + +static int xive_eq_debug_show(struct seq_file *m, void *private) +{ + int cpu = (long)m->private; + struct xive_cpu *xc = per_cpu(xive_cpu, cpu); + + if (xc) + xive_eq_debug_show_one(m, &xc->queue[xive_irq_priority], + xive_irq_priority); return 0; } -DEFINE_SHOW_ATTRIBUTE(xive_core_debug); +DEFINE_SHOW_ATTRIBUTE(xive_eq_debug); + +static void xive_core_debugfs_create(void) +{ + struct dentry *xive_dir; + struct dentry *xive_eq_dir; + long cpu; + char name[16]; + + xive_dir = debugfs_create_dir("xive", arch_debugfs_dir); + if (IS_ERR(xive_dir)) + return; + + debugfs_create_file("ipis", 0400, xive_dir, + NULL, &xive_ipi_debug_fops); + debugfs_create_file("interrupts", 0400, xive_dir, + NULL, &xive_irq_debug_fops); + xive_eq_dir = debugfs_create_dir("eqs", xive_dir); + for_each_possible_cpu(cpu) { + snprintf(name, sizeof(name), "cpu%ld", cpu); + debugfs_create_file(name, 0400, xive_eq_dir, (void *)cpu, + &xive_eq_debug_fops); + } + debugfs_create_bool("store-eoi", 0600, xive_dir, &xive_store_eoi); + + if (xive_ops->debug_create) + xive_ops->debug_create(xive_dir); +} +#else +static inline void xive_core_debugfs_create(void) { } +#endif /* CONFIG_DEBUG_FS */ int xive_core_debug_init(void) { - if (xive_enabled()) - debugfs_create_file("xive", 0400, arch_debugfs_dir, - NULL, &xive_core_debug_fops); + if (xive_enabled() && IS_ENABLED(CONFIG_DEBUG_FS)) + xive_core_debugfs_create(); + return 0; } diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c index 1aec282cd650..f940428ad13f 100644 --- a/arch/powerpc/sysdev/xive/native.c +++ b/arch/powerpc/sysdev/xive/native.c @@ -41,7 +41,7 @@ static u32 xive_queue_shift; static u32 xive_pool_vps = XIVE_INVALID_VP; static struct kmem_cache *xive_provision_cache; static bool xive_has_single_esc; -static bool xive_has_save_restore; +bool xive_has_save_restore; int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data) { @@ -63,6 +63,8 @@ int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data) opal_flags = be64_to_cpu(flags); if (opal_flags & OPAL_XIVE_IRQ_STORE_EOI) data->flags |= XIVE_IRQ_FLAG_STORE_EOI; + if (opal_flags & OPAL_XIVE_IRQ_STORE_EOI2) + data->flags |= XIVE_IRQ_FLAG_STORE_EOI; if (opal_flags & OPAL_XIVE_IRQ_LSI) data->flags |= XIVE_IRQ_FLAG_LSI; data->eoi_page = be64_to_cpu(eoi_page); @@ -459,6 +461,14 @@ void xive_native_sync_queue(u32 hw_irq) } EXPORT_SYMBOL_GPL(xive_native_sync_queue); +#ifdef CONFIG_DEBUG_FS +static int xive_native_debug_create(struct dentry *xive_dir) +{ + debugfs_create_bool("save-restore", 0600, xive_dir, &xive_has_save_restore); + return 0; +} +#endif + static const struct xive_ops xive_native_ops = { .populate_irq_data = xive_native_populate_irq_data, .configure_irq = xive_native_configure_irq, @@ -476,10 +486,13 @@ static const struct xive_ops xive_native_ops = { .get_ipi = xive_native_get_ipi, .put_ipi = xive_native_put_ipi, #endif /* CONFIG_SMP */ +#ifdef CONFIG_DEBUG_FS + .debug_create = xive_native_debug_create, +#endif /* CONFIG_DEBUG_FS */ .name = "native", }; -static bool xive_parse_provisioning(struct device_node *np) +static bool __init xive_parse_provisioning(struct device_node *np) { int rc; @@ -519,7 +532,7 @@ static bool xive_parse_provisioning(struct device_node *np) return true; } -static void xive_native_setup_pools(void) +static void __init xive_native_setup_pools(void) { /* Allocate a pool big enough */ pr_debug("XIVE: Allocating VP block for pool size %u\n", nr_cpu_ids); diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index f143b6f111ac..928f95004501 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -44,7 +44,7 @@ struct xive_irq_bitmap { static LIST_HEAD(xive_irq_bitmaps); -static int xive_irq_bitmap_add(int base, int count) +static int __init xive_irq_bitmap_add(int base, int count) { struct xive_irq_bitmap *xibm; @@ -173,7 +173,7 @@ static long plpar_int_get_source_info(unsigned long flags, } while (plpar_busy_delay(rc)); if (rc) { - pr_err("H_INT_GET_SOURCE_INFO lisn=%ld failed %ld\n", lisn, rc); + pr_err("H_INT_GET_SOURCE_INFO lisn=0x%lx failed %ld\n", lisn, rc); return rc; } @@ -182,8 +182,8 @@ static long plpar_int_get_source_info(unsigned long flags, *trig_page = retbuf[2]; *esb_shift = retbuf[3]; - pr_devel("H_INT_GET_SOURCE_INFO flags=%lx eoi=%lx trig=%lx shift=%lx\n", - retbuf[0], retbuf[1], retbuf[2], retbuf[3]); + pr_debug("H_INT_GET_SOURCE_INFO lisn=0x%lx flags=0x%lx eoi=0x%lx trig=0x%lx shift=0x%lx\n", + lisn, retbuf[0], retbuf[1], retbuf[2], retbuf[3]); return 0; } @@ -200,8 +200,8 @@ static long plpar_int_set_source_config(unsigned long flags, long rc; - pr_devel("H_INT_SET_SOURCE_CONFIG flags=%lx lisn=%lx target=%lx prio=%lx sw_irq=%lx\n", - flags, lisn, target, prio, sw_irq); + pr_debug("H_INT_SET_SOURCE_CONFIG flags=0x%lx lisn=0x%lx target=%ld prio=%ld sw_irq=%ld\n", + flags, lisn, target, prio, sw_irq); do { @@ -210,7 +210,7 @@ static long plpar_int_set_source_config(unsigned long flags, } while (plpar_busy_delay(rc)); if (rc) { - pr_err("H_INT_SET_SOURCE_CONFIG lisn=%ld target=%lx prio=%lx failed %ld\n", + pr_err("H_INT_SET_SOURCE_CONFIG lisn=0x%lx target=%ld prio=%ld failed %ld\n", lisn, target, prio, rc); return rc; } @@ -227,7 +227,7 @@ static long plpar_int_get_source_config(unsigned long flags, unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; long rc; - pr_devel("H_INT_GET_SOURCE_CONFIG flags=%lx lisn=%lx\n", flags, lisn); + pr_debug("H_INT_GET_SOURCE_CONFIG flags=0x%lx lisn=0x%lx\n", flags, lisn); do { rc = plpar_hcall(H_INT_GET_SOURCE_CONFIG, retbuf, flags, lisn, @@ -235,7 +235,7 @@ static long plpar_int_get_source_config(unsigned long flags, } while (plpar_busy_delay(rc)); if (rc) { - pr_err("H_INT_GET_SOURCE_CONFIG lisn=%ld failed %ld\n", + pr_err("H_INT_GET_SOURCE_CONFIG lisn=0x%lx failed %ld\n", lisn, rc); return rc; } @@ -244,8 +244,8 @@ static long plpar_int_get_source_config(unsigned long flags, *prio = retbuf[1]; *sw_irq = retbuf[2]; - pr_devel("H_INT_GET_SOURCE_CONFIG target=%lx prio=%lx sw_irq=%lx\n", - retbuf[0], retbuf[1], retbuf[2]); + pr_debug("H_INT_GET_SOURCE_CONFIG target=%ld prio=%ld sw_irq=%ld\n", + retbuf[0], retbuf[1], retbuf[2]); return 0; } @@ -273,8 +273,8 @@ static long plpar_int_get_queue_info(unsigned long flags, *esn_page = retbuf[0]; *esn_size = retbuf[1]; - pr_devel("H_INT_GET_QUEUE_INFO page=%lx size=%lx\n", - retbuf[0], retbuf[1]); + pr_debug("H_INT_GET_QUEUE_INFO cpu=%ld prio=%ld page=0x%lx size=0x%lx\n", + target, priority, retbuf[0], retbuf[1]); return 0; } @@ -289,8 +289,8 @@ static long plpar_int_set_queue_config(unsigned long flags, { long rc; - pr_devel("H_INT_SET_QUEUE_CONFIG flags=%lx target=%lx priority=%lx qpage=%lx qsize=%lx\n", - flags, target, priority, qpage, qsize); + pr_debug("H_INT_SET_QUEUE_CONFIG flags=0x%lx target=%ld priority=0x%lx qpage=0x%lx qsize=0x%lx\n", + flags, target, priority, qpage, qsize); do { rc = plpar_hcall_norets(H_INT_SET_QUEUE_CONFIG, flags, target, @@ -298,7 +298,7 @@ static long plpar_int_set_queue_config(unsigned long flags, } while (plpar_busy_delay(rc)); if (rc) { - pr_err("H_INT_SET_QUEUE_CONFIG cpu=%ld prio=%ld qpage=%lx returned %ld\n", + pr_err("H_INT_SET_QUEUE_CONFIG cpu=%ld prio=%ld qpage=0x%lx returned %ld\n", target, priority, qpage, rc); return rc; } @@ -315,7 +315,7 @@ static long plpar_int_sync(unsigned long flags, unsigned long lisn) } while (plpar_busy_delay(rc)); if (rc) { - pr_err("H_INT_SYNC lisn=%ld returned %ld\n", lisn, rc); + pr_err("H_INT_SYNC lisn=0x%lx returned %ld\n", lisn, rc); return rc; } @@ -333,8 +333,8 @@ static long plpar_int_esb(unsigned long flags, unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; long rc; - pr_devel("H_INT_ESB flags=%lx lisn=%lx offset=%lx in=%lx\n", - flags, lisn, offset, in_data); + pr_debug("H_INT_ESB flags=0x%lx lisn=0x%lx offset=0x%lx in=0x%lx\n", + flags, lisn, offset, in_data); do { rc = plpar_hcall(H_INT_ESB, retbuf, flags, lisn, offset, @@ -342,7 +342,7 @@ static long plpar_int_esb(unsigned long flags, } while (plpar_busy_delay(rc)); if (rc) { - pr_err("H_INT_ESB lisn=%ld offset=%ld returned %ld\n", + pr_err("H_INT_ESB lisn=0x%lx offset=0x%lx returned %ld\n", lisn, offset, rc); return rc; } @@ -653,6 +653,9 @@ static int xive_spapr_debug_show(struct seq_file *m, void *private) struct xive_irq_bitmap *xibm; char *buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!buf) + return -ENOMEM; + list_for_each_entry(xibm, &xive_irq_bitmaps, list) { memset(buf, 0, PAGE_SIZE); bitmap_print_to_pagebuf(true, buf, xibm->bitmap, xibm->count); @@ -687,7 +690,7 @@ static const struct xive_ops xive_spapr_ops = { /* * get max priority from "/ibm,plat-res-int-priorities" */ -static bool xive_get_max_prio(u8 *max_prio) +static bool __init xive_get_max_prio(u8 *max_prio) { struct device_node *rootdn; const __be32 *reg; @@ -741,7 +744,7 @@ static bool xive_get_max_prio(u8 *max_prio) return true; } -static const u8 *get_vec5_feature(unsigned int index) +static const u8 *__init get_vec5_feature(unsigned int index) { unsigned long root, chosen; int size; diff --git a/arch/powerpc/sysdev/xive/xive-internal.h b/arch/powerpc/sysdev/xive/xive-internal.h index 504e7edce358..fe6d95d54af9 100644 --- a/arch/powerpc/sysdev/xive/xive-internal.h +++ b/arch/powerpc/sysdev/xive/xive-internal.h @@ -58,6 +58,7 @@ struct xive_ops { void (*put_ipi)(unsigned int cpu, struct xive_cpu *xc); #endif int (*debug_show)(struct seq_file *m, void *private); + int (*debug_create)(struct dentry *xive_dir); const char *name; }; @@ -72,5 +73,6 @@ static inline u32 xive_alloc_order(u32 queue_shift) } extern bool xive_cmdline_disabled; +extern bool xive_has_save_restore; #endif /* __XIVE_INTERNAL_H */ diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 8b28ff9d98d1..fd72753e8ad5 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -125,7 +125,7 @@ static unsigned bpinstr = 0x7fe00008; /* trap */ static int cmds(struct pt_regs *); static int mread(unsigned long, void *, int); static int mwrite(unsigned long, void *, int); -static int mread_instr(unsigned long, struct ppc_inst *); +static int mread_instr(unsigned long, ppc_inst_t *); static int handle_fault(struct pt_regs *); static void byterev(unsigned char *, int); static void memex(void); @@ -908,7 +908,7 @@ static struct bpt *new_breakpoint(unsigned long a) static void insert_bpts(void) { int i; - struct ppc_inst instr, instr2; + ppc_inst_t instr, instr2; struct bpt *bp, *bp2; bp = bpts; @@ -988,7 +988,7 @@ static void remove_bpts(void) { int i; struct bpt *bp; - struct ppc_inst instr; + ppc_inst_t instr; bp = bpts; for (i = 0; i < NBPTS; ++i, ++bp) { @@ -1159,7 +1159,7 @@ cmds(struct pt_regs *excp) case 'P': show_tasks(); break; -#ifdef CONFIG_PPC_BOOK3S +#if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_PPC_64S_HASH_MMU) case 'u': dump_segments(); break; @@ -1204,7 +1204,7 @@ static int do_step(struct pt_regs *regs) */ static int do_step(struct pt_regs *regs) { - struct ppc_inst instr; + ppc_inst_t instr; int stepped; force_enable_xmon(); @@ -1459,7 +1459,7 @@ csum(void) */ static long check_bp_loc(unsigned long addr) { - struct ppc_inst instr; + ppc_inst_t instr; addr &= ~3; if (!is_kernel_addr(addr)) { @@ -2107,8 +2107,14 @@ static void dump_300_sprs(void) if (!cpu_has_feature(CPU_FTR_ARCH_300)) return; - printf("pidr = %.16lx tidr = %.16lx\n", - mfspr(SPRN_PID), mfspr(SPRN_TIDR)); + if (cpu_has_feature(CPU_FTR_P9_TIDR)) { + printf("pidr = %.16lx tidr = %.16lx\n", + mfspr(SPRN_PID), mfspr(SPRN_TIDR)); + } else { + printf("pidr = %.16lx\n", + mfspr(SPRN_PID)); + } + printf("psscr = %.16lx\n", hv ? mfspr(SPRN_PSSCR) : mfspr(SPRN_PSSCR_PR)); @@ -2300,7 +2306,7 @@ mwrite(unsigned long adrs, void *buf, int size) } static int -mread_instr(unsigned long adrs, struct ppc_inst *instr) +mread_instr(unsigned long adrs, ppc_inst_t *instr) { volatile int n; @@ -2608,7 +2614,7 @@ static void dump_tracing(void) static void dump_one_paca(int cpu) { struct paca_struct *p; -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU int i = 0; #endif @@ -2650,6 +2656,7 @@ static void dump_one_paca(int cpu) DUMP(p, cpu_start, "%#-*x"); DUMP(p, kexec_state, "%#-*x"); #ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU if (!early_radix_enabled()) { for (i = 0; i < SLB_NUM_BOLTED; i++) { u64 esid, vsid; @@ -2677,6 +2684,7 @@ static void dump_one_paca(int cpu) 22, "slb_cache", i, p->slb_cache[i]); } } +#endif DUMP(p, rfi_flush_fallback_area, "%-*px"); #endif @@ -2809,12 +2817,12 @@ static void dump_all_xives(void) { int cpu; - if (num_possible_cpus() == 0) { + if (num_online_cpus() == 0) { printf("No possible cpus, use 'dx #' to dump individual cpus\n"); return; } - for_each_possible_cpu(cpu) + for_each_online_cpu(cpu) dump_one_xive(cpu); } @@ -3020,7 +3028,7 @@ generic_inst_dump(unsigned long adr, long count, int praddr, { int nr, dotted; unsigned long first_adr; - struct ppc_inst inst, last_inst = ppc_inst(0); + ppc_inst_t inst, last_inst = ppc_inst(0); dotted = 0; for (first_adr = adr; count > 0; --count, adr += ppc_inst_len(inst)) { @@ -3740,7 +3748,7 @@ static void xmon_print_symbol(unsigned long address, const char *mid, printf("%s", after); } -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU void dump_segments(void) { int i; @@ -4128,7 +4136,7 @@ struct spu_info { static struct spu_info spu_info[XMON_NUM_SPUS]; -void xmon_register_spus(struct list_head *list) +void __init xmon_register_spus(struct list_head *list) { struct spu *spu; diff --git a/arch/powerpc/xmon/xmon_bpts.h b/arch/powerpc/xmon/xmon_bpts.h index 57e6fb03de48..377068f52edb 100644 --- a/arch/powerpc/xmon/xmon_bpts.h +++ b/arch/powerpc/xmon/xmon_bpts.h @@ -5,8 +5,8 @@ #define NBPTS 256 #ifndef __ASSEMBLY__ #include <asm/inst.h> -#define BPT_SIZE (sizeof(struct ppc_inst) * 2) -#define BPT_WORDS (BPT_SIZE / sizeof(struct ppc_inst)) +#define BPT_SIZE (sizeof(ppc_inst_t) * 2) +#define BPT_WORDS (BPT_SIZE / sizeof(ppc_inst_t)) extern unsigned int bpt_table[NBPTS * BPT_WORDS]; #endif /* __ASSEMBLY__ */ |