diff options
author | Linus Torvalds | 2020-12-17 13:34:25 -0800 |
---|---|---|
committer | Linus Torvalds | 2020-12-17 13:34:25 -0800 |
commit | 8a5be36b9303ae167468d4f5e1b3c090b9981396 (patch) | |
tree | ddf1721677782484bab6369a87f13611eafb879a | |
parent | 09c0796adf0c793462fda1d7c8c43324551405c7 (diff) | |
parent | c1bea0a840ac75dca19bc6aa05575a33eb9fd058 (diff) |
Merge tag 'powerpc-5.11-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc updates from Michael Ellerman:
- Switch to the generic C VDSO, as well as some cleanups of our VDSO
setup/handling code.
- Support for KUAP (Kernel User Access Prevention) on systems using the
hashed page table MMU, using memory protection keys.
- Better handling of PowerVM SMT8 systems where all threads of a core
do not share an L2, allowing the scheduler to make better scheduling
decisions.
- Further improvements to our machine check handling.
- Show registers when unwinding interrupt frames during stack traces.
- Improvements to our pseries (PowerVM) partition migration code.
- Several series from Christophe refactoring and cleaning up various
parts of the 32-bit code.
- Other smaller features, fixes & cleanups.
Thanks to: Alan Modra, Alexey Kardashevskiy, Andrew Donnellan, Aneesh
Kumar K.V, Ard Biesheuvel, Athira Rajeev, Balamuruhan S, Bill Wendling,
Cédric Le Goater, Christophe Leroy, Christophe Lombard, Colin Ian King,
Daniel Axtens, David Hildenbrand, Frederic Barrat, Ganesh Goudar,
Gautham R. Shenoy, Geert Uytterhoeven, Giuseppe Sacco, Greg Kurz,
Harish, Jan Kratochvil, Jordan Niethe, Kaixu Xia, Laurent Dufour,
Leonardo Bras, Madhavan Srinivasan, Mahesh Salgaonkar, Mathieu
Desnoyers, Nathan Lynch, Nicholas Piggin, Oleg Nesterov, Oliver
O'Halloran, Oscar Salvador, Po-Hsu Lin, Qian Cai, Qinglang Miao, Randy
Dunlap, Ravi Bangoria, Sachin Sant, Sandipan Das, Sebastian Andrzej
Siewior , Segher Boessenkool, Srikar Dronamraju, Tyrel Datwyler, Uwe
Kleine-König, Vincent Stehlé, Youling Tang, and Zhang Xiaoxu.
* tag 'powerpc-5.11-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (304 commits)
powerpc/32s: Fix cleanup_cpu_mmu_context() compile bug
powerpc: Add config fragment for disabling -Werror
powerpc/configs: Add ppc64le_allnoconfig target
powerpc/powernv: Rate limit opal-elog read failure message
powerpc/pseries/memhotplug: Quieten some DLPAR operations
powerpc/ps3: use dma_mapping_error()
powerpc: force inlining of csum_partial() to avoid multiple csum_partial() with GCC10
powerpc/perf: Fix Threshold Event Counter Multiplier width for P10
powerpc/mm: Fix hugetlb_free_pmd_range() and hugetlb_free_pud_range()
KVM: PPC: Book3S HV: Fix mask size for emulated msgsndp
KVM: PPC: fix comparison to bool warning
KVM: PPC: Book3S: Assign boolean values to a bool variable
powerpc: Inline setup_kup()
powerpc/64s: Mark the kuap/kuep functions non __init
KVM: PPC: Book3S HV: XIVE: Add a comment regarding VP numbering
powerpc/xive: Improve error reporting of OPAL calls
powerpc/xive: Simplify xive_do_source_eoi()
powerpc/xive: Remove P9 DD1 flag XIVE_IRQ_FLAG_EOI_FW
powerpc/xive: Remove P9 DD1 flag XIVE_IRQ_FLAG_MASK_FW
powerpc/xive: Remove P9 DD1 flag XIVE_IRQ_FLAG_SHIFT_BUG
...
258 files changed, 6011 insertions, 4965 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index e307f777d942..ae7391627054 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -66,7 +66,7 @@ config NEED_PER_CPU_PAGE_FIRST_CHUNK config NR_IRQS int "Number of virtual interrupt numbers" - range 32 32768 + range 32 1048576 default "512" help This defines the number of virtual interrupt numbers the kernel @@ -87,7 +87,7 @@ config PPC_WATCHDOG help This is a placeholder when the powerpc hardlockup detector watchdog is selected (arch/powerpc/kernel/watchdog.c). It is - seleted via the generic lockup detector menu which is why we + selected via the generic lockup detector menu which is why we have no standalone config option for it here. config STACKTRACE_SUPPORT @@ -177,6 +177,7 @@ config PPC select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER select GENERIC_TIME_VSYSCALL + select GENERIC_GETTIMEOFDAY select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_HUGE_VMAP if PPC_BOOK3S_64 && PPC_RADIX_MMU select HAVE_ARCH_JUMP_LABEL @@ -207,6 +208,7 @@ config PPC select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER select HAVE_GCC_PLUGINS if GCC_VERSION >= 50200 # plugin support on gcc <= 5.1 is buggy on PPC + select HAVE_GENERIC_VDSO select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx) select HAVE_IDE select HAVE_IOREMAP_PROT @@ -312,6 +314,10 @@ config GENERIC_BUG default y depends on BUG +config GENERIC_BUG_RELATIVE_POINTERS + def_bool y + depends on GENERIC_BUG + config SYS_SUPPORTS_APM_EMULATION default y if PMAC_APM_EMU bool @@ -418,6 +424,7 @@ config HUGETLB_PAGE_SIZE_VARIABLE config MATH_EMULATION bool "Math emulation" depends on 4xx || PPC_8xx || PPC_MPC832x || BOOKE + select PPC_FPU_REGS help Some PowerPC chips designed for embedded applications do not have a floating-point unit and therefore do not implement the @@ -657,9 +664,15 @@ config IRQ_ALL_CPUS reported with SMP Power Macintoshes with this option enabled. config NUMA - bool "NUMA support" - depends on PPC64 - default y if SMP && PPC_PSERIES + bool "NUMA Memory Allocation and Scheduler Support" + depends on PPC64 && SMP + default y if PPC_PSERIES || PPC_POWERNV + help + Enable NUMA (Non-Uniform Memory Access) support. + + The kernel will try to allocate memory used by a CPU on the + local memory controller of the CPU and add some more + NUMA awareness to the kernel. config NODES_SHIFT int @@ -793,8 +806,7 @@ config DATA_SHIFT_BOOL bool "Set custom data alignment" depends on ADVANCED_OPTIONS depends on STRICT_KERNEL_RWX || DEBUG_PAGEALLOC - depends on PPC_BOOK3S_32 || (PPC_8xx && !PIN_TLB_DATA && \ - (!PIN_TLB_TEXT || !STRICT_KERNEL_RWX)) + depends on PPC_BOOK3S_32 || (PPC_8xx && !PIN_TLB_DATA && !STRICT_KERNEL_RWX) help This option allows you to set the kernel data alignment. When RAM is mapped by blocks, the alignment needs to fit the size and diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 5c8c06215dd4..08cf0eade56a 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -374,6 +374,11 @@ ppc64le_allmodconfig: $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/le.config \ -f $(srctree)/Makefile allmodconfig +PHONY += ppc64le_allnoconfig +ppc64le_allnoconfig: + $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/ppc64le.config \ + -f $(srctree)/Makefile allnoconfig + PHONY += ppc64_book3e_allmodconfig ppc64_book3e_allmodconfig: $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/85xx-64bit.config \ @@ -405,18 +410,24 @@ PHONY += install install: $(Q)$(MAKE) $(build)=$(boot) install -PHONY += vdso_install -vdso_install: -ifdef CONFIG_PPC64 - $(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso64 $@ -endif -ifdef CONFIG_VDSO32 - $(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso32 $@ -endif - archclean: $(Q)$(MAKE) $(clean)=$(boot) +ifeq ($(KBUILD_EXTMOD),) +# We need to generate vdso-offsets.h before compiling certain files in kernel/. +# In order to do that, we should use the archprepare target, but we can't since +# asm-offsets.h is included in some files used to generate vdso-offsets.h, and +# asm-offsets.h is built in prepare0, for which archprepare is a dependency. +# Therefore we need to generate the header after prepare0 has been made, hence +# this hack. +prepare: vdso_prepare +vdso_prepare: prepare0 + $(if $(CONFIG_VDSO32),$(Q)$(MAKE) \ + $(build)=arch/powerpc/kernel/vdso32 include/generated/vdso32-offsets.h) + $(if $(CONFIG_PPC64),$(Q)$(MAKE) \ + $(build)=arch/powerpc/kernel/vdso64 include/generated/vdso64-offsets.h) +endif + archprepare: checkbin archheaders: diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 1659963a8f1d..ec0b2186e41c 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -21,7 +21,11 @@ all: $(obj)/zImage ifdef CROSS32_COMPILE +ifdef CONFIG_CC_IS_CLANG + BOOTCC := $(CROSS32_COMPILE)clang +else BOOTCC := $(CROSS32_COMPILE)gcc +endif BOOTAR := $(CROSS32_COMPILE)ar else BOOTCC := $(CC) diff --git a/arch/powerpc/boot/ps3.c b/arch/powerpc/boot/ps3.c index 6e4efbdb6b7c..f157717ae814 100644 --- a/arch/powerpc/boot/ps3.c +++ b/arch/powerpc/boot/ps3.c @@ -21,13 +21,6 @@ extern int lv1_get_logical_ppe_id(u64 *out_1); extern int lv1_get_repository_node_value(u64 in_1, u64 in_2, u64 in_3, u64 in_4, u64 in_5, u64 *out_1, u64 *out_2); -#ifdef DEBUG -#define DBG(fmt...) printf(fmt) -#else -static inline int __attribute__ ((format (printf, 1, 2))) DBG( - const char *fmt, ...) {return 0;} -#endif - BSS_STACK(4096); /* A buffer that may be edited by tools operating on a zImage binary so as to diff --git a/arch/powerpc/boot/util.S b/arch/powerpc/boot/util.S index d03cdb7606dc..6a92376daf3f 100644 --- a/arch/powerpc/boot/util.S +++ b/arch/powerpc/boot/util.S @@ -42,14 +42,11 @@ udelay: * (nanoseconds + (timebase_period_ns - 1 )) / timebase_period_ns * timebase_period_ns defaults to 60 (16.6MHz) */ mflr r5 - bl 0f + bcl 20,31,0f 0: mflr r6 mtlr r5 - lis r5,0b@ha - addi r5,r5,0b@l - subf r5,r5,r6 /* In case we're relocated */ - addis r5,r5,timebase_period_ns@ha - lwz r5,timebase_period_ns@l(r5) + addis r5,r6,(timebase_period_ns-0b)@ha + lwz r5,(timebase_period_ns-0b)@l(r5) add r4,r4,r5 addi r4,r4,-1 divw r4,r4,r5 /* BUS ticks */ diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper index cd58a62e810d..41fa0a8715e3 100755 --- a/arch/powerpc/boot/wrapper +++ b/arch/powerpc/boot/wrapper @@ -46,6 +46,8 @@ compression=.gz uboot_comp=gzip pie= format= +notext= +rodynamic= # cross-compilation prefix CROSS= @@ -353,6 +355,8 @@ epapr) platformo="$object/pseries-head.o $object/epapr.o $object/epapr-wrapper.o" link_address='0x20000000' pie=-pie + notext='-z notext' + rodynamic=$(if ${CROSS}ld -V 2>&1 | grep -q LLD ; then echo "-z rodynamic"; fi) ;; mvme5100) platformo="$object/fixed-head.o $object/mvme5100.o" @@ -493,7 +497,7 @@ if [ "$platform" != "miboot" ]; then text_start="-Ttext $link_address" fi #link everything - ${CROSS}ld -m $format -T $lds $text_start $pie $nodl -o "$ofile" $map \ + ${CROSS}ld -m $format -T $lds $text_start $pie $nodl $rodynamic $notext -o "$ofile" $map \ $platformo $tmp $object/wrapper.a rm $tmp fi diff --git a/arch/powerpc/boot/zImage.lds.S b/arch/powerpc/boot/zImage.lds.S index a21f3a76e06f..d6f072865627 100644 --- a/arch/powerpc/boot/zImage.lds.S +++ b/arch/powerpc/boot/zImage.lds.S @@ -34,6 +34,17 @@ SECTIONS __dynamic_start = .; *(.dynamic) } + +#ifdef CONFIG_PPC64_BOOT_WRAPPER + . = ALIGN(256); + .got : + { + __toc_start = .; + *(.got) + *(.toc) + } +#endif + .hash : { *(.hash) } .interp : { *(.interp) } .rela.dyn : @@ -76,16 +87,6 @@ SECTIONS _esm_blob_end = .; } -#ifdef CONFIG_PPC64_BOOT_WRAPPER - . = ALIGN(256); - .got : - { - __toc_start = .; - *(.got) - *(.toc) - } -#endif - . = ALIGN(4096); .bss : { diff --git a/arch/powerpc/configs/disable-werror.config b/arch/powerpc/configs/disable-werror.config new file mode 100644 index 000000000000..6ea12a12432c --- /dev/null +++ b/arch/powerpc/configs/disable-werror.config @@ -0,0 +1 @@ +CONFIG_PPC_DISABLE_WERROR=y diff --git a/arch/powerpc/configs/ppc64le.config b/arch/powerpc/configs/ppc64le.config new file mode 100644 index 000000000000..14dca1062c1b --- /dev/null +++ b/arch/powerpc/configs/ppc64le.config @@ -0,0 +1,2 @@ +CONFIG_PPC64=y +CONFIG_CPU_LITTLE_ENDIAN=y diff --git a/arch/powerpc/configs/security.config b/arch/powerpc/configs/security.config new file mode 100644 index 000000000000..1c91a35c6a73 --- /dev/null +++ b/arch/powerpc/configs/security.config @@ -0,0 +1,15 @@ +# This is the equivalent of booting with lockdown=integrity +CONFIG_SECURITY=y +CONFIG_SECURITYFS=y +CONFIG_SECURITY_LOCKDOWN_LSM=y +CONFIG_SECURITY_LOCKDOWN_LSM_EARLY=y +CONFIG_LOCK_DOWN_KERNEL_FORCE_INTEGRITY=y + +# These are some general, reasonably inexpensive hardening options +CONFIG_HARDENED_USERCOPY=y +CONFIG_FORTIFY_SOURCE=y +CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y + +# UBSAN bounds checking is very cheap and good for hardening +CONFIG_UBSAN=y +# CONFIG_UBSAN_MISC is not set
\ No newline at end of file diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h index 8a55eb8cc97b..61c6e8b200e8 100644 --- a/arch/powerpc/include/asm/atomic.h +++ b/arch/powerpc/include/asm/atomic.h @@ -10,6 +10,7 @@ #include <linux/types.h> #include <asm/cmpxchg.h> #include <asm/barrier.h> +#include <asm/asm-const.h> /* * Since *_return_relaxed and {cmp}xchg_relaxed are implemented with @@ -26,14 +27,14 @@ static __inline__ int atomic_read(const atomic_t *v) { int t; - __asm__ __volatile__("lwz%U1%X1 %0,%1" : "=r"(t) : "m"(v->counter)); + __asm__ __volatile__("lwz%U1%X1 %0,%1" : "=r"(t) : "m"UPD_CONSTR(v->counter)); return t; } static __inline__ void atomic_set(atomic_t *v, int i) { - __asm__ __volatile__("stw%U0%X0 %1,%0" : "=m"(v->counter) : "r"(i)); + __asm__ __volatile__("stw%U0%X0 %1,%0" : "=m"UPD_CONSTR(v->counter) : "r"(i)); } #define ATOMIC_OP(op, asm_op) \ @@ -316,14 +317,14 @@ static __inline__ s64 atomic64_read(const atomic64_t *v) { s64 t; - __asm__ __volatile__("ld%U1%X1 %0,%1" : "=r"(t) : "m"(v->counter)); + __asm__ __volatile__("ld%U1%X1 %0,%1" : "=r"(t) : "m"UPD_CONSTR(v->counter)); return t; } static __inline__ void atomic64_set(atomic64_t *v, s64 i) { - __asm__ __volatile__("std%U0%X0 %1,%0" : "=m"(v->counter) : "r"(i)); + __asm__ __volatile__("std%U0%X0 %1,%0" : "=m"UPD_CONSTR(v->counter) : "r"(i)); } #define ATOMIC64_OP(op, asm_op) \ diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h index f53c42380832..aecfde829d5d 100644 --- a/arch/powerpc/include/asm/barrier.h +++ b/arch/powerpc/include/asm/barrier.h @@ -40,7 +40,7 @@ #define wmb() __asm__ __volatile__ ("sync" : : : "memory") /* The sub-arch has lwsync */ -#if defined(__powerpc64__) || defined(CONFIG_PPC_E500MC) +#if defined(CONFIG_PPC64) || defined(CONFIG_PPC_E500MC) # define SMPWMB LWSYNC #else # define SMPWMB eieio diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h index 4a4d3afd5340..299ab33505a6 100644 --- a/arch/powerpc/include/asm/bitops.h +++ b/arch/powerpc/include/asm/bitops.h @@ -216,15 +216,34 @@ static inline void arch___clear_bit_unlock(int nr, volatile unsigned long *addr) */ static inline int fls(unsigned int x) { - return 32 - __builtin_clz(x); + int lz; + + if (__builtin_constant_p(x)) + return x ? 32 - __builtin_clz(x) : 0; + asm("cntlzw %0,%1" : "=r" (lz) : "r" (x)); + return 32 - lz; } #include <asm-generic/bitops/builtin-__fls.h> +/* + * 64-bit can do this using one cntlzd (count leading zeroes doubleword) + * instruction; for 32-bit we use the generic version, which does two + * 32-bit fls calls. + */ +#ifdef CONFIG_PPC64 static inline int fls64(__u64 x) { - return 64 - __builtin_clzll(x); + int lz; + + if (__builtin_constant_p(x)) + return x ? 64 - __builtin_clzll(x) : 0; + asm("cntlzd %0,%1" : "=r" (lz) : "r" (x)); + return 64 - lz; } +#else +#include <asm-generic/bitops/fls64.h> +#endif #ifdef CONFIG_PPC64 unsigned int __arch_hweight8(unsigned int w); diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h index 32fd4452e960..a0117a9d5b06 100644 --- a/arch/powerpc/include/asm/book3s/32/kup.h +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -183,11 +183,7 @@ bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) unsigned long begin = regs->kuap & 0xf0000000; unsigned long end = regs->kuap << 28; - if (!is_write) - return false; - - return WARN(address < begin || address >= end, - "Bug: write fault blocked by segment registers !"); + return is_write && (address < begin || address >= end); } #endif /* CONFIG_PPC_KUAP */ diff --git a/arch/powerpc/include/asm/book3s/32/mmu-hash.h b/arch/powerpc/include/asm/book3s/32/mmu-hash.h index 2e277ca0170f..685c589e723f 100644 --- a/arch/powerpc/include/asm/book3s/32/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/32/mmu-hash.h @@ -90,10 +90,11 @@ struct hash_pte { typedef struct { unsigned long id; - unsigned long vdso_base; + void __user *vdso; } mm_context_t; void update_bats(void); +static inline void cleanup_cpu_mmu_context(void) { }; /* patch sites */ extern s32 patch__hash_page_A0, patch__hash_page_A1, patch__hash_page_A2; diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 1376be95e975..415ae29fa73a 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -240,8 +240,14 @@ extern void add_hash_page(unsigned context, unsigned long va, unsigned long pmdval); /* Flush an entry from the TLB/hash table */ -extern void flush_hash_entry(struct mm_struct *mm, pte_t *ptep, - unsigned long address); +static inline void flush_hash_entry(struct mm_struct *mm, pte_t *ptep, unsigned long addr) +{ + if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) { + unsigned long ptephys = __pa(ptep) & PAGE_MASK; + + flush_hash_pages(mm->context.id, addr, ptephys, 1); + } +} /* * PTE updates. This function is called whenever an existing @@ -293,10 +299,9 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm, { unsigned long old; old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0); - if (old & _PAGE_HASHPTE) { - unsigned long ptephys = __pa(ptep) & PAGE_MASK; - flush_hash_pages(mm->context.id, addr, ptephys, 1); - } + if (old & _PAGE_HASHPTE) + flush_hash_entry(mm, ptep, addr); + return (old & _PAGE_ACCESSED) != 0; } #define ptep_test_and_clear_young(__vma, __addr, __ptep) \ @@ -524,9 +529,9 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, if (pte_val(*ptep) & _PAGE_HASHPTE) flush_hash_entry(mm, ptep, addr); __asm__ __volatile__("\ - stw%U0%X0 %2,%0\n\ + stw%X0 %2,%0\n\ eieio\n\ - stw%U0%X0 %L2,%1" + stw%X1 %L2,%1" : "=m" (*ptep), "=m" (*((unsigned char *)ptep+4)) : "r" (pte) : "memory"); diff --git a/arch/powerpc/include/asm/book3s/32/tlbflush.h b/arch/powerpc/include/asm/book3s/32/tlbflush.h index 068085b709fb..d941c06d4f2e 100644 --- a/arch/powerpc/include/asm/book3s/32/tlbflush.h +++ b/arch/powerpc/include/asm/book3s/32/tlbflush.h @@ -6,12 +6,69 @@ /* * TLB flushing for "classic" hash-MMU 32-bit CPUs, 6xx, 7xx, 7xxx */ -extern void flush_tlb_mm(struct mm_struct *mm); -extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr); -extern void flush_tlb_page_nohash(struct vm_area_struct *vma, unsigned long addr); -extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, - unsigned long end); -extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); +void hash__flush_tlb_mm(struct mm_struct *mm); +void hash__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr); +void hash__flush_range(struct mm_struct *mm, unsigned long start, unsigned long end); + +#ifdef CONFIG_SMP +void _tlbie(unsigned long address); +#else +static inline void _tlbie(unsigned long address) +{ + asm volatile ("tlbie %0; sync" : : "r" (address) : "memory"); +} +#endif +void _tlbia(void); + +/* + * Called at the end of a mmu_gather operation to make sure the + * TLB flush is completely done. + */ +static inline void tlb_flush(struct mmu_gather *tlb) +{ + /* 603 needs to flush the whole TLB here since it doesn't use a hash table. */ + if (!mmu_has_feature(MMU_FTR_HPTE_TABLE)) + _tlbia(); +} + +static inline void flush_range(struct mm_struct *mm, unsigned long start, unsigned long end) +{ + start &= PAGE_MASK; + if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) + hash__flush_range(mm, start, end); + else if (end - start <= PAGE_SIZE) + _tlbie(start); + else + _tlbia(); +} + +static inline void flush_tlb_mm(struct mm_struct *mm) +{ + if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) + hash__flush_tlb_mm(mm); + else + _tlbia(); +} + +static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) +{ + if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) + hash__flush_tlb_page(vma, vmaddr); + else + _tlbie(vmaddr); +} + +static inline void +flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) +{ + flush_range(vma->vm_mm, start, end); +} + +static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + flush_range(&init_mm, start, end); +} + static inline void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) { diff --git a/arch/powerpc/include/asm/book3s/64/hash-pkey.h b/arch/powerpc/include/asm/book3s/64/hash-pkey.h index 795010897e5d..f1e60d579f6c 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-pkey.h +++ b/arch/powerpc/include/asm/book3s/64/hash-pkey.h @@ -2,6 +2,9 @@ #ifndef _ASM_POWERPC_BOOK3S_64_HASH_PKEY_H #define _ASM_POWERPC_BOOK3S_64_HASH_PKEY_H +/* We use key 3 for KERNEL */ +#define HASH_DEFAULT_KERNEL_KEY (HPTE_R_KEY_BIT0 | HPTE_R_KEY_BIT1) + static inline u64 hash__vmflag_to_pte_pkey_bits(u64 vm_flags) { return (((vm_flags & VM_PKEY_BIT0) ? H_PTE_PKEY_BIT0 : 0x0UL) | @@ -11,13 +14,23 @@ static inline u64 hash__vmflag_to_pte_pkey_bits(u64 vm_flags) ((vm_flags & VM_PKEY_BIT4) ? H_PTE_PKEY_BIT4 : 0x0UL)); } -static inline u64 pte_to_hpte_pkey_bits(u64 pteflags) +static inline u64 pte_to_hpte_pkey_bits(u64 pteflags, unsigned long flags) { - return (((pteflags & H_PTE_PKEY_BIT4) ? HPTE_R_KEY_BIT4 : 0x0UL) | - ((pteflags & H_PTE_PKEY_BIT3) ? HPTE_R_KEY_BIT3 : 0x0UL) | - ((pteflags & H_PTE_PKEY_BIT2) ? HPTE_R_KEY_BIT2 : 0x0UL) | - ((pteflags & H_PTE_PKEY_BIT1) ? HPTE_R_KEY_BIT1 : 0x0UL) | - ((pteflags & H_PTE_PKEY_BIT0) ? HPTE_R_KEY_BIT0 : 0x0UL)); + unsigned long pte_pkey; + + pte_pkey = (((pteflags & H_PTE_PKEY_BIT4) ? HPTE_R_KEY_BIT4 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT3) ? HPTE_R_KEY_BIT3 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT2) ? HPTE_R_KEY_BIT2 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT1) ? HPTE_R_KEY_BIT1 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT0) ? HPTE_R_KEY_BIT0 : 0x0UL)); + + if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP) || + mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) { + if ((pte_pkey == 0) && (flags & HPTE_USE_KERNEL_KEY)) + return HASH_DEFAULT_KERNEL_KEY; + } + + return pte_pkey; } static inline u16 hash__pte_to_pkey_bits(u64 pteflags) diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index 73ad038ed10b..d959b0195ad9 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -145,7 +145,7 @@ extern void hash__mark_initmem_nx(void); extern void hpte_need_flush(struct mm_struct *mm, unsigned long addr, pte_t *ptep, unsigned long pte, int huge); -extern unsigned long htab_convert_pte_flags(unsigned long pteflags); +unsigned long htab_convert_pte_flags(unsigned long pteflags, unsigned long flags); /* Atomic PTE updates */ static inline unsigned long hash__pte_update(struct mm_struct *mm, unsigned long addr, diff --git a/arch/powerpc/include/asm/book3s/64/kexec.h b/arch/powerpc/include/asm/book3s/64/kexec.h index 6b5c3a248ba2..d4b9d476ecba 100644 --- a/arch/powerpc/include/asm/book3s/64/kexec.h +++ b/arch/powerpc/include/asm/book3s/64/kexec.h @@ -3,6 +3,7 @@ #ifndef _ASM_POWERPC_BOOK3S_64_KEXEC_H_ #define _ASM_POWERPC_BOOK3S_64_KEXEC_H_ +#include <asm/plpar_wrappers.h> #define reset_sprs reset_sprs static inline void reset_sprs(void) @@ -14,6 +15,10 @@ static inline void reset_sprs(void) if (cpu_has_feature(CPU_FTR_ARCH_207S)) { mtspr(SPRN_IAMR, 0); + if (cpu_has_feature(CPU_FTR_HVMODE)) + mtspr(SPRN_CIABR, 0); + else + plpar_set_ciabr(0); } /* Do we need isync()? We are going via a kexec reset */ diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h deleted file mode 100644 index a39e2d193fdc..000000000000 --- a/arch/powerpc/include/asm/book3s/64/kup-radix.h +++ /dev/null @@ -1,205 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H -#define _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H - -#include <linux/const.h> -#include <asm/reg.h> - -#define AMR_KUAP_BLOCK_READ UL(0x4000000000000000) -#define AMR_KUAP_BLOCK_WRITE UL(0x8000000000000000) -#define AMR_KUAP_BLOCKED (AMR_KUAP_BLOCK_READ | AMR_KUAP_BLOCK_WRITE) -#define AMR_KUAP_SHIFT 62 - -#ifdef __ASSEMBLY__ - -.macro kuap_restore_amr gpr1, gpr2 -#ifdef CONFIG_PPC_KUAP - BEGIN_MMU_FTR_SECTION_NESTED(67) - mfspr \gpr1, SPRN_AMR - ld \gpr2, STACK_REGS_KUAP(r1) - cmpd \gpr1, \gpr2 - beq 998f - isync - mtspr SPRN_AMR, \gpr2 - /* No isync required, see kuap_restore_amr() */ -998: - END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_RADIX_KUAP, 67) -#endif -.endm - -#ifdef CONFIG_PPC_KUAP -.macro kuap_check_amr gpr1, gpr2 -#ifdef CONFIG_PPC_KUAP_DEBUG - BEGIN_MMU_FTR_SECTION_NESTED(67) - mfspr \gpr1, SPRN_AMR - li \gpr2, (AMR_KUAP_BLOCKED >> AMR_KUAP_SHIFT) - sldi \gpr2, \gpr2, AMR_KUAP_SHIFT -999: tdne \gpr1, \gpr2 - EMIT_BUG_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE) - END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_RADIX_KUAP, 67) -#endif -.endm -#endif - -.macro kuap_save_amr_and_lock gpr1, gpr2, use_cr, msr_pr_cr -#ifdef CONFIG_PPC_KUAP - BEGIN_MMU_FTR_SECTION_NESTED(67) - .ifnb \msr_pr_cr - bne \msr_pr_cr, 99f - .endif - mfspr \gpr1, SPRN_AMR - std \gpr1, STACK_REGS_KUAP(r1) - li \gpr2, (AMR_KUAP_BLOCKED >> AMR_KUAP_SHIFT) - sldi \gpr2, \gpr2, AMR_KUAP_SHIFT - cmpd \use_cr, \gpr1, \gpr2 - beq \use_cr, 99f - // We don't isync here because we very recently entered via rfid - mtspr SPRN_AMR, \gpr2 - isync -99: - END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_RADIX_KUAP, 67) -#endif -.endm - -#else /* !__ASSEMBLY__ */ - -#include <linux/jump_label.h> - -DECLARE_STATIC_KEY_FALSE(uaccess_flush_key); - -#ifdef CONFIG_PPC_KUAP - -#include <asm/mmu.h> -#include <asm/ptrace.h> - -static inline void kuap_restore_amr(struct pt_regs *regs, unsigned long amr) -{ - if (mmu_has_feature(MMU_FTR_RADIX_KUAP) && unlikely(regs->kuap != amr)) { - isync(); - mtspr(SPRN_AMR, regs->kuap); - /* - * No isync required here because we are about to RFI back to - * previous context before any user accesses would be made, - * which is a CSI. - */ - } -} - -static inline unsigned long kuap_get_and_check_amr(void) -{ - if (mmu_has_feature(MMU_FTR_RADIX_KUAP)) { - unsigned long amr = mfspr(SPRN_AMR); - if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG)) /* kuap_check_amr() */ - WARN_ON_ONCE(amr != AMR_KUAP_BLOCKED); - return amr; - } - return 0; -} - -static inline void kuap_check_amr(void) -{ - if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && mmu_has_feature(MMU_FTR_RADIX_KUAP)) - WARN_ON_ONCE(mfspr(SPRN_AMR) != AMR_KUAP_BLOCKED); -} - -/* - * We support individually allowing read or write, but we don't support nesting - * because that would require an expensive read/modify write of the AMR. - */ - -static inline unsigned long get_kuap(void) -{ - /* - * We return AMR_KUAP_BLOCKED when we don't support KUAP because - * prevent_user_access_return needs to return AMR_KUAP_BLOCKED to - * cause restore_user_access to do a flush. - * - * This has no effect in terms of actually blocking things on hash, - * so it doesn't break anything. - */ - if (!early_mmu_has_feature(MMU_FTR_RADIX_KUAP)) - return AMR_KUAP_BLOCKED; - - return mfspr(SPRN_AMR); -} - -static inline void set_kuap(unsigned long value) -{ - if (!early_mmu_has_feature(MMU_FTR_RADIX_KUAP)) - return; - - /* - * ISA v3.0B says we need a CSI (Context Synchronising Instruction) both - * before and after the move to AMR. See table 6 on page 1134. - */ - isync(); - mtspr(SPRN_AMR, value); - isync(); -} - -static inline bool -bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) -{ - return WARN(mmu_has_feature(MMU_FTR_RADIX_KUAP) && - (regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)), - "Bug: %s fault blocked by AMR!", is_write ? "Write" : "Read"); -} -#else /* CONFIG_PPC_KUAP */ -static inline void kuap_restore_amr(struct pt_regs *regs, unsigned long amr) { } - -static inline unsigned long kuap_get_and_check_amr(void) -{ - return 0UL; -} - -static inline unsigned long get_kuap(void) -{ - return AMR_KUAP_BLOCKED; -} - -static inline void set_kuap(unsigned long value) { } -#endif /* !CONFIG_PPC_KUAP */ - -static __always_inline void allow_user_access(void __user *to, const void __user *from, - unsigned long size, unsigned long dir) -{ - // This is written so we can resolve to a single case at build time - BUILD_BUG_ON(!__builtin_constant_p(dir)); - if (dir == KUAP_READ) - set_kuap(AMR_KUAP_BLOCK_WRITE); - else if (dir == KUAP_WRITE) - set_kuap(AMR_KUAP_BLOCK_READ); - else if (dir == KUAP_READ_WRITE) - set_kuap(0); - else - BUILD_BUG(); -} - -static inline void prevent_user_access(void __user *to, const void __user *from, - unsigned long size, unsigned long dir) -{ - set_kuap(AMR_KUAP_BLOCKED); - if (static_branch_unlikely(&uaccess_flush_key)) - do_uaccess_flush(); -} - -static inline unsigned long prevent_user_access_return(void) -{ - unsigned long flags = get_kuap(); - - set_kuap(AMR_KUAP_BLOCKED); - if (static_branch_unlikely(&uaccess_flush_key)) - do_uaccess_flush(); - - return flags; -} - -static inline void restore_user_access(unsigned long flags) -{ - set_kuap(flags); - if (static_branch_unlikely(&uaccess_flush_key) && flags == AMR_KUAP_BLOCKED) - do_uaccess_flush(); -} -#endif /* __ASSEMBLY__ */ - -#endif /* _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H */ diff --git a/arch/powerpc/include/asm/book3s/64/kup.h b/arch/powerpc/include/asm/book3s/64/kup.h new file mode 100644 index 000000000000..f50f72e535aa --- /dev/null +++ b/arch/powerpc/include/asm/book3s/64/kup.h @@ -0,0 +1,442 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_BOOK3S_64_KUP_H +#define _ASM_POWERPC_BOOK3S_64_KUP_H + +#include <linux/const.h> +#include <asm/reg.h> + +#define AMR_KUAP_BLOCK_READ UL(0x5455555555555555) +#define AMR_KUAP_BLOCK_WRITE UL(0xa8aaaaaaaaaaaaaa) +#define AMR_KUEP_BLOCKED UL(0x5455555555555555) +#define AMR_KUAP_BLOCKED (AMR_KUAP_BLOCK_READ | AMR_KUAP_BLOCK_WRITE) + +#ifdef __ASSEMBLY__ + +.macro kuap_user_restore gpr1, gpr2 +#if defined(CONFIG_PPC_PKEY) + BEGIN_MMU_FTR_SECTION_NESTED(67) + b 100f // skip_restore_amr + END_MMU_FTR_SECTION_NESTED_IFCLR(MMU_FTR_PKEY, 67) + /* + * AMR and IAMR are going to be different when + * returning to userspace. + */ + ld \gpr1, STACK_REGS_AMR(r1) + + /* + * If kuap feature is not enabled, do the mtspr + * only if AMR value is different. + */ + BEGIN_MMU_FTR_SECTION_NESTED(68) + mfspr \gpr2, SPRN_AMR + cmpd \gpr1, \gpr2 + beq 99f + END_MMU_FTR_SECTION_NESTED_IFCLR(MMU_FTR_BOOK3S_KUAP, 68) + + isync + mtspr SPRN_AMR, \gpr1 +99: + /* + * Restore IAMR only when returning to userspace + */ + ld \gpr1, STACK_REGS_IAMR(r1) + + /* + * If kuep feature is not enabled, do the mtspr + * only if IAMR value is different. + */ + BEGIN_MMU_FTR_SECTION_NESTED(69) + mfspr \gpr2, SPRN_IAMR + cmpd \gpr1, \gpr2 + beq 100f + END_MMU_FTR_SECTION_NESTED_IFCLR(MMU_FTR_BOOK3S_KUEP, 69) + + isync + mtspr SPRN_IAMR, \gpr1 + +100: //skip_restore_amr + /* No isync required, see kuap_user_restore() */ +#endif +.endm + +.macro kuap_kernel_restore gpr1, gpr2 +#if defined(CONFIG_PPC_PKEY) + + BEGIN_MMU_FTR_SECTION_NESTED(67) + /* + * AMR is going to be mostly the same since we are + * returning to the kernel. Compare and do a mtspr. + */ + ld \gpr2, STACK_REGS_AMR(r1) + mfspr \gpr1, SPRN_AMR + cmpd \gpr1, \gpr2 + beq 100f + isync + mtspr SPRN_AMR, \gpr2 + /* + * No isync required, see kuap_restore_amr() + * No need to restore IAMR when returning to kernel space. + */ +100: + END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_BOOK3S_KUAP, 67) +#endif +.endm + +#ifdef CONFIG_PPC_KUAP +.macro kuap_check_amr gpr1, gpr2 +#ifdef CONFIG_PPC_KUAP_DEBUG + BEGIN_MMU_FTR_SECTION_NESTED(67) + mfspr \gpr1, SPRN_AMR + /* Prevent access to userspace using any key values */ + LOAD_REG_IMMEDIATE(\gpr2, AMR_KUAP_BLOCKED) +999: tdne \gpr1, \gpr2 + EMIT_BUG_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE) + END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_BOOK3S_KUAP, 67) +#endif +.endm +#endif + +/* + * if (pkey) { + * + * save AMR -> stack; + * if (kuap) { + * if (AMR != BLOCKED) + * KUAP_BLOCKED -> AMR; + * } + * if (from_user) { + * save IAMR -> stack; + * if (kuep) { + * KUEP_BLOCKED ->IAMR + * } + * } + * return; + * } + * + * if (kuap) { + * if (from_kernel) { + * save AMR -> stack; + * if (AMR != BLOCKED) + * KUAP_BLOCKED -> AMR; + * } + * + * } + */ +.macro kuap_save_amr_and_lock gpr1, gpr2, use_cr, msr_pr_cr +#if defined(CONFIG_PPC_PKEY) + + /* + * if both pkey and kuap is disabled, nothing to do + */ + BEGIN_MMU_FTR_SECTION_NESTED(68) + b 100f // skip_save_amr + END_MMU_FTR_SECTION_NESTED_IFCLR(MMU_FTR_PKEY | MMU_FTR_BOOK3S_KUAP, 68) + + /* + * if pkey is disabled and we are entering from userspace + * don't do anything. + */ + BEGIN_MMU_FTR_SECTION_NESTED(67) + .ifnb \msr_pr_cr + /* + * Without pkey we are not changing AMR outside the kernel + * hence skip this completely. + */ + bne \msr_pr_cr, 100f // from userspace + .endif + END_MMU_FTR_SECTION_NESTED_IFCLR(MMU_FTR_PKEY, 67) + + /* + * pkey is enabled or pkey is disabled but entering from kernel + */ + mfspr \gpr1, SPRN_AMR + std \gpr1, STACK_REGS_AMR(r1) + + /* + * update kernel AMR with AMR_KUAP_BLOCKED only + * if KUAP feature is enabled + */ + BEGIN_MMU_FTR_SECTION_NESTED(69) + LOAD_REG_IMMEDIATE(\gpr2, AMR_KUAP_BLOCKED) + cmpd \use_cr, \gpr1, \gpr2 + beq \use_cr, 102f + /* + * We don't isync here because we very recently entered via an interrupt + */ + mtspr SPRN_AMR, \gpr2 + isync +102: + END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_BOOK3S_KUAP, 69) + + /* + * if entering from kernel we don't need save IAMR + */ + .ifnb \msr_pr_cr + beq \msr_pr_cr, 100f // from kernel space + mfspr \gpr1, SPRN_IAMR + std \gpr1, STACK_REGS_IAMR(r1) + + /* + * update kernel IAMR with AMR_KUEP_BLOCKED only + * if KUEP feature is enabled + */ + BEGIN_MMU_FTR_SECTION_NESTED(70) + LOAD_REG_IMMEDIATE(\gpr2, AMR_KUEP_BLOCKED) + mtspr SPRN_IAMR, \gpr2 + isync + END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_BOOK3S_KUEP, 70) + .endif + +100: // skip_save_amr +#endif +.endm + +#else /* !__ASSEMBLY__ */ + +#include <linux/jump_label.h> + +DECLARE_STATIC_KEY_FALSE(uaccess_flush_key); + +#ifdef CONFIG_PPC_PKEY + +#include <asm/mmu.h> +#include <asm/ptrace.h> + +/* + * For kernel thread that doesn't have thread.regs return + * default AMR/IAMR values. + */ +static inline u64 current_thread_amr(void) +{ + if (current->thread.regs) + return current->thread.regs->amr; + return AMR_KUAP_BLOCKED; +} + +static inline u64 current_thread_iamr(void) +{ + if (current->thread.regs) + return current->thread.regs->iamr; + return AMR_KUEP_BLOCKED; +} +#endif /* CONFIG_PPC_PKEY */ + +#ifdef CONFIG_PPC_KUAP + +static inline void kuap_user_restore(struct pt_regs *regs) +{ + bool restore_amr = false, restore_iamr = false; + unsigned long amr, iamr; + + if (!mmu_has_feature(MMU_FTR_PKEY)) + return; + + if (!mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) { + amr = mfspr(SPRN_AMR); + if (amr != regs->amr) + restore_amr = true; + } else { + restore_amr = true; + } + + if (!mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) { + iamr = mfspr(SPRN_IAMR); + if (iamr != regs->iamr) + restore_iamr = true; + } else { + restore_iamr = true; + } + + + if (restore_amr || restore_iamr) { + isync(); + if (restore_amr) + mtspr(SPRN_AMR, regs->amr); + if (restore_iamr) + mtspr(SPRN_IAMR, regs->iamr); + } + /* + * No isync required here because we are about to rfi + * back to previous context before any user accesses + * would be made, which is a CSI. + */ +} + +static inline void kuap_kernel_restore(struct pt_regs *regs, + unsigned long amr) +{ + if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) { + if (unlikely(regs->amr != amr)) { + isync(); + mtspr(SPRN_AMR, regs->amr); + /* + * No isync required here because we are about to rfi + * back to previous context before any user accesses + * would be made, which is a CSI. + */ + } + } + /* + * No need to restore IAMR when returning to kernel space. + */ +} + +static inline unsigned long kuap_get_and_check_amr(void) +{ + if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) { + unsigned long amr = mfspr(SPRN_AMR); + if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG)) /* kuap_check_amr() */ + WARN_ON_ONCE(amr != AMR_KUAP_BLOCKED); + return amr; + } + return 0; +} + +#else /* CONFIG_PPC_PKEY */ + +static inline void kuap_user_restore(struct pt_regs *regs) +{ +} + +static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long amr) +{ +} + +static inline unsigned long kuap_get_and_check_amr(void) +{ + return 0; +} + +#endif /* CONFIG_PPC_PKEY */ + + +#ifdef CONFIG_PPC_KUAP + +static inline void kuap_check_amr(void) +{ + if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) + WARN_ON_ONCE(mfspr(SPRN_AMR) != AMR_KUAP_BLOCKED); +} + +/* + * We support individually allowing read or write, but we don't support nesting + * because that would require an expensive read/modify write of the AMR. + */ + +static inline unsigned long get_kuap(void) +{ + /* + * We return AMR_KUAP_BLOCKED when we don't support KUAP because + * prevent_user_access_return needs to return AMR_KUAP_BLOCKED to + * cause restore_user_access to do a flush. + * + * This has no effect in terms of actually blocking things on hash, + * so it doesn't break anything. + */ + if (!early_mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) + return AMR_KUAP_BLOCKED; + + return mfspr(SPRN_AMR); +} + +static inline void set_kuap(unsigned long value) +{ + if (!early_mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) + return; + + /* + * ISA v3.0B says we need a CSI (Context Synchronising Instruction) both + * before and after the move to AMR. See table 6 on page 1134. + */ + isync(); + mtspr(SPRN_AMR, value); + isync(); +} + +static inline bool bad_kuap_fault(struct pt_regs *regs, unsigned long address, + bool is_write) +{ + if (!mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) + return false; + /* + * For radix this will be a storage protection fault (DSISR_PROTFAULT). + * For hash this will be a key fault (DSISR_KEYFAULT) + */ + /* + * We do have exception table entry, but accessing the + * userspace results in fault. This could be because we + * didn't unlock the AMR or access is denied by userspace + * using a key value that blocks access. We are only interested + * in catching the use case of accessing without unlocking + * the AMR. Hence check for BLOCK_WRITE/READ against AMR. + */ + if (is_write) { + return (regs->amr & AMR_KUAP_BLOCK_WRITE) == AMR_KUAP_BLOCK_WRITE; + } + return (regs->amr & AMR_KUAP_BLOCK_READ) == AMR_KUAP_BLOCK_READ; +} + +static __always_inline void allow_user_access(void __user *to, const void __user *from, + unsigned long size, unsigned long dir) +{ + unsigned long thread_amr = 0; + + // This is written so we can resolve to a single case at build time + BUILD_BUG_ON(!__builtin_constant_p(dir)); + + if (mmu_has_feature(MMU_FTR_PKEY)) + thread_amr = current_thread_amr(); + + if (dir == KUAP_READ) + set_kuap(thread_amr | AMR_KUAP_BLOCK_WRITE); + else if (dir == KUAP_WRITE) + set_kuap(thread_amr | AMR_KUAP_BLOCK_READ); + else if (dir == KUAP_READ_WRITE) + set_kuap(thread_amr); + else + BUILD_BUG(); +} + +#else /* CONFIG_PPC_KUAP */ + +static inline unsigned long get_kuap(void) +{ + return AMR_KUAP_BLOCKED; +} + +static inline void set_kuap(unsigned long value) { } + +static __always_inline void allow_user_access(void __user *to, const void __user *from, + unsigned long size, unsigned long dir) +{ } + +#endif /* !CONFIG_PPC_KUAP */ + +static inline void prevent_user_access(void __user *to, const void __user *from, + unsigned long size, unsigned long dir) +{ + set_kuap(AMR_KUAP_BLOCKED); + if (static_branch_unlikely(&uaccess_flush_key)) + do_uaccess_flush(); +} + +static inline unsigned long prevent_user_access_return(void) +{ + unsigned long flags = get_kuap(); + + set_kuap(AMR_KUAP_BLOCKED); + if (static_branch_unlikely(&uaccess_flush_key)) + do_uaccess_flush(); + + return flags; +} + +static inline void restore_user_access(unsigned long flags) +{ + set_kuap(flags); + if (static_branch_unlikely(&uaccess_flush_key) && flags == AMR_KUAP_BLOCKED) + do_uaccess_flush(); +} +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_POWERPC_BOOK3S_64_KUP_H */ diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 683a9c7d1b03..066b1d34c7bc 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -452,6 +452,7 @@ static inline unsigned long hpt_hash(unsigned long vpn, #define HPTE_LOCAL_UPDATE 0x1 #define HPTE_NOHPTE_UPDATE 0x2 +#define HPTE_USE_KERNEL_KEY 0x4 extern int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, @@ -842,6 +843,32 @@ static inline unsigned long get_kernel_vsid(unsigned long ea, int ssize) unsigned htab_shift_for_mem_size(unsigned long mem_size); -#endif /* __ASSEMBLY__ */ +enum slb_index { + LINEAR_INDEX = 0, /* Kernel linear map (0xc000000000000000) */ + KSTACK_INDEX = 1, /* Kernel stack map */ +}; +#define slb_esid_mask(ssize) \ + (((ssize) == MMU_SEGSIZE_256M) ? ESID_MASK : ESID_MASK_1T) + +static inline unsigned long mk_esid_data(unsigned long ea, int ssize, + enum slb_index index) +{ + return (ea & slb_esid_mask(ssize)) | SLB_ESID_V | index; +} + +static inline unsigned long __mk_vsid_data(unsigned long vsid, int ssize, + unsigned long flags) +{ + return (vsid << slb_vsid_shift(ssize)) | flags | + ((unsigned long)ssize << SLB_VSID_SSIZE_SHIFT); +} + +static inline unsigned long mk_vsid_data(unsigned long ea, int ssize, + unsigned long flags) +{ + return __mk_vsid_data(get_kernel_vsid(ea, ssize), ssize, flags); +} + +#endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_BOOK3S_64_MMU_HASH_H_ */ diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index 750918451dd2..995bbcdd0ef8 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -111,7 +111,7 @@ typedef struct { struct hash_mm_context *hash_context; - unsigned long vdso_base; + void __user *vdso; /* * pagetable fragment support */ @@ -199,7 +199,7 @@ extern int mmu_io_psize; void mmu_early_init_devtree(void); void hash__early_init_devtree(void); void radix__early_init_devtree(void); -#ifdef CONFIG_PPC_MEM_KEYS +#ifdef CONFIG_PPC_PKEY void pkey_early_init_devtree(void); #else static inline void pkey_early_init_devtree(void) {} diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index cd3feeac6e87..a39886681629 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -1231,13 +1231,28 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) return hash__pmd_same(pmd_a, pmd_b); } -static inline pmd_t pmd_mkhuge(pmd_t pmd) +static inline pmd_t __pmd_mkhuge(pmd_t pmd) { if (radix_enabled()) return radix__pmd_mkhuge(pmd); return hash__pmd_mkhuge(pmd); } +/* + * pfn_pmd return a pmd_t that can be used as pmd pte entry. + */ +static inline pmd_t pmd_mkhuge(pmd_t pmd) +{ +#ifdef CONFIG_DEBUG_VM + if (radix_enabled()) + WARN_ON((pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE)) == 0); + else + WARN_ON((pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE | H_PAGE_THP_HUGE)) != + cpu_to_be64(_PAGE_PTE | H_PAGE_THP_HUGE)); +#endif + return pmd; +} + #define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS extern int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp, diff --git a/arch/powerpc/include/asm/book3s/64/pkeys.h b/arch/powerpc/include/asm/book3s/64/pkeys.h index b7d9f4267bcd..3b8640498f5b 100644 --- a/arch/powerpc/include/asm/book3s/64/pkeys.h +++ b/arch/powerpc/include/asm/book3s/64/pkeys.h @@ -6,6 +6,8 @@ #include <asm/book3s/64/hash-pkey.h> extern u64 __ro_after_init default_uamor; +extern u64 __ro_after_init default_amr; +extern u64 __ro_after_init default_iamr; static inline u64 vmflag_to_pte_pkey_bits(u64 vm_flags) { diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h index 338f36cd9934..464f8ca8a5c9 100644 --- a/arch/powerpc/include/asm/bug.h +++ b/arch/powerpc/include/asm/bug.h @@ -12,7 +12,7 @@ #ifdef CONFIG_DEBUG_BUGVERBOSE .macro EMIT_BUG_ENTRY addr,file,line,flags .section __bug_table,"aw" -5001: PPC_LONG \addr, 5002f +5001: .4byte \addr - 5001b, 5002f - 5001b .short \line, \flags .org 5001b+BUG_ENTRY_SIZE .previous @@ -23,7 +23,7 @@ #else .macro EMIT_BUG_ENTRY addr,file,line,flags .section __bug_table,"aw" -5001: PPC_LONG \addr +5001: .4byte \addr - 5001b .short \flags .org 5001b+BUG_ENTRY_SIZE .previous @@ -36,14 +36,14 @@ #ifdef CONFIG_DEBUG_BUGVERBOSE #define _EMIT_BUG_ENTRY \ ".section __bug_table,\"aw\"\n" \ - "2:\t" PPC_LONG "1b, %0\n" \ + "2:\t.4byte 1b - 2b, %0 - 2b\n" \ "\t.short %1, %2\n" \ ".org 2b+%3\n" \ ".previous\n" #else #define _EMIT_BUG_ENTRY \ ".section __bug_table,\"aw\"\n" \ - "2:\t" PPC_LONG "1b\n" \ + "2:\t.4byte 1b - 2b\n" \ "\t.short %2\n" \ ".org 2b+%3\n" \ ".previous\n" @@ -113,6 +113,7 @@ struct pt_regs; extern int do_page_fault(struct pt_regs *, unsigned long, unsigned long); extern void bad_page_fault(struct pt_regs *, unsigned long, int); +void __bad_page_fault(struct pt_regs *regs, unsigned long address, int sig); extern void _exception(int, struct pt_regs *, int, unsigned long); extern void _exception_pkey(struct pt_regs *, unsigned long, int); extern void die(const char *, struct pt_regs *, long); diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h index 82f099ba2411..d5da7ddbf0fc 100644 --- a/arch/powerpc/include/asm/checksum.h +++ b/arch/powerpc/include/asm/checksum.h @@ -163,7 +163,7 @@ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) */ __wsum __csum_partial(const void *buff, int len, __wsum sum); -static inline __wsum csum_partial(const void *buff, int len, __wsum sum) +static __always_inline __wsum csum_partial(const void *buff, int len, __wsum sum) { if (__builtin_constant_p(len) && len <= 16 && (len & 1) == 0) { if (len == 2) diff --git a/arch/powerpc/include/asm/clocksource.h b/arch/powerpc/include/asm/clocksource.h new file mode 100644 index 000000000000..0a26ef13a34a --- /dev/null +++ b/arch/powerpc/include/asm/clocksource.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_CLOCKSOURCE_H +#define _ASM_POWERPC_CLOCKSOURCE_H + +#include <asm/vdso/clocksource.h> + +#endif /* _ASM_POWERPC_CLOCKSOURCE_H */ diff --git a/arch/powerpc/include/asm/cpm1.h b/arch/powerpc/include/asm/cpm1.h index a116fe931789..3bdd74739cb8 100644 --- a/arch/powerpc/include/asm/cpm1.h +++ b/arch/powerpc/include/asm/cpm1.h @@ -68,6 +68,7 @@ extern void cpm_reset(void); #define PROFF_SPI ((uint)0x0180) #define PROFF_SCC3 ((uint)0x0200) #define PROFF_SMC1 ((uint)0x0280) +#define PROFF_DSP1 ((uint)0x02c0) #define PROFF_SCC4 ((uint)0x0300) #define PROFF_SMC2 ((uint)0x0380) diff --git a/arch/powerpc/include/asm/cpu_setup_power.h b/arch/powerpc/include/asm/cpu_setup_power.h new file mode 100644 index 000000000000..24be9131f803 --- /dev/null +++ b/arch/powerpc/include/asm/cpu_setup_power.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2020 IBM Corporation + */ +void __setup_cpu_power7(unsigned long offset, struct cpu_spec *spec); +void __restore_cpu_power7(void); +void __setup_cpu_power8(unsigned long offset, struct cpu_spec *spec); +void __restore_cpu_power8(void); +void __setup_cpu_power9(unsigned long offset, struct cpu_spec *spec); +void __restore_cpu_power9(void); +void __setup_cpu_power10(unsigned long offset, struct cpu_spec *spec); +void __restore_cpu_power10(void); diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 3d2f94afc13a..5f21a5bab467 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -41,7 +41,6 @@ extern int machine_check_4xx(struct pt_regs *regs); extern int machine_check_440A(struct pt_regs *regs); extern int machine_check_e500mc(struct pt_regs *regs); extern int machine_check_e500(struct pt_regs *regs); -extern int machine_check_e200(struct pt_regs *regs); extern int machine_check_47x(struct pt_regs *regs); int machine_check_8xx(struct pt_regs *regs); int machine_check_83xx(struct pt_regs *regs); @@ -137,7 +136,7 @@ static inline void cpu_feature_keys_init(void) { } #define CPU_FTR_DBELL ASM_CONST(0x00000004) #define CPU_FTR_CAN_NAP ASM_CONST(0x00000008) #define CPU_FTR_DEBUG_LVL_EXC ASM_CONST(0x00000010) -#define CPU_FTR_NODSISRALIGN ASM_CONST(0x00000020) +// ASM_CONST(0x00000020) Free #define CPU_FTR_FPU_UNAVAILABLE ASM_CONST(0x00000040) #define CPU_FTR_LWSYNC ASM_CONST(0x00000080) #define CPU_FTR_NOEXECUTE ASM_CONST(0x00000100) @@ -219,9 +218,7 @@ static inline void cpu_feature_keys_init(void) { } #ifndef __ASSEMBLY__ -#define CPU_FTR_PPCAS_ARCH_V2 (CPU_FTR_NOEXECUTE | CPU_FTR_NODSISRALIGN) - -#define MMU_FTR_PPCAS_ARCH_V2 (MMU_FTR_TLBIEL | MMU_FTR_16M_PAGE) +#define CPU_FTR_PPCAS_ARCH_V2 (CPU_FTR_NOEXECUTE) /* We only set the altivec features if the kernel was compiled with altivec * support @@ -369,7 +366,7 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX) #define CPU_FTRS_82XX (CPU_FTR_COMMON | CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_NOEXECUTE) #define CPU_FTRS_G2_LE (CPU_FTR_COMMON | CPU_FTR_MAYBE_CAN_DOZE | \ - CPU_FTR_MAYBE_CAN_NAP) + CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_NOEXECUTE) #define CPU_FTRS_E300 (CPU_FTR_MAYBE_CAN_DOZE | \ CPU_FTR_MAYBE_CAN_NAP | \ CPU_FTR_COMMON | CPU_FTR_NOEXECUTE) @@ -378,38 +375,33 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_COMMON | CPU_FTR_FPU_UNAVAILABLE | CPU_FTR_NOEXECUTE) #define CPU_FTRS_CLASSIC32 (CPU_FTR_COMMON) #define CPU_FTRS_8XX (CPU_FTR_NOEXECUTE) -#define CPU_FTRS_40X (CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE) -#define CPU_FTRS_44X (CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE) -#define CPU_FTRS_440x6 (CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE | \ +#define CPU_FTRS_40X (CPU_FTR_NOEXECUTE) +#define CPU_FTRS_44X (CPU_FTR_NOEXECUTE) +#define CPU_FTRS_440x6 (CPU_FTR_NOEXECUTE | \ CPU_FTR_INDEXED_DCR) #define CPU_FTRS_47X (CPU_FTRS_440x6) -#define CPU_FTRS_E200 (CPU_FTR_SPE_COMP | \ - CPU_FTR_NODSISRALIGN | CPU_FTR_COHERENT_ICACHE | \ - CPU_FTR_NOEXECUTE | \ - CPU_FTR_DEBUG_LVL_EXC) #define CPU_FTRS_E500 (CPU_FTR_MAYBE_CAN_DOZE | \ - CPU_FTR_SPE_COMP | CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_NODSISRALIGN | \ + CPU_FTR_SPE_COMP | CPU_FTR_MAYBE_CAN_NAP | \ CPU_FTR_NOEXECUTE) #define CPU_FTRS_E500_2 (CPU_FTR_MAYBE_CAN_DOZE | \ CPU_FTR_SPE_COMP | CPU_FTR_MAYBE_CAN_NAP | \ - CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE) -#define CPU_FTRS_E500MC (CPU_FTR_NODSISRALIGN | \ + CPU_FTR_NOEXECUTE) +#define CPU_FTRS_E500MC ( \ CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \ CPU_FTR_DBELL | CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV) /* * e5500/e6500 erratum A-006958 is a timebase bug that can use the * same workaround as CPU_FTR_CELL_TB_BUG. */ -#define CPU_FTRS_E5500 (CPU_FTR_NODSISRALIGN | \ +#define CPU_FTRS_E5500 ( \ CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \ CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV | CPU_FTR_CELL_TB_BUG) -#define CPU_FTRS_E6500 (CPU_FTR_NODSISRALIGN | \ +#define CPU_FTRS_E6500 ( \ CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \ CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV | CPU_FTR_ALTIVEC_COMP | \ CPU_FTR_CELL_TB_BUG | CPU_FTR_SMT) -#define CPU_FTRS_GENERIC_32 (CPU_FTR_COMMON | CPU_FTR_NODSISRALIGN) /* 64-bit CPUs */ #define CPU_FTRS_PPC970 (CPU_FTR_LWSYNC | \ @@ -489,7 +481,7 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_PURR | CPU_FTR_REAL_LE | CPU_FTR_DABRX) #define CPU_FTRS_COMPATIBLE (CPU_FTR_PPCAS_ARCH_V2) -#ifdef __powerpc64__ +#ifdef CONFIG_PPC64 #ifdef CONFIG_PPC_BOOK3E #define CPU_FTRS_POSSIBLE (CPU_FTRS_E6500 | CPU_FTRS_E5500) #else @@ -510,18 +502,19 @@ static inline void cpu_feature_keys_init(void) { } #else enum { CPU_FTRS_POSSIBLE = -#ifdef CONFIG_PPC_BOOK3S_32 - CPU_FTRS_603 | CPU_FTRS_604 | CPU_FTRS_740_NOTAU | +#ifdef CONFIG_PPC_BOOK3S_604 + CPU_FTRS_604 | CPU_FTRS_740_NOTAU | CPU_FTRS_740 | CPU_FTRS_750 | CPU_FTRS_750FX1 | CPU_FTRS_750FX2 | CPU_FTRS_750FX | CPU_FTRS_750GX | CPU_FTRS_7400_NOTAU | CPU_FTRS_7400 | CPU_FTRS_7450_20 | CPU_FTRS_7450_21 | CPU_FTRS_7450_23 | CPU_FTRS_7455_1 | CPU_FTRS_7455_20 | CPU_FTRS_7455 | CPU_FTRS_7447_10 | - CPU_FTRS_7447 | CPU_FTRS_7447A | CPU_FTRS_82XX | - CPU_FTRS_G2_LE | CPU_FTRS_E300 | CPU_FTRS_E300C2 | + CPU_FTRS_7447 | CPU_FTRS_7447A | CPU_FTRS_CLASSIC32 | -#else - CPU_FTRS_GENERIC_32 | +#endif +#ifdef CONFIG_PPC_BOOK3S_603 + CPU_FTRS_603 | CPU_FTRS_82XX | + CPU_FTRS_G2_LE | CPU_FTRS_E300 | CPU_FTRS_E300C2 | #endif #ifdef CONFIG_PPC_8xx CPU_FTRS_8XX | @@ -529,14 +522,10 @@ enum { #ifdef CONFIG_40x CPU_FTRS_40X | #endif -#ifdef CONFIG_44x - CPU_FTRS_44X | CPU_FTRS_440x6 | -#endif #ifdef CONFIG_PPC_47x CPU_FTRS_47X | CPU_FTR_476_DD2 | -#endif -#ifdef CONFIG_E200 - CPU_FTRS_E200 | +#elif defined(CONFIG_44x) + CPU_FTRS_44X | CPU_FTRS_440x6 | #endif #ifdef CONFIG_E500 CPU_FTRS_E500 | CPU_FTRS_E500_2 | @@ -548,7 +537,7 @@ enum { }; #endif /* __powerpc64__ */ -#ifdef __powerpc64__ +#ifdef CONFIG_PPC64 #ifdef CONFIG_PPC_BOOK3E #define CPU_FTRS_ALWAYS (CPU_FTRS_E6500 & CPU_FTRS_E5500) #else @@ -557,7 +546,6 @@ enum { #define CPU_FTRS_DT_CPU_BASE \ (CPU_FTR_LWSYNC | \ CPU_FTR_FPU_UNAVAILABLE | \ - CPU_FTR_NODSISRALIGN | \ CPU_FTR_NOEXECUTE | \ CPU_FTR_COHERENT_ICACHE | \ CPU_FTR_STCX_CHECKS_ADDRESS | \ @@ -586,18 +574,19 @@ enum { #else enum { CPU_FTRS_ALWAYS = -#ifdef CONFIG_PPC_BOOK3S_32 - CPU_FTRS_603 & CPU_FTRS_604 & CPU_FTRS_740_NOTAU & +#ifdef CONFIG_PPC_BOOK3S_604 + CPU_FTRS_604 & CPU_FTRS_740_NOTAU & CPU_FTRS_740 & CPU_FTRS_750 & CPU_FTRS_750FX1 & CPU_FTRS_750FX2 & CPU_FTRS_750FX & CPU_FTRS_750GX & CPU_FTRS_7400_NOTAU & CPU_FTRS_7400 & CPU_FTRS_7450_20 & CPU_FTRS_7450_21 & CPU_FTRS_7450_23 & CPU_FTRS_7455_1 & CPU_FTRS_7455_20 & CPU_FTRS_7455 & CPU_FTRS_7447_10 & - CPU_FTRS_7447 & CPU_FTRS_7447A & CPU_FTRS_82XX & - CPU_FTRS_G2_LE & CPU_FTRS_E300 & CPU_FTRS_E300C2 & + CPU_FTRS_7447 & CPU_FTRS_7447A & CPU_FTRS_CLASSIC32 & -#else - CPU_FTRS_GENERIC_32 & +#endif +#ifdef CONFIG_PPC_BOOK3S_603 + CPU_FTRS_603 & CPU_FTRS_82XX & + CPU_FTRS_G2_LE & CPU_FTRS_E300 & CPU_FTRS_E300C2 & #endif #ifdef CONFIG_PPC_8xx CPU_FTRS_8XX & @@ -605,12 +594,11 @@ enum { #ifdef CONFIG_40x CPU_FTRS_40X & #endif -#ifdef CONFIG_44x +#ifdef CONFIG_PPC_47x + CPU_FTRS_47X & +#elif defined(CONFIG_44x) CPU_FTRS_44X & CPU_FTRS_440x6 & #endif -#ifdef CONFIG_E200 - CPU_FTRS_E200 & -#endif #ifdef CONFIG_E500 CPU_FTRS_E500 & CPU_FTRS_E500_2 & #endif diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h index 53ed2ca40151..b8425e3cfd81 100644 --- a/arch/powerpc/include/asm/elf.h +++ b/arch/powerpc/include/asm/elf.h @@ -168,8 +168,8 @@ do { \ /* Cache size items */ \ NEW_AUX_ENT(AT_DCACHEBSIZE, dcache_bsize); \ NEW_AUX_ENT(AT_ICACHEBSIZE, icache_bsize); \ - NEW_AUX_ENT(AT_UCACHEBSIZE, ucache_bsize); \ - VDSO_AUX_ENT(AT_SYSINFO_EHDR, current->mm->context.vdso_base); \ + NEW_AUX_ENT(AT_UCACHEBSIZE, 0); \ + VDSO_AUX_ENT(AT_SYSINFO_EHDR, (unsigned long)current->mm->context.vdso);\ ARCH_DLINFO_CACHE_GEOMETRY; \ } while (0) diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h index fbd406cd6916..f6d2acb57425 100644 --- a/arch/powerpc/include/asm/feature-fixups.h +++ b/arch/powerpc/include/asm/feature-fixups.h @@ -36,6 +36,24 @@ label##2: \ .align 2; \ label##3: + +#ifndef CONFIG_CC_IS_CLANG +#define CHECK_ALT_SIZE(else_size, body_size) \ + .ifgt (else_size) - (body_size); \ + .error "Feature section else case larger than body"; \ + .endif; +#else +/* + * If we use the ifgt syntax above, clang's assembler complains about the + * expression being non-absolute when the code appears in an inline assembly + * statement. + * As a workaround use an .org directive that has no effect if the else case + * instructions are smaller than the body, but fails otherwise. + */ +#define CHECK_ALT_SIZE(else_size, body_size) \ + .org . + ((else_size) > (body_size)); +#endif + #define MAKE_FTR_SECTION_ENTRY(msk, val, label, sect) \ label##4: \ .popsection; \ @@ -48,9 +66,7 @@ label##5: \ FTR_ENTRY_OFFSET label##2b-label##5b; \ FTR_ENTRY_OFFSET label##3b-label##5b; \ FTR_ENTRY_OFFSET label##4b-label##5b; \ - .ifgt (label##4b- label##3b)-(label##2b- label##1b); \ - .error "Feature section else case larger than body"; \ - .endif; \ + CHECK_ALT_SIZE((label##4b-label##3b), (label##2b-label##1b)); \ .popsection; @@ -100,6 +116,9 @@ label##5: \ #define END_MMU_FTR_SECTION_NESTED_IFSET(msk, label) \ END_MMU_FTR_SECTION_NESTED((msk), (msk), label) +#define END_MMU_FTR_SECTION_NESTED_IFCLR(msk, label) \ + END_MMU_FTR_SECTION_NESTED((msk), 0, label) + #define END_MMU_FTR_SECTION_IFSET(msk) END_MMU_FTR_SECTION((msk), (msk)) #define END_MMU_FTR_SECTION_IFCLR(msk) END_MMU_FTR_SECTION((msk), 0) diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h index 0b295bdb201e..aa6a5ef5d483 100644 --- a/arch/powerpc/include/asm/firmware.h +++ b/arch/powerpc/include/asm/firmware.h @@ -134,12 +134,6 @@ extern int ibm_nmi_interlock_token; extern unsigned int __start___fw_ftr_fixup, __stop___fw_ftr_fixup; -#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_GUEST) -bool is_kvm_guest(void); -#else -static inline bool is_kvm_guest(void) { return false; } -#endif - #ifdef CONFIG_PPC_PSERIES void pseries_probe_fw_features(void); #else diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index c1fbccb04390..c98f5141e3fc 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -155,6 +155,14 @@ #define H_VASI_RESUMED 5 #define H_VASI_COMPLETED 6 +/* VASI signal codes. Only the Cancel code is valid for H_VASI_SIGNAL. */ +#define H_VASI_SIGNAL_CANCEL 1 +#define H_VASI_SIGNAL_ABORT 2 +#define H_VASI_SIGNAL_SUSPEND 3 +#define H_VASI_SIGNAL_COMPLETE 4 +#define H_VASI_SIGNAL_ENABLE 5 +#define H_VASI_SIGNAL_FAILOVER 6 + /* Each control block has to be on a 4K boundary */ #define H_CB_ALIGNMENT 4096 @@ -261,6 +269,7 @@ #define H_ADD_CONN 0x284 #define H_DEL_CONN 0x288 #define H_JOIN 0x298 +#define H_VASI_SIGNAL 0x2A0 #define H_VASI_STATE 0x2A4 #define H_VIOCTL 0x2A8 #define H_ENABLE_CRQ 0x2B0 diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index 58635960403c..273edd208ec5 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -122,7 +122,7 @@ static inline u##size name(const volatile u##size __iomem *addr) \ { \ u##size ret; \ __asm__ __volatile__("sync;"#insn"%U1%X1 %0,%1;twi 0,%0,0;isync"\ - : "=r" (ret) : "m" (*addr) : "memory"); \ + : "=r" (ret) : "m"UPD_CONSTR (*addr) : "memory"); \ return ret; \ } @@ -130,7 +130,7 @@ static inline u##size name(const volatile u##size __iomem *addr) \ static inline void name(volatile u##size __iomem *addr, u##size val) \ { \ __asm__ __volatile__("sync;"#insn"%U0%X0 %1,%0" \ - : "=m" (*addr) : "r" (val) : "memory"); \ + : "=m"UPD_CONSTR (*addr) : "r" (val) : "memory"); \ mmiowb_set_pending(); \ } @@ -302,41 +302,56 @@ static inline unsigned char __raw_readb(const volatile void __iomem *addr) { return *(volatile unsigned char __force *)PCI_FIX_ADDR(addr); } +#define __raw_readb __raw_readb + static inline unsigned short __raw_readw(const volatile void __iomem *addr) { return *(volatile unsigned short __force *)PCI_FIX_ADDR(addr); } +#define __raw_readw __raw_readw + static inline unsigned int __raw_readl(const volatile void __iomem *addr) { return *(volatile unsigned int __force *)PCI_FIX_ADDR(addr); } +#define __raw_readl __raw_readl + static inline void __raw_writeb(unsigned char v, volatile void __iomem *addr) { *(volatile unsigned char __force *)PCI_FIX_ADDR(addr) = v; } +#define __raw_writeb __raw_writeb + static inline void __raw_writew(unsigned short v, volatile void __iomem *addr) { *(volatile unsigned short __force *)PCI_FIX_ADDR(addr) = v; } +#define __raw_writew __raw_writew + static inline void __raw_writel(unsigned int v, volatile void __iomem *addr) { *(volatile unsigned int __force *)PCI_FIX_ADDR(addr) = v; } +#define __raw_writel __raw_writel #ifdef __powerpc64__ static inline unsigned long __raw_readq(const volatile void __iomem *addr) { return *(volatile unsigned long __force *)PCI_FIX_ADDR(addr); } +#define __raw_readq __raw_readq + static inline void __raw_writeq(unsigned long v, volatile void __iomem *addr) { *(volatile unsigned long __force *)PCI_FIX_ADDR(addr) = v; } +#define __raw_writeq __raw_writeq static inline void __raw_writeq_be(unsigned long v, volatile void __iomem *addr) { __raw_writeq((__force unsigned long)cpu_to_be64(v), addr); } +#define __raw_writeq_be __raw_writeq_be /* * Real mode versions of the above. Those instructions are only supposed @@ -609,10 +624,37 @@ static inline void name at \ /* Some drivers check for the presence of readq & writeq with * a #ifdef, so we make them happy here. */ +#define readb readb +#define readw readw +#define readl readl +#define writeb writeb +#define writew writew +#define writel writel +#define readsb readsb +#define readsw readsw +#define readsl readsl +#define writesb writesb +#define writesw writesw +#define writesl writesl +#define inb inb +#define inw inw +#define inl inl +#define outb outb +#define outw outw +#define outl outl +#define insb insb +#define insw insw +#define insl insl +#define outsb outsb +#define outsw outsw +#define outsl outsl #ifdef __powerpc64__ #define readq readq #define writeq writeq #endif +#define memset_io memset_io +#define memcpy_fromio memcpy_fromio +#define memcpy_toio memcpy_toio /* * Convert a physical pointer to a virtual kernel pointer for /dev/mem @@ -637,7 +679,106 @@ static inline void name at \ #define writel_relaxed(v, addr) writel(v, addr) #define writeq_relaxed(v, addr) writeq(v, addr) +#ifdef CONFIG_GENERIC_IOMAP #include <asm-generic/iomap.h> +#else +/* + * Here comes the implementation of the IOMAP interfaces. + */ +static inline unsigned int ioread16be(const void __iomem *addr) +{ + return readw_be(addr); +} +#define ioread16be ioread16be + +static inline unsigned int ioread32be(const void __iomem *addr) +{ + return readl_be(addr); +} +#define ioread32be ioread32be + +#ifdef __powerpc64__ +static inline u64 ioread64_lo_hi(const void __iomem *addr) +{ + return readq(addr); +} +#define ioread64_lo_hi ioread64_lo_hi + +static inline u64 ioread64_hi_lo(const void __iomem *addr) +{ + return readq(addr); +} +#define ioread64_hi_lo ioread64_hi_lo + +static inline u64 ioread64be(const void __iomem *addr) +{ + return readq_be(addr); +} +#define ioread64be ioread64be + +static inline u64 ioread64be_lo_hi(const void __iomem *addr) +{ + return readq_be(addr); +} +#define ioread64be_lo_hi ioread64be_lo_hi + +static inline u64 ioread64be_hi_lo(const void __iomem *addr) +{ + return readq_be(addr); +} +#define ioread64be_hi_lo ioread64be_hi_lo +#endif /* __powerpc64__ */ + +static inline void iowrite16be(u16 val, void __iomem *addr) +{ + writew_be(val, addr); +} +#define iowrite16be iowrite16be + +static inline void iowrite32be(u32 val, void __iomem *addr) +{ + writel_be(val, addr); +} +#define iowrite32be iowrite32be + +#ifdef __powerpc64__ +static inline void iowrite64_lo_hi(u64 val, void __iomem *addr) +{ + writeq(val, addr); +} +#define iowrite64_lo_hi iowrite64_lo_hi + +static inline void iowrite64_hi_lo(u64 val, void __iomem *addr) +{ + writeq(val, addr); +} +#define iowrite64_hi_lo iowrite64_hi_lo + +static inline void iowrite64be(u64 val, void __iomem *addr) +{ + writeq_be(val, addr); +} +#define iowrite64be iowrite64be + +static inline void iowrite64be_lo_hi(u64 val, void __iomem *addr) +{ + writeq_be(val, addr); +} +#define iowrite64be_lo_hi iowrite64be_lo_hi + +static inline void iowrite64be_hi_lo(u64 val, void __iomem *addr) +{ + writeq_be(val, addr); +} +#define iowrite64be_hi_lo iowrite64be_hi_lo +#endif /* __powerpc64__ */ + +struct pci_dev; +void pci_iounmap(struct pci_dev *dev, void __iomem *addr); +#define pci_iounmap pci_iounmap +void __iomem *ioport_map(unsigned long port, unsigned int len); +#define ioport_map ioport_map +#endif static inline void iosync(void) { @@ -670,7 +811,6 @@ static inline void iosync(void) #define IO_SPACE_LIMIT ~(0UL) - /** * ioremap - map bus memory into CPU space * @address: bus address of the memory @@ -706,7 +846,13 @@ extern void __iomem *ioremap(phys_addr_t address, unsigned long size); extern void __iomem *ioremap_prot(phys_addr_t address, unsigned long size, unsigned long flags); extern void __iomem *ioremap_wc(phys_addr_t address, unsigned long size); +#define ioremap_wc ioremap_wc + +#ifdef CONFIG_PPC32 void __iomem *ioremap_wt(phys_addr_t address, unsigned long size); +#define ioremap_wt ioremap_wt +#endif + void __iomem *ioremap_coherent(phys_addr_t address, unsigned long size); #define ioremap_uc(addr, size) ioremap((addr), (size)) #define ioremap_cache(addr, size) \ @@ -766,6 +912,7 @@ static inline unsigned long virt_to_phys(volatile void * address) return __pa((unsigned long)address); } +#define virt_to_phys virt_to_phys /** * phys_to_virt - map physical address to virtual @@ -783,6 +930,7 @@ static inline void * phys_to_virt(unsigned long address) { return (void *)__va(address); } +#define phys_to_virt phys_to_virt /* * Change "struct page" to physical address. @@ -810,6 +958,7 @@ static inline unsigned long virt_to_bus(volatile void * address) return 0; return __pa(address) + PCI_DRAM_OFFSET; } +#define virt_to_bus virt_to_bus static inline void * bus_to_virt(unsigned long address) { @@ -817,6 +966,7 @@ static inline void * bus_to_virt(unsigned long address) return NULL; return __va(address - PCI_DRAM_OFFSET); } +#define bus_to_virt bus_to_virt #define page_to_bus(page) (page_to_phys(page) + PCI_DRAM_OFFSET) @@ -855,6 +1005,8 @@ static inline void * bus_to_virt(unsigned long address) #define clrsetbits_8(addr, clear, set) clrsetbits(8, addr, clear, set) +#include <asm-generic/io.h> + #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_IO_H */ diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index 0d93331d0fab..bf221a2a523e 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -15,11 +15,13 @@ #define KUAP_CURRENT (KUAP_CURRENT_READ | KUAP_CURRENT_WRITE) #ifdef CONFIG_PPC_BOOK3S_64 -#include <asm/book3s/64/kup-radix.h> +#include <asm/book3s/64/kup.h> #endif + #ifdef CONFIG_PPC_8xx #include <asm/nohash/32/kup-8xx.h> #endif + #ifdef CONFIG_PPC_BOOK3S_32 #include <asm/book3s/32/kup.h> #endif @@ -42,9 +44,10 @@ #else /* !__ASSEMBLY__ */ -#include <linux/pgtable.h> +extern bool disable_kuep; +extern bool disable_kuap; -void setup_kup(void); +#include <linux/pgtable.h> #ifdef CONFIG_PPC_KUEP void setup_kuep(bool disabled); @@ -80,6 +83,12 @@ static inline void restore_user_access(unsigned long flags) { } #endif /* CONFIG_PPC_BOOK3S_64 */ #endif /* CONFIG_PPC_KUAP */ +static __always_inline void setup_kup(void) +{ + setup_kuep(disable_kuep); + setup_kuap(disable_kuap); +} + static inline void allow_read_from_user(const void __user *from, unsigned long size) { allow_user_access(NULL, from, size, KUAP_READ); diff --git a/arch/powerpc/include/asm/kvm_guest.h b/arch/powerpc/include/asm/kvm_guest.h new file mode 100644 index 000000000000..2fca299f7e19 --- /dev/null +++ b/arch/powerpc/include/asm/kvm_guest.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020 IBM Corporation + */ + +#ifndef _ASM_POWERPC_KVM_GUEST_H_ +#define _ASM_POWERPC_KVM_GUEST_H_ + +#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_GUEST) +#include <linux/jump_label.h> + +DECLARE_STATIC_KEY_FALSE(kvm_guest); + +static inline bool is_kvm_guest(void) +{ + return static_branch_unlikely(&kvm_guest); +} + +bool check_kvm_guest(void); +#else +static inline bool is_kvm_guest(void) { return false; } +static inline bool check_kvm_guest(void) { return false; } +#endif + +#endif /* _ASM_POWERPC_KVM_GUEST_H_ */ diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h index 744612054c94..abe1b5e82547 100644 --- a/arch/powerpc/include/asm/kvm_para.h +++ b/arch/powerpc/include/asm/kvm_para.h @@ -8,7 +8,7 @@ #ifndef __POWERPC_KVM_PARA_H__ #define __POWERPC_KVM_PARA_H__ -#include <asm/firmware.h> +#include <asm/kvm_guest.h> #include <uapi/asm/kvm_para.h> diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 475687f24f4a..cf6ebbc16cb4 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -207,7 +207,6 @@ struct machdep_calls { void (*suspend_disable_irqs)(void); void (*suspend_enable_irqs)(void); #endif - int (*suspend_disable_cpu)(void); #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE ssize_t (*cpu_probe)(const char *, size_t); diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h index 89aa8248a57d..e6c27ae843dc 100644 --- a/arch/powerpc/include/asm/mce.h +++ b/arch/powerpc/include/asm/mce.h @@ -228,6 +228,7 @@ int mce_register_notifier(struct notifier_block *nb); int mce_unregister_notifier(struct notifier_block *nb); #ifdef CONFIG_PPC_BOOK3S_64 void flush_and_reload_slb(void); +void flush_erat(void); long __machine_check_early_realmode_p7(struct pt_regs *regs); long __machine_check_early_realmode_p8(struct pt_regs *regs); long __machine_check_early_realmode_p9(struct pt_regs *regs); diff --git a/arch/powerpc/include/asm/mm-arch-hooks.h b/arch/powerpc/include/asm/mm-arch-hooks.h deleted file mode 100644 index dce274be824a..000000000000 --- a/arch/powerpc/include/asm/mm-arch-hooks.h +++ /dev/null @@ -1,25 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Architecture specific mm hooks - * - * Copyright (C) 2015, IBM Corporation - * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com> - */ - -#ifndef _ASM_POWERPC_MM_ARCH_HOOKS_H -#define _ASM_POWERPC_MM_ARCH_HOOKS_H - -static inline void arch_remap(struct mm_struct *mm, - unsigned long old_start, unsigned long old_end, - unsigned long new_start, unsigned long new_end) -{ - /* - * mremap() doesn't allow moving multiple vmas so we can limit the - * check to old_start == vdso_base. - */ - if (old_start == mm->context.vdso_base) - mm->context.vdso_base = new_start; -} -#define arch_remap arch_remap - -#endif /* _ASM_POWERPC_MM_ARCH_HOOKS_H */ diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 255a1837e9f7..80b27f5d9648 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -29,9 +29,18 @@ */ /* - * Support for KUEP feature. + * Supports KUAP feature + * key 0 controlling userspace addresses on radix + * Key 3 on hash */ -#define MMU_FTR_KUEP ASM_CONST(0x00000400) +#define MMU_FTR_BOOK3S_KUAP ASM_CONST(0x00000200) + +/* + * Supports KUEP feature + * key 0 controlling userspace addresses on radix + * Key 3 on hash + */ +#define MMU_FTR_BOOK3S_KUEP ASM_CONST(0x00000400) /* * Support for memory protection keys. @@ -120,14 +129,8 @@ */ #define MMU_FTR_1T_SEGMENT ASM_CONST(0x40000000) -/* - * Supports KUAP (key 0 controlling userspace addresses) on radix - */ -#define MMU_FTR_RADIX_KUAP ASM_CONST(0x80000000) - /* MMU feature bit sets for various CPUs */ -#define MMU_FTRS_DEFAULT_HPTE_ARCH_V2 \ - MMU_FTR_HPTE_TABLE | MMU_FTR_PPCAS_ARCH_V2 +#define MMU_FTRS_DEFAULT_HPTE_ARCH_V2 (MMU_FTR_HPTE_TABLE | MMU_FTR_TLBIEL | MMU_FTR_16M_PAGE) #define MMU_FTRS_POWER MMU_FTRS_DEFAULT_HPTE_ARCH_V2 #define MMU_FTRS_PPC970 MMU_FTRS_POWER | MMU_FTR_TLBIE_CROP_VA #define MMU_FTRS_POWER5 MMU_FTRS_POWER | MMU_FTR_LOCKLESS_TLBIE @@ -154,7 +157,7 @@ DECLARE_PER_CPU(int, next_tlbcam_idx); enum { MMU_FTRS_POSSIBLE = -#ifdef CONFIG_PPC_BOOK3S +#if defined(CONFIG_PPC_BOOK3S_64) || defined(CONFIG_PPC_BOOK3S_604) MMU_FTR_HPTE_TABLE | #endif #ifdef CONFIG_PPC_8xx @@ -163,17 +166,19 @@ enum { #ifdef CONFIG_40x MMU_FTR_TYPE_40x | #endif -#ifdef CONFIG_44x +#ifdef CONFIG_PPC_47x + MMU_FTR_TYPE_47x | MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL | +#elif defined(CONFIG_44x) MMU_FTR_TYPE_44x | #endif -#if defined(CONFIG_E200) || defined(CONFIG_E500) +#ifdef CONFIG_E500 MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS | MMU_FTR_USE_TLBILX | #endif -#ifdef CONFIG_PPC_47x - MMU_FTR_TYPE_47x | MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL | -#endif #ifdef CONFIG_PPC_BOOK3S_32 - MMU_FTR_USE_HIGH_BATS | MMU_FTR_NEED_DTLB_SW_LRU | + MMU_FTR_USE_HIGH_BATS | +#endif +#ifdef CONFIG_PPC_83xx + MMU_FTR_NEED_DTLB_SW_LRU | #endif #ifdef CONFIG_PPC_BOOK3E_64 MMU_FTR_USE_TLBRSRV | MMU_FTR_USE_PAIRED_MAS | @@ -187,22 +192,47 @@ enum { #ifdef CONFIG_PPC_RADIX_MMU MMU_FTR_TYPE_RADIX | MMU_FTR_GTSE | +#endif /* CONFIG_PPC_RADIX_MMU */ #ifdef CONFIG_PPC_KUAP - MMU_FTR_RADIX_KUAP | + MMU_FTR_BOOK3S_KUAP | #endif /* CONFIG_PPC_KUAP */ -#endif /* CONFIG_PPC_RADIX_MMU */ #ifdef CONFIG_PPC_MEM_KEYS MMU_FTR_PKEY | #endif #ifdef CONFIG_PPC_KUEP - MMU_FTR_KUEP | + MMU_FTR_BOOK3S_KUEP | #endif /* CONFIG_PPC_KUAP */ 0, }; +#if defined(CONFIG_PPC_BOOK3S_604) && !defined(CONFIG_PPC_BOOK3S_603) +#define MMU_FTRS_ALWAYS MMU_FTR_HPTE_TABLE +#endif +#ifdef CONFIG_PPC_8xx +#define MMU_FTRS_ALWAYS MMU_FTR_TYPE_8xx +#endif +#ifdef CONFIG_40x +#define MMU_FTRS_ALWAYS MMU_FTR_TYPE_40x +#endif +#ifdef CONFIG_PPC_47x +#define MMU_FTRS_ALWAYS MMU_FTR_TYPE_47x +#elif defined(CONFIG_44x) +#define MMU_FTRS_ALWAYS MMU_FTR_TYPE_44x +#endif +#if defined(CONFIG_E200) || defined(CONFIG_E500) +#define MMU_FTRS_ALWAYS MMU_FTR_TYPE_FSL_E +#endif + +#ifndef MMU_FTRS_ALWAYS +#define MMU_FTRS_ALWAYS 0 +#endif + static inline bool early_mmu_has_feature(unsigned long feature) { + if (MMU_FTRS_ALWAYS & feature) + return true; + return !!(MMU_FTRS_POSSIBLE & cur_cpu_spec->mmu_features & feature); } @@ -231,6 +261,9 @@ static __always_inline bool mmu_has_feature(unsigned long feature) } #endif + if (MMU_FTRS_ALWAYS & feature) + return true; + if (!(MMU_FTRS_POSSIBLE & feature)) return false; diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index b42813359f49..d5821834dba9 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -263,8 +263,10 @@ extern void arch_exit_mmap(struct mm_struct *mm); static inline void arch_unmap(struct mm_struct *mm, unsigned long start, unsigned long end) { - if (start <= mm->context.vdso_base && mm->context.vdso_base < end) - mm->context.vdso_base = 0; + unsigned long vdso_base = (unsigned long)mm->context.vdso - PAGE_SIZE; + + if (start <= vdso_base && vdso_base < end) + mm->context.vdso = NULL; } #ifdef CONFIG_PPC_MEM_KEYS @@ -285,7 +287,7 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, #define thread_pkey_regs_init(thread) #define arch_dup_pkeys(oldmm, mm) -static inline u64 pte_to_hpte_pkey_bits(u64 pteflags) +static inline u64 pte_to_hpte_pkey_bits(u64 pteflags, unsigned long flags) { return 0x0UL; } diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h index 567cdc557402..17a4a616436f 100644 --- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h @@ -63,8 +63,7 @@ static inline void restore_user_access(unsigned long flags) static inline bool bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) { - return WARN(!((regs->kuap ^ MD_APG_KUAP) & 0xff000000), - "Bug: fault blocked by AP register !"); + return !((regs->kuap ^ MD_APG_KUAP) & 0xff000000); } #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/nohash/32/mmu-40x.h b/arch/powerpc/include/asm/nohash/32/mmu-40x.h index 74f4edb5916e..8a8f13a22cf4 100644 --- a/arch/powerpc/include/asm/nohash/32/mmu-40x.h +++ b/arch/powerpc/include/asm/nohash/32/mmu-40x.h @@ -57,7 +57,7 @@ typedef struct { unsigned int id; unsigned int active; - unsigned long vdso_base; + void __user *vdso; } mm_context_t; #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/nohash/32/mmu-44x.h b/arch/powerpc/include/asm/nohash/32/mmu-44x.h index 28aa3b339c5e..2d92a39d8f2e 100644 --- a/arch/powerpc/include/asm/nohash/32/mmu-44x.h +++ b/arch/powerpc/include/asm/nohash/32/mmu-44x.h @@ -108,7 +108,7 @@ extern unsigned int tlb_44x_index; typedef struct { unsigned int id; unsigned int active; - unsigned long vdso_base; + void __user *vdso; } mm_context_t; /* patch sites */ diff --git a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h index 0bd1b144eb76..478249959baa 100644 --- a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h @@ -181,7 +181,7 @@ void mmu_pin_tlb(unsigned long top, bool readonly); typedef struct { unsigned int id; unsigned int active; - unsigned long vdso_base; + void __user *vdso; void *pte_frag; } mm_context_t; diff --git a/arch/powerpc/include/asm/nohash/mmu-book3e.h b/arch/powerpc/include/asm/nohash/mmu-book3e.h index b41004664312..e43a418d3ccd 100644 --- a/arch/powerpc/include/asm/nohash/mmu-book3e.h +++ b/arch/powerpc/include/asm/nohash/mmu-book3e.h @@ -238,7 +238,7 @@ extern unsigned int tlbcam_index; typedef struct { unsigned int id; unsigned int active; - unsigned long vdso_base; + void __user *vdso; } mm_context_t; /* Page size definitions, common between 32 and 64-bit diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index 6277e7596ae5..ac75f4ab0dba 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -192,9 +192,9 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, */ if (IS_ENABLED(CONFIG_PPC32) && IS_ENABLED(CONFIG_PTE_64BIT) && !percpu) { __asm__ __volatile__("\ - stw%U0%X0 %2,%0\n\ + stw%X0 %2,%0\n\ eieio\n\ - stw%U0%X0 %L2,%1" + stw%X1 %L2,%1" : "=m" (*ptep), "=m" (*((unsigned char *)ptep+4)) : "r" (pte) : "memory"); return; diff --git a/arch/powerpc/include/asm/nohash/tlbflush.h b/arch/powerpc/include/asm/nohash/tlbflush.h index b1d8fec29169..1edb7243e515 100644 --- a/arch/powerpc/include/asm/nohash/tlbflush.h +++ b/arch/powerpc/include/asm/nohash/tlbflush.h @@ -10,7 +10,6 @@ * - local_flush_tlb_mm(mm, full) flushes the specified mm context on * the local processor * - local_flush_tlb_page(vma, vmaddr) flushes one page on the local processor - * - flush_tlb_page_nohash(vma, vmaddr) flushes one page if SW loaded TLB * - flush_tlb_range(vma, start, end) flushes a range of pages * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages * diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index 1dffa3cb16ba..0b63ba7d5917 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -1091,9 +1091,9 @@ enum { OPAL_XIVE_IRQ_TRIGGER_PAGE = 0x00000001, OPAL_XIVE_IRQ_STORE_EOI = 0x00000002, OPAL_XIVE_IRQ_LSI = 0x00000004, - OPAL_XIVE_IRQ_SHIFT_BUG = 0x00000008, - OPAL_XIVE_IRQ_MASK_VIA_FW = 0x00000010, - OPAL_XIVE_IRQ_EOI_VIA_FW = 0x00000020, + OPAL_XIVE_IRQ_SHIFT_BUG = 0x00000008, /* P9 DD1.0 workaround */ + OPAL_XIVE_IRQ_MASK_VIA_FW = 0x00000010, /* P9 DD1.0 workaround */ + OPAL_XIVE_IRQ_EOI_VIA_FW = 0x00000020, /* P9 DD1.0 workaround */ }; /* Flags for OPAL_XIVE_GET/SET_QUEUE_INFO */ diff --git a/arch/powerpc/include/asm/page_32.h b/arch/powerpc/include/asm/page_32.h index d64dfe3ac712..56f217606327 100644 --- a/arch/powerpc/include/asm/page_32.h +++ b/arch/powerpc/include/asm/page_32.h @@ -16,12 +16,6 @@ #define ARCH_DMA_MINALIGN L1_CACHE_BYTES #endif -#ifdef CONFIG_PTE_64BIT -#define PTE_FLAGS_OFFSET 4 /* offset of PTE flags, in bytes */ -#else -#define PTE_FLAGS_OFFSET 0 -#endif - #if defined(CONFIG_PPC_256K_PAGES) || \ (defined(CONFIG_PPC_8xx) && defined(CONFIG_PPC_16K_PAGES)) #define PTE_SHIFT (PAGE_SHIFT - PTE_T_LOG2 - 2) /* 1/4 of a page */ diff --git a/arch/powerpc/include/asm/paravirt.h b/arch/powerpc/include/asm/paravirt.h index 9362c94fe3aa..edc08f04aef7 100644 --- a/arch/powerpc/include/asm/paravirt.h +++ b/arch/powerpc/include/asm/paravirt.h @@ -10,6 +10,9 @@ #endif #ifdef CONFIG_PPC_SPLPAR +#include <asm/kvm_guest.h> +#include <asm/cputhreads.h> + DECLARE_STATIC_KEY_FALSE(shared_processor); static inline bool is_shared_processor(void) @@ -74,6 +77,21 @@ static inline bool vcpu_is_preempted(int cpu) { if (!is_shared_processor()) return false; + +#ifdef CONFIG_PPC_SPLPAR + if (!is_kvm_guest()) { + int first_cpu = cpu_first_thread_sibling(smp_processor_id()); + + /* + * Preemption can only happen at core granularity. This CPU + * is not preempted if one of the CPU of this core is not + * preempted. + */ + if (cpu_first_thread_sibling(cpu) == first_cpu) + return false; + } +#endif + if (yield_count_of(cpu) & 1) return true; return false; diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h index f6acabb6c9be..3b7baba01c92 100644 --- a/arch/powerpc/include/asm/perf_event_server.h +++ b/arch/powerpc/include/asm/perf_event_server.h @@ -82,6 +82,7 @@ struct power_pmu { #define PPMU_ARCH_207S 0x00000080 /* PMC is architecture v2.07S */ #define PPMU_NO_SIAR 0x00000100 /* Do not use SIAR */ #define PPMU_ARCH_31 0x00000200 /* Has MMCR3, SIER2 and SIER3 */ +#define PPMU_P10_DD1 0x00000400 /* Is power10 DD1 processor version */ /* * Values for flags to get_alternatives() diff --git a/arch/powerpc/include/asm/pnv-ocxl.h b/arch/powerpc/include/asm/pnv-ocxl.h index d37ededca3ee..9acd1fbf1197 100644 --- a/arch/powerpc/include/asm/pnv-ocxl.h +++ b/arch/powerpc/include/asm/pnv-ocxl.h @@ -3,12 +3,59 @@ #ifndef _ASM_PNV_OCXL_H #define _ASM_PNV_OCXL_H +#include <linux/bitfield.h> #include <linux/pci.h> #define PNV_OCXL_TL_MAX_TEMPLATE 63 #define PNV_OCXL_TL_BITS_PER_RATE 4 #define PNV_OCXL_TL_RATE_BUF_SIZE ((PNV_OCXL_TL_MAX_TEMPLATE+1) * PNV_OCXL_TL_BITS_PER_RATE / 8) +#define PNV_OCXL_ATSD_TIMEOUT 1 + +/* TLB Management Instructions */ +#define PNV_OCXL_ATSD_LNCH 0x00 +/* Radix Invalidate */ +#define PNV_OCXL_ATSD_LNCH_R PPC_BIT(0) +/* Radix Invalidation Control + * 0b00 Just invalidate TLB. + * 0b01 Invalidate just Page Walk Cache. + * 0b10 Invalidate TLB, Page Walk Cache, and any + * caching of Partition and Process Table Entries. + */ +#define PNV_OCXL_ATSD_LNCH_RIC PPC_BITMASK(1, 2) +/* Number and Page Size of translations to be invalidated */ +#define PNV_OCXL_ATSD_LNCH_LP PPC_BITMASK(3, 10) +/* Invalidation Criteria + * 0b00 Invalidate just the target VA. + * 0b01 Invalidate matching PID. + */ +#define PNV_OCXL_ATSD_LNCH_IS PPC_BITMASK(11, 12) +/* 0b1: Process Scope, 0b0: Partition Scope */ +#define PNV_OCXL_ATSD_LNCH_PRS PPC_BIT(13) +/* Invalidation Flag */ +#define PNV_OCXL_ATSD_LNCH_B PPC_BIT(14) +/* Actual Page Size to be invalidated + * 000 4KB + * 101 64KB + * 001 2MB + * 010 1GB + */ +#define PNV_OCXL_ATSD_LNCH_AP PPC_BITMASK(15, 17) +/* Defines the large page select + * L=0b0 for 4KB pages + * L=0b1 for large pages) + */ +#define PNV_OCXL_ATSD_LNCH_L PPC_BIT(18) +/* Process ID */ +#define PNV_OCXL_ATSD_LNCH_PID PPC_BITMASK(19, 38) +/* NoFlush – Assumed to be 0b0 */ +#define PNV_OCXL_ATSD_LNCH_F PPC_BIT(39) +#define PNV_OCXL_ATSD_LNCH_OCAPI_SLBI PPC_BIT(40) +#define PNV_OCXL_ATSD_LNCH_OCAPI_SINGLETON PPC_BIT(41) +#define PNV_OCXL_ATSD_AVA 0x08 +#define PNV_OCXL_ATSD_AVA_AVA PPC_BITMASK(0, 51) +#define PNV_OCXL_ATSD_STAT 0x10 + int pnv_ocxl_get_actag(struct pci_dev *dev, u16 *base, u16 *enabled, u16 *supported); int pnv_ocxl_get_pasid_count(struct pci_dev *dev, int *count); @@ -28,4 +75,11 @@ int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask, void **p void pnv_ocxl_spa_release(void *platform_data); int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle); +int pnv_ocxl_map_lpar(struct pci_dev *dev, uint64_t lparid, + uint64_t lpcr, void __iomem **arva); +void pnv_ocxl_unmap_lpar(void __iomem *arva); +void pnv_ocxl_tlb_invalidate(void __iomem *arva, + unsigned long pid, + unsigned long addr, + unsigned long page_size); #endif /* _ASM_PNV_OCXL_H */ diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index a6e3700c4566..ed161ef2b3ca 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -78,6 +78,9 @@ #define IMM_L(i) ((uintptr_t)(i) & 0xffff) #define IMM_DS(i) ((uintptr_t)(i) & 0xfffc) +#define IMM_DQ(i) ((uintptr_t)(i) & 0xfff0) +#define IMM_D0(i) (((uintptr_t)(i) >> 16) & 0x3ffff) +#define IMM_D1(i) IMM_L(i) /* * 16-bit immediate helper macros: HA() is for use with sign-extending instrs @@ -230,7 +233,6 @@ #define PPC_INST_POPCNTB_MASK 0xfc0007fe #define PPC_INST_RFEBB 0x4c000124 #define PPC_INST_RFID 0x4c000024 -#define PPC_INST_MFSPR 0x7c0002a6 #define PPC_INST_MFSPR_DSCR 0x7c1102a6 #define PPC_INST_MFSPR_DSCR_MASK 0xfc1ffffe #define PPC_INST_MTSPR_DSCR 0x7c1103a6 @@ -295,6 +297,8 @@ #define __PPC_XB(b) ((((b) & 0x1f) << 11) | (((b) & 0x20) >> 4)) #define __PPC_XS(s) ((((s) & 0x1f) << 21) | (((s) & 0x20) >> 5)) #define __PPC_XT(s) __PPC_XS(s) +#define __PPC_XSP(s) ((((s) & 0x1e) | (((s) >> 5) & 0x1)) << 21) +#define __PPC_XTP(s) __PPC_XSP(s) #define __PPC_T_TLB(t) (((t) & 0x3) << 21) #define __PPC_WC(w) (((w) & 0x3) << 21) #define __PPC_WS(w) (((w) & 0x1f) << 11) @@ -395,6 +399,14 @@ #define PPC_RAW_XVCPSGNDP(t, a, b) ((0xf0000780 | VSX_XX3((t), (a), (b)))) #define PPC_RAW_VPERMXOR(vrt, vra, vrb, vrc) \ ((0x1000002d | ___PPC_RT(vrt) | ___PPC_RA(vra) | ___PPC_RB(vrb) | (((vrc) & 0x1f) << 6))) +#define PPC_RAW_LXVP(xtp, a, i) (0x18000000 | __PPC_XTP(xtp) | ___PPC_RA(a) | IMM_DQ(i)) +#define PPC_RAW_STXVP(xsp, a, i) (0x18000001 | __PPC_XSP(xsp) | ___PPC_RA(a) | IMM_DQ(i)) +#define PPC_RAW_LXVPX(xtp, a, b) (0x7c00029a | __PPC_XTP(xtp) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_STXVPX(xsp, a, b) (0x7c00039a | __PPC_XSP(xsp) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_PLXVP(xtp, i, a, pr) \ + ((PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_D0(i)) << 32 | (0xe8000000 | __PPC_XTP(xtp) | ___PPC_RA(a) | IMM_D1(i))) +#define PPC_RAW_PSTXVP(xsp, i, a, pr) \ + ((PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_D0(i)) << 32 | (0xf8000000 | __PPC_XSP(xsp) | ___PPC_RA(a) | IMM_D1(i))) #define PPC_RAW_NAP (0x4c000364) #define PPC_RAW_SLEEP (0x4c0003a4) #define PPC_RAW_WINKLE (0x4c0003e4) @@ -507,6 +519,8 @@ #define PPC_RAW_NEG(d, a) (0x7c0000d0 | ___PPC_RT(d) | ___PPC_RA(a)) +#define PPC_RAW_MFSPR(d, spr) (0x7c0002a6 | ___PPC_RT(d) | __PPC_SPR(spr)) + /* Deal with instructions that older assemblers aren't aware of */ #define PPC_BCCTR_FLUSH stringify_in_c(.long PPC_INST_BCCTR_FLUSH) #define PPC_CP_ABORT stringify_in_c(.long PPC_RAW_CP_ABORT) diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 511786f0e40d..cfa814824285 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -251,6 +251,8 @@ n: #define _GLOBAL_TOC(name) _GLOBAL(name) +#define DOTSYM(a) a + #endif /* @@ -495,15 +497,9 @@ END_FTR_SECTION_NESTED(CPU_FTR_CELL_TB_BUG, CPU_FTR_CELL_TB_BUG, 96) #endif #ifdef CONFIG_PPC_BOOK3S_64 -#define RFI rfid #define MTMSRD(r) mtmsrd r #define MTMSR_EERI(reg) mtmsrd reg,1 #else -#ifndef CONFIG_40x -#define RFI rfi -#else -#define RFI rfi; b . /* Prevent prefetch past rfi */ -#endif #define MTMSRD(r) mtmsr r #define MTMSR_EERI(reg) mtmsr reg #endif diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index c61c859b51a8..8acc3590c971 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -6,6 +6,8 @@ * Copyright (C) 2001 PPC 64 Team, IBM Corp */ +#include <vdso/processor.h> + #include <asm/reg.h> #ifdef CONFIG_VSX @@ -63,14 +65,6 @@ extern int _chrp_type; #endif /* defined(__KERNEL__) && defined(CONFIG_PPC32) */ -/* Macros for adjusting thread priority (hardware multi-threading) */ -#define HMT_very_low() asm volatile("or 31,31,31 # very low priority") -#define HMT_low() asm volatile("or 1,1,1 # low priority") -#define HMT_medium_low() asm volatile("or 6,6,6 # medium low priority") -#define HMT_medium() asm volatile("or 2,2,2 # medium priority") -#define HMT_medium_high() asm volatile("or 5,5,5 # medium high priority") -#define HMT_high() asm volatile("or 3,3,3 # high priority") - #ifdef __KERNEL__ #ifdef CONFIG_PPC64 @@ -170,8 +164,10 @@ struct thread_struct { #endif /* Debug Registers */ struct debug_reg debug; +#ifdef CONFIG_PPC_FPU_REGS struct thread_fp_state fp_state; struct thread_fp_state *fp_save_area; +#endif int fpexc_mode; /* floating-point exception mode */ unsigned int align_ctl; /* alignment handling control */ #ifdef CONFIG_HAVE_HW_BREAKPOINT @@ -230,10 +226,6 @@ struct thread_struct { struct thread_vr_state ckvr_state; /* Checkpointed VR state */ unsigned long ckvrsave; /* Checkpointed VRSAVE */ #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ -#ifdef CONFIG_PPC_MEM_KEYS - unsigned long amr; - unsigned long iamr; -#endif #ifdef CONFIG_KVM_BOOK3S_32_HANDLER void* kvm_shadow_vcpu; /* KVM internal data */ #endif /* CONFIG_KVM_BOOK3S_32_HANDLER */ @@ -344,7 +336,6 @@ static inline unsigned long __pack_fe01(unsigned int fpmode) } #ifdef CONFIG_PPC64 -#define cpu_relax() do { HMT_low(); HMT_medium(); barrier(); } while (0) #define spin_begin() HMT_low() @@ -363,8 +354,6 @@ do { \ } \ } while (0) -#else -#define cpu_relax() barrier() #endif /* Check that a certain kernel stack pointer is valid in task_struct p */ @@ -398,20 +387,6 @@ static inline void prefetchw(const void *x) #define HAVE_ARCH_PICK_MMAP_LAYOUT -#ifdef CONFIG_PPC64 -static inline unsigned long get_clean_sp(unsigned long sp, int is_32) -{ - if (is_32) - return sp & 0x0ffffffffUL; - return sp; -} -#else -static inline unsigned long get_clean_sp(unsigned long sp, int is_32) -{ - return sp; -} -#endif - /* asm stubs */ extern unsigned long isa300_idle_stop_noloss(unsigned long psscr_val); extern unsigned long isa300_idle_stop_mayloss(unsigned long psscr_val); diff --git a/arch/powerpc/include/asm/ps3.h b/arch/powerpc/include/asm/ps3.h index cb89e4bf55ce..e646c7f218bc 100644 --- a/arch/powerpc/include/asm/ps3.h +++ b/arch/powerpc/include/asm/ps3.h @@ -378,8 +378,8 @@ struct ps3_system_bus_driver { enum ps3_match_sub_id match_sub_id; struct device_driver core; int (*probe)(struct ps3_system_bus_device *); - int (*remove)(struct ps3_system_bus_device *); - int (*shutdown)(struct ps3_system_bus_device *); + void (*remove)(struct ps3_system_bus_device *); + void (*shutdown)(struct ps3_system_bus_device *); /* int (*suspend)(struct ps3_system_bus_device *, pm_message_t); */ /* int (*resume)(struct ps3_system_bus_device *); */ }; diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index e2c778c176a3..58f9dc060a7b 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -53,11 +53,19 @@ struct pt_regs #ifdef CONFIG_PPC64 unsigned long ppr; #endif + union { #ifdef CONFIG_PPC_KUAP - unsigned long kuap; + unsigned long kuap; +#endif +#ifdef CONFIG_PPC_PKEY + unsigned long amr; +#endif + }; +#ifdef CONFIG_PPC_PKEY + unsigned long iamr; #endif }; - unsigned long __pad[2]; /* Maintain 16 byte interrupt stack alignment */ + unsigned long __pad[4]; /* Maintain 16 byte interrupt stack alignment */ }; }; #endif @@ -171,12 +179,6 @@ static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) set_thread_flag(TIF_NOERROR); \ } while(0) -struct task_struct; -extern int ptrace_get_reg(struct task_struct *task, int regno, - unsigned long *data); -extern int ptrace_put_reg(struct task_struct *task, int regno, - unsigned long data); - #define current_pt_regs() \ ((struct pt_regs *)((unsigned long)task_stack_page(current) + THREAD_SIZE) - 1) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index f877a576b338..e40a921d78f9 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -29,7 +29,6 @@ #include <asm/reg_8xx.h> #define MSR_SF_LG 63 /* Enable 64 bit mode */ -#define MSR_ISF_LG 61 /* Interrupt 64b mode valid on 630 */ #define MSR_HV_LG 60 /* Hypervisor state */ #define MSR_TS_T_LG 34 /* Trans Mem state: Transactional */ #define MSR_TS_S_LG 33 /* Trans Mem state: Suspended */ @@ -69,13 +68,11 @@ #ifdef CONFIG_PPC64 #define MSR_SF __MASK(MSR_SF_LG) /* Enable 64 bit mode */ -#define MSR_ISF __MASK(MSR_ISF_LG) /* Interrupt 64b mode valid on 630 */ #define MSR_HV __MASK(MSR_HV_LG) /* Hypervisor state */ #define MSR_S __MASK(MSR_S_LG) /* Secure state */ #else /* so tests for these bits fail on 32-bit */ #define MSR_SF 0 -#define MSR_ISF 0 #define MSR_HV 0 #define MSR_S 0 #endif @@ -134,7 +131,7 @@ #define MSR_64BIT MSR_SF /* Server variant */ -#define __MSR (MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_ISF |MSR_HV) +#define __MSR (MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_HV) #ifdef __BIG_ENDIAN__ #define MSR_ __MSR #define MSR_IDLE (MSR_ME | MSR_SF | MSR_HV) @@ -864,6 +861,7 @@ #define MMCR0_BHRBA 0x00200000UL /* BHRB Access allowed in userspace */ #define MMCR0_EBE 0x00100000UL /* Event based branch enable */ #define MMCR0_PMCC 0x000c0000UL /* PMC control */ +#define MMCR0_PMCCEXT ASM_CONST(0x00000200) /* PMCCEXT control */ #define MMCR0_PMCC_U6 0x00080000UL /* PMC1-6 are R/W by user (PR) */ #define MMCR0_PMC1CE 0x00008000UL /* PMC1 count enable*/ #define MMCR0_PMCjCE ASM_CONST(0x00004000) /* PMCj count enable*/ @@ -1203,7 +1201,7 @@ #ifdef CONFIG_PPC_BOOK3S_32 #define SPRN_SPRG_SCRATCH0 SPRN_SPRG0 #define SPRN_SPRG_SCRATCH1 SPRN_SPRG1 -#define SPRN_SPRG_PGDIR SPRN_SPRG2 +#define SPRN_SPRG_SCRATCH2 SPRN_SPRG2 #define SPRN_SPRG_603_LRU SPRN_SPRG4 #endif @@ -1232,14 +1230,9 @@ #define SPRN_SPRG_WSCRATCH_MC SPRN_SPRG1 #define SPRN_SPRG_RSCRATCH4 SPRN_SPRG7R #define SPRN_SPRG_WSCRATCH4 SPRN_SPRG7W -#ifdef CONFIG_E200 -#define SPRN_SPRG_RSCRATCH_DBG SPRN_SPRG6R -#define SPRN_SPRG_WSCRATCH_DBG SPRN_SPRG6W -#else #define SPRN_SPRG_RSCRATCH_DBG SPRN_SPRG9 #define SPRN_SPRG_WSCRATCH_DBG SPRN_SPRG9 #endif -#endif #ifdef CONFIG_PPC_8xx #define SPRN_SPRG_SCRATCH0 SPRN_SPRG0 @@ -1419,37 +1412,6 @@ static inline void msr_check_and_clear(unsigned long bits) __msr_check_and_clear(bits); } -#if defined(CONFIG_PPC_CELL) || defined(CONFIG_E500) -#define mftb() ({unsigned long rval; \ - asm volatile( \ - "90: mfspr %0, %2;\n" \ - ASM_FTR_IFSET( \ - "97: cmpwi %0,0;\n" \ - " beq- 90b;\n", "", %1) \ - : "=r" (rval) \ - : "i" (CPU_FTR_CELL_TB_BUG), "i" (SPRN_TBRL) : "cr0"); \ - rval;}) -#elif defined(CONFIG_PPC_8xx) -#define mftb() ({unsigned long rval; \ - asm volatile("mftbl %0" : "=r" (rval)); rval;}) -#else -#define mftb() ({unsigned long rval; \ - asm volatile("mfspr %0, %1" : \ - "=r" (rval) : "i" (SPRN_TBRL)); rval;}) -#endif /* !CONFIG_PPC_CELL */ - -#if defined(CONFIG_PPC_8xx) -#define mftbu() ({unsigned long rval; \ - asm volatile("mftbu %0" : "=r" (rval)); rval;}) -#else -#define mftbu() ({unsigned long rval; \ - asm volatile("mfspr %0, %1" : "=r" (rval) : \ - "i" (SPRN_TBRU)); rval;}) -#endif - -#define mttbl(v) asm volatile("mttbl %0":: "r"(v)) -#define mttbu(v) asm volatile("mttbu %0":: "r"(v)) - #ifdef CONFIG_PPC32 #define mfsrin(v) ({unsigned int rval; \ asm volatile("mfsrin %0,%1" : "=r" (rval) : "r" (v)); \ diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h index 29a948e0c0f2..262782f08fd4 100644 --- a/arch/powerpc/include/asm/reg_booke.h +++ b/arch/powerpc/include/asm/reg_booke.h @@ -281,18 +281,6 @@ #define MSRP_PMMP 0x00000004 /* Protect MSR[PMM] */ #endif -#ifdef CONFIG_E200 -#define MCSR_MCP 0x80000000UL /* Machine Check Input Pin */ -#define MCSR_CP_PERR 0x20000000UL /* Cache Push Parity Error */ -#define MCSR_CPERR 0x10000000UL /* Cache Parity Error */ -#define MCSR_EXCP_ERR 0x08000000UL /* ISI, ITLB, or Bus Error on 1st insn - fetch for an exception handler */ -#define MCSR_BUS_IRERR 0x00000010UL /* Read Bus Error on instruction fetch*/ -#define MCSR_BUS_DRERR 0x00000008UL /* Read Bus Error on data load */ -#define MCSR_BUS_WRERR 0x00000004UL /* Write Bus Error on buffered - store or cache line push */ -#endif - /* Bit definitions for the HID1 */ #ifdef CONFIG_E500 /* e500v1/v2 */ diff --git a/arch/powerpc/include/asm/rtas-types.h b/arch/powerpc/include/asm/rtas-types.h index aa420561bc10..8df6235d64d1 100644 --- a/arch/powerpc/include/asm/rtas-types.h +++ b/arch/powerpc/include/asm/rtas-types.h @@ -23,14 +23,6 @@ struct rtas_t { struct device_node *dev; /* virtual address pointer */ }; -struct rtas_suspend_me_data { - atomic_t working; /* number of cpus accessing this struct */ - atomic_t done; - int token; /* ibm,suspend-me */ - atomic_t error; - struct completion *complete; /* wait on this until working == 0 */ -}; - struct rtas_error_log { /* Byte 0 */ u8 byte0; /* Architectural version */ diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 55f9a154c95d..332e1000ca0f 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -23,11 +23,16 @@ #define RTAS_RMOBUF_MAX (64 * 1024) /* RTAS return status codes */ -#define RTAS_NOT_SUSPENDABLE -9004 #define RTAS_BUSY -2 /* RTAS Busy */ #define RTAS_EXTENDED_DELAY_MIN 9900 #define RTAS_EXTENDED_DELAY_MAX 9905 +/* statuses specific to ibm,suspend-me */ +#define RTAS_SUSPEND_ABORTED 9000 /* Suspension aborted */ +#define RTAS_NOT_SUSPENDABLE -9004 /* Partition not suspendable */ +#define RTAS_THREADS_ACTIVE -9005 /* Multiple processor threads active */ +#define RTAS_OUTSTANDING_COPROC -9006 /* Outstanding coprocessor operations */ + /* * In general to call RTAS use rtas_token("string") to lookup * an RTAS token for the given string (e.g. "event-scan"). @@ -242,6 +247,7 @@ extern void __noreturn rtas_restart(char *cmd); extern void rtas_power_off(void); extern void __noreturn rtas_halt(void); extern void rtas_os_term(char *str); +void rtas_activate_firmware(void); extern int rtas_get_sensor(int sensor, int index, int *state); extern int rtas_get_sensor_fast(int sensor, int index, int *state); extern int rtas_get_power_level(int powerdomain, int *level); @@ -250,9 +256,7 @@ extern bool rtas_indicator_present(int token, int *maxindex); extern int rtas_set_indicator(int indicator, int index, int new_value); extern int rtas_set_indicator_fast(int indicator, int index, int new_value); extern void rtas_progress(char *s, unsigned short hex); -extern int rtas_suspend_cpu(struct rtas_suspend_me_data *data); -extern int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data); -extern int rtas_ibm_suspend_me(u64 handle); +int rtas_ibm_suspend_me(int *fw_status); struct rtc_time; extern time64_t rtas_get_boot_time(void); @@ -272,8 +276,13 @@ extern time64_t last_rtas_event; extern int clobbering_unread_rtas_event(void); extern int pseries_devicetree_update(s32 scope); extern void post_mobility_fixup(void); +int rtas_syscall_dispatch_ibm_suspend_me(u64 handle); #else static inline int clobbering_unread_rtas_event(void) { return 0; } +static inline int rtas_syscall_dispatch_ibm_suspend_me(u64 handle) +{ + return -EINVAL; +} #endif #ifdef CONFIG_PPC_RTAS_DAEMON diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index b2035b2f57ce..c4e2d53acd2b 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -134,6 +134,7 @@ static inline struct cpumask *cpu_smallcore_mask(int cpu) extern int cpu_to_core_id(int cpu); extern bool has_big_cores; +extern bool thread_group_shares_l2; #define cpu_smt_mask cpu_smt_mask #ifdef CONFIG_SCHED_SMT @@ -187,6 +188,7 @@ extern void __cpu_die(unsigned int cpu); /* for UP */ #define hard_smp_processor_id() get_hard_smp_processor_id(0) #define smp_setup_cpu_maps() +#define thread_group_shares_l2 0 static inline void inhibit_secondary_onlining(void) {} static inline void uninhibit_secondary_onlining(void) {} static inline const struct cpumask *cpu_sibling_mask(int cpu) @@ -199,6 +201,10 @@ static inline const struct cpumask *cpu_smallcore_mask(int cpu) return cpumask_of(cpu); } +static inline const struct cpumask *cpu_l2_cache_mask(int cpu) +{ + return cpumask_of(cpu); +} #endif /* CONFIG_SMP */ #ifdef CONFIG_PPC64 diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 53115ae61495..3d8a47af7a25 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -77,10 +77,8 @@ struct thread_info { /* how to get the thread information struct from C */ extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); -#ifdef CONFIG_PPC_BOOK3S_64 void arch_setup_new_exec(void); #define arch_setup_new_exec arch_setup_new_exec -#endif #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index 2f566c1a754c..8f789b597bae 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -15,6 +15,7 @@ #include <asm/processor.h> #include <asm/cpu_has_feature.h> +#include <asm/vdso/timebase.h> /* time.c */ extern unsigned long tb_ticks_per_jiffy; @@ -38,42 +39,12 @@ struct div_result { u64 result_low; }; -/* For compatibility, get_tbl() is defined as get_tb() on ppc64 */ -static inline unsigned long get_tbl(void) -{ - return mftb(); -} - static inline u64 get_vtb(void) { -#ifdef CONFIG_PPC_BOOK3S_64 if (cpu_has_feature(CPU_FTR_ARCH_207S)) return mfspr(SPRN_VTB); -#endif - return 0; -} - -static inline u64 get_tb(void) -{ - unsigned int tbhi, tblo, tbhi2; - - if (IS_ENABLED(CONFIG_PPC64)) - return mftb(); - do { - tbhi = mftbu(); - tblo = mftb(); - tbhi2 = mftbu(); - } while (tbhi != tbhi2); - - return ((u64)tbhi << 32) | tblo; -} - -static inline void set_tb(unsigned int upper, unsigned int lower) -{ - mtspr(SPRN_TBWL, 0); - mtspr(SPRN_TBWU, upper); - mtspr(SPRN_TBWL, lower); + return 0; } /* Accessor functions for the decrementer register. diff --git a/arch/powerpc/include/asm/timex.h b/arch/powerpc/include/asm/timex.h index 95988870a57b..fa2e76e4093a 100644 --- a/arch/powerpc/include/asm/timex.h +++ b/arch/powerpc/include/asm/timex.h @@ -9,7 +9,7 @@ */ #include <asm/cputable.h> -#include <asm/reg.h> +#include <asm/vdso/timebase.h> #define CLOCK_TICK_RATE 1024000 /* Underlying HZ */ diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h index d97f061fecac..160422a439aa 100644 --- a/arch/powerpc/include/asm/tlb.h +++ b/arch/powerpc/include/asm/tlb.h @@ -40,9 +40,6 @@ extern void tlb_flush(struct mmu_gather *tlb); /* Get the generic bits... */ #include <asm-generic/tlb.h> -extern void flush_hash_entry(struct mm_struct *mm, pte_t *ptep, - unsigned long address); - static inline void __tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep, unsigned long address) { diff --git a/arch/powerpc/include/asm/vdso.h b/arch/powerpc/include/asm/vdso.h index 2ff884853f97..8542e9bbeead 100644 --- a/arch/powerpc/include/asm/vdso.h +++ b/arch/powerpc/include/asm/vdso.h @@ -1,12 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __PPC64_VDSO_H__ -#define __PPC64_VDSO_H__ - -#ifdef __KERNEL__ - -/* Default link addresses for the vDSOs */ -#define VDSO32_LBASE 0x0 -#define VDSO64_LBASE 0x0 +#ifndef _ASM_POWERPC_VDSO_H +#define _ASM_POWERPC_VDSO_H /* Default map addresses for 32bit vDSO */ #define VDSO32_MBASE 0x100000 @@ -15,10 +9,17 @@ #ifndef __ASSEMBLY__ -/* Offsets relative to thread->vdso_base */ -extern unsigned long vdso64_rt_sigtramp; -extern unsigned long vdso32_sigtramp; -extern unsigned long vdso32_rt_sigtramp; +#ifdef CONFIG_PPC64 +#include <generated/vdso64-offsets.h> +#endif + +#ifdef CONFIG_VDSO32 +#include <generated/vdso32-offsets.h> +#endif + +#define VDSO64_SYMBOL(base, name) ((unsigned long)(base) + (vdso64_offset_##name)) + +#define VDSO32_SYMBOL(base, name) ((unsigned long)(base) + (vdso32_offset_##name)) int vdso_getcpu_init(void); @@ -51,6 +52,4 @@ int vdso_getcpu_init(void); #endif /* __ASSEMBLY__ */ -#endif /* __KERNEL__ */ - -#endif /* __PPC64_VDSO_H__ */ +#endif /* _ASM_POWERPC_VDSO_H */ diff --git a/arch/powerpc/include/asm/vdso/clocksource.h b/arch/powerpc/include/asm/vdso/clocksource.h new file mode 100644 index 000000000000..c1ba56b82ee5 --- /dev/null +++ b/arch/powerpc/include/asm/vdso/clocksource.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_VDSO_CLOCKSOURCE_H +#define _ASM_POWERPC_VDSO_CLOCKSOURCE_H + +#define VDSO_ARCH_CLOCKMODES VDSO_CLOCKMODE_ARCHTIMER + +#endif diff --git a/arch/powerpc/include/asm/vdso/gettimeofday.h b/arch/powerpc/include/asm/vdso/gettimeofday.h new file mode 100644 index 000000000000..81671aa365b3 --- /dev/null +++ b/arch/powerpc/include/asm/vdso/gettimeofday.h @@ -0,0 +1,201 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_VDSO_GETTIMEOFDAY_H +#define _ASM_POWERPC_VDSO_GETTIMEOFDAY_H + +#ifdef __ASSEMBLY__ + +#include <asm/ppc_asm.h> + +/* + * The macros sets two stack frames, one for the caller and one for the callee + * because there are no requirement for the caller to set a stack frame when + * calling VDSO so it may have omitted to set one, especially on PPC64 + */ + +.macro cvdso_call funct + .cfi_startproc + PPC_STLU r1, -PPC_MIN_STKFRM(r1) + mflr r0 + .cfi_register lr, r0 + PPC_STLU r1, -PPC_MIN_STKFRM(r1) + PPC_STL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) +#ifdef __powerpc64__ + PPC_STL r2, PPC_MIN_STKFRM + STK_GOT(r1) +#endif + get_datapage r5 + addi r5, r5, VDSO_DATA_OFFSET + bl DOTSYM(\funct) + PPC_LL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) +#ifdef __powerpc64__ + PPC_LL r2, PPC_MIN_STKFRM + STK_GOT(r1) +#endif + cmpwi r3, 0 + mtlr r0 + .cfi_restore lr + addi r1, r1, 2 * PPC_MIN_STKFRM + crclr so + beqlr+ + crset so + neg r3, r3 + blr + .cfi_endproc +.endm + +.macro cvdso_call_time funct + .cfi_startproc + PPC_STLU r1, -PPC_MIN_STKFRM(r1) + mflr r0 + .cfi_register lr, r0 + PPC_STLU r1, -PPC_MIN_STKFRM(r1) + PPC_STL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) +#ifdef __powerpc64__ + PPC_STL r2, PPC_MIN_STKFRM + STK_GOT(r1) +#endif + get_datapage r4 + addi r4, r4, VDSO_DATA_OFFSET + bl DOTSYM(\funct) + PPC_LL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) +#ifdef __powerpc64__ + PPC_LL r2, PPC_MIN_STKFRM + STK_GOT(r1) +#endif + crclr so + mtlr r0 + .cfi_restore lr + addi r1, r1, 2 * PPC_MIN_STKFRM + blr + .cfi_endproc +.endm + +#else + +#include <asm/vdso/timebase.h> +#include <asm/barrier.h> +#include <asm/unistd.h> +#include <uapi/linux/time.h> + +#define VDSO_HAS_CLOCK_GETRES 1 + +#define VDSO_HAS_TIME 1 + +static __always_inline int do_syscall_2(const unsigned long _r0, const unsigned long _r3, + const unsigned long _r4) +{ + register long r0 asm("r0") = _r0; + register unsigned long r3 asm("r3") = _r3; + register unsigned long r4 asm("r4") = _r4; + register int ret asm ("r3"); + + asm volatile( + " sc\n" + " bns+ 1f\n" + " neg %0, %0\n" + "1:\n" + : "=r" (ret), "+r" (r4), "+r" (r0) + : "r" (r3) + : "memory", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cr0", "ctr"); + + return ret; +} + +static __always_inline +int gettimeofday_fallback(struct __kernel_old_timeval *_tv, struct timezone *_tz) +{ + return do_syscall_2(__NR_gettimeofday, (unsigned long)_tv, (unsigned long)_tz); +} + +static __always_inline +int clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + return do_syscall_2(__NR_clock_gettime, _clkid, (unsigned long)_ts); +} + +static __always_inline +int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + return do_syscall_2(__NR_clock_getres, _clkid, (unsigned long)_ts); +} + +#ifdef CONFIG_VDSO32 + +#define BUILD_VDSO32 1 + +static __always_inline +int clock_gettime32_fallback(clockid_t _clkid, struct old_timespec32 *_ts) +{ + return do_syscall_2(__NR_clock_gettime, _clkid, (unsigned long)_ts); +} + +static __always_inline +int clock_getres32_fallback(clockid_t _clkid, struct old_timespec32 *_ts) +{ + return do_syscall_2(__NR_clock_getres, _clkid, (unsigned long)_ts); +} +#endif + +static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, + const struct vdso_data *vd) +{ + return get_tb(); +} + +const struct vdso_data *__arch_get_vdso_data(void); + +static inline bool vdso_clocksource_ok(const struct vdso_data *vd) +{ + return true; +} +#define vdso_clocksource_ok vdso_clocksource_ok + +/* + * powerpc specific delta calculation. + * + * This variant removes the masking of the subtraction because the + * clocksource mask of all VDSO capable clocksources on powerpc is U64_MAX + * which would result in a pointless operation. The compiler cannot + * optimize it away as the mask comes from the vdso data and is not compile + * time constant. + */ +static __always_inline u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult) +{ + return (cycles - last) * mult; +} +#define vdso_calc_delta vdso_calc_delta + +#ifndef __powerpc64__ +static __always_inline u64 vdso_shift_ns(u64 ns, unsigned long shift) +{ + u32 hi = ns >> 32; + u32 lo = ns; + + lo >>= shift; + lo |= hi << (32 - shift); + hi >>= shift; + + if (likely(hi == 0)) + return lo; + + return ((u64)hi << 32) | lo; +} +#define vdso_shift_ns vdso_shift_ns +#endif + +#ifdef __powerpc64__ +int __c_kernel_clock_gettime(clockid_t clock, struct __kernel_timespec *ts, + const struct vdso_data *vd); +int __c_kernel_clock_getres(clockid_t clock_id, struct __kernel_timespec *res, + const struct vdso_data *vd); +#else +int __c_kernel_clock_gettime(clockid_t clock, struct old_timespec32 *ts, + const struct vdso_data *vd); +int __c_kernel_clock_gettime64(clockid_t clock, struct __kernel_timespec *ts, + const struct vdso_data *vd); +int __c_kernel_clock_getres(clockid_t clock_id, struct old_timespec32 *res, + const struct vdso_data *vd); +#endif +int __c_kernel_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz, + const struct vdso_data *vd); +__kernel_old_time_t __c_kernel_time(__kernel_old_time_t *time, + const struct vdso_data *vd); +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_POWERPC_VDSO_GETTIMEOFDAY_H */ diff --git a/arch/powerpc/include/asm/vdso/processor.h b/arch/powerpc/include/asm/vdso/processor.h new file mode 100644 index 000000000000..e072577bc7c0 --- /dev/null +++ b/arch/powerpc/include/asm/vdso/processor.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _ASM_POWERPC_VDSO_PROCESSOR_H +#define _ASM_POWERPC_VDSO_PROCESSOR_H + +#ifndef __ASSEMBLY__ + +/* Macros for adjusting thread priority (hardware multi-threading) */ +#define HMT_very_low() asm volatile("or 31, 31, 31 # very low priority") +#define HMT_low() asm volatile("or 1, 1, 1 # low priority") +#define HMT_medium_low() asm volatile("or 6, 6, 6 # medium low priority") +#define HMT_medium() asm volatile("or 2, 2, 2 # medium priority") +#define HMT_medium_high() asm volatile("or 5, 5, 5 # medium high priority") +#define HMT_high() asm volatile("or 3, 3, 3 # high priority") + +#ifdef CONFIG_PPC64 +#define cpu_relax() do { HMT_low(); HMT_medium(); barrier(); } while (0) +#else +#define cpu_relax() barrier() +#endif + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_POWERPC_VDSO_PROCESSOR_H */ diff --git a/arch/powerpc/include/asm/vdso/timebase.h b/arch/powerpc/include/asm/vdso/timebase.h new file mode 100644 index 000000000000..b558b07959ce --- /dev/null +++ b/arch/powerpc/include/asm/vdso/timebase.h @@ -0,0 +1,79 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Common timebase prototypes and such for all ppc machines. + */ + +#ifndef _ASM_POWERPC_VDSO_TIMEBASE_H +#define _ASM_POWERPC_VDSO_TIMEBASE_H + +#include <asm/reg.h> + +/* + * We use __powerpc64__ here because we want the compat VDSO to use the 32-bit + * version below in the else case of the ifdef. + */ +#if defined(__powerpc64__) && (defined(CONFIG_PPC_CELL) || defined(CONFIG_E500)) +#define mftb() ({unsigned long rval; \ + asm volatile( \ + "90: mfspr %0, %2;\n" \ + ASM_FTR_IFSET( \ + "97: cmpwi %0,0;\n" \ + " beq- 90b;\n", "", %1) \ + : "=r" (rval) \ + : "i" (CPU_FTR_CELL_TB_BUG), "i" (SPRN_TBRL) : "cr0"); \ + rval;}) +#elif defined(CONFIG_PPC_8xx) +#define mftb() ({unsigned long rval; \ + asm volatile("mftbl %0" : "=r" (rval)); rval;}) +#else +#define mftb() ({unsigned long rval; \ + asm volatile("mfspr %0, %1" : \ + "=r" (rval) : "i" (SPRN_TBRL)); rval;}) +#endif /* !CONFIG_PPC_CELL */ + +#if defined(CONFIG_PPC_8xx) +#define mftbu() ({unsigned long rval; \ + asm volatile("mftbu %0" : "=r" (rval)); rval;}) +#else +#define mftbu() ({unsigned long rval; \ + asm volatile("mfspr %0, %1" : "=r" (rval) : \ + "i" (SPRN_TBRU)); rval;}) +#endif + +#define mttbl(v) asm volatile("mttbl %0":: "r"(v)) +#define mttbu(v) asm volatile("mttbu %0":: "r"(v)) + +/* For compatibility, get_tbl() is defined as get_tb() on ppc64 */ +static inline unsigned long get_tbl(void) +{ + return mftb(); +} + +static inline u64 get_tb(void) +{ + unsigned int tbhi, tblo, tbhi2; + + /* + * We use __powerpc64__ here not CONFIG_PPC64 because we want the compat + * VDSO to use the 32-bit compatible version in the while loop below. + */ + if (__is_defined(__powerpc64__)) + return mftb(); + + do { + tbhi = mftbu(); + tblo = mftb(); + tbhi2 = mftbu(); + } while (tbhi != tbhi2); + + return ((u64)tbhi << 32) | tblo; +} + +static inline void set_tb(unsigned int upper, unsigned int lower) +{ + mtspr(SPRN_TBWL, 0); + mtspr(SPRN_TBWU, upper); + mtspr(SPRN_TBWL, lower); +} + +#endif /* _ASM_POWERPC_VDSO_TIMEBASE_H */ diff --git a/arch/powerpc/include/asm/vdso/vsyscall.h b/arch/powerpc/include/asm/vdso/vsyscall.h new file mode 100644 index 000000000000..48cf23f1e273 --- /dev/null +++ b/arch/powerpc/include/asm/vdso/vsyscall.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_VDSO_VSYSCALL_H +#define _ASM_POWERPC_VDSO_VSYSCALL_H + +#ifndef __ASSEMBLY__ + +#include <linux/timekeeper_internal.h> +#include <asm/vdso_datapage.h> + +/* + * Update the vDSO data page to keep in sync with kernel timekeeping. + */ +static __always_inline +struct vdso_data *__arch_get_k_vdso_data(void) +{ + return vdso_data->data; +} +#define __arch_get_k_vdso_data __arch_get_k_vdso_data + +/* The asm-generic header needs to be included after the definitions above */ +#include <asm-generic/vdso/vsyscall.h> + +#endif /* !__ASSEMBLY__ */ + +#endif /* _ASM_POWERPC_VDSO_VSYSCALL_H */ diff --git a/arch/powerpc/include/asm/vdso_datapage.h b/arch/powerpc/include/asm/vdso_datapage.h index b9ef6cf50ea5..3f958ecf2beb 100644 --- a/arch/powerpc/include/asm/vdso_datapage.h +++ b/arch/powerpc/include/asm/vdso_datapage.h @@ -36,6 +36,7 @@ #include <linux/unistd.h> #include <linux/time.h> +#include <vdso/datapage.h> #define SYSCALL_MAP_SIZE ((NR_syscalls + 31) / 32) @@ -45,7 +46,7 @@ #ifdef CONFIG_PPC64 -struct vdso_data { +struct vdso_arch_data { __u8 eye_catcher[16]; /* Eyecatcher: SYSTEMCFG:PPC64 0x00 */ struct { /* Systemcfg version numbers */ __u32 major; /* Major number 0x10 */ @@ -59,13 +60,13 @@ struct vdso_data { __u32 processor; /* Processor type 0x1C */ __u64 processorCount; /* # of physical processors 0x20 */ __u64 physicalMemorySize; /* Size of real memory(B) 0x28 */ - __u64 tb_orig_stamp; /* Timebase at boot 0x30 */ + __u64 tb_orig_stamp; /* (NU) Timebase at boot 0x30 */ __u64 tb_ticks_per_sec; /* Timebase tics / sec 0x38 */ - __u64 tb_to_xs; /* Inverse of TB to 2^20 0x40 */ - __u64 stamp_xsec; /* 0x48 */ - __u64 tb_update_count; /* Timebase atomicity ctr 0x50 */ - __u32 tz_minuteswest; /* Minutes west of Greenwich 0x58 */ - __u32 tz_dsttime; /* Type of dst correction 0x5C */ + __u64 tb_to_xs; /* (NU) Inverse of TB to 2^20 0x40 */ + __u64 stamp_xsec; /* (NU) 0x48 */ + __u64 tb_update_count; /* (NU) Timebase atomicity ctr 0x50 */ + __u32 tz_minuteswest; /* (NU) Min. west of Greenwich 0x58 */ + __u32 tz_dsttime; /* (NU) Type of dst correction 0x5C */ __u32 dcache_size; /* L1 d-cache size 0x60 */ __u32 dcache_line_size; /* L1 d-cache line size 0x64 */ __u32 icache_size; /* L1 i-cache size 0x68 */ @@ -78,14 +79,10 @@ struct vdso_data { __u32 icache_block_size; /* L1 i-cache block size */ __u32 dcache_log_block_size; /* L1 d-cache log block size */ __u32 icache_log_block_size; /* L1 i-cache log block size */ - __u32 stamp_sec_fraction; /* fractional seconds of stamp_xtime */ - __s32 wtom_clock_nsec; /* Wall to monotonic clock nsec */ - __s64 wtom_clock_sec; /* Wall to monotonic clock sec */ - __s64 stamp_xtime_sec; /* xtime secs as at tb_orig_stamp */ - __s64 stamp_xtime_nsec; /* xtime nsecs as at tb_orig_stamp */ - __u32 hrtimer_res; /* hrtimer resolution */ - __u32 syscall_map_64[SYSCALL_MAP_SIZE]; /* map of syscalls */ - __u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */ + __u32 syscall_map[SYSCALL_MAP_SIZE]; /* Map of syscalls */ + __u32 compat_syscall_map[SYSCALL_MAP_SIZE]; /* Map of compat syscalls */ + + struct vdso_data data[CS_BASES]; }; #else /* CONFIG_PPC64 */ @@ -93,35 +90,27 @@ struct vdso_data { /* * And here is the simpler 32 bits version */ -struct vdso_data { - __u64 tb_orig_stamp; /* Timebase at boot 0x30 */ +struct vdso_arch_data { __u64 tb_ticks_per_sec; /* Timebase tics / sec 0x38 */ - __u64 tb_to_xs; /* Inverse of TB to 2^20 0x40 */ - __u64 stamp_xsec; /* 0x48 */ - __u32 tb_update_count; /* Timebase atomicity ctr 0x50 */ - __u32 tz_minuteswest; /* Minutes west of Greenwich 0x58 */ - __u32 tz_dsttime; /* Type of dst correction 0x5C */ - __s32 wtom_clock_sec; /* Wall to monotonic clock */ - __s32 wtom_clock_nsec; - __s32 stamp_xtime_sec; /* xtime seconds as at tb_orig_stamp */ - __s32 stamp_xtime_nsec; /* xtime nsecs as at tb_orig_stamp */ - __u32 stamp_sec_fraction; /* fractional seconds of stamp_xtime */ - __u32 hrtimer_res; /* hrtimer resolution */ - __u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */ + __u32 syscall_map[SYSCALL_MAP_SIZE]; /* Map of syscalls */ + __u32 compat_syscall_map[0]; /* No compat syscalls on PPC32 */ + struct vdso_data data[CS_BASES]; }; #endif /* CONFIG_PPC64 */ -extern struct vdso_data *vdso_data; +extern struct vdso_arch_data *vdso_data; #else /* __ASSEMBLY__ */ -.macro get_datapage ptr, tmp +.macro get_datapage ptr bcl 20, 31, .+4 +999: mflr \ptr - addi \ptr, \ptr, (__kernel_datapage_offset - (.-4))@l - lwz \tmp, 0(\ptr) - add \ptr, \tmp, \ptr +#if CONFIG_PPC_PAGE_SHIFT > 14 + addis \ptr, \ptr, (_vdso_datapage - 999b)@ha +#endif + addi \ptr, \ptr, (_vdso_datapage - 999b)@l .endm #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h index 309b4d65b74f..9a312b975ca8 100644 --- a/arch/powerpc/include/asm/xive.h +++ b/arch/powerpc/include/asm/xive.h @@ -60,13 +60,13 @@ struct xive_irq_data { }; #define XIVE_IRQ_FLAG_STORE_EOI 0x01 #define XIVE_IRQ_FLAG_LSI 0x02 -#define XIVE_IRQ_FLAG_SHIFT_BUG 0x04 -#define XIVE_IRQ_FLAG_MASK_FW 0x08 -#define XIVE_IRQ_FLAG_EOI_FW 0x10 +/* #define XIVE_IRQ_FLAG_SHIFT_BUG 0x04 */ /* P9 DD1.0 workaround */ +/* #define XIVE_IRQ_FLAG_MASK_FW 0x08 */ /* P9 DD1.0 workaround */ +/* #define XIVE_IRQ_FLAG_EOI_FW 0x10 */ /* P9 DD1.0 workaround */ #define XIVE_IRQ_FLAG_H_INT_ESB 0x20 /* Special flag set by KVM for excalation interrupts */ -#define XIVE_IRQ_NO_EOI 0x80 +#define XIVE_IRQ_FLAG_NO_EOI 0x80 #define XIVE_INVALID_CHIP_ID -1 diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index bf0bf1b900d2..fe2ef598e2ea 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -173,6 +173,9 @@ KCOV_INSTRUMENT_cputable.o := n KCOV_INSTRUMENT_setup_64.o := n KCOV_INSTRUMENT_paca.o := n +CFLAGS_setup_64.o += -fno-stack-protector +CFLAGS_paca.o += -fno-stack-protector + extra-$(CONFIG_PPC_FPU) += fpu.o extra-$(CONFIG_ALTIVEC) += vector.o extra-$(CONFIG_PPC64) += entry_64.o diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index c2722ff36e98..b12d7c049bfe 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -110,9 +110,11 @@ int main(void) #ifdef CONFIG_BOOKE OFFSET(THREAD_NORMSAVES, thread_struct, normsave[0]); #endif +#ifdef CONFIG_PPC_FPU OFFSET(THREAD_FPEXC_MODE, thread_struct, fpexc_mode); OFFSET(THREAD_FPSTATE, thread_struct, fp_state.fpr); OFFSET(THREAD_FPSAVEAREA, thread_struct, fp_save_area); +#endif OFFSET(FPSTATE_FPSCR, thread_fp_state, fpscr); OFFSET(THREAD_LOAD_FP, thread_struct, load_fp); #ifdef CONFIG_ALTIVEC @@ -354,10 +356,15 @@ int main(void) STACK_PT_REGS_OFFSET(_PPR, ppr); #endif /* CONFIG_PPC64 */ +#ifdef CONFIG_PPC_PKEY + STACK_PT_REGS_OFFSET(STACK_REGS_AMR, amr); + STACK_PT_REGS_OFFSET(STACK_REGS_IAMR, iamr); +#endif #ifdef CONFIG_PPC_KUAP STACK_PT_REGS_OFFSET(STACK_REGS_KUAP, kuap); #endif + #if defined(CONFIG_PPC32) #if defined(CONFIG_BOOKE) || defined(CONFIG_40x) DEFINE(EXC_LVL_SIZE, STACK_EXC_LVL_FRAME_SIZE); @@ -398,47 +405,18 @@ int main(void) #endif /* ! CONFIG_PPC64 */ /* datapage offsets for use by vdso */ - OFFSET(CFG_TB_ORIG_STAMP, vdso_data, tb_orig_stamp); - OFFSET(CFG_TB_TICKS_PER_SEC, vdso_data, tb_ticks_per_sec); - OFFSET(CFG_TB_TO_XS, vdso_data, tb_to_xs); - OFFSET(CFG_TB_UPDATE_COUNT, vdso_data, tb_update_count); - OFFSET(CFG_TZ_MINUTEWEST, vdso_data, tz_minuteswest); - OFFSET(CFG_TZ_DSTTIME, vdso_data, tz_dsttime); - OFFSET(CFG_SYSCALL_MAP32, vdso_data, syscall_map_32); - OFFSET(WTOM_CLOCK_SEC, vdso_data, wtom_clock_sec); - OFFSET(WTOM_CLOCK_NSEC, vdso_data, wtom_clock_nsec); - OFFSET(STAMP_XTIME_SEC, vdso_data, stamp_xtime_sec); - OFFSET(STAMP_XTIME_NSEC, vdso_data, stamp_xtime_nsec); - OFFSET(STAMP_SEC_FRAC, vdso_data, stamp_sec_fraction); - OFFSET(CLOCK_HRTIMER_RES, vdso_data, hrtimer_res); + OFFSET(VDSO_DATA_OFFSET, vdso_arch_data, data); + OFFSET(CFG_TB_TICKS_PER_SEC, vdso_arch_data, tb_ticks_per_sec); #ifdef CONFIG_PPC64 - OFFSET(CFG_ICACHE_BLOCKSZ, vdso_data, icache_block_size); - OFFSET(CFG_DCACHE_BLOCKSZ, vdso_data, dcache_block_size); - OFFSET(CFG_ICACHE_LOGBLOCKSZ, vdso_data, icache_log_block_size); - OFFSET(CFG_DCACHE_LOGBLOCKSZ, vdso_data, dcache_log_block_size); - OFFSET(CFG_SYSCALL_MAP64, vdso_data, syscall_map_64); - OFFSET(TVAL64_TV_SEC, __kernel_old_timeval, tv_sec); - OFFSET(TVAL64_TV_USEC, __kernel_old_timeval, tv_usec); -#endif - OFFSET(TSPC64_TV_SEC, __kernel_timespec, tv_sec); - OFFSET(TSPC64_TV_NSEC, __kernel_timespec, tv_nsec); - OFFSET(TVAL32_TV_SEC, old_timeval32, tv_sec); - OFFSET(TVAL32_TV_USEC, old_timeval32, tv_usec); - OFFSET(TSPC32_TV_SEC, old_timespec32, tv_sec); - OFFSET(TSPC32_TV_NSEC, old_timespec32, tv_nsec); - /* timeval/timezone offsets for use by vdso */ - OFFSET(TZONE_TZ_MINWEST, timezone, tz_minuteswest); - OFFSET(TZONE_TZ_DSTTIME, timezone, tz_dsttime); - - /* Other bits used by the vdso */ - DEFINE(CLOCK_REALTIME, CLOCK_REALTIME); - DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC); - DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE); - DEFINE(CLOCK_MONOTONIC_COARSE, CLOCK_MONOTONIC_COARSE); - DEFINE(CLOCK_MAX, CLOCK_TAI); - DEFINE(NSEC_PER_SEC, NSEC_PER_SEC); - DEFINE(EINVAL, EINVAL); - DEFINE(KTIME_LOW_RES, KTIME_LOW_RES); + OFFSET(CFG_ICACHE_BLOCKSZ, vdso_arch_data, icache_block_size); + OFFSET(CFG_DCACHE_BLOCKSZ, vdso_arch_data, dcache_block_size); + OFFSET(CFG_ICACHE_LOGBLOCKSZ, vdso_arch_data, icache_log_block_size); + OFFSET(CFG_DCACHE_LOGBLOCKSZ, vdso_arch_data, dcache_log_block_size); + OFFSET(CFG_SYSCALL_MAP64, vdso_arch_data, syscall_map); + OFFSET(CFG_SYSCALL_MAP32, vdso_arch_data, compat_syscall_map); +#else + OFFSET(CFG_SYSCALL_MAP32, vdso_arch_data, syscall_map); +#endif #ifdef CONFIG_BUG DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry)); diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c index 65ab9fcebd31..6f903e9aa20b 100644 --- a/arch/powerpc/kernel/cacheinfo.c +++ b/arch/powerpc/kernel/cacheinfo.c @@ -655,11 +655,27 @@ static unsigned int index_dir_to_cpu(struct cache_index_dir *index) * On big-core systems, each core has two groups of CPUs each of which * has its own L1-cache. The thread-siblings which share l1-cache with * @cpu can be obtained via cpu_smallcore_mask(). + * + * On some big-core systems, the L2 cache is shared only between some + * groups of siblings. This is already parsed and encoded in + * cpu_l2_cache_mask(). + * + * TODO: cache_lookup_or_instantiate() needs to be made aware of the + * "ibm,thread-groups" property so that cache->shared_cpu_map + * reflects the correct siblings on platforms that have this + * device-tree property. This helper function is only a stop-gap + * solution so that we report the correct siblings to the + * userspace via sysfs. */ -static const struct cpumask *get_big_core_shared_cpu_map(int cpu, struct cache *cache) +static const struct cpumask *get_shared_cpu_map(struct cache_index_dir *index, struct cache *cache) { - if (cache->level == 1) - return cpu_smallcore_mask(cpu); + if (has_big_cores) { + int cpu = index_dir_to_cpu(index); + if (cache->level == 1) + return cpu_smallcore_mask(cpu); + if (cache->level == 2 && thread_group_shares_l2) + return cpu_l2_cache_mask(cpu); + } return &cache->shared_cpu_map; } @@ -670,17 +686,11 @@ show_shared_cpumap(struct kobject *k, struct kobj_attribute *attr, char *buf, bo struct cache_index_dir *index; struct cache *cache; const struct cpumask *mask; - int cpu; index = kobj_to_cache_index_dir(k); cache = index->cache; - if (has_big_cores) { - cpu = index_dir_to_cpu(index); - mask = get_big_core_shared_cpu_map(cpu, cache); - } else { - mask = &cache->shared_cpu_map; - } + mask = get_shared_cpu_map(index, cache); return cpumap_print_to_pagebuf(list, buf, mask); } diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S index 1d308780e0d3..4bf33f1b4193 100644 --- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S +++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S @@ -108,15 +108,6 @@ _GLOBAL(__setup_cpu_e6500) #endif /* CONFIG_PPC_E500MC */ #ifdef CONFIG_PPC32 -#ifdef CONFIG_E200 -_GLOBAL(__setup_cpu_e200) - /* enable dedicated debug exception handling resources (Debug APU) */ - mfspr r3,SPRN_HID0 - ori r3,r3,HID0_DAPUEN@l - mtspr SPRN_HID0,r3 - b __setup_e200_ivors -#endif /* CONFIG_E200 */ - #ifdef CONFIG_E500 #ifndef CONFIG_PPC_E500MC _GLOBAL(__setup_cpu_e500v1) diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S deleted file mode 100644 index 704e8b9501ee..000000000000 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ /dev/null @@ -1,252 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * This file contains low level CPU setup functions. - * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org) - */ - -#include <asm/processor.h> -#include <asm/page.h> -#include <asm/cputable.h> -#include <asm/ppc_asm.h> -#include <asm/asm-offsets.h> -#include <asm/cache.h> -#include <asm/book3s/64/mmu-hash.h> - -/* Entry: r3 = crap, r4 = ptr to cputable entry - * - * Note that we can be called twice for pseudo-PVRs - */ -_GLOBAL(__setup_cpu_power7) - mflr r11 - bl __init_hvmode_206 - mtlr r11 - beqlr - li r0,0 - mtspr SPRN_LPID,r0 - LOAD_REG_IMMEDIATE(r0, PCR_MASK) - mtspr SPRN_PCR,r0 - mfspr r3,SPRN_LPCR - li r4,(LPCR_LPES1 >> LPCR_LPES_SH) - bl __init_LPCR_ISA206 - mtlr r11 - blr - -_GLOBAL(__restore_cpu_power7) - mflr r11 - mfmsr r3 - rldicl. r0,r3,4,63 - beqlr - li r0,0 - mtspr SPRN_LPID,r0 - LOAD_REG_IMMEDIATE(r0, PCR_MASK) - mtspr SPRN_PCR,r0 - mfspr r3,SPRN_LPCR - li r4,(LPCR_LPES1 >> LPCR_LPES_SH) - bl __init_LPCR_ISA206 - mtlr r11 - blr - -_GLOBAL(__setup_cpu_power8) - mflr r11 - bl __init_FSCR - bl __init_PMU - bl __init_PMU_ISA207 - bl __init_hvmode_206 - mtlr r11 - beqlr - li r0,0 - mtspr SPRN_LPID,r0 - LOAD_REG_IMMEDIATE(r0, PCR_MASK) - mtspr SPRN_PCR,r0 - mfspr r3,SPRN_LPCR - ori r3, r3, LPCR_PECEDH - li r4,0 /* LPES = 0 */ - bl __init_LPCR_ISA206 - bl __init_HFSCR - bl __init_PMU_HV - bl __init_PMU_HV_ISA207 - mtlr r11 - blr - -_GLOBAL(__restore_cpu_power8) - mflr r11 - bl __init_FSCR - bl __init_PMU - bl __init_PMU_ISA207 - mfmsr r3 - rldicl. r0,r3,4,63 - mtlr r11 - beqlr - li r0,0 - mtspr SPRN_LPID,r0 - LOAD_REG_IMMEDIATE(r0, PCR_MASK) - mtspr SPRN_PCR,r0 - mfspr r3,SPRN_LPCR - ori r3, r3, LPCR_PECEDH - li r4,0 /* LPES = 0 */ - bl __init_LPCR_ISA206 - bl __init_HFSCR - bl __init_PMU_HV - bl __init_PMU_HV_ISA207 - mtlr r11 - blr - -_GLOBAL(__setup_cpu_power10) - mflr r11 - bl __init_FSCR_power10 - bl __init_PMU - bl __init_PMU_ISA31 - b 1f - -_GLOBAL(__setup_cpu_power9) - mflr r11 - bl __init_FSCR_power9 - bl __init_PMU -1: bl __init_hvmode_206 - mtlr r11 - beqlr - li r0,0 - mtspr SPRN_PSSCR,r0 - mtspr SPRN_LPID,r0 - mtspr SPRN_PID,r0 - LOAD_REG_IMMEDIATE(r0, PCR_MASK) - mtspr SPRN_PCR,r0 - mfspr r3,SPRN_LPCR - LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC) - or r3, r3, r4 - LOAD_REG_IMMEDIATE(r4, LPCR_UPRT | LPCR_HR) - andc r3, r3, r4 - li r4,0 /* LPES = 0 */ - bl __init_LPCR_ISA300 - bl __init_HFSCR - bl __init_PMU_HV - mtlr r11 - blr - -_GLOBAL(__restore_cpu_power10) - mflr r11 - bl __init_FSCR_power10 - bl __init_PMU - bl __init_PMU_ISA31 - b 1f - -_GLOBAL(__restore_cpu_power9) - mflr r11 - bl __init_FSCR_power9 - bl __init_PMU -1: mfmsr r3 - rldicl. r0,r3,4,63 - mtlr r11 - beqlr - li r0,0 - mtspr SPRN_PSSCR,r0 - mtspr SPRN_LPID,r0 - mtspr SPRN_PID,r0 - LOAD_REG_IMMEDIATE(r0, PCR_MASK) - mtspr SPRN_PCR,r0 - mfspr r3,SPRN_LPCR - LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC) - or r3, r3, r4 - LOAD_REG_IMMEDIATE(r4, LPCR_UPRT | LPCR_HR) - andc r3, r3, r4 - li r4,0 /* LPES = 0 */ - bl __init_LPCR_ISA300 - bl __init_HFSCR - bl __init_PMU_HV - mtlr r11 - blr - -__init_hvmode_206: - /* Disable CPU_FTR_HVMODE and exit if MSR:HV is not set */ - mfmsr r3 - rldicl. r0,r3,4,63 - bnelr - ld r5,CPU_SPEC_FEATURES(r4) - LOAD_REG_IMMEDIATE(r6,CPU_FTR_HVMODE | CPU_FTR_P9_TM_HV_ASSIST) - andc r5,r5,r6 - std r5,CPU_SPEC_FEATURES(r4) - blr - -__init_LPCR_ISA206: - /* Setup a sane LPCR: - * Called with initial LPCR in R3 and desired LPES 2-bit value in R4 - * - * LPES = 0b01 (HSRR0/1 used for 0x500) - * PECE = 0b111 - * DPFD = 4 - * HDICE = 0 - * VC = 0b100 (VPM0=1, VPM1=0, ISL=0) - * VRMASD = 0b10000 (L=1, LP=00) - * - * Other bits untouched for now - */ - li r5,0x10 - rldimi r3,r5, LPCR_VRMASD_SH, 64-LPCR_VRMASD_SH-5 - - /* POWER9 has no VRMASD */ -__init_LPCR_ISA300: - rldimi r3,r4, LPCR_LPES_SH, 64-LPCR_LPES_SH-2 - ori r3,r3,(LPCR_PECE0|LPCR_PECE1|LPCR_PECE2) - li r5,4 - rldimi r3,r5, LPCR_DPFD_SH, 64-LPCR_DPFD_SH-3 - clrrdi r3,r3,1 /* clear HDICE */ - li r5,4 - rldimi r3,r5, LPCR_VC_SH, 0 - mtspr SPRN_LPCR,r3 - isync - blr - -__init_FSCR_power10: - mfspr r3, SPRN_FSCR - ori r3, r3, FSCR_PREFIX - mtspr SPRN_FSCR, r3 - // fall through - -__init_FSCR_power9: - mfspr r3, SPRN_FSCR - ori r3, r3, FSCR_SCV - mtspr SPRN_FSCR, r3 - // fall through - -__init_FSCR: - mfspr r3,SPRN_FSCR - ori r3,r3,FSCR_TAR|FSCR_EBB - mtspr SPRN_FSCR,r3 - blr - -__init_HFSCR: - mfspr r3,SPRN_HFSCR - ori r3,r3,HFSCR_TAR|HFSCR_TM|HFSCR_BHRB|HFSCR_PM|\ - HFSCR_DSCR|HFSCR_VECVSX|HFSCR_FP|HFSCR_EBB|HFSCR_MSGP - mtspr SPRN_HFSCR,r3 - blr - -__init_PMU_HV: - li r5,0 - mtspr SPRN_MMCRC,r5 - blr - -__init_PMU_HV_ISA207: - li r5,0 - mtspr SPRN_MMCRH,r5 - blr - -__init_PMU: - li r5,0 - mtspr SPRN_MMCRA,r5 - mtspr SPRN_MMCR0,r5 - mtspr SPRN_MMCR1,r5 - mtspr SPRN_MMCR2,r5 - blr - -__init_PMU_ISA207: - li r5,0 - mtspr SPRN_MMCRS,r5 - blr - -__init_PMU_ISA31: - li r5,0 - mtspr SPRN_MMCR3,r5 - LOAD_REG_IMMEDIATE(r5, MMCRA_BHRB_DISABLE) - mtspr SPRN_MMCRA,r5 - blr diff --git a/arch/powerpc/kernel/cpu_setup_power.c b/arch/powerpc/kernel/cpu_setup_power.c new file mode 100644 index 000000000000..3cca88ee96d7 --- /dev/null +++ b/arch/powerpc/kernel/cpu_setup_power.c @@ -0,0 +1,272 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2020, Jordan Niethe, IBM Corporation. + * + * This file contains low level CPU setup functions. + * Originally written in assembly by Benjamin Herrenschmidt & various other + * authors. + */ + +#include <asm/reg.h> +#include <asm/synch.h> +#include <linux/bitops.h> +#include <asm/cputable.h> +#include <asm/cpu_setup_power.h> + +/* Disable CPU_FTR_HVMODE and return false if MSR:HV is not set */ +static bool init_hvmode_206(struct cpu_spec *t) +{ + u64 msr; + + msr = mfmsr(); + if (msr & MSR_HV) + return true; + + t->cpu_features &= ~(CPU_FTR_HVMODE | CPU_FTR_P9_TM_HV_ASSIST); + return false; +} + +static void init_LPCR_ISA300(u64 lpcr, u64 lpes) +{ + /* POWER9 has no VRMASD */ + lpcr |= (lpes << LPCR_LPES_SH) & LPCR_LPES; + lpcr |= LPCR_PECE0|LPCR_PECE1|LPCR_PECE2; + lpcr |= (4ull << LPCR_DPFD_SH) & LPCR_DPFD; + lpcr &= ~LPCR_HDICE; /* clear HDICE */ + lpcr |= (4ull << LPCR_VC_SH); + mtspr(SPRN_LPCR, lpcr); + isync(); +} + +/* + * Setup a sane LPCR: + * Called with initial LPCR and desired LPES 2-bit value + * + * LPES = 0b01 (HSRR0/1 used for 0x500) + * PECE = 0b111 + * DPFD = 4 + * HDICE = 0 + * VC = 0b100 (VPM0=1, VPM1=0, ISL=0) + * VRMASD = 0b10000 (L=1, LP=00) + * + * Other bits untouched for now + */ +static void init_LPCR_ISA206(u64 lpcr, u64 lpes) +{ + lpcr |= (0x10ull << LPCR_VRMASD_SH) & LPCR_VRMASD; + init_LPCR_ISA300(lpcr, lpes); +} + +static void init_FSCR(void) +{ + u64 fscr; + + fscr = mfspr(SPRN_FSCR); + fscr |= FSCR_TAR|FSCR_EBB; + mtspr(SPRN_FSCR, fscr); +} + +static void init_FSCR_power9(void) +{ + u64 fscr; + + fscr = mfspr(SPRN_FSCR); + fscr |= FSCR_SCV; + mtspr(SPRN_FSCR, fscr); + init_FSCR(); +} + +static void init_FSCR_power10(void) +{ + u64 fscr; + + fscr = mfspr(SPRN_FSCR); + fscr |= FSCR_PREFIX; + mtspr(SPRN_FSCR, fscr); + init_FSCR_power9(); +} + +static void init_HFSCR(void) +{ + u64 hfscr; + + hfscr = mfspr(SPRN_HFSCR); + hfscr |= HFSCR_TAR|HFSCR_TM|HFSCR_BHRB|HFSCR_PM|HFSCR_DSCR|\ + HFSCR_VECVSX|HFSCR_FP|HFSCR_EBB|HFSCR_MSGP; + mtspr(SPRN_HFSCR, hfscr); +} + +static void init_PMU_HV(void) +{ + mtspr(SPRN_MMCRC, 0); +} + +static void init_PMU_HV_ISA207(void) +{ + mtspr(SPRN_MMCRH, 0); +} + +static void init_PMU(void) +{ + mtspr(SPRN_MMCRA, 0); + mtspr(SPRN_MMCR0, 0); + mtspr(SPRN_MMCR1, 0); + mtspr(SPRN_MMCR2, 0); +} + +static void init_PMU_ISA207(void) +{ + mtspr(SPRN_MMCRS, 0); +} + +static void init_PMU_ISA31(void) +{ + mtspr(SPRN_MMCR3, 0); + mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE); + mtspr(SPRN_MMCR0, MMCR0_PMCCEXT); +} + +/* + * Note that we can be called twice of pseudo-PVRs. + * The parameter offset is not used. + */ + +void __setup_cpu_power7(unsigned long offset, struct cpu_spec *t) +{ + if (!init_hvmode_206(t)) + return; + + mtspr(SPRN_LPID, 0); + mtspr(SPRN_PCR, PCR_MASK); + init_LPCR_ISA206(mfspr(SPRN_LPCR), LPCR_LPES1 >> LPCR_LPES_SH); +} + +void __restore_cpu_power7(void) +{ + u64 msr; + + msr = mfmsr(); + if (!(msr & MSR_HV)) + return; + + mtspr(SPRN_LPID, 0); + mtspr(SPRN_PCR, PCR_MASK); + init_LPCR_ISA206(mfspr(SPRN_LPCR), LPCR_LPES1 >> LPCR_LPES_SH); +} + +void __setup_cpu_power8(unsigned long offset, struct cpu_spec *t) +{ + init_FSCR(); + init_PMU(); + init_PMU_ISA207(); + + if (!init_hvmode_206(t)) + return; + + mtspr(SPRN_LPID, 0); + mtspr(SPRN_PCR, PCR_MASK); + init_LPCR_ISA206(mfspr(SPRN_LPCR) | LPCR_PECEDH, 0); /* LPES = 0 */ + init_HFSCR(); + init_PMU_HV(); + init_PMU_HV_ISA207(); +} + +void __restore_cpu_power8(void) +{ + u64 msr; + + init_FSCR(); + init_PMU(); + init_PMU_ISA207(); + + msr = mfmsr(); + if (!(msr & MSR_HV)) + return; + + mtspr(SPRN_LPID, 0); + mtspr(SPRN_PCR, PCR_MASK); + init_LPCR_ISA206(mfspr(SPRN_LPCR) | LPCR_PECEDH, 0); /* LPES = 0 */ + init_HFSCR(); + init_PMU_HV(); + init_PMU_HV_ISA207(); +} + +void __setup_cpu_power9(unsigned long offset, struct cpu_spec *t) +{ + init_FSCR_power9(); + init_PMU(); + + if (!init_hvmode_206(t)) + return; + + mtspr(SPRN_PSSCR, 0); + mtspr(SPRN_LPID, 0); + mtspr(SPRN_PID, 0); + mtspr(SPRN_PCR, PCR_MASK); + init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\ + LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0); + init_HFSCR(); + init_PMU_HV(); +} + +void __restore_cpu_power9(void) +{ + u64 msr; + + init_FSCR_power9(); + init_PMU(); + + msr = mfmsr(); + if (!(msr & MSR_HV)) + return; + + mtspr(SPRN_PSSCR, 0); + mtspr(SPRN_LPID, 0); + mtspr(SPRN_PID, 0); + mtspr(SPRN_PCR, PCR_MASK); + init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\ + LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0); + init_HFSCR(); + init_PMU_HV(); +} + +void __setup_cpu_power10(unsigned long offset, struct cpu_spec *t) +{ + init_FSCR_power10(); + init_PMU(); + init_PMU_ISA31(); + + if (!init_hvmode_206(t)) + return; + + mtspr(SPRN_PSSCR, 0); + mtspr(SPRN_LPID, 0); + mtspr(SPRN_PID, 0); + mtspr(SPRN_PCR, PCR_MASK); + init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\ + LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0); + init_HFSCR(); + init_PMU_HV(); +} + +void __restore_cpu_power10(void) +{ + u64 msr; + + init_FSCR_power10(); + init_PMU(); + init_PMU_ISA31(); + + msr = mfmsr(); + if (!(msr & MSR_HV)) + return; + + mtspr(SPRN_PSSCR, 0); + mtspr(SPRN_LPID, 0); + mtspr(SPRN_PID, 0); + mtspr(SPRN_PCR, PCR_MASK); + init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\ + LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0); + init_HFSCR(); + init_PMU_HV(); +} diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 29de58d4dfb7..65f35ec052d4 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -36,7 +36,6 @@ const char *powerpc_base_platform; * and ppc64 */ #ifdef CONFIG_PPC32 -extern void __setup_cpu_e200(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_e500v1(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_e500v2(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_e500mc(unsigned long offset, struct cpu_spec* spec); @@ -60,19 +59,15 @@ extern void __setup_cpu_7410(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_745x(unsigned long offset, struct cpu_spec* spec); #endif /* CONFIG_PPC32 */ #ifdef CONFIG_PPC64 +#include <asm/cpu_setup_power.h> extern void __setup_cpu_ppc970(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_ppc970MP(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_pa6t(unsigned long offset, struct cpu_spec* spec); extern void __restore_cpu_pa6t(void); extern void __restore_cpu_ppc970(void); -extern void __setup_cpu_power7(unsigned long offset, struct cpu_spec* spec); -extern void __restore_cpu_power7(void); -extern void __setup_cpu_power8(unsigned long offset, struct cpu_spec* spec); -extern void __restore_cpu_power8(void); -extern void __setup_cpu_power9(unsigned long offset, struct cpu_spec* spec); -extern void __restore_cpu_power9(void); -extern void __setup_cpu_power10(unsigned long offset, struct cpu_spec* spec); -extern void __restore_cpu_power10(void); +extern long __machine_check_early_realmode_p7(struct pt_regs *regs); +extern long __machine_check_early_realmode_p8(struct pt_regs *regs); +extern long __machine_check_early_realmode_p9(struct pt_regs *regs); #endif /* CONFIG_PPC64 */ #if defined(CONFIG_E500) extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec); @@ -616,46 +611,8 @@ static struct cpu_spec __initdata cpu_specs[] = { #endif /* CONFIG_PPC_BOOK3S_64 */ #ifdef CONFIG_PPC32 -#ifdef CONFIG_PPC_BOOK3S_6xx - { /* 603 */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00030000, - .cpu_name = "603", - .cpu_features = CPU_FTRS_603, - .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, - .mmu_features = 0, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_603, - .machine_check = machine_check_generic, - .platform = "ppc603", - }, - { /* 603e */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00060000, - .cpu_name = "603e", - .cpu_features = CPU_FTRS_603, - .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, - .mmu_features = 0, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_603, - .machine_check = machine_check_generic, - .platform = "ppc603", - }, - { /* 603ev */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00070000, - .cpu_name = "603ev", - .cpu_features = CPU_FTRS_603, - .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, - .mmu_features = 0, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_603, - .machine_check = machine_check_generic, - .platform = "ppc603", - }, +#ifdef CONFIG_PPC_BOOK3S_32 +#ifdef CONFIG_PPC_BOOK3S_604 { /* 604 */ .pvr_mask = 0xffff0000, .pvr_value = 0x00040000, @@ -1145,6 +1102,47 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_generic, .platform = "ppc7450", }, +#endif /* CONFIG_PPC_BOOK3S_604 */ +#ifdef CONFIG_PPC_BOOK3S_603 + { /* 603 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00030000, + .cpu_name = "603", + .cpu_features = CPU_FTRS_603, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = 0, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_603, + .machine_check = machine_check_generic, + .platform = "ppc603", + }, + { /* 603e */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00060000, + .cpu_name = "603e", + .cpu_features = CPU_FTRS_603, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = 0, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_603, + .machine_check = machine_check_generic, + .platform = "ppc603", + }, + { /* 603ev */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00070000, + .cpu_name = "603ev", + .cpu_features = CPU_FTRS_603, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = 0, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_603, + .machine_check = machine_check_generic, + .platform = "ppc603", + }, { /* 82xx (8240, 8245, 8260 are all 603e cores) */ .pvr_mask = 0x7fff0000, .pvr_value = 0x00810000, @@ -1234,6 +1232,8 @@ static struct cpu_spec __initdata cpu_specs[] = { .platform = "ppc603", }, #endif +#endif /* CONFIG_PPC_BOOK3S_603 */ +#ifdef CONFIG_PPC_BOOK3S_604 { /* default match, we assume split I/D cache & TB (non-601)... */ .pvr_mask = 0x00000000, .pvr_value = 0x00000000, @@ -1246,7 +1246,8 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_generic, .platform = "ppc603", }, -#endif /* CONFIG_PPC_BOOK3S_6xx */ +#endif /* CONFIG_PPC_BOOK3S_604 */ +#endif /* CONFIG_PPC_BOOK3S_32 */ #ifdef CONFIG_PPC_8xx { /* 8xx */ .pvr_mask = 0xffff0000, @@ -1540,6 +1541,7 @@ static struct cpu_spec __initdata cpu_specs[] = { #endif /* CONFIG_40x */ #ifdef CONFIG_44x +#ifndef CONFIG_PPC_47x { .pvr_mask = 0xf0000fff, .pvr_value = 0x40000850, @@ -1822,7 +1824,19 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_440A, .platform = "ppc440", }, -#ifdef CONFIG_PPC_47x + { /* default match */ + .pvr_mask = 0x00000000, + .pvr_value = 0x00000000, + .cpu_name = "(generic 44x PPC)", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc440", + } +#else /* CONFIG_PPC_47x */ { /* 476 DD2 core */ .pvr_mask = 0xffffffff, .pvr_value = 0x11a52080, @@ -1879,65 +1893,20 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_47x, .platform = "ppc470", }, -#endif /* CONFIG_PPC_47x */ { /* default match */ .pvr_mask = 0x00000000, .pvr_value = 0x00000000, - .cpu_name = "(generic 44x PPC)", - .cpu_features = CPU_FTRS_44X, + .cpu_name = "(generic 47x PPC)", + .cpu_features = CPU_FTRS_47X, .cpu_user_features = COMMON_USER_BOOKE, - .mmu_features = MMU_FTR_TYPE_44x, + .mmu_features = MMU_FTR_TYPE_47x, .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc440", + .dcache_bsize = 128, + .machine_check = machine_check_47x, + .platform = "ppc470", } +#endif /* CONFIG_PPC_47x */ #endif /* CONFIG_44x */ -#ifdef CONFIG_E200 - { /* e200z5 */ - .pvr_mask = 0xfff00000, - .pvr_value = 0x81000000, - .cpu_name = "e200z5", - /* xxx - galak: add CPU_FTR_MAYBE_CAN_DOZE */ - .cpu_features = CPU_FTRS_E200, - .cpu_user_features = COMMON_USER_BOOKE | - PPC_FEATURE_HAS_EFP_SINGLE | - PPC_FEATURE_UNIFIED_CACHE, - .mmu_features = MMU_FTR_TYPE_FSL_E, - .dcache_bsize = 32, - .machine_check = machine_check_e200, - .platform = "ppc5554", - }, - { /* e200z6 */ - .pvr_mask = 0xfff00000, - .pvr_value = 0x81100000, - .cpu_name = "e200z6", - /* xxx - galak: add CPU_FTR_MAYBE_CAN_DOZE */ - .cpu_features = CPU_FTRS_E200, - .cpu_user_features = COMMON_USER_BOOKE | - PPC_FEATURE_HAS_SPE_COMP | - PPC_FEATURE_HAS_EFP_SINGLE_COMP | - PPC_FEATURE_UNIFIED_CACHE, - .mmu_features = MMU_FTR_TYPE_FSL_E, - .dcache_bsize = 32, - .machine_check = machine_check_e200, - .platform = "ppc5554", - }, - { /* default match */ - .pvr_mask = 0x00000000, - .pvr_value = 0x00000000, - .cpu_name = "(generic E200 PPC)", - .cpu_features = CPU_FTRS_E200, - .cpu_user_features = COMMON_USER_BOOKE | - PPC_FEATURE_HAS_EFP_SINGLE | - PPC_FEATURE_UNIFIED_CACHE, - .mmu_features = MMU_FTR_TYPE_FSL_E, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_e200, - .machine_check = machine_check_e200, - .platform = "ppc5554", - } -#endif /* CONFIG_E200 */ #endif /* CONFIG_PPC32 */ #ifdef CONFIG_E500 #ifdef CONFIG_PPC32 diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 1098863e17ee..b5478b72c08c 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -69,7 +69,6 @@ static int hv_mode; static struct { u64 lpcr; - u64 lpcr_clear; u64 hfscr; u64 fscr; u64 pcr; @@ -79,24 +78,7 @@ static void (*init_pmu_registers)(void); static void __restore_cpu_cpufeatures(void) { - u64 lpcr; - - /* - * LPCR is restored by the power on engine already. It can be changed - * after early init e.g., by radix enable, and we have no unified API - * for saving and restoring such SPRs. - * - * This ->restore hook should really be removed from idle and register - * restore moved directly into the idle restore code, because this code - * doesn't know how idle is implemented or what it needs restored here. - * - * The best we can do to accommodate secondary boot and idle restore - * for now is "or" LPCR with existing. - */ - lpcr = mfspr(SPRN_LPCR); - lpcr |= system_registers.lpcr; - lpcr &= ~system_registers.lpcr_clear; - mtspr(SPRN_LPCR, lpcr); + mtspr(SPRN_LPCR, system_registers.lpcr); if (hv_mode) { mtspr(SPRN_LPID, 0); mtspr(SPRN_HFSCR, system_registers.hfscr); @@ -273,13 +255,6 @@ static int __init feat_enable_idle_nap(struct dt_cpu_feature *f) return 1; } -static int __init feat_enable_align_dsisr(struct dt_cpu_feature *f) -{ - cur_cpu_spec->cpu_features &= ~CPU_FTR_NODSISRALIGN; - - return 1; -} - static int __init feat_enable_idle_stop(struct dt_cpu_feature *f) { u64 lpcr; @@ -317,7 +292,6 @@ static int __init feat_enable_mmu_hash_v3(struct dt_cpu_feature *f) { u64 lpcr; - system_registers.lpcr_clear |= (LPCR_ISL | LPCR_UPRT | LPCR_HR); lpcr = mfspr(SPRN_LPCR); lpcr &= ~(LPCR_ISL | LPCR_UPRT | LPCR_HR); mtspr(SPRN_LPCR, lpcr); @@ -454,6 +428,7 @@ static void init_pmu_power10(void) mtspr(SPRN_MMCR3, 0); mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE); + mtspr(SPRN_MMCR0, MMCR0_PMCCEXT); } static int __init feat_enable_pmu_power10(struct dt_cpu_feature *f) @@ -641,7 +616,7 @@ static struct dt_cpu_feature_match __initdata {"tm-suspend-hypervisor-assist", feat_enable, CPU_FTR_P9_TM_HV_ASSIST}, {"tm-suspend-xer-so-bug", feat_enable, CPU_FTR_P9_TM_XER_SO_BUG}, {"idle-nap", feat_enable_idle_nap, 0}, - {"alignment-interrupt-dsisr", feat_enable_align_dsisr, 0}, + /* alignment-interrupt-dsisr ignored */ {"idle-stop", feat_enable_idle_stop, 0}, {"machine-check-power8", feat_enable_mce_power8, 0}, {"performance-monitor-power8", feat_enable_pmu_power8, 0}, diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 8cdc8bcde703..1c9b0ccc2172 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -234,7 +234,10 @@ transfer_to_handler_cont: mtspr SPRN_SRR0,r11 mtspr SPRN_SRR1,r10 mtlr r9 - RFI /* jump to handler, enable MMU */ + rfi /* jump to handler, enable MMU */ +#ifdef CONFIG_40x + b . /* Prevent prefetch past rfi */ +#endif #if defined (CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500) 4: rlwinm r12,r12,0,~_TLF_NAPPING @@ -263,7 +266,10 @@ _ASM_NOKPROBE_SYMBOL(transfer_to_handler_cont) LOAD_REG_IMMEDIATE(r0, MSR_KERNEL) mtspr SPRN_SRR0,r12 mtspr SPRN_SRR1,r0 - RFI + rfi +#ifdef CONFIG_40x + b . /* Prevent prefetch past rfi */ +#endif reenable_mmu: /* @@ -321,7 +327,10 @@ stack_ovf: #endif mtspr SPRN_SRR0,r9 mtspr SPRN_SRR1,r10 - RFI + rfi +#ifdef CONFIG_40x + b . /* Prevent prefetch past rfi */ +#endif _ASM_NOKPROBE_SYMBOL(stack_ovf) #endif @@ -439,15 +448,13 @@ syscall_exit_cont: andis. r10,r0,DBCR0_IDM@h bnel- load_dbcr0 #endif -#ifdef CONFIG_44x -BEGIN_MMU_FTR_SECTION +#ifdef CONFIG_PPC_47x lis r4,icache_44x_need_flush@ha lwz r5,icache_44x_need_flush@l(r4) cmplwi cr0,r5,0 bne- 2f +#endif /* CONFIG_PPC_47x */ 1: -END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_47x) -#endif /* CONFIG_44x */ BEGIN_FTR_SECTION lwarx r7,0,r1 END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) @@ -470,7 +477,10 @@ syscall_exit_finish: #endif mtspr SPRN_SRR0,r7 mtspr SPRN_SRR1,r8 - RFI + rfi +#ifdef CONFIG_40x + b . /* Prevent prefetch past rfi */ +#endif _ASM_NOKPROBE_SYMBOL(syscall_exit_finish) #ifdef CONFIG_44x 2: li r7,0 @@ -600,7 +610,10 @@ ret_from_kernel_syscall: #endif mtspr SPRN_SRR0, r9 mtspr SPRN_SRR1, r10 - RFI + rfi +#ifdef CONFIG_40x + b . /* Prevent prefetch past rfi */ +#endif _ASM_NOKPROBE_SYMBOL(ret_from_kernel_syscall) /* @@ -671,7 +684,7 @@ handle_page_fault: mr r5,r3 addi r3,r1,STACK_FRAME_OVERHEAD lwz r4,_DAR(r1) - bl bad_page_fault + bl __bad_page_fault b ret_from_except_full #ifdef CONFIG_PPC_BOOK3S_32 @@ -803,7 +816,10 @@ fast_exception_return: REST_GPR(9, r11) REST_GPR(12, r11) lwz r11,GPR11(r11) - RFI + rfi +#ifdef CONFIG_40x + b . /* Prevent prefetch past rfi */ +#endif _ASM_NOKPROBE_SYMBOL(fast_exception_return) #if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) @@ -948,10 +964,7 @@ restore_kuap: /* interrupts are hard-disabled at this point */ restore: -#ifdef CONFIG_44x -BEGIN_MMU_FTR_SECTION - b 1f -END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x) +#if defined(CONFIG_44x) && !defined(CONFIG_PPC_47x) lis r4,icache_44x_need_flush@ha lwz r5,icache_44x_need_flush@l(r4) cmplwi cr0,r5,0 @@ -1027,7 +1040,7 @@ exc_exit_restart: lwz r1,GPR1(r1) .globl exc_exit_restart_end exc_exit_restart_end: - RFI + rfi _ASM_NOKPROBE_SYMBOL(exc_exit_restart) _ASM_NOKPROBE_SYMBOL(exc_exit_restart_end) @@ -1356,7 +1369,7 @@ _GLOBAL(enter_rtas) stw r7, THREAD + RTAS_SP(r2) mtspr SPRN_SRR0,r8 mtspr SPRN_SRR1,r9 - RFI + rfi 1: tophys_novmstack r9, r1 #ifdef CONFIG_VMAP_STACK li r0, MSR_KERNEL & ~MSR_IR /* can take DTLB miss */ @@ -1371,6 +1384,6 @@ _GLOBAL(enter_rtas) stw r0, THREAD + RTAS_SP(r7) mtspr SPRN_SRR0,r8 mtspr SPRN_SRR1,r9 - RFI /* return to caller */ + rfi /* return to caller */ _ASM_NOKPROBE_SYMBOL(enter_rtas) #endif /* CONFIG_PPC_RTAS */ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 2f3846192ec7..aa1af139d947 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -653,8 +653,8 @@ _ASM_NOKPROBE_SYMBOL(fast_interrupt_return) kuap_check_amr r3, r4 ld r5,_MSR(r1) andi. r0,r5,MSR_PR - bne .Lfast_user_interrupt_return - kuap_restore_amr r3, r4 + bne .Lfast_user_interrupt_return_amr + kuap_kernel_restore r3, r4 andi. r0,r5,MSR_RI li r3,0 /* 0 return value, no EMULATE_STACK_STORE */ bne+ .Lfast_kernel_interrupt_return @@ -674,6 +674,8 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return) cmpdi r3,0 bne- .Lrestore_nvgprs +.Lfast_user_interrupt_return_amr: + kuap_user_restore r3, r4 .Lfast_user_interrupt_return: ld r11,_NIP(r1) ld r12,_MSR(r1) @@ -967,7 +969,7 @@ _GLOBAL(enter_prom) mtsrr1 r11 rfi #else /* CONFIG_PPC_BOOK3E */ - LOAD_REG_IMMEDIATE(r12, MSR_SF | MSR_ISF | MSR_LE) + LOAD_REG_IMMEDIATE(r12, MSR_SF | MSR_LE) andc r11,r11,r12 mtsrr1 r11 RFI_TO_KERNEL diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index f579ce46eef2..74d07dc0bb48 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -1023,7 +1023,7 @@ storage_fault_common: mr r5,r3 addi r3,r1,STACK_FRAME_OVERHEAD ld r4,_DAR(r1) - bl bad_page_fault + bl __bad_page_fault b ret_from_except /* diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 4d01f09ecf80..e02ad6fefa46 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1059,7 +1059,7 @@ EXC_COMMON_BEGIN(system_reset_common) ld r10,SOFTE(r1) stb r10,PACAIRQSOFTMASK(r13) - kuap_restore_amr r9, r10 + kuap_kernel_restore r9, r10 EXCEPTION_RESTORE_REGS RFI_TO_USER_OR_KERNEL @@ -2875,7 +2875,7 @@ EXC_COMMON_BEGIN(soft_nmi_common) ld r10,SOFTE(r1) stb r10,PACAIRQSOFTMASK(r13) - kuap_restore_amr r9, r10 + kuap_kernel_restore r9, r10 EXCEPTION_RESTORE_REGS hsrr=0 RFI_TO_KERNEL @@ -3259,7 +3259,7 @@ handle_page_fault: mr r5,r3 addi r3,r1,STACK_FRAME_OVERHEAD ld r4,_DAR(r1) - bl bad_page_fault + bl __bad_page_fault b interrupt_return /* We have a data breakpoint exception - handle it */ diff --git a/arch/powerpc/kernel/firmware.c b/arch/powerpc/kernel/firmware.c index fe48d319d490..c9e2819b095a 100644 --- a/arch/powerpc/kernel/firmware.c +++ b/arch/powerpc/kernel/firmware.c @@ -14,6 +14,7 @@ #include <linux/of.h> #include <asm/firmware.h> +#include <asm/kvm_guest.h> #ifdef CONFIG_PPC64 unsigned long powerpc_firmware_features __read_mostly; @@ -21,17 +22,19 @@ EXPORT_SYMBOL_GPL(powerpc_firmware_features); #endif #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_GUEST) -bool is_kvm_guest(void) +DEFINE_STATIC_KEY_FALSE(kvm_guest); +bool check_kvm_guest(void) { struct device_node *hyper_node; hyper_node = of_find_node_by_path("/hypervisor"); if (!hyper_node) - return 0; + return false; if (!of_device_is_compatible(hyper_node, "linux,kvm")) - return 0; + return false; - return 1; + static_branch_enable(&kvm_guest); + return true; } #endif diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 7c767765071d..541664d95702 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -40,38 +40,31 @@ .macro EXCEPTION_PROLOG_1 for_rtas=0 #ifdef CONFIG_VMAP_STACK - mr r11, r1 + mtspr SPRN_SPRG_SCRATCH2,r1 subi r1, r1, INT_FRAME_SIZE /* use r1 if kernel */ beq 1f mfspr r1,SPRN_SPRG_THREAD lwz r1,TASK_STACK-THREAD(r1) addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE +1: + mtcrf 0x7f, r1 + bt 32 - THREAD_ALIGN_SHIFT, stack_overflow #else subi r11, r1, INT_FRAME_SIZE /* use r1 if kernel */ beq 1f mfspr r11,SPRN_SPRG_THREAD lwz r11,TASK_STACK-THREAD(r11) addi r11, r11, THREAD_SIZE - INT_FRAME_SIZE -#endif -1: - tophys_novmstack r11, r11 -#ifdef CONFIG_VMAP_STACK - mtcrf 0x7f, r1 - bt 32 - THREAD_ALIGN_SHIFT, stack_overflow +1: tophys(r11, r11) #endif .endm .macro EXCEPTION_PROLOG_2 handle_dar_dsisr=0 #ifdef CONFIG_VMAP_STACK - mtcr r10 - li r10, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */ - mtmsr r10 + li r11, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */ + mtmsr r11 isync -#else - stw r10,_CCR(r11) /* save registers */ -#endif - mfspr r10, SPRN_SPRG_SCRATCH0 -#ifdef CONFIG_VMAP_STACK + mfspr r11, SPRN_SPRG_SCRATCH2 stw r11,GPR1(r1) stw r11,0(r1) mr r11, r1 @@ -80,14 +73,12 @@ stw r1,0(r11) tovirt(r1, r11) /* set new kernel sp */ #endif + stw r10,_CCR(r11) /* save registers */ stw r12,GPR12(r11) stw r9,GPR9(r11) - stw r10,GPR10(r11) -#ifdef CONFIG_VMAP_STACK - mfcr r10 - stw r10, _CCR(r11) -#endif + mfspr r10,SPRN_SPRG_SCRATCH0 mfspr r12,SPRN_SPRG_SCRATCH1 + stw r10,GPR10(r11) stw r12,GPR11(r11) mflr r10 stw r10,_LINK(r11) @@ -101,7 +92,6 @@ stw r10, _DSISR(r11) .endif lwz r9, SRR1(r12) - andi. r10, r9, MSR_PR lwz r12, SRR0(r12) #else mfspr r12,SPRN_SRR0 @@ -222,7 +212,10 @@ #endif mtspr SPRN_SRR1,r10 mtspr SPRN_SRR0,r11 - RFI /* jump to handler, enable MMU */ + rfi /* jump to handler, enable MMU */ +#ifdef CONFIG_40x + b . /* Prevent prefetch past rfi */ +#endif 99: b ret_from_kernel_syscall .endm diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 1510b2a56669..ece7f97bafff 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -41,6 +41,11 @@ #include <asm/ppc-opcode.h> #include <asm/export.h> #include <asm/feature-fixups.h> +#ifdef CONFIG_PPC_BOOK3S +#include <asm/exception-64s.h> +#else +#include <asm/exception-64e.h> +#endif /* The physical memory is laid out such that the secondary processor * spin code sits at 0x0000...0x00ff. On server, the vectors follow @@ -417,6 +422,10 @@ generic_secondary_common_init: /* From now on, r24 is expected to be logical cpuid */ mr r24,r5 + /* Create a temp kernel stack for use before relocation is on. */ + ld r1,PACAEMERGSP(r13) + subi r1,r1,STACK_FRAME_OVERHEAD + /* See if we need to call a cpu state restore handler */ LOAD_REG_ADDR(r23, cur_cpu_spec) ld r23,0(r23) @@ -445,10 +454,6 @@ generic_secondary_common_init: sync /* order paca.run and cur_cpu_spec */ isync /* In case code patching happened */ - /* Create a temp kernel stack for use before relocation is on. */ - ld r1,PACAEMERGSP(r13) - subi r1,r1,STACK_FRAME_OVERHEAD - b __secondary_start #endif /* SMP */ @@ -829,7 +834,7 @@ __secondary_start: mtspr SPRN_SRR0,r3 mtspr SPRN_SRR1,r4 - RFI + RFI_TO_KERNEL b . /* prevent speculative execution */ /* @@ -865,8 +870,7 @@ enable_64b_mode: oris r11,r11,0x8000 /* CM bit set, we'll set ICM later */ mtmsr r11 #else /* CONFIG_PPC_BOOK3E */ - li r12,(MSR_64BIT | MSR_ISF)@highest - sldi r12,r12,48 + LOAD_REG_IMMEDIATE(r12, MSR_64BIT) or r11,r11,r12 mtmsrd r11 isync @@ -966,7 +970,7 @@ start_here_multiplatform: ld r4,PACAKMSR(r13) mtspr SPRN_SRR0,r3 mtspr SPRN_SRR1,r4 - RFI + RFI_TO_KERNEL b . /* prevent speculative execution */ /* This is where all platforms converge execution */ @@ -990,7 +994,7 @@ start_here_common: bl start_kernel /* Not reached */ - trap +0: trap EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0 .previous diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index ee0bfebc375f..52702f3db6df 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -43,16 +43,6 @@ .endm /* - * We need an ITLB miss handler for kernel addresses if: - * - Either we have modules - * - Or we have not pinned the first 8M - */ -#if defined(CONFIG_MODULES) || !defined(CONFIG_PIN_TLB_TEXT) || \ - defined(CONFIG_DEBUG_PAGEALLOC) -#define ITLB_MISS_KERNEL 1 -#endif - -/* * Value for the bits that have fixed value in RPN entries. * Also used for tagging DAR for DTLBerror. */ @@ -190,32 +180,31 @@ SystemCall: */ #ifdef CONFIG_8xx_CPU15 -#define INVALIDATE_ADJACENT_PAGES_CPU15(addr) \ - addi addr, addr, PAGE_SIZE; \ - tlbie addr; \ - addi addr, addr, -(PAGE_SIZE << 1); \ - tlbie addr; \ - addi addr, addr, PAGE_SIZE +#define INVALIDATE_ADJACENT_PAGES_CPU15(addr, tmp) \ + addi tmp, addr, PAGE_SIZE; \ + tlbie tmp; \ + addi tmp, addr, -PAGE_SIZE; \ + tlbie tmp #else -#define INVALIDATE_ADJACENT_PAGES_CPU15(addr) +#define INVALIDATE_ADJACENT_PAGES_CPU15(addr, tmp) #endif InstructionTLBMiss: - mtspr SPRN_SPRG_SCRATCH0, r10 - mtspr SPRN_SPRG_SCRATCH1, r11 + mtspr SPRN_SPRG_SCRATCH2, r10 + mtspr SPRN_M_TW, r11 /* If we are faulting a kernel address, we have to use the * kernel page tables. */ mfspr r10, SPRN_SRR0 /* Get effective address of fault */ - INVALIDATE_ADJACENT_PAGES_CPU15(r10) + INVALIDATE_ADJACENT_PAGES_CPU15(r10, r11) mtspr SPRN_MD_EPN, r10 -#ifdef ITLB_MISS_KERNEL +#ifdef CONFIG_MODULES mfcr r11 compare_to_kernel_boundary r10, r10 #endif mfspr r10, SPRN_M_TWB /* Get level 1 table */ -#ifdef ITLB_MISS_KERNEL +#ifdef CONFIG_MODULES blt+ 3f rlwinm r10, r10, 0, 20, 31 oris r10, r10, (swapper_pg_dir - PAGE_OFFSET)@ha @@ -241,8 +230,8 @@ InstructionTLBMiss: mtspr SPRN_MI_RPN, r10 /* Update TLB entry */ /* Restore registers */ -0: mfspr r10, SPRN_SPRG_SCRATCH0 - mfspr r11, SPRN_SPRG_SCRATCH1 +0: mfspr r10, SPRN_SPRG_SCRATCH2 + mfspr r11, SPRN_M_TW rfi patch_site 0b, patch__itlbmiss_exit_1 @@ -251,14 +240,14 @@ InstructionTLBMiss: 0: lwz r10, (itlb_miss_counter - PAGE_OFFSET)@l(0) addi r10, r10, 1 stw r10, (itlb_miss_counter - PAGE_OFFSET)@l(0) - mfspr r10, SPRN_SPRG_SCRATCH0 - mfspr r11, SPRN_SPRG_SCRATCH1 + mfspr r10, SPRN_SPRG_SCRATCH2 + mfspr r11, SPRN_M_TW rfi #endif . = 0x1200 DataStoreTLBMiss: - mtspr SPRN_DAR, r10 + mtspr SPRN_SPRG_SCRATCH2, r10 mtspr SPRN_M_TW, r11 mfcr r11 @@ -297,11 +286,11 @@ DataStoreTLBMiss: li r11, RPN_PATTERN rlwimi r10, r11, 0, 24, 27 /* Set 24-27 */ mtspr SPRN_MD_RPN, r10 /* Update TLB entry */ + mtspr SPRN_DAR, r11 /* Tag DAR */ /* Restore registers */ -0: mfspr r10, SPRN_DAR - mtspr SPRN_DAR, r11 /* Tag DAR */ +0: mfspr r10, SPRN_SPRG_SCRATCH2 mfspr r11, SPRN_M_TW rfi patch_site 0b, patch__dtlbmiss_exit_1 @@ -311,8 +300,7 @@ DataStoreTLBMiss: 0: lwz r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0) addi r10, r10, 1 stw r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0) - mfspr r10, SPRN_DAR - mtspr SPRN_DAR, r11 /* Tag DAR */ + mfspr r10, SPRN_SPRG_SCRATCH2 mfspr r11, SPRN_M_TW rfi #endif @@ -619,10 +607,6 @@ start_here: lis r0, (MD_TWAM | MD_RSV4I)@h mtspr SPRN_MD_CTR, r0 #endif -#ifndef CONFIG_PIN_TLB_TEXT - li r0, 0 - mtspr SPRN_MI_CTR, r0 -#endif #if !defined(CONFIG_PIN_TLB_DATA) && !defined(CONFIG_PIN_TLB_IMMR) lis r0, MD_TWAM@h mtspr SPRN_MD_CTR, r0 @@ -718,7 +702,6 @@ initial_mmu: mtspr SPRN_DER, r8 blr -#ifdef CONFIG_PIN_TLB _GLOBAL(mmu_pin_tlb) lis r9, (1f - PAGE_OFFSET)@h ori r9, r9, (1f - PAGE_OFFSET)@l @@ -740,7 +723,6 @@ _GLOBAL(mmu_pin_tlb) mtspr SPRN_MD_CTR, r6 tlbia -#ifdef CONFIG_PIN_TLB_TEXT LOAD_REG_IMMEDIATE(r5, 28 << 8) LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET) LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG | _PMD_ACCESSED) @@ -761,7 +743,7 @@ _GLOBAL(mmu_pin_tlb) bdnzt lt, 2b lis r0, MI_RSV4I@h mtspr SPRN_MI_CTR, r0 -#endif + LOAD_REG_IMMEDIATE(r5, 28 << 8 | MD_TWAM) #ifdef CONFIG_PIN_TLB_DATA LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET) @@ -819,7 +801,6 @@ _GLOBAL(mmu_pin_tlb) mtspr SPRN_SRR1, r10 mtspr SPRN_SRR0, r11 rfi -#endif /* CONFIG_PIN_TLB */ /* * We put a few things here that have to be page-aligned. diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index a0dda2a1f2df..349bf3f0c3af 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -155,10 +155,8 @@ __after_mmu_off: bl initial_bats bl load_segment_registers -BEGIN_MMU_FTR_SECTION bl reloc_offset bl early_hash_table -END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) #if defined(CONFIG_BOOTX_TEXT) bl setup_disp_bat #endif @@ -207,7 +205,7 @@ turn_on_mmu: lis r0,start_here@h ori r0,r0,start_here@l mtspr SPRN_SRR0,r0 - RFI /* enables MMU */ + rfi /* enables MMU */ /* * We need __secondary_hold as a place to hold the other cpus on @@ -288,51 +286,35 @@ MachineCheck: DO_KVM 0x300 DataAccess: #ifdef CONFIG_VMAP_STACK - mtspr SPRN_SPRG_SCRATCH0,r10 - mfspr r10, SPRN_SPRG_THREAD BEGIN_MMU_FTR_SECTION + mtspr SPRN_SPRG_SCRATCH2,r10 + mfspr r10, SPRN_SPRG_THREAD stw r11, THR11(r10) mfspr r10, SPRN_DSISR mfcr r11 -#ifdef CONFIG_PPC_KUAP - andis. r10, r10, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH | DSISR_PROTFAULT)@h -#else andis. r10, r10, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH)@h -#endif mfspr r10, SPRN_SPRG_THREAD beq hash_page_dsi .Lhash_page_dsi_cont: mtcr r11 lwz r11, THR11(r10) -END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) - mtspr SPRN_SPRG_SCRATCH1,r11 - mfspr r11, SPRN_DAR - stw r11, DAR(r10) - mfspr r11, SPRN_DSISR - stw r11, DSISR(r10) - mfspr r11, SPRN_SRR0 - stw r11, SRR0(r10) - mfspr r11, SPRN_SRR1 /* check whether user or kernel */ - stw r11, SRR1(r10) - mfcr r10 - andi. r11, r11, MSR_PR - + mfspr r10, SPRN_SPRG_SCRATCH2 +MMU_FTR_SECTION_ELSE + b 1f +ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE) +1: EXCEPTION_PROLOG_0 handle_dar_dsisr=1 EXCEPTION_PROLOG_1 b handle_page_fault_tramp_1 #else /* CONFIG_VMAP_STACK */ EXCEPTION_PROLOG handle_dar_dsisr=1 get_and_save_dar_dsisr_on_stack r4, r5, r11 BEGIN_MMU_FTR_SECTION -#ifdef CONFIG_PPC_KUAP - andis. r0, r5, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH | DSISR_PROTFAULT)@h -#else andis. r0, r5, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH)@h -#endif bne handle_page_fault_tramp_2 /* if not, try to put a PTE */ rlwinm r3, r5, 32 - 15, 21, 21 /* DSISR_STORE -> _PAGE_RW */ bl hash_page b handle_page_fault_tramp_1 -FTR_SECTION_ELSE +MMU_FTR_SECTION_ELSE b handle_page_fault_tramp_2 ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE) #endif /* CONFIG_VMAP_STACK */ @@ -394,6 +376,7 @@ Alignment: . = 0x800 DO_KVM 0x800 FPUnavailable: +#ifdef CONFIG_PPC_FPU BEGIN_FTR_SECTION /* * Certain Freescale cores don't have a FPU and treat fp instructions @@ -407,6 +390,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE) b fast_exception_return 1: addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_LITE(0x800, kernel_fp_unavailable_exception) +#else + b ProgramCheck +#endif /* Decrementer */ EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE) @@ -453,13 +439,14 @@ InstructionTLBMiss: */ /* Get PTE (linux-style) and check access */ mfspr r3,SPRN_IMISS -#if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC) +#ifdef CONFIG_MODULES lis r1, TASK_SIZE@h /* check if kernel address */ cmplw 0,r1,r3 #endif - mfspr r2, SPRN_SPRG_PGDIR + mfspr r2, SPRN_SDR1 li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC -#if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC) + rlwinm r2, r2, 28, 0xfffff000 +#ifdef CONFIG_MODULES bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ @@ -519,8 +506,9 @@ DataLoadTLBMiss: mfspr r3,SPRN_DMISS lis r1, TASK_SIZE@h /* check if kernel address */ cmplw 0,r1,r3 - mfspr r2, SPRN_SPRG_PGDIR + mfspr r2, SPRN_SDR1 li r1, _PAGE_PRESENT | _PAGE_ACCESSED + rlwinm r2, r2, 28, 0xfffff000 bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ @@ -595,8 +583,9 @@ DataStoreTLBMiss: mfspr r3,SPRN_DMISS lis r1, TASK_SIZE@h /* check if kernel address */ cmplw 0,r1,r3 - mfspr r2, SPRN_SPRG_PGDIR + mfspr r2, SPRN_SDR1 li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED + rlwinm r2, r2, 28, 0xfffff000 bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ @@ -757,14 +746,14 @@ fast_hash_page_return: /* DSI */ mtcr r11 lwz r11, THR11(r10) - mfspr r10, SPRN_SPRG_SCRATCH0 - RFI + mfspr r10, SPRN_SPRG_SCRATCH2 + rfi 1: /* ISI */ mtcr r11 mfspr r11, SPRN_SPRG_SCRATCH1 mfspr r10, SPRN_SPRG_SCRATCH0 - RFI + rfi stack_overflow: vmap_stack_overflow_exception @@ -889,9 +878,12 @@ __secondary_start: tophys(r4,r2) addi r4,r4,THREAD /* phys address of our thread_struct */ mtspr SPRN_SPRG_THREAD,r4 +BEGIN_MMU_FTR_SECTION lis r4, (swapper_pg_dir - PAGE_OFFSET)@h ori r4, r4, (swapper_pg_dir - PAGE_OFFSET)@l - mtspr SPRN_SPRG_PGDIR, r4 + rlwinm r4, r4, 4, 0xffff01ff + mtspr SPRN_SDR1, r4 +END_MMU_FTR_SECTION_IFCLR(MMU_FTR_HPTE_TABLE) /* enable MMU and jump to start_secondary */ li r4,MSR_KERNEL @@ -899,7 +891,7 @@ __secondary_start: ori r3,r3,start_secondary@l mtspr SPRN_SRR0,r3 mtspr SPRN_SRR1,r4 - RFI + rfi #endif /* CONFIG_SMP */ #ifdef CONFIG_KVM_BOOK3S_HANDLER @@ -920,9 +912,6 @@ early_hash_table: lis r6, early_hash - PAGE_OFFSET@h ori r6, r6, 3 /* 256kB table */ mtspr SPRN_SDR1, r6 - lis r6, early_hash@h - addis r3, r3, Hash@ha - stw r6, Hash@l(r3) blr load_up_mmu: @@ -931,11 +920,13 @@ load_up_mmu: tlbia /* Clear all TLB entries */ sync /* wait for tlbia/tlbie to finish */ TLBSYNC /* ... on all CPUs */ +BEGIN_MMU_FTR_SECTION /* Load the SDR1 register (hash table base & size) */ lis r6,_SDR1@ha tophys(r6,r6) lwz r6,_SDR1@l(r6) mtspr SPRN_SDR1,r6 +END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) /* Load the BAT registers with the values set up by MMU_init. */ lis r3,BATS@ha @@ -991,9 +982,12 @@ start_here: tophys(r4,r2) addi r4,r4,THREAD /* init task's THREAD */ mtspr SPRN_SPRG_THREAD,r4 +BEGIN_MMU_FTR_SECTION lis r4, (swapper_pg_dir - PAGE_OFFSET)@h ori r4, r4, (swapper_pg_dir - PAGE_OFFSET)@l - mtspr SPRN_SPRG_PGDIR, r4 + rlwinm r4, r4, 4, 0xffff01ff + mtspr SPRN_SDR1, r4 +END_MMU_FTR_SECTION_IFCLR(MMU_FTR_HPTE_TABLE) /* stack */ lis r1,init_thread_union@ha @@ -1027,7 +1021,7 @@ start_here: .align 4 mtspr SPRN_SRR0,r4 mtspr SPRN_SRR1,r3 - RFI + rfi /* Load up the kernel context */ 2: bl load_up_mmu @@ -1051,7 +1045,7 @@ start_here: ori r3,r3,start_kernel@l mtspr SPRN_SRR0,r3 mtspr SPRN_SRR1,r4 - RFI + rfi /* * void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next); @@ -1073,16 +1067,22 @@ _ENTRY(switch_mmu_context) li r0,NUM_USER_SEGMENTS mtctr r0 - lwz r4, MM_PGD(r4) #ifdef CONFIG_BDI_SWITCH /* Context switch the PTE pointer for the Abatron BDI2000. * The PGDIR is passed as second argument. */ + lwz r4, MM_PGD(r4) lis r5, abatron_pteptrs@ha stw r4, abatron_pteptrs@l + 0x4(r5) #endif +BEGIN_MMU_FTR_SECTION +#ifndef CONFIG_BDI_SWITCH + lwz r4, MM_PGD(r4) +#endif tophys(r4, r4) - mtspr SPRN_SPRG_PGDIR, r4 + rlwinm r4, r4, 4, 0xffff01ff + mtspr SPRN_SDR1, r4 +END_MMU_FTR_SECTION_IFCLR(MMU_FTR_HPTE_TABLE) li r4,0 isync 3: @@ -1166,7 +1166,7 @@ _ENTRY(update_bats) .align 4 mtspr SPRN_SRR0, r4 mtspr SPRN_SRR1, r3 - RFI + rfi 1: bl clear_bats lis r3, BATS@ha addi r3, r3, BATS@l @@ -1185,7 +1185,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) mtmsr r3 mtspr SPRN_SRR0, r7 mtspr SPRN_SRR1, r6 - RFI + rfi flush_tlbs: lis r10, 0x40 @@ -1206,7 +1206,7 @@ mmu_off: mtspr SPRN_SRR0,r4 mtspr SPRN_SRR1,r3 sync - RFI + rfi /* We use one BAT to map up to 256M of RAM at _PAGE_OFFSET */ initial_bats: diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 71c359d438b5..74e230c200fb 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -176,7 +176,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) #endif mtspr SPRN_SRR1,r10 mtspr SPRN_SRR0,r11 - RFI /* jump to handler, enable MMU */ + rfi /* jump to handler, enable MMU */ 99: b ret_from_kernel_syscall .endm @@ -185,7 +185,6 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) * * On 40x critical is the only additional level * On 44x/e500 we have critical and machine check - * On e200 we have critical and debug (machine check occurs via critical) * * Additionally we reserve a SPRG for each priority level so we can free up a * GPR to use as the base for indirect access to the exception stacks. This @@ -201,7 +200,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) #define MC_STACK_BASE mcheckirq_ctx #define CRIT_STACK_BASE critirq_ctx -/* only on e500mc/e200 */ +/* only on e500mc */ #define DBG_STACK_BASE dbgirq_ctx #define EXC_LVL_FRAME_OVERHEAD (THREAD_SIZE - INT_FRAME_SIZE - EXC_LVL_SIZE) diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 586a6ac501e9..fdd4d274c245 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -187,9 +187,6 @@ set_ivor: /* Setup the defaults for TLB entries */ li r2,(MAS4_TSIZED(BOOK3E_PAGESZ_4K))@l -#ifdef CONFIG_E200 - oris r2,r2,MAS4_TLBSELD(1)@h -#endif mtspr SPRN_MAS4, r2 #if !defined(CONFIG_BDI_SWITCH) @@ -362,13 +359,7 @@ interrupt_base: CRITICAL_EXCEPTION(0x0100, CRITICAL, CriticalInput, unknown_exception) /* Machine Check Interrupt */ -#ifdef CONFIG_E200 - /* no RFMCI, MCSRRs on E200 */ - CRITICAL_EXCEPTION(0x0200, MACHINE_CHECK, MachineCheck, \ - machine_check_exception) -#else MCHECK_EXCEPTION(0x0200, MachineCheck, machine_check_exception) -#endif /* Data Storage Interrupt */ START_EXCEPTION(DataStorage) @@ -400,15 +391,9 @@ interrupt_base: #ifdef CONFIG_PPC_FPU FP_UNAVAILABLE_EXCEPTION #else -#ifdef CONFIG_E200 - /* E200 treats 'normal' floating point instructions as FP Unavail exception */ - EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \ - program_check_exception, EXC_XFER_STD) -#else EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \ unknown_exception, EXC_XFER_STD) #endif -#endif /* System Call Interrupt */ START_EXCEPTION(SystemCall) @@ -625,7 +610,7 @@ END_BTB_FLUSH_SECTION mfspr r10, SPRN_SPRG_RSCRATCH0 b InstructionStorage -/* Define SPE handlers for e200 and e500v2 */ +/* Define SPE handlers for e500v2 */ #ifdef CONFIG_SPE /* SPE Unavailable */ START_EXCEPTION(SPEUnavailable) @@ -807,31 +792,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS) #endif 3: mtspr SPRN_MAS2, r12 -#ifdef CONFIG_E200 - /* Round robin TLB1 entries assignment */ - mfspr r12, SPRN_MAS0 - - /* Extract TLB1CFG(NENTRY) */ - mfspr r11, SPRN_TLB1CFG - andi. r11, r11, 0xfff - - /* Extract MAS0(NV) */ - andi. r13, r12, 0xfff - addi r13, r13, 1 - cmpw 0, r13, r11 - addi r12, r12, 1 - - /* check if we need to wrap */ - blt 7f - - /* wrap back to first free tlbcam entry */ - lis r13, tlbcam_index@ha - lwz r13, tlbcam_index@l(r13) - rlwimi r12, r13, 0, 20, 31 -7: - mtspr SPRN_MAS0,r12 -#endif /* CONFIG_E200 */ - tlb_write_entry: tlbwe @@ -933,21 +893,6 @@ get_phys_addr: * Global functions */ -#ifdef CONFIG_E200 -/* Adjust or setup IVORs for e200 */ -_GLOBAL(__setup_e200_ivors) - li r3,DebugDebug@l - mtspr SPRN_IVOR15,r3 - li r3,SPEUnavailable@l - mtspr SPRN_IVOR32,r3 - li r3,SPEFloatingPointData@l - mtspr SPRN_IVOR33,r3 - li r3,SPEFloatingPointRound@l - mtspr SPRN_IVOR34,r3 - sync - blr -#endif - #ifdef CONFIG_E500 #ifndef CONFIG_PPC_E500MC /* Adjust or setup IVORs for e500v1/v2 */ diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index f4e8f21046f5..8fc7a14e4d71 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -499,6 +499,11 @@ static bool is_larx_stcx_instr(int type) return type == LARX || type == STCX; } +static bool is_octword_vsx_instr(int type, int size) +{ + return ((type == LOAD_VSX || type == STORE_VSX) && size == 32); +} + /* * We've failed in reliably handling the hw-breakpoint. Unregister * it and throw a warning message to let the user know about it. @@ -549,6 +554,58 @@ static bool stepping_handler(struct pt_regs *regs, struct perf_event **bp, return true; } +static void handle_p10dd1_spurious_exception(struct arch_hw_breakpoint **info, + int *hit, unsigned long ea) +{ + int i; + unsigned long hw_end_addr; + + /* + * Handle spurious exception only when any bp_per_reg is set. + * Otherwise this might be created by xmon and not actually a + * spurious exception. + */ + for (i = 0; i < nr_wp_slots(); i++) { + if (!info[i]) + continue; + + hw_end_addr = ALIGN(info[i]->address + info[i]->len, HW_BREAKPOINT_SIZE); + + /* + * Ending address of DAWR range is less than starting + * address of op. + */ + if ((hw_end_addr - 1) >= ea) + continue; + + /* + * Those addresses need to be in the same or in two + * consecutive 512B blocks; + */ + if (((hw_end_addr - 1) >> 10) != (ea >> 10)) + continue; + + /* + * 'op address + 64B' generates an address that has a + * carry into bit 52 (crosses 2K boundary). + */ + if ((ea & 0x800) == ((ea + 64) & 0x800)) + continue; + + break; + } + + if (i == nr_wp_slots()) + return; + + for (i = 0; i < nr_wp_slots(); i++) { + if (info[i]) { + hit[i] = 1; + info[i]->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; + } + } +} + int hw_breakpoint_handler(struct die_args *args) { bool err = false; @@ -607,8 +664,14 @@ int hw_breakpoint_handler(struct die_args *args) goto reset; if (!nr_hit) { - rc = NOTIFY_DONE; - goto out; + /* Workaround for Power10 DD1 */ + if (!IS_ENABLED(CONFIG_PPC_8xx) && mfspr(SPRN_PVR) == 0x800100 && + is_octword_vsx_instr(type, size)) { + handle_p10dd1_spurious_exception(info, hit, ea); + } else { + rc = NOTIFY_DONE; + goto out; + } } /* diff --git a/arch/powerpc/kernel/iomap.c b/arch/powerpc/kernel/iomap.c index 9fe4fb3b08aa..72862a4d3a5d 100644 --- a/arch/powerpc/kernel/iomap.c +++ b/arch/powerpc/kernel/iomap.c @@ -11,177 +11,11 @@ #include <asm/pci-bridge.h> #include <asm/isa-bridge.h> -/* - * Here comes the ppc64 implementation of the IOMAP - * interfaces. - */ -unsigned int ioread8(const void __iomem *addr) -{ - return readb(addr); -} -unsigned int ioread16(const void __iomem *addr) -{ - return readw(addr); -} -unsigned int ioread16be(const void __iomem *addr) -{ - return readw_be(addr); -} -unsigned int ioread32(const void __iomem *addr) -{ - return readl(addr); -} -unsigned int ioread32be(const void __iomem *addr) -{ - return readl_be(addr); -} -EXPORT_SYMBOL(ioread8); -EXPORT_SYMBOL(ioread16); -EXPORT_SYMBOL(ioread16be); -EXPORT_SYMBOL(ioread32); -EXPORT_SYMBOL(ioread32be); -#ifdef __powerpc64__ -u64 ioread64(const void __iomem *addr) -{ - return readq(addr); -} -u64 ioread64_lo_hi(const void __iomem *addr) -{ - return readq(addr); -} -u64 ioread64_hi_lo(const void __iomem *addr) -{ - return readq(addr); -} -u64 ioread64be(const void __iomem *addr) -{ - return readq_be(addr); -} -u64 ioread64be_lo_hi(const void __iomem *addr) -{ - return readq_be(addr); -} -u64 ioread64be_hi_lo(const void __iomem *addr) -{ - return readq_be(addr); -} -EXPORT_SYMBOL(ioread64); -EXPORT_SYMBOL(ioread64_lo_hi); -EXPORT_SYMBOL(ioread64_hi_lo); -EXPORT_SYMBOL(ioread64be); -EXPORT_SYMBOL(ioread64be_lo_hi); -EXPORT_SYMBOL(ioread64be_hi_lo); -#endif /* __powerpc64__ */ - -void iowrite8(u8 val, void __iomem *addr) -{ - writeb(val, addr); -} -void iowrite16(u16 val, void __iomem *addr) -{ - writew(val, addr); -} -void iowrite16be(u16 val, void __iomem *addr) -{ - writew_be(val, addr); -} -void iowrite32(u32 val, void __iomem *addr) -{ - writel(val, addr); -} -void iowrite32be(u32 val, void __iomem *addr) -{ - writel_be(val, addr); -} -EXPORT_SYMBOL(iowrite8); -EXPORT_SYMBOL(iowrite16); -EXPORT_SYMBOL(iowrite16be); -EXPORT_SYMBOL(iowrite32); -EXPORT_SYMBOL(iowrite32be); -#ifdef __powerpc64__ -void iowrite64(u64 val, void __iomem *addr) -{ - writeq(val, addr); -} -void iowrite64_lo_hi(u64 val, void __iomem *addr) -{ - writeq(val, addr); -} -void iowrite64_hi_lo(u64 val, void __iomem *addr) -{ - writeq(val, addr); -} -void iowrite64be(u64 val, void __iomem *addr) -{ - writeq_be(val, addr); -} -void iowrite64be_lo_hi(u64 val, void __iomem *addr) -{ - writeq_be(val, addr); -} -void iowrite64be_hi_lo(u64 val, void __iomem *addr) -{ - writeq_be(val, addr); -} -EXPORT_SYMBOL(iowrite64); -EXPORT_SYMBOL(iowrite64_lo_hi); -EXPORT_SYMBOL(iowrite64_hi_lo); -EXPORT_SYMBOL(iowrite64be); -EXPORT_SYMBOL(iowrite64be_lo_hi); -EXPORT_SYMBOL(iowrite64be_hi_lo); -#endif /* __powerpc64__ */ - -/* - * These are the "repeat read/write" functions. Note the - * non-CPU byte order. We do things in "IO byteorder" - * here. - * - * FIXME! We could make these do EEH handling if we really - * wanted. Not clear if we do. - */ -void ioread8_rep(const void __iomem *addr, void *dst, unsigned long count) -{ - readsb(addr, dst, count); -} -void ioread16_rep(const void __iomem *addr, void *dst, unsigned long count) -{ - readsw(addr, dst, count); -} -void ioread32_rep(const void __iomem *addr, void *dst, unsigned long count) -{ - readsl(addr, dst, count); -} -EXPORT_SYMBOL(ioread8_rep); -EXPORT_SYMBOL(ioread16_rep); -EXPORT_SYMBOL(ioread32_rep); - -void iowrite8_rep(void __iomem *addr, const void *src, unsigned long count) -{ - writesb(addr, src, count); -} -void iowrite16_rep(void __iomem *addr, const void *src, unsigned long count) -{ - writesw(addr, src, count); -} -void iowrite32_rep(void __iomem *addr, const void *src, unsigned long count) -{ - writesl(addr, src, count); -} -EXPORT_SYMBOL(iowrite8_rep); -EXPORT_SYMBOL(iowrite16_rep); -EXPORT_SYMBOL(iowrite32_rep); - void __iomem *ioport_map(unsigned long port, unsigned int len) { return (void __iomem *) (port + _IO_BASE); } - -void ioport_unmap(void __iomem *addr) -{ - /* Nothing to do */ -} EXPORT_SYMBOL(ioport_map); -EXPORT_SYMBOL(ioport_unmap); #ifdef CONFIG_PCI void pci_iounmap(struct pci_dev *dev, void __iomem *addr) diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 7d0f7682d01d..6b1eca53e36c 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -102,14 +102,6 @@ static inline notrace unsigned long get_irq_happened(void) return happened; } -static inline notrace int decrementer_check_overflow(void) -{ - u64 now = get_tb(); - u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); - - return now >= *next_tb; -} - #ifdef CONFIG_PPC_BOOK3E /* This is called whenever we are re-enabling interrupts @@ -142,35 +134,6 @@ notrace unsigned int __check_irq_replay(void) trace_hardirqs_on(); trace_hardirqs_off(); - /* - * We are always hard disabled here, but PACA_IRQ_HARD_DIS may - * not be set, which means interrupts have only just been hard - * disabled as part of the local_irq_restore or interrupt return - * code. In that case, skip the decrementr check becaus it's - * expensive to read the TB. - * - * HARD_DIS then gets cleared here, but it's reconciled later. - * Either local_irq_disable will replay the interrupt and that - * will reconcile state like other hard interrupts. Or interrupt - * retur will replay the interrupt and in that case it sets - * PACA_IRQ_HARD_DIS by hand (see comments in entry_64.S). - */ - if (happened & PACA_IRQ_HARD_DIS) { - local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; - - /* - * We may have missed a decrementer interrupt if hard disabled. - * Check the decrementer register in case we had a rollover - * while hard disabled. - */ - if (!(happened & PACA_IRQ_DEC)) { - if (decrementer_check_overflow()) { - local_paca->irq_happened |= PACA_IRQ_DEC; - happened |= PACA_IRQ_DEC; - } - } - } - if (happened & PACA_IRQ_DEC) { local_paca->irq_happened &= ~PACA_IRQ_DEC; return 0x900; @@ -186,6 +149,9 @@ notrace unsigned int __check_irq_replay(void) return 0x280; } + if (happened & PACA_IRQ_HARD_DIS) + local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; + /* There should be nothing left ! */ BUG_ON(local_paca->irq_happened != 0); @@ -229,18 +195,6 @@ again: if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) WARN_ON_ONCE(mfmsr() & MSR_EE); - if (happened & PACA_IRQ_HARD_DIS) { - /* - * We may have missed a decrementer interrupt if hard disabled. - * Check the decrementer register in case we had a rollover - * while hard disabled. - */ - if (!(happened & PACA_IRQ_DEC)) { - if (decrementer_check_overflow()) - happened |= PACA_IRQ_DEC; - } - } - /* * Force the delivery of pending soft-disabled interrupts on PS3. * Any HV call will have this side effect. @@ -345,6 +299,7 @@ notrace void arch_local_irq_restore(unsigned long mask) if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) WARN_ON_ONCE(!(mfmsr() & MSR_EE)); __hard_irq_disable(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; } else { /* * We should already be hard disabled here. We had bugs diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 63702c0badb9..9f3e133b57b7 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -555,7 +555,7 @@ void machine_check_print_event_info(struct machine_check_event *evt, } printk("%sMCE: CPU%d: machine check (%s) %s %s %s %s[%s]\n", - level, evt->cpu, sevstr, in_guest ? "Guest" : "Host", + level, evt->cpu, sevstr, in_guest ? "Guest" : "", err_type, subtype, dar_str, evt->disposition == MCE_DISPOSITION_RECOVERED ? "Recovered" : "Not recovered"); @@ -577,7 +577,7 @@ void machine_check_print_event_info(struct machine_check_event *evt, #ifdef CONFIG_PPC_BOOK3S_64 /* Display faulty slb contents for SLB errors. */ - if (evt->error_type == MCE_ERROR_TYPE_SLB) + if (evt->error_type == MCE_ERROR_TYPE_SLB && !in_guest) slb_dump_contents(local_paca->mce_faulty_slbs); #endif } diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index b7e173754a2e..667104d4c455 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -62,6 +62,20 @@ out: return pfn; } +static bool mce_in_guest(void) +{ +#ifdef CONFIG_KVM_BOOK3S_HANDLER + /* + * If machine check is hit when in guest context or low level KVM + * code, avoid looking up any translations or making any attempts + * to recover, just record the event and pass to KVM. + */ + if (get_paca()->kvm_hstate.in_guest) + return true; +#endif + return false; +} + /* flush SLBs and reload */ #ifdef CONFIG_PPC_BOOK3S_64 void flush_and_reload_slb(void) @@ -69,14 +83,6 @@ void flush_and_reload_slb(void) /* Invalidate all SLBs */ slb_flush_all_realmode(); -#ifdef CONFIG_KVM_BOOK3S_HANDLER - /* - * If machine check is hit when in guest or in transition, we will - * only flush the SLBs and continue. - */ - if (get_paca()->kvm_hstate.in_guest) - return; -#endif if (early_radix_enabled()) return; @@ -91,7 +97,7 @@ void flush_and_reload_slb(void) } #endif -static void flush_erat(void) +void flush_erat(void) { #ifdef CONFIG_PPC_BOOK3S_64 if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) { @@ -490,19 +496,21 @@ static int mce_handle_ierror(struct pt_regs *regs, if ((srr1 & table[i].srr1_mask) != table[i].srr1_value) continue; - /* attempt to correct the error */ - switch (table[i].error_type) { - case MCE_ERROR_TYPE_SLB: - if (local_paca->in_mce == 1) - slb_save_contents(local_paca->mce_faulty_slbs); - handled = mce_flush(MCE_FLUSH_SLB); - break; - case MCE_ERROR_TYPE_ERAT: - handled = mce_flush(MCE_FLUSH_ERAT); - break; - case MCE_ERROR_TYPE_TLB: - handled = mce_flush(MCE_FLUSH_TLB); - break; + if (!mce_in_guest()) { + /* attempt to correct the error */ + switch (table[i].error_type) { + case MCE_ERROR_TYPE_SLB: + if (local_paca->in_mce == 1) + slb_save_contents(local_paca->mce_faulty_slbs); + handled = mce_flush(MCE_FLUSH_SLB); + break; + case MCE_ERROR_TYPE_ERAT: + handled = mce_flush(MCE_FLUSH_ERAT); + break; + case MCE_ERROR_TYPE_TLB: + handled = mce_flush(MCE_FLUSH_TLB); + break; + } } /* now fill in mce_error_info */ @@ -534,7 +542,7 @@ static int mce_handle_ierror(struct pt_regs *regs, mce_err->sync_error = table[i].sync_error; mce_err->severity = table[i].severity; mce_err->initiator = table[i].initiator; - if (table[i].nip_valid) { + if (table[i].nip_valid && !mce_in_guest()) { *addr = regs->nip; if (mce_err->sync_error && table[i].error_type == MCE_ERROR_TYPE_UE) { @@ -577,22 +585,24 @@ static int mce_handle_derror(struct pt_regs *regs, if (!(dsisr & table[i].dsisr_value)) continue; - /* attempt to correct the error */ - switch (table[i].error_type) { - case MCE_ERROR_TYPE_SLB: - if (local_paca->in_mce == 1) - slb_save_contents(local_paca->mce_faulty_slbs); - if (mce_flush(MCE_FLUSH_SLB)) - handled = 1; - break; - case MCE_ERROR_TYPE_ERAT: - if (mce_flush(MCE_FLUSH_ERAT)) - handled = 1; - break; - case MCE_ERROR_TYPE_TLB: - if (mce_flush(MCE_FLUSH_TLB)) - handled = 1; - break; + if (!mce_in_guest()) { + /* attempt to correct the error */ + switch (table[i].error_type) { + case MCE_ERROR_TYPE_SLB: + if (local_paca->in_mce == 1) + slb_save_contents(local_paca->mce_faulty_slbs); + if (mce_flush(MCE_FLUSH_SLB)) + handled = 1; + break; + case MCE_ERROR_TYPE_ERAT: + if (mce_flush(MCE_FLUSH_ERAT)) + handled = 1; + break; + case MCE_ERROR_TYPE_TLB: + if (mce_flush(MCE_FLUSH_TLB)) + handled = 1; + break; + } } /* @@ -634,7 +644,7 @@ static int mce_handle_derror(struct pt_regs *regs, mce_err->initiator = table[i].initiator; if (table[i].dar_valid) *addr = regs->dar; - else if (mce_err->sync_error && + else if (mce_err->sync_error && !mce_in_guest() && table[i].error_type == MCE_ERROR_TYPE_UE) { /* * We do a maximum of 4 nested MCE calls, see @@ -662,7 +672,8 @@ static int mce_handle_derror(struct pt_regs *regs, static long mce_handle_ue_error(struct pt_regs *regs, struct mce_error_info *mce_err) { - long handled = 0; + if (mce_in_guest()) + return 0; mce_common_process_ue(regs, mce_err); if (mce_err->ignore_event) @@ -677,9 +688,10 @@ static long mce_handle_ue_error(struct pt_regs *regs, if (ppc_md.mce_check_early_recovery) { if (ppc_md.mce_check_early_recovery(regs)) - handled = 1; + return 1; } - return handled; + + return 0; } static long mce_handle_error(struct pt_regs *regs, diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 0ad15768d762..7f5aae3c387d 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -208,7 +208,7 @@ static struct rtas_args * __init new_rtas_args(int cpu, unsigned long limit) struct paca_struct **paca_ptrs __read_mostly; EXPORT_SYMBOL(paca_ptrs); -void __init __nostackprotector initialise_paca(struct paca_struct *new_paca, int cpu) +void __init initialise_paca(struct paca_struct *new_paca, int cpu) { #ifdef CONFIG_PPC_PSERIES new_paca->lppaca_ptr = NULL; @@ -241,7 +241,7 @@ void __init __nostackprotector initialise_paca(struct paca_struct *new_paca, int } /* Put the paca pointer into r13 and SPRG_PACA */ -void __nostackprotector setup_paca(struct paca_struct *new_paca) +void setup_paca(struct paca_struct *new_paca) { /* Setup r13 */ local_paca = new_paca; diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index be108616a721..2b555997b295 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -353,6 +353,55 @@ struct pci_controller *pci_find_controller_for_domain(int domain_nr) return NULL; } +struct pci_intx_virq { + int virq; + struct kref kref; + struct list_head list_node; +}; + +static LIST_HEAD(intx_list); +static DEFINE_MUTEX(intx_mutex); + +static void ppc_pci_intx_release(struct kref *kref) +{ + struct pci_intx_virq *vi = container_of(kref, struct pci_intx_virq, kref); + + list_del(&vi->list_node); + irq_dispose_mapping(vi->virq); + kfree(vi); +} + +static int ppc_pci_unmap_irq_line(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct pci_dev *pdev = to_pci_dev(data); + + if (action == BUS_NOTIFY_DEL_DEVICE) { + struct pci_intx_virq *vi; + + mutex_lock(&intx_mutex); + list_for_each_entry(vi, &intx_list, list_node) { + if (vi->virq == pdev->irq) { + kref_put(&vi->kref, ppc_pci_intx_release); + break; + } + } + mutex_unlock(&intx_mutex); + } + + return NOTIFY_DONE; +} + +static struct notifier_block ppc_pci_unmap_irq_notifier = { + .notifier_call = ppc_pci_unmap_irq_line, +}; + +static int ppc_pci_register_irq_notifier(void) +{ + return bus_register_notifier(&pci_bus_type, &ppc_pci_unmap_irq_notifier); +} +arch_initcall(ppc_pci_register_irq_notifier); + /* * Reads the interrupt pin to determine if interrupt is use by card. * If the interrupt is used, then gets the interrupt line from the @@ -361,6 +410,12 @@ struct pci_controller *pci_find_controller_for_domain(int domain_nr) static int pci_read_irq_line(struct pci_dev *pci_dev) { int virq; + struct pci_intx_virq *vi, *vitmp; + + /* Preallocate vi as rewind is complex if this fails after mapping */ + vi = kzalloc(sizeof(struct pci_intx_virq), GFP_KERNEL); + if (!vi) + return -1; pr_debug("PCI: Try to map irq for %s...\n", pci_name(pci_dev)); @@ -377,12 +432,12 @@ static int pci_read_irq_line(struct pci_dev *pci_dev) * function. */ if (pci_read_config_byte(pci_dev, PCI_INTERRUPT_PIN, &pin)) - return -1; + goto error_exit; if (pin == 0) - return -1; + goto error_exit; if (pci_read_config_byte(pci_dev, PCI_INTERRUPT_LINE, &line) || line == 0xff || line == 0) { - return -1; + goto error_exit; } pr_debug(" No map ! Using line %d (pin %d) from PCI config\n", line, pin); @@ -394,14 +449,33 @@ static int pci_read_irq_line(struct pci_dev *pci_dev) if (!virq) { pr_debug(" Failed to map !\n"); - return -1; + goto error_exit; } pr_debug(" Mapped to linux irq %d\n", virq); pci_dev->irq = virq; + mutex_lock(&intx_mutex); + list_for_each_entry(vitmp, &intx_list, list_node) { + if (vitmp->virq == virq) { + kref_get(&vitmp->kref); + kfree(vi); + vi = NULL; + break; + } + } + if (vi) { + vi->virq = virq; + kref_init(&vi->kref); + list_add_tail(&vi->list_node, &intx_list); + } + mutex_unlock(&intx_mutex); + return 0; +error_exit: + kfree(vi); + return -1; } /* diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index d421a2c7f822..a66f435dabbf 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -589,7 +589,6 @@ static void save_all(struct task_struct *tsk) __giveup_spe(tsk); msr_check_and_clear(msr_all_available); - thread_pkey_regs_save(&tsk->thread); } void flush_all_to_thread(struct task_struct *tsk) @@ -807,29 +806,6 @@ static void switch_hw_breakpoint(struct task_struct *new) #endif /* !CONFIG_HAVE_HW_BREAKPOINT */ #endif /* CONFIG_PPC_ADV_DEBUG_REGS */ -#ifdef CONFIG_PPC_ADV_DEBUG_REGS -static inline int __set_dabr(unsigned long dabr, unsigned long dabrx) -{ - mtspr(SPRN_DAC1, dabr); - if (IS_ENABLED(CONFIG_PPC_47x)) - isync(); - return 0; -} -#elif defined(CONFIG_PPC_BOOK3S) -static inline int __set_dabr(unsigned long dabr, unsigned long dabrx) -{ - mtspr(SPRN_DABR, dabr); - if (cpu_has_feature(CPU_FTR_DABRX)) - mtspr(SPRN_DABRX, dabrx); - return 0; -} -#else -static inline int __set_dabr(unsigned long dabr, unsigned long dabrx) -{ - return -EINVAL; -} -#endif - static inline int set_dabr(struct arch_hw_breakpoint *brk) { unsigned long dabr, dabrx; @@ -840,7 +816,19 @@ static inline int set_dabr(struct arch_hw_breakpoint *brk) if (ppc_md.set_dabr) return ppc_md.set_dabr(dabr, dabrx); - return __set_dabr(dabr, dabrx); + if (IS_ENABLED(CONFIG_PPC_ADV_DEBUG_REGS)) { + mtspr(SPRN_DAC1, dabr); + if (IS_ENABLED(CONFIG_PPC_47x)) + isync(); + return 0; + } else if (IS_ENABLED(CONFIG_PPC_BOOK3S)) { + mtspr(SPRN_DABR, dabr); + if (cpu_has_feature(CPU_FTR_DABRX)) + mtspr(SPRN_DABRX, dabrx); + return 0; + } else { + return -EINVAL; + } } static inline int set_breakpoint_8xx(struct arch_hw_breakpoint *brk) @@ -1160,8 +1148,6 @@ static inline void save_sprs(struct thread_struct *t) t->tar = mfspr(SPRN_TAR); } #endif - - thread_pkey_regs_save(t); } static inline void restore_sprs(struct thread_struct *old_thread, @@ -1202,7 +1188,6 @@ static inline void restore_sprs(struct thread_struct *old_thread, mtspr(SPRN_TIDR, new_thread->tidr); #endif - thread_pkey_regs_restore(new_thread, old_thread); } struct task_struct *__switch_to(struct task_struct *prev, @@ -1466,12 +1451,10 @@ static void print_msr_bits(unsigned long val) #define LAST_VOLATILE 12 #endif -void show_regs(struct pt_regs * regs) +static void __show_regs(struct pt_regs *regs) { int i, trap; - show_regs_print_info(KERN_DEFAULT); - printk("NIP: "REG" LR: "REG" CTR: "REG"\n", regs->nip, regs->link, regs->ctr); printk("REGS: %px TRAP: %04lx %s (%s)\n", @@ -1513,6 +1496,12 @@ void show_regs(struct pt_regs * regs) printk("NIP ["REG"] %pS\n", regs->nip, (void *)regs->nip); printk("LR ["REG"] %pS\n", regs->link, (void *)regs->link); } +} + +void show_regs(struct pt_regs *regs) +{ + show_regs_print_info(KERN_DEFAULT); + __show_regs(regs); show_stack(current, (unsigned long *) regs->gpr[1], KERN_DEFAULT); if (!user_mode(regs)) show_instructions(regs); @@ -1527,14 +1516,27 @@ void flush_thread(void) #endif /* CONFIG_HAVE_HW_BREAKPOINT */ } -#ifdef CONFIG_PPC_BOOK3S_64 void arch_setup_new_exec(void) { - if (radix_enabled()) - return; - hash__setup_new_exec(); -} + +#ifdef CONFIG_PPC_BOOK3S_64 + if (!radix_enabled()) + hash__setup_new_exec(); #endif + /* + * If we exec out of a kernel thread then thread.regs will not be + * set. Do it now. + */ + if (!current->thread.regs) { + struct pt_regs *regs = task_stack_page(current) + THREAD_SIZE; + current->thread.regs = regs - 1; + } + +#ifdef CONFIG_PPC_MEM_KEYS + current->thread.regs->amr = default_amr; + current->thread.regs->iamr = default_iamr; +#endif +} #ifdef CONFIG_PPC64 /** @@ -1730,7 +1732,9 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, p->thread.ptrace_bps[i] = NULL; #endif +#ifdef CONFIG_PPC_FPU_REGS p->thread.fp_save_area = NULL; +#endif #ifdef CONFIG_ALTIVEC p->thread.vr_save_area = NULL; #endif @@ -1747,6 +1751,16 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, p->thread.tidr = 0; #endif + /* + * Run with the current AMR value of the kernel + */ +#ifdef CONFIG_PPC_PKEY + if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) + kregs->amr = AMR_KUAP_BLOCKED; + + if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) + kregs->iamr = AMR_KUEP_BLOCKED; +#endif kregs->nip = ppc_function_entry(f); return 0; } @@ -1765,15 +1779,6 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) preload_new_slb_context(start, sp); #endif - /* - * If we exec out of a kernel thread then thread.regs will not be - * set. Do it now. - */ - if (!current->thread.regs) { - struct pt_regs *regs = task_stack_page(current) + THREAD_SIZE; - current->thread.regs = regs - 1; - } - #ifdef CONFIG_PPC_TRANSACTIONAL_MEM /* * Clear any transactional state, we're exec()ing. The cause is @@ -1855,8 +1860,10 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) #endif current->thread.load_slb = 0; current->thread.load_fp = 0; +#ifdef CONFIG_PPC_FPU_REGS memset(¤t->thread.fp_state, 0, sizeof(current->thread.fp_state)); current->thread.fp_save_area = NULL; +#endif #ifdef CONFIG_ALTIVEC memset(¤t->thread.vr_state, 0, sizeof(current->thread.vr_state)); current->thread.vr_state.vscr.u[3] = 0x00010000; /* Java mode disabled */ @@ -1878,7 +1885,6 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) current->thread.load_tm = 0; #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ - thread_pkey_regs_init(¤t->thread); } EXPORT_SYMBOL(start_thread); @@ -2174,10 +2180,14 @@ void show_stack(struct task_struct *tsk, unsigned long *stack, && stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) { struct pt_regs *regs = (struct pt_regs *) (sp + STACK_FRAME_OVERHEAD); + lr = regs->link; - printk("%s--- interrupt: %lx at %pS\n LR = %pS\n", - loglvl, regs->trap, - (void *)regs->nip, (void *)lr); + printk("%s--- interrupt: %lx at %pS\n", + loglvl, regs->trap, (void *)regs->nip); + __show_regs(regs); + printk("%s--- interrupt: %lx\n", + loglvl, regs->trap); + firstframe = 1; } diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index c1545f22c077..ae3c41730367 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -165,7 +165,6 @@ static struct ibm_pa_feature { #ifdef CONFIG_PPC_RADIX_MMU { .pabyte = 40, .pabit = 0, .mmu_features = MMU_FTR_TYPE_RADIX | MMU_FTR_GTSE }, #endif - { .pabyte = 1, .pabit = 1, .invert = 1, .cpu_features = CPU_FTR_NODSISRALIGN }, { .pabyte = 5, .pabit = 0, .cpu_features = CPU_FTR_REAL_LE, .cpu_user_ftrs = PPC_FEATURE_TRUE_LE }, /* diff --git a/arch/powerpc/kernel/ptrace/Makefile b/arch/powerpc/kernel/ptrace/Makefile index c2f2402ebc8c..8ebc11d1168d 100644 --- a/arch/powerpc/kernel/ptrace/Makefile +++ b/arch/powerpc/kernel/ptrace/Makefile @@ -6,10 +6,11 @@ CFLAGS_ptrace-view.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' obj-y += ptrace.o ptrace-view.o +obj-$(CONFIG_PPC_FPU_REGS) += ptrace-fpu.o obj-$(CONFIG_COMPAT) += ptrace32.o obj-$(CONFIG_VSX) += ptrace-vsx.o ifneq ($(CONFIG_VSX),y) -obj-y += ptrace-novsx.o +obj-$(CONFIG_PPC_FPU_REGS) += ptrace-novsx.o endif obj-$(CONFIG_ALTIVEC) += ptrace-altivec.o obj-$(CONFIG_SPE) += ptrace-spe.o diff --git a/arch/powerpc/kernel/ptrace/ptrace-decl.h b/arch/powerpc/kernel/ptrace/ptrace-decl.h index 67447a6197eb..3487f2c9735c 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-decl.h +++ b/arch/powerpc/kernel/ptrace/ptrace-decl.h @@ -159,8 +159,29 @@ int tm_cgpr32_set(struct task_struct *target, const struct user_regset *regset, /* ptrace-view */ +int ptrace_get_reg(struct task_struct *task, int regno, unsigned long *data); +int ptrace_put_reg(struct task_struct *task, int regno, unsigned long data); + extern const struct user_regset_view user_ppc_native_view; +/* ptrace-fpu */ +#ifdef CONFIG_PPC_FPU_REGS +int ptrace_get_fpr(struct task_struct *child, int index, unsigned long *data); +int ptrace_put_fpr(struct task_struct *child, int index, unsigned long data); +#else +static inline int +ptrace_get_fpr(struct task_struct *child, int index, unsigned long *data) +{ + return -EIO; +} + +static inline int +ptrace_put_fpr(struct task_struct *child, int index, unsigned long data) +{ + return -EIO; +} +#endif + /* ptrace-(no)adv */ void ppc_gethwdinfo(struct ppc_debug_info *dbginfo); int ptrace_get_debugreg(struct task_struct *child, unsigned long addr, diff --git a/arch/powerpc/kernel/ptrace/ptrace-fpu.c b/arch/powerpc/kernel/ptrace/ptrace-fpu.c new file mode 100644 index 000000000000..8301cb52dd99 --- /dev/null +++ b/arch/powerpc/kernel/ptrace/ptrace-fpu.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <linux/regset.h> + +#include <asm/switch_to.h> + +#include "ptrace-decl.h" + +int ptrace_get_fpr(struct task_struct *child, int index, unsigned long *data) +{ + unsigned int fpidx = index - PT_FPR0; + + if (index > PT_FPSCR) + return -EIO; + + flush_fp_to_thread(child); + if (fpidx < (PT_FPSCR - PT_FPR0)) + memcpy(data, &child->thread.TS_FPR(fpidx), sizeof(long)); + else + *data = child->thread.fp_state.fpscr; + + return 0; +} + +int ptrace_put_fpr(struct task_struct *child, int index, unsigned long data) +{ + unsigned int fpidx = index - PT_FPR0; + + if (index > PT_FPSCR) + return -EIO; + + flush_fp_to_thread(child); + if (fpidx < (PT_FPSCR - PT_FPR0)) + memcpy(&child->thread.TS_FPR(fpidx), &data, sizeof(long)); + else + child->thread.fp_state.fpscr = data; + + return 0; +} + diff --git a/arch/powerpc/kernel/ptrace/ptrace-tm.c b/arch/powerpc/kernel/ptrace/ptrace-tm.c index 54f2d076206f..44045363a903 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-tm.c +++ b/arch/powerpc/kernel/ptrace/ptrace-tm.c @@ -86,6 +86,11 @@ int tm_cgpr_active(struct task_struct *target, const struct user_regset *regset) int tm_cgpr_get(struct task_struct *target, const struct user_regset *regset, struct membuf to) { + struct membuf to_msr = membuf_at(&to, offsetof(struct pt_regs, msr)); +#ifdef CONFIG_PPC64 + struct membuf to_softe = membuf_at(&to, offsetof(struct pt_regs, softe)); +#endif + if (!cpu_has_feature(CPU_FTR_TM)) return -ENODEV; @@ -96,16 +101,12 @@ int tm_cgpr_get(struct task_struct *target, const struct user_regset *regset, flush_fp_to_thread(target); flush_altivec_to_thread(target); - membuf_write(&to, &target->thread.ckpt_regs, - offsetof(struct pt_regs, msr)); - membuf_store(&to, get_user_ckpt_msr(target)); - - BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) != - offsetof(struct pt_regs, msr) + sizeof(long)); + membuf_write(&to, &target->thread.ckpt_regs, sizeof(struct user_pt_regs)); - membuf_write(&to, &target->thread.ckpt_regs.orig_gpr3, - sizeof(struct user_pt_regs) - - offsetof(struct pt_regs, orig_gpr3)); + membuf_store(&to_msr, get_user_ckpt_msr(target)); +#ifdef CONFIG_PPC64 + membuf_store(&to_softe, 0x1ul); +#endif return membuf_zero(&to, ELF_NGREG * sizeof(unsigned long) - sizeof(struct user_pt_regs)); } diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c index 7e6478e7ed07..2bad8068f598 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-view.c +++ b/arch/powerpc/kernel/ptrace/ptrace-view.c @@ -217,6 +217,10 @@ int ptrace_put_reg(struct task_struct *task, int regno, unsigned long data) static int gpr_get(struct task_struct *target, const struct user_regset *regset, struct membuf to) { + struct membuf to_msr = membuf_at(&to, offsetof(struct pt_regs, msr)); +#ifdef CONFIG_PPC64 + struct membuf to_softe = membuf_at(&to, offsetof(struct pt_regs, softe)); +#endif int i; if (target->thread.regs == NULL) @@ -228,15 +232,12 @@ static int gpr_get(struct task_struct *target, const struct user_regset *regset, target->thread.regs->gpr[i] = NV_REG_POISON; } - membuf_write(&to, target->thread.regs, offsetof(struct pt_regs, msr)); - membuf_store(&to, get_user_msr(target)); - - BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) != - offsetof(struct pt_regs, msr) + sizeof(long)); + membuf_write(&to, target->thread.regs, sizeof(struct user_pt_regs)); - membuf_write(&to, &target->thread.regs->orig_gpr3, - sizeof(struct user_pt_regs) - - offsetof(struct pt_regs, orig_gpr3)); + membuf_store(&to_msr, get_user_msr(target)); +#ifdef CONFIG_PPC64 + membuf_store(&to_softe, 0x1ul); +#endif return membuf_zero(&to, ELF_NGREG * sizeof(unsigned long) - sizeof(struct user_pt_regs)); } @@ -470,12 +471,12 @@ static int pkey_active(struct task_struct *target, const struct user_regset *reg static int pkey_get(struct task_struct *target, const struct user_regset *regset, struct membuf to) { - BUILD_BUG_ON(TSO(amr) + sizeof(unsigned long) != TSO(iamr)); if (!arch_pkeys_enabled()) return -ENODEV; - membuf_write(&to, &target->thread.amr, 2 * sizeof(unsigned long)); + membuf_store(&to, target->thread.regs->amr); + membuf_store(&to, target->thread.regs->iamr); return membuf_store(&to, default_uamor); } @@ -508,7 +509,8 @@ static int pkey_set(struct task_struct *target, const struct user_regset *regset * Pick the AMR values for the keys that kernel is using. This * will be indicated by the ~default_uamor bits. */ - target->thread.amr = (new_amr & default_uamor) | (target->thread.amr & ~default_uamor); + target->thread.regs->amr = (new_amr & default_uamor) | + (target->thread.regs->amr & ~default_uamor); return 0; } @@ -520,11 +522,13 @@ static const struct user_regset native_regsets[] = { .size = sizeof(long), .align = sizeof(long), .regset_get = gpr_get, .set = gpr_set }, +#ifdef CONFIG_PPC_FPU_REGS [REGSET_FPR] = { .core_note_type = NT_PRFPREG, .n = ELF_NFPREG, .size = sizeof(double), .align = sizeof(double), .regset_get = fpr_get, .set = fpr_set }, +#endif #ifdef CONFIG_ALTIVEC [REGSET_VMX] = { .core_note_type = NT_PPC_VMX, .n = 34, diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c index f6e51be47c6e..3d44b73adb83 100644 --- a/arch/powerpc/kernel/ptrace/ptrace.c +++ b/arch/powerpc/kernel/ptrace/ptrace.c @@ -55,31 +55,18 @@ long arch_ptrace(struct task_struct *child, long request, ret = -EIO; /* convert to index and check */ -#ifdef CONFIG_PPC32 - index = addr >> 2; - if ((addr & 3) || (index > PT_FPSCR) - || (child->thread.regs == NULL)) -#else - index = addr >> 3; - if ((addr & 7) || (index > PT_FPSCR)) -#endif + index = addr / sizeof(long); + if ((addr & (sizeof(long) - 1)) || !child->thread.regs) break; CHECK_FULL_REGS(child->thread.regs); - if (index < PT_FPR0) { + if (index < PT_FPR0) ret = ptrace_get_reg(child, (int) index, &tmp); - if (ret) - break; - } else { - unsigned int fpidx = index - PT_FPR0; - - flush_fp_to_thread(child); - if (fpidx < (PT_FPSCR - PT_FPR0)) - memcpy(&tmp, &child->thread.TS_FPR(fpidx), - sizeof(long)); - else - tmp = child->thread.fp_state.fpscr; - } + else + ret = ptrace_get_fpr(child, index, &tmp); + + if (ret) + break; ret = put_user(tmp, datalp); break; } @@ -90,30 +77,15 @@ long arch_ptrace(struct task_struct *child, long request, ret = -EIO; /* convert to index and check */ -#ifdef CONFIG_PPC32 - index = addr >> 2; - if ((addr & 3) || (index > PT_FPSCR) - || (child->thread.regs == NULL)) -#else - index = addr >> 3; - if ((addr & 7) || (index > PT_FPSCR)) -#endif + index = addr / sizeof(long); + if ((addr & (sizeof(long) - 1)) || !child->thread.regs) break; CHECK_FULL_REGS(child->thread.regs); - if (index < PT_FPR0) { + if (index < PT_FPR0) ret = ptrace_put_reg(child, index, data); - } else { - unsigned int fpidx = index - PT_FPR0; - - flush_fp_to_thread(child); - if (fpidx < (PT_FPSCR - PT_FPR0)) - memcpy(&child->thread.TS_FPR(fpidx), &data, - sizeof(long)); - else - child->thread.fp_state.fpscr = data; - ret = 0; - } + else + ret = ptrace_put_fpr(child, index, data); break; } diff --git a/arch/powerpc/kernel/ptrace/ptrace32.c b/arch/powerpc/kernel/ptrace/ptrace32.c index 7589a9665ffb..d30b9ad70edc 100644 --- a/arch/powerpc/kernel/ptrace/ptrace32.c +++ b/arch/powerpc/kernel/ptrace/ptrace32.c @@ -23,6 +23,8 @@ #include <asm/switch_to.h> +#include "ptrace-decl.h" + /* * does not yet catch signals sent when the child dies. * in exit.c or in signal.c. diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 954f41676f69..d126d71ea5bd 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -684,6 +684,63 @@ int rtas_set_indicator_fast(int indicator, int index, int new_value) return rc; } +/** + * rtas_ibm_suspend_me() - Call ibm,suspend-me to suspend the LPAR. + * + * @fw_status: RTAS call status will be placed here if not NULL. + * + * rtas_ibm_suspend_me() should be called only on a CPU which has + * received H_CONTINUE from the H_JOIN hcall. All other active CPUs + * should be waiting to return from H_JOIN. + * + * rtas_ibm_suspend_me() may suspend execution of the OS + * indefinitely. Callers should take appropriate measures upon return, such as + * resetting watchdog facilities. + * + * Callers may choose to retry this call if @fw_status is + * %RTAS_THREADS_ACTIVE. + * + * Return: + * 0 - The partition has resumed from suspend, possibly after + * migration to a different host. + * -ECANCELED - The operation was aborted. + * -EAGAIN - There were other CPUs not in H_JOIN at the time of the call. + * -EBUSY - Some other condition prevented the suspend from succeeding. + * -EIO - Hardware/platform error. + */ +int rtas_ibm_suspend_me(int *fw_status) +{ + int fwrc; + int ret; + + fwrc = rtas_call(rtas_token("ibm,suspend-me"), 0, 1, NULL); + + switch (fwrc) { + case 0: + ret = 0; + break; + case RTAS_SUSPEND_ABORTED: + ret = -ECANCELED; + break; + case RTAS_THREADS_ACTIVE: + ret = -EAGAIN; + break; + case RTAS_NOT_SUSPENDABLE: + case RTAS_OUTSTANDING_COPROC: + ret = -EBUSY; + break; + case -1: + default: + ret = -EIO; + break; + } + + if (fw_status) + *fw_status = fwrc; + + return ret; +} + void __noreturn rtas_restart(char *cmd) { if (rtas_flash_term_hook) @@ -741,163 +798,38 @@ void rtas_os_term(char *str) printk(KERN_EMERG "ibm,os-term call failed %d\n", status); } -static int ibm_suspend_me_token = RTAS_UNKNOWN_SERVICE; -#ifdef CONFIG_PPC_PSERIES -static int __rtas_suspend_last_cpu(struct rtas_suspend_me_data *data, int wake_when_done) -{ - u16 slb_size = mmu_slb_size; - int rc = H_MULTI_THREADS_ACTIVE; - int cpu; - - slb_set_size(SLB_MIN_SIZE); - printk(KERN_DEBUG "calling ibm,suspend-me on cpu %i\n", smp_processor_id()); - - while (rc == H_MULTI_THREADS_ACTIVE && !atomic_read(&data->done) && - !atomic_read(&data->error)) - rc = rtas_call(data->token, 0, 1, NULL); - - if (rc || atomic_read(&data->error)) { - printk(KERN_DEBUG "ibm,suspend-me returned %d\n", rc); - slb_set_size(slb_size); - } - - if (atomic_read(&data->error)) - rc = atomic_read(&data->error); - - atomic_set(&data->error, rc); - pSeries_coalesce_init(); - - if (wake_when_done) { - atomic_set(&data->done, 1); - - for_each_online_cpu(cpu) - plpar_hcall_norets(H_PROD, get_hard_smp_processor_id(cpu)); - } - - if (atomic_dec_return(&data->working) == 0) - complete(data->complete); - - return rc; -} - -int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data) -{ - atomic_inc(&data->working); - return __rtas_suspend_last_cpu(data, 0); -} - -static int __rtas_suspend_cpu(struct rtas_suspend_me_data *data, int wake_when_done) -{ - long rc = H_SUCCESS; - unsigned long msr_save; - int cpu; - - atomic_inc(&data->working); - - /* really need to ensure MSR.EE is off for H_JOIN */ - msr_save = mfmsr(); - mtmsr(msr_save & ~(MSR_EE)); - - while (rc == H_SUCCESS && !atomic_read(&data->done) && !atomic_read(&data->error)) - rc = plpar_hcall_norets(H_JOIN); - - mtmsr(msr_save); - - if (rc == H_SUCCESS) { - /* This cpu was prodded and the suspend is complete. */ - goto out; - } else if (rc == H_CONTINUE) { - /* All other cpus are in H_JOIN, this cpu does - * the suspend. - */ - return __rtas_suspend_last_cpu(data, wake_when_done); - } else { - printk(KERN_ERR "H_JOIN on cpu %i failed with rc = %ld\n", - smp_processor_id(), rc); - atomic_set(&data->error, rc); - } - - if (wake_when_done) { - atomic_set(&data->done, 1); - - /* This cpu did the suspend or got an error; in either case, - * we need to prod all other other cpus out of join state. - * Extra prods are harmless. - */ - for_each_online_cpu(cpu) - plpar_hcall_norets(H_PROD, get_hard_smp_processor_id(cpu)); - } -out: - if (atomic_dec_return(&data->working) == 0) - complete(data->complete); - return rc; -} - -int rtas_suspend_cpu(struct rtas_suspend_me_data *data) -{ - return __rtas_suspend_cpu(data, 0); -} - -static void rtas_percpu_suspend_me(void *info) +/** + * rtas_activate_firmware() - Activate a new version of firmware. + * + * Activate a new version of partition firmware. The OS must call this + * after resuming from a partition hibernation or migration in order + * to maintain the ability to perform live firmware updates. It's not + * catastrophic for this method to be absent or to fail; just log the + * condition in that case. + * + * Context: This function may sleep. + */ +void rtas_activate_firmware(void) { - __rtas_suspend_cpu((struct rtas_suspend_me_data *)info, 1); -} + int token; + int fwrc; -int rtas_ibm_suspend_me(u64 handle) -{ - long state; - long rc; - unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; - struct rtas_suspend_me_data data; - DECLARE_COMPLETION_ONSTACK(done); - - if (!rtas_service_present("ibm,suspend-me")) - return -ENOSYS; - - /* Make sure the state is valid */ - rc = plpar_hcall(H_VASI_STATE, retbuf, handle); - - state = retbuf[0]; - - if (rc) { - printk(KERN_ERR "rtas_ibm_suspend_me: vasi_state returned %ld\n",rc); - return rc; - } else if (state == H_VASI_ENABLED) { - return -EAGAIN; - } else if (state != H_VASI_SUSPENDING) { - printk(KERN_ERR "rtas_ibm_suspend_me: vasi_state returned state %ld\n", - state); - return -EIO; + token = rtas_token("ibm,activate-firmware"); + if (token == RTAS_UNKNOWN_SERVICE) { + pr_notice("ibm,activate-firmware method unavailable\n"); + return; } - atomic_set(&data.working, 0); - atomic_set(&data.done, 0); - atomic_set(&data.error, 0); - data.token = rtas_token("ibm,suspend-me"); - data.complete = &done; - - lock_device_hotplug(); - - cpu_hotplug_disable(); - - /* Call function on all CPUs. One of us will make the - * rtas call - */ - on_each_cpu(rtas_percpu_suspend_me, &data, 0); - - wait_for_completion(&done); - - if (atomic_read(&data.error) != 0) - printk(KERN_ERR "Error doing global join\n"); - - - cpu_hotplug_enable(); - - unlock_device_hotplug(); + do { + fwrc = rtas_call(token, 0, 1, NULL); + } while (rtas_busy_delay(fwrc)); - return atomic_read(&data.error); + if (fwrc) + pr_err("ibm,activate-firmware failed (%i)\n", fwrc); } +static int ibm_suspend_me_token = RTAS_UNKNOWN_SERVICE; +#ifdef CONFIG_PPC_PSERIES /** * rtas_call_reentrant() - Used for reentrant rtas calls * @token: Token for desired reentrant RTAS call @@ -948,12 +880,7 @@ int rtas_call_reentrant(int token, int nargs, int nret, int *outputs, ...) return ret; } -#else /* CONFIG_PPC_PSERIES */ -int rtas_ibm_suspend_me(u64 handle) -{ - return -ENOSYS; -} -#endif +#endif /* CONFIG_PPC_PSERIES */ /** * Find a specific pseries error log in an RTAS extended event log. @@ -1030,7 +957,7 @@ static struct rtas_filter rtas_filters[] __ro_after_init = { { "ibm,display-message", -1, 0, -1, -1, -1 }, { "ibm,errinjct", -1, 2, -1, -1, -1, 1024 }, { "ibm,close-errinjct", -1, -1, -1, -1, -1 }, - { "ibm,open-errinct", -1, -1, -1, -1, -1 }, + { "ibm,open-errinjct", -1, -1, -1, -1, -1 }, { "ibm,get-config-addr-info2", -1, -1, -1, -1, -1 }, { "ibm,get-dynamic-sensor-state", -1, 1, -1, -1, -1 }, { "ibm,get-indices", -1, 2, 3, -1, -1 }, @@ -1050,9 +977,11 @@ static struct rtas_filter rtas_filters[] __ro_after_init = { { "set-time-for-power-on", -1, -1, -1, -1, -1 }, { "ibm,set-system-parameter", -1, 1, -1, -1, -1 }, { "set-time-of-day", -1, -1, -1, -1, -1 }, +#ifdef CONFIG_CPU_BIG_ENDIAN { "ibm,suspend-me", -1, -1, -1, -1, -1 }, { "ibm,update-nodes", -1, 0, -1, -1, -1, 4096 }, { "ibm,update-properties", -1, 0, -1, -1, -1, 4096 }, +#endif { "ibm,physical-attestation", -1, 0, 1, -1, -1 }, }; @@ -1183,7 +1112,7 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) int rc = 0; u64 handle = ((u64)be32_to_cpu(args.args[0]) << 32) | be32_to_cpu(args.args[1]); - rc = rtas_ibm_suspend_me(handle); + rc = rtas_syscall_dispatch_ibm_suspend_me(handle); if (rc == -EAGAIN) args.rets[0] = cpu_to_be32(RTAS_NOT_SUSPENDABLE); else if (rc == -EIO) diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 808ec9fab605..71f38e9248be 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -90,8 +90,6 @@ EXPORT_SYMBOL_GPL(boot_cpuid); */ int dcache_bsize; int icache_bsize; -int ucache_bsize; - unsigned long klimit = (unsigned long) _end; @@ -802,8 +800,6 @@ static __init void print_system_info(void) pr_info("dcache_bsize = 0x%x\n", dcache_bsize); pr_info("icache_bsize = 0x%x\n", icache_bsize); - if (ucache_bsize != 0) - pr_info("ucache_bsize = 0x%x\n", ucache_bsize); pr_info("cpu_features = 0x%016lx\n", cur_cpu_spec->cpu_features); pr_info(" possible = 0x%016lx\n", @@ -919,8 +915,6 @@ void __init setup_arch(char **cmdline_p) /* On BookE, setup per-core TLB data structures. */ setup_tlb_core_data(); - - smp_release_cpus(); #endif /* Print various info about the machine that has been gathered so far. */ @@ -944,6 +938,8 @@ void __init setup_arch(char **cmdline_p) exc_lvl_early_init(); emergency_stack_init(); + smp_release_cpus(); + initmem_init(); early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT); diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h index 2ec835574cc9..2dd0d9cb5a20 100644 --- a/arch/powerpc/kernel/setup.h +++ b/arch/powerpc/kernel/setup.h @@ -8,12 +8,6 @@ #ifndef __ARCH_POWERPC_KERNEL_SETUP_H #define __ARCH_POWERPC_KERNEL_SETUP_H -#ifdef CONFIG_CC_IS_CLANG -#define __nostackprotector -#else -#define __nostackprotector __attribute__((__optimize__("no-stack-protector"))) -#endif - void initialize_cache_info(void); void irqstack_early_init(void); diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 057d6b8e9bb0..8ba49a6bf515 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -222,7 +222,4 @@ __init void initialize_cache_info(void) */ dcache_bsize = cur_cpu_spec->dcache_bsize; icache_bsize = cur_cpu_spec->icache_bsize; - ucache_bsize = 0; - if (IS_ENABLED(CONFIG_E200)) - ucache_bsize = icache_bsize = dcache_bsize; } diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 74fd47f46fa5..c28e949cc222 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -283,7 +283,7 @@ void __init record_spr_defaults(void) * device-tree is not accessible via normal means at this point. */ -void __init __nostackprotector early_setup(unsigned long dt_ptr) +void __init early_setup(unsigned long dt_ptr) { static __initdata struct paca_struct boot_paca; diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index a8bb0aca1d02..53782aa60ade 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -133,36 +133,6 @@ unsigned long copy_ckvsx_from_user(struct task_struct *task, return 0; } #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ -#else -inline unsigned long copy_fpr_to_user(void __user *to, - struct task_struct *task) -{ - return __copy_to_user(to, task->thread.fp_state.fpr, - ELF_NFPREG * sizeof(double)); -} - -inline unsigned long copy_fpr_from_user(struct task_struct *task, - void __user *from) -{ - return __copy_from_user(task->thread.fp_state.fpr, from, - ELF_NFPREG * sizeof(double)); -} - -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM -inline unsigned long copy_ckfpr_to_user(void __user *to, - struct task_struct *task) -{ - return __copy_to_user(to, task->thread.ckfp_state.fpr, - ELF_NFPREG * sizeof(double)); -} - -inline unsigned long copy_ckfpr_from_user(struct task_struct *task, - void __user *from) -{ - return __copy_from_user(task->thread.ckfp_state.fpr, from, - ELF_NFPREG * sizeof(double)); -} -#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ #endif /* Log an error when sending an unhandled signal to a process. Controlled @@ -174,20 +144,22 @@ int show_unhandled_signals = 1; /* * Allocate space for the signal frame */ -void __user *get_sigframe(struct ksignal *ksig, unsigned long sp, - size_t frame_size, int is_32) +static unsigned long get_tm_stackpointer(struct task_struct *tsk); + +void __user *get_sigframe(struct ksignal *ksig, struct task_struct *tsk, + size_t frame_size, int is_32) { unsigned long oldsp, newsp; + unsigned long sp = get_tm_stackpointer(tsk); /* Default to using normal stack */ - oldsp = get_clean_sp(sp, is_32); + if (is_32) + oldsp = sp & 0x0ffffffffUL; + else + oldsp = sp; oldsp = sigsp(oldsp, ksig); newsp = (oldsp - frame_size) & ~0xFUL; - /* Check access */ - if (!access_ok((void __user *)newsp, oldsp - newsp)) - return NULL; - return (void __user *)newsp; } @@ -331,7 +303,7 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags) user_enter(); } -unsigned long get_tm_stackpointer(struct task_struct *tsk) +static unsigned long get_tm_stackpointer(struct task_struct *tsk) { /* When in an active transaction that takes a signal, we need to be * careful with the stack. It's possible that the stack has moved back @@ -379,3 +351,14 @@ unsigned long get_tm_stackpointer(struct task_struct *tsk) #endif return ret; } + +static const char fm32[] = KERN_INFO "%s[%d]: bad frame in %s: %p nip %08lx lr %08lx\n"; +static const char fm64[] = KERN_INFO "%s[%d]: bad frame in %s: %p nip %016lx lr %016lx\n"; + +void signal_fault(struct task_struct *tsk, struct pt_regs *regs, + const char *where, void __user *ptr) +{ + if (show_unhandled_signals) + printk_ratelimited(regs->msr & MSR_64BIT ? fm64 : fm32, tsk->comm, + task_pid_nr(tsk), where, ptr, regs->nip, regs->link); +} diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h index d396efca4068..2559a681536e 100644 --- a/arch/powerpc/kernel/signal.h +++ b/arch/powerpc/kernel/signal.h @@ -10,8 +10,8 @@ #ifndef _POWERPC_ARCH_SIGNAL_H #define _POWERPC_ARCH_SIGNAL_H -extern void __user *get_sigframe(struct ksignal *ksig, unsigned long sp, - size_t frame_size, int is_32); +void __user *get_sigframe(struct ksignal *ksig, struct task_struct *tsk, + size_t frame_size, int is_32); extern int handle_signal32(struct ksignal *ksig, sigset_t *oldset, struct task_struct *tsk); @@ -19,16 +19,6 @@ extern int handle_signal32(struct ksignal *ksig, sigset_t *oldset, extern int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, struct task_struct *tsk); -extern unsigned long copy_fpr_to_user(void __user *to, - struct task_struct *task); -extern unsigned long copy_ckfpr_to_user(void __user *to, - struct task_struct *task); -extern unsigned long copy_fpr_from_user(struct task_struct *task, - void __user *from); -extern unsigned long copy_ckfpr_from_user(struct task_struct *task, - void __user *from); -extern unsigned long get_tm_stackpointer(struct task_struct *tsk); - #ifdef CONFIG_VSX extern unsigned long copy_vsx_to_user(void __user *to, struct task_struct *task); @@ -38,6 +28,104 @@ extern unsigned long copy_vsx_from_user(struct task_struct *task, void __user *from); extern unsigned long copy_ckvsx_from_user(struct task_struct *task, void __user *from); +unsigned long copy_fpr_to_user(void __user *to, struct task_struct *task); +unsigned long copy_ckfpr_to_user(void __user *to, struct task_struct *task); +unsigned long copy_fpr_from_user(struct task_struct *task, void __user *from); +unsigned long copy_ckfpr_from_user(struct task_struct *task, void __user *from); + +#define unsafe_copy_fpr_to_user(to, task, label) do { \ + struct task_struct *__t = task; \ + u64 __user *buf = (u64 __user *)to; \ + int i; \ + \ + for (i = 0; i < ELF_NFPREG - 1 ; i++) \ + unsafe_put_user(__t->thread.TS_FPR(i), &buf[i], label); \ + unsafe_put_user(__t->thread.fp_state.fpscr, &buf[i], label); \ +} while (0) + +#define unsafe_copy_vsx_to_user(to, task, label) do { \ + struct task_struct *__t = task; \ + u64 __user *buf = (u64 __user *)to; \ + int i; \ + \ + for (i = 0; i < ELF_NVSRHALFREG ; i++) \ + unsafe_put_user(__t->thread.fp_state.fpr[i][TS_VSRLOWOFFSET], \ + &buf[i], label);\ +} while (0) + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +#define unsafe_copy_ckfpr_to_user(to, task, label) do { \ + struct task_struct *__t = task; \ + u64 __user *buf = (u64 __user *)to; \ + int i; \ + \ + for (i = 0; i < ELF_NFPREG - 1 ; i++) \ + unsafe_put_user(__t->thread.TS_CKFPR(i), &buf[i], label);\ + unsafe_put_user(__t->thread.ckfp_state.fpscr, &buf[i], label); \ +} while (0) + +#define unsafe_copy_ckvsx_to_user(to, task, label) do { \ + struct task_struct *__t = task; \ + u64 __user *buf = (u64 __user *)to; \ + int i; \ + \ + for (i = 0; i < ELF_NVSRHALFREG ; i++) \ + unsafe_put_user(__t->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET], \ + &buf[i], label);\ +} while (0) +#endif +#elif defined(CONFIG_PPC_FPU_REGS) + +#define unsafe_copy_fpr_to_user(to, task, label) \ + unsafe_copy_to_user(to, (task)->thread.fp_state.fpr, \ + ELF_NFPREG * sizeof(double), label) + +static inline unsigned long +copy_fpr_to_user(void __user *to, struct task_struct *task) +{ + return __copy_to_user(to, task->thread.fp_state.fpr, + ELF_NFPREG * sizeof(double)); +} + +static inline unsigned long +copy_fpr_from_user(struct task_struct *task, void __user *from) +{ + return __copy_from_user(task->thread.fp_state.fpr, from, + ELF_NFPREG * sizeof(double)); +} + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +#define unsafe_copy_ckfpr_to_user(to, task, label) \ + unsafe_copy_to_user(to, (task)->thread.ckfp_state.fpr, \ + ELF_NFPREG * sizeof(double), label) + +inline unsigned long copy_ckfpr_to_user(void __user *to, struct task_struct *task) +{ + return __copy_to_user(to, task->thread.ckfp_state.fpr, + ELF_NFPREG * sizeof(double)); +} + +static inline unsigned long +copy_ckfpr_from_user(struct task_struct *task, void __user *from) +{ + return __copy_from_user(task->thread.ckfp_state.fpr, from, + ELF_NFPREG * sizeof(double)); +} +#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ +#else +#define unsafe_copy_fpr_to_user(to, task, label) do { } while (0) + +static inline unsigned long +copy_fpr_to_user(void __user *to, struct task_struct *task) +{ + return 0; +} + +static inline unsigned long +copy_fpr_from_user(struct task_struct *task, void __user *from) +{ + return 0; +} #endif #ifdef CONFIG_PPC64 @@ -58,4 +146,7 @@ static inline int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, #endif /* !defined(CONFIG_PPC64) */ +void signal_fault(struct task_struct *tsk, struct pt_regs *regs, + const char *where, void __user *ptr); + #endif /* _POWERPC_ARCH_SIGNAL_H */ diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 96950f189b5a..934cbdf6dd10 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -58,8 +58,6 @@ #define mcontext mcontext32 #define ucontext ucontext32 -#define __save_altstack __compat_save_altstack - /* * Userspace code may pass a ucontext which doesn't include VSX added * at the end. We need to check for this case. @@ -84,10 +82,7 @@ * Functions for flipping sigsets (thanks to brain dead generic * implementation that makes things simple for little endian only) */ -static inline int put_sigset_t(compat_sigset_t __user *uset, sigset_t *set) -{ - return put_compat_sigset(uset, set, sizeof(*uset)); -} +#define unsafe_put_sigset_t unsafe_put_compat_sigset static inline int get_sigset_t(sigset_t *set, const compat_sigset_t __user *uset) @@ -98,8 +93,8 @@ static inline int get_sigset_t(sigset_t *set, #define to_user_ptr(p) ptr_to_compat(p) #define from_user_ptr(p) compat_ptr(p) -static inline int save_general_regs(struct pt_regs *regs, - struct mcontext __user *frame) +static __always_inline int +save_general_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame) { elf_greg_t64 *gregs = (elf_greg_t64 *)regs; int val, i; @@ -113,10 +108,12 @@ static inline int save_general_regs(struct pt_regs *regs, else val = gregs[i]; - if (__put_user(val, &frame->mc_gregs[i])) - return -EFAULT; + unsafe_put_user(val, &frame->mc_gregs[i], failed); } return 0; + +failed: + return 1; } static inline int restore_general_regs(struct pt_regs *regs, @@ -138,10 +135,12 @@ static inline int restore_general_regs(struct pt_regs *regs, #define GP_REGS_SIZE min(sizeof(elf_gregset_t), sizeof(struct pt_regs)) -static inline int put_sigset_t(sigset_t __user *uset, sigset_t *set) -{ - return copy_to_user(uset, set, sizeof(*uset)); -} +#define unsafe_put_sigset_t(uset, set, label) do { \ + sigset_t __user *__us = uset ; \ + const sigset_t *__s = set; \ + \ + unsafe_copy_to_user(__us, __s, sizeof(*__us), label); \ +} while (0) static inline int get_sigset_t(sigset_t *set, const sigset_t __user *uset) { @@ -151,11 +150,15 @@ static inline int get_sigset_t(sigset_t *set, const sigset_t __user *uset) #define to_user_ptr(p) ((unsigned long)(p)) #define from_user_ptr(p) ((void __user *)(p)) -static inline int save_general_regs(struct pt_regs *regs, - struct mcontext __user *frame) +static __always_inline int +save_general_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame) { WARN_ON(!FULL_REGS(regs)); - return __copy_to_user(&frame->mc_gregs, regs, GP_REGS_SIZE); + unsafe_copy_to_user(&frame->mc_gregs, regs, GP_REGS_SIZE, failed); + return 0; + +failed: + return 1; } static inline int restore_general_regs(struct pt_regs *regs, @@ -173,6 +176,11 @@ static inline int restore_general_regs(struct pt_regs *regs, } #endif +#define unsafe_save_general_regs(regs, frame, label) do { \ + if (save_general_regs_unsafe(regs, frame)) \ + goto label; \ +} while (0) + /* * When we have signals to deliver, we set up on the * user stack, going down from the original stack pointer: @@ -199,9 +207,6 @@ struct sigframe { int abigap[56]; }; -/* We use the mc_pad field for the signal return trampoline. */ -#define tramp mc_pad - /* * When we have rt signals to deliver, we set up on the * user stack, going down from the original stack pointer: @@ -235,26 +240,39 @@ struct rt_sigframe { * We only save the altivec/spe registers if the process has used * altivec/spe instructions at some point. */ -static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, - struct mcontext __user *tm_frame, int sigret, - int ctx_has_vsx_region) +static void prepare_save_user_regs(int ctx_has_vsx_region) { - unsigned long msr = regs->msr; - /* Make sure floating point registers are stored in regs */ flush_fp_to_thread(current); +#ifdef CONFIG_ALTIVEC + if (current->thread.used_vr) + flush_altivec_to_thread(current); + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + current->thread.vrsave = mfspr(SPRN_VRSAVE); +#endif +#ifdef CONFIG_VSX + if (current->thread.used_vsr && ctx_has_vsx_region) + flush_vsx_to_thread(current); +#endif +#ifdef CONFIG_SPE + if (current->thread.used_spe) + flush_spe_to_thread(current); +#endif +} + +static int save_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame, + struct mcontext __user *tm_frame, int ctx_has_vsx_region) +{ + unsigned long msr = regs->msr; /* save general registers */ - if (save_general_regs(regs, frame)) - return 1; + unsafe_save_general_regs(regs, frame, failed); #ifdef CONFIG_ALTIVEC /* save altivec registers */ if (current->thread.used_vr) { - flush_altivec_to_thread(current); - if (__copy_to_user(&frame->mc_vregs, ¤t->thread.vr_state, - ELF_NVRREG * sizeof(vector128))) - return 1; + unsafe_copy_to_user(&frame->mc_vregs, ¤t->thread.vr_state, + ELF_NVRREG * sizeof(vector128), failed); /* set MSR_VEC in the saved MSR value to indicate that frame->mc_vregs contains valid data */ msr |= MSR_VEC; @@ -267,13 +285,10 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, * most significant bits of that same vector. --BenH * Note that the current VRSAVE value is in the SPR at this point. */ - if (cpu_has_feature(CPU_FTR_ALTIVEC)) - current->thread.vrsave = mfspr(SPRN_VRSAVE); - if (__put_user(current->thread.vrsave, (u32 __user *)&frame->mc_vregs[32])) - return 1; + unsafe_put_user(current->thread.vrsave, (u32 __user *)&frame->mc_vregs[32], + failed); #endif /* CONFIG_ALTIVEC */ - if (copy_fpr_to_user(&frame->mc_fregs, current)) - return 1; + unsafe_copy_fpr_to_user(&frame->mc_fregs, current, failed); /* * Clear the MSR VSX bit to indicate there is no valid state attached @@ -288,19 +303,15 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, * contains valid data */ if (current->thread.used_vsr && ctx_has_vsx_region) { - flush_vsx_to_thread(current); - if (copy_vsx_to_user(&frame->mc_vsregs, current)) - return 1; + unsafe_copy_vsx_to_user(&frame->mc_vsregs, current, failed); msr |= MSR_VSX; } #endif /* CONFIG_VSX */ #ifdef CONFIG_SPE /* save spe registers */ if (current->thread.used_spe) { - flush_spe_to_thread(current); - if (__copy_to_user(&frame->mc_vregs, current->thread.evr, - ELF_NEVRREG * sizeof(u32))) - return 1; + unsafe_copy_to_user(&frame->mc_vregs, current->thread.evr, + ELF_NEVRREG * sizeof(u32), failed); /* set MSR_SPE in the saved MSR value to indicate that frame->mc_vregs contains valid data */ msr |= MSR_SPE; @@ -308,30 +319,29 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, /* else assert((regs->msr & MSR_SPE) == 0) */ /* We always copy to/from spefscr */ - if (__put_user(current->thread.spefscr, (u32 __user *)&frame->mc_vregs + ELF_NEVRREG)) - return 1; + unsafe_put_user(current->thread.spefscr, + (u32 __user *)&frame->mc_vregs + ELF_NEVRREG, failed); #endif /* CONFIG_SPE */ - if (__put_user(msr, &frame->mc_gregs[PT_MSR])) - return 1; + unsafe_put_user(msr, &frame->mc_gregs[PT_MSR], failed); + /* We need to write 0 the MSR top 32 bits in the tm frame so that we * can check it on the restore to see if TM is active */ - if (tm_frame && __put_user(0, &tm_frame->mc_gregs[PT_MSR])) - return 1; - - if (sigret) { - /* Set up the sigreturn trampoline: li 0,sigret; sc */ - if (__put_user(PPC_INST_ADDI + sigret, &frame->tramp[0]) - || __put_user(PPC_INST_SC, &frame->tramp[1])) - return 1; - flush_icache_range((unsigned long) &frame->tramp[0], - (unsigned long) &frame->tramp[2]); - } + if (tm_frame) + unsafe_put_user(0, &tm_frame->mc_gregs[PT_MSR], failed); return 0; + +failed: + return 1; } +#define unsafe_save_user_regs(regs, frame, tm_frame, has_vsx, label) do { \ + if (save_user_regs_unsafe(regs, frame, tm_frame, has_vsx)) \ + goto label; \ +} while (0) + #ifdef CONFIG_PPC_TRANSACTIONAL_MEM /* * Save the current user registers on the user stack. @@ -340,19 +350,28 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, * We also save the transactional registers to a second ucontext in the * frame. * - * See save_user_regs() and signal_64.c:setup_tm_sigcontexts(). + * See save_user_regs_unsafe() and signal_64.c:setup_tm_sigcontexts(). */ -static int save_tm_user_regs(struct pt_regs *regs, - struct mcontext __user *frame, - struct mcontext __user *tm_frame, int sigret, - unsigned long msr) +static void prepare_save_tm_user_regs(void) { WARN_ON(tm_suspend_disabled); +#ifdef CONFIG_ALTIVEC + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + current->thread.ckvrsave = mfspr(SPRN_VRSAVE); +#endif +#ifdef CONFIG_SPE + if (current->thread.used_spe) + flush_spe_to_thread(current); +#endif +} + +static int save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame, + struct mcontext __user *tm_frame, unsigned long msr) +{ /* Save both sets of general registers */ - if (save_general_regs(¤t->thread.ckpt_regs, frame) - || save_general_regs(regs, tm_frame)) - return 1; + unsafe_save_general_regs(¤t->thread.ckpt_regs, frame, failed); + unsafe_save_general_regs(regs, tm_frame, failed); /* Stash the top half of the 64bit MSR into the 32bit MSR word * of the transactional mcontext. This way we have a backward-compatible @@ -360,26 +379,21 @@ static int save_tm_user_regs(struct pt_regs *regs, * also look at what type of transaction (T or S) was active at the * time of the signal. */ - if (__put_user((msr >> 32), &tm_frame->mc_gregs[PT_MSR])) - return 1; + unsafe_put_user((msr >> 32), &tm_frame->mc_gregs[PT_MSR], failed); #ifdef CONFIG_ALTIVEC /* save altivec registers */ if (current->thread.used_vr) { - if (__copy_to_user(&frame->mc_vregs, ¤t->thread.ckvr_state, - ELF_NVRREG * sizeof(vector128))) - return 1; - if (msr & MSR_VEC) { - if (__copy_to_user(&tm_frame->mc_vregs, - ¤t->thread.vr_state, - ELF_NVRREG * sizeof(vector128))) - return 1; - } else { - if (__copy_to_user(&tm_frame->mc_vregs, - ¤t->thread.ckvr_state, - ELF_NVRREG * sizeof(vector128))) - return 1; - } + unsafe_copy_to_user(&frame->mc_vregs, ¤t->thread.ckvr_state, + ELF_NVRREG * sizeof(vector128), failed); + if (msr & MSR_VEC) + unsafe_copy_to_user(&tm_frame->mc_vregs, + ¤t->thread.vr_state, + ELF_NVRREG * sizeof(vector128), failed); + else + unsafe_copy_to_user(&tm_frame->mc_vregs, + ¤t->thread.ckvr_state, + ELF_NVRREG * sizeof(vector128), failed); /* set MSR_VEC in the saved MSR value to indicate that * frame->mc_vregs contains valid data @@ -392,31 +406,21 @@ static int save_tm_user_regs(struct pt_regs *regs, * significant bits of a vector, we "cheat" and stuff VRSAVE in the * most significant bits of that same vector. --BenH */ - if (cpu_has_feature(CPU_FTR_ALTIVEC)) - current->thread.ckvrsave = mfspr(SPRN_VRSAVE); - if (__put_user(current->thread.ckvrsave, - (u32 __user *)&frame->mc_vregs[32])) - return 1; - if (msr & MSR_VEC) { - if (__put_user(current->thread.vrsave, - (u32 __user *)&tm_frame->mc_vregs[32])) - return 1; - } else { - if (__put_user(current->thread.ckvrsave, - (u32 __user *)&tm_frame->mc_vregs[32])) - return 1; - } + unsafe_put_user(current->thread.ckvrsave, + (u32 __user *)&frame->mc_vregs[32], failed); + if (msr & MSR_VEC) + unsafe_put_user(current->thread.vrsave, + (u32 __user *)&tm_frame->mc_vregs[32], failed); + else + unsafe_put_user(current->thread.ckvrsave, + (u32 __user *)&tm_frame->mc_vregs[32], failed); #endif /* CONFIG_ALTIVEC */ - if (copy_ckfpr_to_user(&frame->mc_fregs, current)) - return 1; - if (msr & MSR_FP) { - if (copy_fpr_to_user(&tm_frame->mc_fregs, current)) - return 1; - } else { - if (copy_ckfpr_to_user(&tm_frame->mc_fregs, current)) - return 1; - } + unsafe_copy_ckfpr_to_user(&frame->mc_fregs, current, failed); + if (msr & MSR_FP) + unsafe_copy_fpr_to_user(&tm_frame->mc_fregs, current, failed); + else + unsafe_copy_ckfpr_to_user(&tm_frame->mc_fregs, current, failed); #ifdef CONFIG_VSX /* @@ -426,54 +430,54 @@ static int save_tm_user_regs(struct pt_regs *regs, * contains valid data */ if (current->thread.used_vsr) { - if (copy_ckvsx_to_user(&frame->mc_vsregs, current)) - return 1; - if (msr & MSR_VSX) { - if (copy_vsx_to_user(&tm_frame->mc_vsregs, - current)) - return 1; - } else { - if (copy_ckvsx_to_user(&tm_frame->mc_vsregs, current)) - return 1; - } + unsafe_copy_ckvsx_to_user(&frame->mc_vsregs, current, failed); + if (msr & MSR_VSX) + unsafe_copy_vsx_to_user(&tm_frame->mc_vsregs, current, failed); + else + unsafe_copy_ckvsx_to_user(&tm_frame->mc_vsregs, current, failed); msr |= MSR_VSX; } #endif /* CONFIG_VSX */ #ifdef CONFIG_SPE /* SPE regs are not checkpointed with TM, so this section is - * simply the same as in save_user_regs(). + * simply the same as in save_user_regs_unsafe(). */ if (current->thread.used_spe) { - flush_spe_to_thread(current); - if (__copy_to_user(&frame->mc_vregs, current->thread.evr, - ELF_NEVRREG * sizeof(u32))) - return 1; + unsafe_copy_to_user(&frame->mc_vregs, current->thread.evr, + ELF_NEVRREG * sizeof(u32), failed); /* set MSR_SPE in the saved MSR value to indicate that * frame->mc_vregs contains valid data */ msr |= MSR_SPE; } /* We always copy to/from spefscr */ - if (__put_user(current->thread.spefscr, (u32 __user *)&frame->mc_vregs + ELF_NEVRREG)) - return 1; + unsafe_put_user(current->thread.spefscr, + (u32 __user *)&frame->mc_vregs + ELF_NEVRREG, failed); #endif /* CONFIG_SPE */ - if (__put_user(msr, &frame->mc_gregs[PT_MSR])) - return 1; - if (sigret) { - /* Set up the sigreturn trampoline: li 0,sigret; sc */ - if (__put_user(PPC_INST_ADDI + sigret, &frame->tramp[0]) - || __put_user(PPC_INST_SC, &frame->tramp[1])) - return 1; - flush_icache_range((unsigned long) &frame->tramp[0], - (unsigned long) &frame->tramp[2]); - } + unsafe_put_user(msr, &frame->mc_gregs[PT_MSR], failed); return 0; + +failed: + return 1; +} +#else +static void prepare_save_tm_user_regs(void) { } + +static int save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame, + struct mcontext __user *tm_frame, unsigned long msr) +{ + return 0; } #endif +#define unsafe_save_tm_user_regs(regs, frame, tm_frame, msr, label) do { \ + if (save_tm_user_regs_unsafe(regs, frame, tm_frame, msr)) \ + goto label; \ +} while (0) + /* * Restore the current user register values from the user stack, * (except for MSR). @@ -751,96 +755,189 @@ static long restore_tm_user_regs(struct pt_regs *regs, int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, struct task_struct *tsk) { - struct rt_sigframe __user *rt_sf; - struct mcontext __user *frame; - struct mcontext __user *tm_frame = NULL; - void __user *addr; + struct rt_sigframe __user *frame; + struct mcontext __user *mctx; + struct mcontext __user *tm_mctx = NULL; unsigned long newsp = 0; - int sigret; unsigned long tramp; struct pt_regs *regs = tsk->thread.regs; -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM /* Save the thread's msr before get_tm_stackpointer() changes it */ unsigned long msr = regs->msr; -#endif - - BUG_ON(tsk != current); /* Set up Signal Frame */ - /* Put a Real Time Context onto stack */ - rt_sf = get_sigframe(ksig, get_tm_stackpointer(tsk), sizeof(*rt_sf), 1); - addr = rt_sf; - if (unlikely(rt_sf == NULL)) + frame = get_sigframe(ksig, tsk, sizeof(*frame), 1); + mctx = &frame->uc.uc_mcontext; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + tm_mctx = &frame->uc_transact.uc_mcontext; +#endif + if (MSR_TM_ACTIVE(msr)) + prepare_save_tm_user_regs(); + else + prepare_save_user_regs(1); + + if (!user_write_access_begin(frame, sizeof(*frame))) goto badframe; /* Put the siginfo & fill in most of the ucontext */ - if (copy_siginfo_to_user(&rt_sf->info, &ksig->info) - || __put_user(0, &rt_sf->uc.uc_flags) - || __save_altstack(&rt_sf->uc.uc_stack, regs->gpr[1]) - || __put_user(to_user_ptr(&rt_sf->uc.uc_mcontext), - &rt_sf->uc.uc_regs) - || put_sigset_t(&rt_sf->uc.uc_sigmask, oldset)) - goto badframe; + unsafe_put_user(0, &frame->uc.uc_flags, failed); +#ifdef CONFIG_PPC64 + unsafe_compat_save_altstack(&frame->uc.uc_stack, regs->gpr[1], failed); +#else + unsafe_save_altstack(&frame->uc.uc_stack, regs->gpr[1], failed); +#endif + unsafe_put_user(to_user_ptr(&frame->uc.uc_mcontext), &frame->uc.uc_regs, failed); - /* Save user registers on the stack */ - frame = &rt_sf->uc.uc_mcontext; - addr = frame; - if (vdso32_rt_sigtramp && tsk->mm->context.vdso_base) { - sigret = 0; - tramp = tsk->mm->context.vdso_base + vdso32_rt_sigtramp; + if (MSR_TM_ACTIVE(msr)) { +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + unsafe_put_user((unsigned long)&frame->uc_transact, + &frame->uc.uc_link, failed); + unsafe_put_user((unsigned long)tm_mctx, + &frame->uc_transact.uc_regs, failed); +#endif + unsafe_save_tm_user_regs(regs, mctx, tm_mctx, msr, failed); } else { - sigret = __NR_rt_sigreturn; - tramp = (unsigned long) frame->tramp; + unsafe_put_user(0, &frame->uc.uc_link, failed); + unsafe_save_user_regs(regs, mctx, tm_mctx, 1, failed); } -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - tm_frame = &rt_sf->uc_transact.uc_mcontext; - if (MSR_TM_ACTIVE(msr)) { - if (__put_user((unsigned long)&rt_sf->uc_transact, - &rt_sf->uc.uc_link) || - __put_user((unsigned long)tm_frame, - &rt_sf->uc_transact.uc_regs)) - goto badframe; - if (save_tm_user_regs(regs, frame, tm_frame, sigret, msr)) - goto badframe; - } - else -#endif - { - if (__put_user(0, &rt_sf->uc.uc_link)) - goto badframe; - if (save_user_regs(regs, frame, tm_frame, sigret, 1)) - goto badframe; + /* Save user registers on the stack */ + if (tsk->mm->context.vdso) { + tramp = VDSO32_SYMBOL(tsk->mm->context.vdso, sigtramp_rt32); + } else { + tramp = (unsigned long)mctx->mc_pad; + /* Set up the sigreturn trampoline: li r0,sigret; sc */ + unsafe_put_user(PPC_INST_ADDI + __NR_rt_sigreturn, &mctx->mc_pad[0], + failed); + unsafe_put_user(PPC_INST_SC, &mctx->mc_pad[1], failed); } + unsafe_put_sigset_t(&frame->uc.uc_sigmask, oldset, failed); + + user_write_access_end(); + + if (copy_siginfo_to_user(&frame->info, &ksig->info)) + goto badframe; + + if (tramp == (unsigned long)mctx->mc_pad) + flush_icache_range(tramp, tramp + 2 * sizeof(unsigned long)); + regs->link = tramp; +#ifdef CONFIG_PPC_FPU_REGS tsk->thread.fp_state.fpscr = 0; /* turn off all fp exceptions */ +#endif /* create a stack frame for the caller of the handler */ - newsp = ((unsigned long)rt_sf) - (__SIGNAL_FRAMESIZE + 16); - addr = (void __user *)regs->gpr[1]; + newsp = ((unsigned long)frame) - (__SIGNAL_FRAMESIZE + 16); if (put_user(regs->gpr[1], (u32 __user *)newsp)) goto badframe; /* Fill registers for signal handler */ regs->gpr[1] = newsp; regs->gpr[3] = ksig->sig; - regs->gpr[4] = (unsigned long) &rt_sf->info; - regs->gpr[5] = (unsigned long) &rt_sf->uc; - regs->gpr[6] = (unsigned long) rt_sf; + regs->gpr[4] = (unsigned long)&frame->info; + regs->gpr[5] = (unsigned long)&frame->uc; + regs->gpr[6] = (unsigned long)frame; regs->nip = (unsigned long) ksig->ka.sa.sa_handler; /* enter the signal handler in native-endian mode */ regs->msr &= ~MSR_LE; regs->msr |= (MSR_KERNEL & MSR_LE); return 0; +failed: + user_write_access_end(); + +badframe: + signal_fault(tsk, regs, "handle_rt_signal32", frame); + + return 1; +} + +/* + * OK, we're invoking a handler + */ +int handle_signal32(struct ksignal *ksig, sigset_t *oldset, + struct task_struct *tsk) +{ + struct sigcontext __user *sc; + struct sigframe __user *frame; + struct mcontext __user *mctx; + struct mcontext __user *tm_mctx = NULL; + unsigned long newsp = 0; + unsigned long tramp; + struct pt_regs *regs = tsk->thread.regs; + /* Save the thread's msr before get_tm_stackpointer() changes it */ + unsigned long msr = regs->msr; + + /* Set up Signal Frame */ + frame = get_sigframe(ksig, tsk, sizeof(*frame), 1); + mctx = &frame->mctx; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + tm_mctx = &frame->mctx_transact; +#endif + if (MSR_TM_ACTIVE(msr)) + prepare_save_tm_user_regs(); + else + prepare_save_user_regs(1); + + if (!user_write_access_begin(frame, sizeof(*frame))) + goto badframe; + sc = (struct sigcontext __user *) &frame->sctx; + +#if _NSIG != 64 +#error "Please adjust handle_signal()" +#endif + unsafe_put_user(to_user_ptr(ksig->ka.sa.sa_handler), &sc->handler, failed); + unsafe_put_user(oldset->sig[0], &sc->oldmask, failed); +#ifdef CONFIG_PPC64 + unsafe_put_user((oldset->sig[0] >> 32), &sc->_unused[3], failed); +#else + unsafe_put_user(oldset->sig[1], &sc->_unused[3], failed); +#endif + unsafe_put_user(to_user_ptr(mctx), &sc->regs, failed); + unsafe_put_user(ksig->sig, &sc->signal, failed); + + if (MSR_TM_ACTIVE(msr)) + unsafe_save_tm_user_regs(regs, mctx, tm_mctx, msr, failed); + else + unsafe_save_user_regs(regs, mctx, tm_mctx, 1, failed); + + if (tsk->mm->context.vdso) { + tramp = VDSO32_SYMBOL(tsk->mm->context.vdso, sigtramp32); + } else { + tramp = (unsigned long)mctx->mc_pad; + /* Set up the sigreturn trampoline: li r0,sigret; sc */ + unsafe_put_user(PPC_INST_ADDI + __NR_sigreturn, &mctx->mc_pad[0], failed); + unsafe_put_user(PPC_INST_SC, &mctx->mc_pad[1], failed); + } + user_write_access_end(); + + if (tramp == (unsigned long)mctx->mc_pad) + flush_icache_range(tramp, tramp + 2 * sizeof(unsigned long)); + + regs->link = tramp; + +#ifdef CONFIG_PPC_FPU_REGS + tsk->thread.fp_state.fpscr = 0; /* turn off all fp exceptions */ +#endif + + /* create a stack frame for the caller of the handler */ + newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE; + if (put_user(regs->gpr[1], (u32 __user *)newsp)) + goto badframe; + + regs->gpr[1] = newsp; + regs->gpr[3] = ksig->sig; + regs->gpr[4] = (unsigned long) sc; + regs->nip = (unsigned long)ksig->ka.sa.sa_handler; + /* enter the signal handler in big-endian mode */ + regs->msr &= ~MSR_LE; + return 0; + +failed: + user_write_access_end(); + badframe: - if (show_unhandled_signals) - printk_ratelimited(KERN_INFO - "%s[%d]: bad frame in handle_rt_signal32: " - "%p nip %08lx lr %08lx\n", - tsk->comm, tsk->pid, - addr, regs->nip, regs->link); + signal_fault(tsk, regs, "handle_signal32", frame); return 1; } @@ -967,11 +1064,13 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, */ mctx = (struct mcontext __user *) ((unsigned long) &old_ctx->uc_mcontext & ~0xfUL); - if (!access_ok(old_ctx, ctx_size) - || save_user_regs(regs, mctx, NULL, 0, ctx_has_vsx_region) - || put_sigset_t(&old_ctx->uc_sigmask, ¤t->blocked) - || __put_user(to_user_ptr(mctx), &old_ctx->uc_regs)) + prepare_save_user_regs(ctx_has_vsx_region); + if (!user_write_access_begin(old_ctx, ctx_size)) return -EFAULT; + unsafe_save_user_regs(regs, mctx, NULL, ctx_has_vsx_region, failed); + unsafe_put_sigset_t(&old_ctx->uc_sigmask, ¤t->blocked, failed); + unsafe_put_user(to_user_ptr(mctx), &old_ctx->uc_regs, failed); + user_write_access_end(); } if (new_ctx == NULL) return 0; @@ -995,6 +1094,10 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, set_thread_flag(TIF_RESTOREALL); return 0; + +failed: + user_write_access_end(); + return -EFAULT; } #ifdef CONFIG_PPC64 @@ -1092,12 +1195,7 @@ SYSCALL_DEFINE0(rt_sigreturn) return 0; bad: - if (show_unhandled_signals) - printk_ratelimited(KERN_INFO - "%s[%d]: bad frame in sys_rt_sigreturn: " - "%p nip %08lx lr %08lx\n", - current->comm, current->pid, - rt_sf, regs->nip, regs->link); + signal_fault(current, regs, "sys_rt_sigreturn", rt_sf); force_sig(SIGSEGV); return 0; @@ -1181,12 +1279,7 @@ SYSCALL_DEFINE3(debug_setcontext, struct ucontext __user *, ctx, * We kill the task with a SIGSEGV in this situation. */ if (do_setcontext(ctx, regs, 1)) { - if (show_unhandled_signals) - printk_ratelimited(KERN_INFO "%s[%d]: bad frame in " - "sys_debug_setcontext: %p nip %08lx " - "lr %08lx\n", - current->comm, current->pid, - ctx, regs->nip, regs->link); + signal_fault(current, regs, "sys_debug_setcontext", ctx); force_sig(SIGSEGV); goto out; @@ -1208,96 +1301,6 @@ SYSCALL_DEFINE3(debug_setcontext, struct ucontext __user *, ctx, #endif /* - * OK, we're invoking a handler - */ -int handle_signal32(struct ksignal *ksig, sigset_t *oldset, - struct task_struct *tsk) -{ - struct sigcontext __user *sc; - struct sigframe __user *frame; - struct mcontext __user *tm_mctx = NULL; - unsigned long newsp = 0; - int sigret; - unsigned long tramp; - struct pt_regs *regs = tsk->thread.regs; -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - /* Save the thread's msr before get_tm_stackpointer() changes it */ - unsigned long msr = regs->msr; -#endif - - BUG_ON(tsk != current); - - /* Set up Signal Frame */ - frame = get_sigframe(ksig, get_tm_stackpointer(tsk), sizeof(*frame), 1); - if (unlikely(frame == NULL)) - goto badframe; - sc = (struct sigcontext __user *) &frame->sctx; - -#if _NSIG != 64 -#error "Please adjust handle_signal()" -#endif - if (__put_user(to_user_ptr(ksig->ka.sa.sa_handler), &sc->handler) - || __put_user(oldset->sig[0], &sc->oldmask) -#ifdef CONFIG_PPC64 - || __put_user((oldset->sig[0] >> 32), &sc->_unused[3]) -#else - || __put_user(oldset->sig[1], &sc->_unused[3]) -#endif - || __put_user(to_user_ptr(&frame->mctx), &sc->regs) - || __put_user(ksig->sig, &sc->signal)) - goto badframe; - - if (vdso32_sigtramp && tsk->mm->context.vdso_base) { - sigret = 0; - tramp = tsk->mm->context.vdso_base + vdso32_sigtramp; - } else { - sigret = __NR_sigreturn; - tramp = (unsigned long) frame->mctx.tramp; - } - -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - tm_mctx = &frame->mctx_transact; - if (MSR_TM_ACTIVE(msr)) { - if (save_tm_user_regs(regs, &frame->mctx, &frame->mctx_transact, - sigret, msr)) - goto badframe; - } - else -#endif - { - if (save_user_regs(regs, &frame->mctx, tm_mctx, sigret, 1)) - goto badframe; - } - - regs->link = tramp; - - tsk->thread.fp_state.fpscr = 0; /* turn off all fp exceptions */ - - /* create a stack frame for the caller of the handler */ - newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE; - if (put_user(regs->gpr[1], (u32 __user *)newsp)) - goto badframe; - - regs->gpr[1] = newsp; - regs->gpr[3] = ksig->sig; - regs->gpr[4] = (unsigned long) sc; - regs->nip = (unsigned long) (unsigned long)ksig->ka.sa.sa_handler; - /* enter the signal handler in big-endian mode */ - regs->msr &= ~MSR_LE; - return 0; - -badframe: - if (show_unhandled_signals) - printk_ratelimited(KERN_INFO - "%s[%d]: bad frame in handle_signal32: " - "%p nip %08lx lr %08lx\n", - tsk->comm, tsk->pid, - frame, regs->nip, regs->link); - - return 1; -} - -/* * Do a signal return; undo the signal stack. */ #ifdef CONFIG_PPC64 @@ -1363,12 +1366,7 @@ SYSCALL_DEFINE0(sigreturn) return 0; badframe: - if (show_unhandled_signals) - printk_ratelimited(KERN_INFO - "%s[%d]: bad frame in sys_sigreturn: " - "%p nip %08lx lr %08lx\n", - current->comm, current->pid, - addr, regs->nip, regs->link); + signal_fault(current, regs, "sys_sigreturn", addr); force_sig(SIGSEGV); return 0; diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index bfc939360bad..f9e4a1ac440f 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -66,11 +66,6 @@ struct rt_sigframe { char abigap[USER_REDZONE_SIZE]; } __attribute__ ((aligned (16))); -static const char fmt32[] = KERN_INFO \ - "%s[%d]: bad frame in %s: %08lx nip %08lx lr %08lx\n"; -static const char fmt64[] = KERN_INFO \ - "%s[%d]: bad frame in %s: %016lx nip %016lx lr %016lx\n"; - /* * This computes a quad word aligned pointer inside the vmx_reserve array * element. For historical reasons sigcontext might not be quad word aligned, @@ -801,10 +796,7 @@ SYSCALL_DEFINE0(rt_sigreturn) return 0; badframe: - if (show_unhandled_signals) - printk_ratelimited(regs->msr & MSR_64BIT ? fmt64 : fmt32, - current->comm, current->pid, "rt_sigreturn", - (long)uc, regs->nip, regs->link); + signal_fault(current, regs, "rt_sigreturn", uc); force_sig(SIGSEGV); return 0; @@ -822,10 +814,8 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, unsigned long msr = regs->msr; #endif - BUG_ON(tsk != current); - - frame = get_sigframe(ksig, get_tm_stackpointer(tsk), sizeof(*frame), 0); - if (unlikely(frame == NULL)) + frame = get_sigframe(ksig, tsk, sizeof(*frame), 0); + if (!access_ok(frame, sizeof(*frame))) goto badframe; err |= __put_user(&frame->info, &frame->pinfo); @@ -864,8 +854,8 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, tsk->thread.fp_state.fpscr = 0; /* Set up to return from userspace. */ - if (vdso64_rt_sigtramp && tsk->mm->context.vdso_base) { - regs->nip = tsk->mm->context.vdso_base + vdso64_rt_sigtramp; + if (tsk->mm->context.vdso) { + regs->nip = VDSO64_SYMBOL(tsk->mm->context.vdso, sigtramp_rt64); } else { err |= setup_trampoline(__NR_rt_sigreturn, &frame->tramp[0]); if (err) @@ -913,10 +903,7 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, return 0; badframe: - if (show_unhandled_signals) - printk_ratelimited(regs->msr & MSR_64BIT ? fmt64 : fmt32, - tsk->comm, tsk->pid, "setup_rt_frame", - (long)frame, regs->nip, regs->link); + signal_fault(current, regs, "handle_rt_signal64", frame); return 1; } diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 8c2857cbd960..2b9b1bb4c5f2 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -76,6 +76,7 @@ static DEFINE_PER_CPU(int, cpu_state) = { 0 }; struct task_struct *secondary_current; bool has_big_cores; bool coregroup_enabled; +bool thread_group_shares_l2; DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map); DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map); @@ -99,6 +100,7 @@ enum { #define MAX_THREAD_LIST_SIZE 8 #define THREAD_GROUP_SHARE_L1 1 +#define THREAD_GROUP_SHARE_L2 2 struct thread_groups { unsigned int property; unsigned int nr_groups; @@ -106,11 +108,27 @@ struct thread_groups { unsigned int thread_list[MAX_THREAD_LIST_SIZE]; }; +/* Maximum number of properties that groups of threads within a core can share */ +#define MAX_THREAD_GROUP_PROPERTIES 2 + +struct thread_groups_list { + unsigned int nr_properties; + struct thread_groups property_tgs[MAX_THREAD_GROUP_PROPERTIES]; +}; + +static struct thread_groups_list tgl[NR_CPUS] __initdata; /* - * On big-cores system, cpu_l1_cache_map for each CPU corresponds to + * On big-cores system, thread_group_l1_cache_map for each CPU corresponds to * the set its siblings that share the L1-cache. */ -DEFINE_PER_CPU(cpumask_var_t, cpu_l1_cache_map); +DEFINE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map); + +/* + * On some big-cores system, thread_group_l2_cache_map for each CPU + * corresponds to the set its siblings within the core that share the + * L2-cache. + */ +DEFINE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map); /* SMP operations for this machine */ struct smp_ops_t *smp_ops; @@ -695,81 +713,100 @@ static void or_cpumasks_related(int i, int j, struct cpumask *(*srcmask)(int), /* * parse_thread_groups: Parses the "ibm,thread-groups" device tree * property for the CPU device node @dn and stores - * the parsed output in the thread_groups - * structure @tg if the ibm,thread-groups[0] - * matches @property. + * the parsed output in the thread_groups_list + * structure @tglp. * * @dn: The device node of the CPU device. - * @tg: Pointer to a thread group structure into which the parsed + * @tglp: Pointer to a thread group list structure into which the parsed * output of "ibm,thread-groups" is stored. - * @property: The property of the thread-group that the caller is - * interested in. * * ibm,thread-groups[0..N-1] array defines which group of threads in * the CPU-device node can be grouped together based on the property. * - * ibm,thread-groups[0] tells us the property based on which the + * This array can represent thread groupings for multiple properties. + * + * ibm,thread-groups[i + 0] tells us the property based on which the * threads are being grouped together. If this value is 1, it implies - * that the threads in the same group share L1, translation cache. + * that the threads in the same group share L1, translation cache. If + * the value is 2, it implies that the threads in the same group share + * the same L2 cache. * - * ibm,thread-groups[1] tells us how many such thread groups exist. + * ibm,thread-groups[i+1] tells us how many such thread groups exist for the + * property ibm,thread-groups[i] * - * ibm,thread-groups[2] tells us the number of threads in each such + * ibm,thread-groups[i+2] tells us the number of threads in each such * group. + * Suppose k = (ibm,thread-groups[i+1] * ibm,thread-groups[i+2]), then, * - * ibm,thread-groups[3..N-1] is the list of threads identified by + * ibm,thread-groups[i+3..i+k+2] (is the list of threads identified by * "ibm,ppc-interrupt-server#s" arranged as per their membership in * the grouping. * - * Example: If ibm,thread-groups = [1,2,4,5,6,7,8,9,10,11,12] it - * implies that there are 2 groups of 4 threads each, where each group - * of threads share L1, translation cache. + * Example: + * If "ibm,thread-groups" = [1,2,4,8,10,12,14,9,11,13,15,2,2,4,8,10,12,14,9,11,13,15] + * This can be decomposed up into two consecutive arrays: + * a) [1,2,4,8,10,12,14,9,11,13,15] + * b) [2,2,4,8,10,12,14,9,11,13,15] + * + * where in, + * + * a) provides information of Property "1" being shared by "2" groups, + * each with "4" threads each. The "ibm,ppc-interrupt-server#s" of + * the first group is {8,10,12,14} and the + * "ibm,ppc-interrupt-server#s" of the second group is + * {9,11,13,15}. Property "1" is indicative of the thread in the + * group sharing L1 cache, translation cache and Instruction Data + * flow. * - * The "ibm,ppc-interrupt-server#s" of the first group is {5,6,7,8} - * and the "ibm,ppc-interrupt-server#s" of the second group is {9, 10, - * 11, 12} structure + * b) provides information of Property "2" being shared by "2" groups, + * each group with "4" threads. The "ibm,ppc-interrupt-server#s" of + * the first group is {8,10,12,14} and the + * "ibm,ppc-interrupt-server#s" of the second group is + * {9,11,13,15}. Property "2" indicates that the threads in each + * group share the L2-cache. * * Returns 0 on success, -EINVAL if the property does not exist, * -ENODATA if property does not have a value, and -EOVERFLOW if the * property data isn't large enough. */ static int parse_thread_groups(struct device_node *dn, - struct thread_groups *tg, - unsigned int property) + struct thread_groups_list *tglp) { - int i; - u32 thread_group_array[3 + MAX_THREAD_LIST_SIZE]; - u32 *thread_list; + unsigned int property_idx = 0; + u32 *thread_group_array; size_t total_threads; - int ret; + int ret = 0, count; + u32 *thread_list; + int i = 0; + count = of_property_count_u32_elems(dn, "ibm,thread-groups"); + thread_group_array = kcalloc(count, sizeof(u32), GFP_KERNEL); ret = of_property_read_u32_array(dn, "ibm,thread-groups", - thread_group_array, 3); + thread_group_array, count); if (ret) - return ret; - - tg->property = thread_group_array[0]; - tg->nr_groups = thread_group_array[1]; - tg->threads_per_group = thread_group_array[2]; - if (tg->property != property || - tg->nr_groups < 1 || - tg->threads_per_group < 1) - return -ENODATA; + goto out_free; - total_threads = tg->nr_groups * tg->threads_per_group; + while (i < count && property_idx < MAX_THREAD_GROUP_PROPERTIES) { + int j; + struct thread_groups *tg = &tglp->property_tgs[property_idx++]; - ret = of_property_read_u32_array(dn, "ibm,thread-groups", - thread_group_array, - 3 + total_threads); - if (ret) - return ret; + tg->property = thread_group_array[i]; + tg->nr_groups = thread_group_array[i + 1]; + tg->threads_per_group = thread_group_array[i + 2]; + total_threads = tg->nr_groups * tg->threads_per_group; - thread_list = &thread_group_array[3]; + thread_list = &thread_group_array[i + 3]; - for (i = 0 ; i < total_threads; i++) - tg->thread_list[i] = thread_list[i]; + for (j = 0; j < total_threads; j++) + tg->thread_list[j] = thread_list[j]; + i = i + 3 + total_threads; + } - return 0; + tglp->nr_properties = property_idx; + +out_free: + kfree(thread_group_array); + return ret; } /* @@ -805,50 +842,84 @@ static int get_cpu_thread_group_start(int cpu, struct thread_groups *tg) return -1; } -static int init_cpu_l1_cache_map(int cpu) - +static struct thread_groups *__init get_thread_groups(int cpu, + int group_property, + int *err) { struct device_node *dn = of_get_cpu_node(cpu, NULL); - struct thread_groups tg = {.property = 0, - .nr_groups = 0, - .threads_per_group = 0}; + struct thread_groups_list *cpu_tgl = &tgl[cpu]; + struct thread_groups *tg = NULL; + int i; + *err = 0; + + if (!dn) { + *err = -ENODATA; + return NULL; + } + + if (!cpu_tgl->nr_properties) { + *err = parse_thread_groups(dn, cpu_tgl); + if (*err) + goto out; + } + + for (i = 0; i < cpu_tgl->nr_properties; i++) { + if (cpu_tgl->property_tgs[i].property == group_property) { + tg = &cpu_tgl->property_tgs[i]; + break; + } + } + + if (!tg) + *err = -EINVAL; +out: + of_node_put(dn); + return tg; +} + +static int __init init_thread_group_cache_map(int cpu, int cache_property) + +{ int first_thread = cpu_first_thread_sibling(cpu); int i, cpu_group_start = -1, err = 0; + struct thread_groups *tg = NULL; + cpumask_var_t *mask = NULL; - if (!dn) - return -ENODATA; + if (cache_property != THREAD_GROUP_SHARE_L1 && + cache_property != THREAD_GROUP_SHARE_L2) + return -EINVAL; - err = parse_thread_groups(dn, &tg, THREAD_GROUP_SHARE_L1); - if (err) - goto out; + tg = get_thread_groups(cpu, cache_property, &err); + if (!tg) + return err; - cpu_group_start = get_cpu_thread_group_start(cpu, &tg); + cpu_group_start = get_cpu_thread_group_start(cpu, tg); if (unlikely(cpu_group_start == -1)) { WARN_ON_ONCE(1); - err = -ENODATA; - goto out; + return -ENODATA; } - zalloc_cpumask_var_node(&per_cpu(cpu_l1_cache_map, cpu), - GFP_KERNEL, cpu_to_node(cpu)); + if (cache_property == THREAD_GROUP_SHARE_L1) + mask = &per_cpu(thread_group_l1_cache_map, cpu); + else if (cache_property == THREAD_GROUP_SHARE_L2) + mask = &per_cpu(thread_group_l2_cache_map, cpu); + + zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cpu)); for (i = first_thread; i < first_thread + threads_per_core; i++) { - int i_group_start = get_cpu_thread_group_start(i, &tg); + int i_group_start = get_cpu_thread_group_start(i, tg); if (unlikely(i_group_start == -1)) { WARN_ON_ONCE(1); - err = -ENODATA; - goto out; + return -ENODATA; } if (i_group_start == cpu_group_start) - cpumask_set_cpu(i, per_cpu(cpu_l1_cache_map, cpu)); + cpumask_set_cpu(i, *mask); } -out: - of_node_put(dn); - return err; + return 0; } static bool shared_caches; @@ -924,7 +995,7 @@ static int init_big_cores(void) int cpu; for_each_possible_cpu(cpu) { - int err = init_cpu_l1_cache_map(cpu); + int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L1); if (err) return err; @@ -935,6 +1006,16 @@ static int init_big_cores(void) } has_big_cores = true; + + for_each_possible_cpu(cpu) { + int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L2); + + if (err) + return err; + } + + thread_group_shares_l2 = true; + pr_debug("L2 cache only shared by the threads in the small core\n"); return 0; } @@ -1249,6 +1330,28 @@ static bool update_mask_by_l2(int cpu, cpumask_var_t *mask) if (has_big_cores) submask_fn = cpu_smallcore_mask; + /* + * If the threads in a thread-group share L2 cache, then the + * L2-mask can be obtained from thread_group_l2_cache_map. + */ + if (thread_group_shares_l2) { + cpumask_set_cpu(cpu, cpu_l2_cache_mask(cpu)); + + for_each_cpu(i, per_cpu(thread_group_l2_cache_map, cpu)) { + if (cpu_online(i)) + set_cpus_related(i, cpu, cpu_l2_cache_mask); + } + + /* Verify that L1-cache siblings are a subset of L2 cache-siblings */ + if (!cpumask_equal(submask_fn(cpu), cpu_l2_cache_mask(cpu)) && + !cpumask_subset(submask_fn(cpu), cpu_l2_cache_mask(cpu))) { + pr_warn_once("CPU %d : Inconsistent L1 and L2 cache siblings\n", + cpu); + } + + return true; + } + l2_cache = cpu_to_l2cache(cpu); if (!l2_cache || !*mask) { /* Assume only core siblings share cache with this CPU */ @@ -1320,7 +1423,7 @@ static inline void add_cpu_to_smallcore_masks(int cpu) cpumask_set_cpu(cpu, cpu_smallcore_mask(cpu)); - for_each_cpu(i, per_cpu(cpu_l1_cache_map, cpu)) { + for_each_cpu(i, per_cpu(thread_group_l1_cache_map, cpu)) { if (cpu_online(i)) set_cpus_related(i, cpu, cpu_smallcore_mask); } diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c index 310bcd768cd5..7c85ed04a164 100644 --- a/arch/powerpc/kernel/syscall_64.c +++ b/arch/powerpc/kernel/syscall_64.c @@ -35,7 +35,31 @@ notrace long system_call_exception(long r3, long r4, long r5, BUG_ON(!FULL_REGS(regs)); BUG_ON(regs->softe != IRQS_ENABLED); - kuap_check_amr(); +#ifdef CONFIG_PPC_PKEY + if (mmu_has_feature(MMU_FTR_PKEY)) { + unsigned long amr, iamr; + bool flush_needed = false; + /* + * When entering from userspace we mostly have the AMR/IAMR + * different from kernel default values. Hence don't compare. + */ + amr = mfspr(SPRN_AMR); + iamr = mfspr(SPRN_IAMR); + regs->amr = amr; + regs->iamr = iamr; + if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) { + mtspr(SPRN_AMR, AMR_KUAP_BLOCKED); + flush_needed = true; + } + if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) { + mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED); + flush_needed = true; + } + if (flush_needed) + isync(); + } else +#endif + kuap_check_amr(); account_cpu_user_entry(); @@ -245,6 +269,12 @@ again: account_cpu_user_exit(); +#ifdef CONFIG_PPC_BOOK3S /* BOOK3E not yet using this */ + /* + * We do this at the end so that we do context switch with KERNEL AMR + */ + kuap_user_restore(regs); +#endif return ret; } @@ -330,6 +360,10 @@ again: account_cpu_user_exit(); + /* + * We do this at the end so that we do context switch with KERNEL AMR + */ + kuap_user_restore(regs); return ret; } @@ -400,7 +434,7 @@ again: * which would cause Read-After-Write stalls. Hence, we take the AMR * value from the check above. */ - kuap_restore_amr(regs, amr); + kuap_kernel_restore(regs, amr); return ret; } diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index cf3f8db7e0e3..67feb3524460 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -82,6 +82,7 @@ static struct clocksource clocksource_timebase = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, .mask = CLOCKSOURCE_MASK(64), .read = timebase_read, + .vdso_clock_mode = VDSO_CLOCKMODE_ARCHTIMER, }; #define DECREMENTER_DEFAULT_MAX 0x7FFFFFFF @@ -576,14 +577,11 @@ void timer_interrupt(struct pt_regs *regs) struct pt_regs *old_regs; u64 now; - /* Some implementations of hotplug will get timer interrupts while - * offline, just ignore these and we also need to set - * decrementers_next_tb as MAX to make sure __check_irq_replay - * don't replay timer interrupt when return, otherwise we'll trap - * here infinitely :( + /* + * Some implementations of hotplug will get timer interrupts while + * offline, just ignore these. */ if (unlikely(!cpu_online(smp_processor_id()))) { - *next_tb = ~(u64)0; set_dec(decrementer_max); return; } @@ -855,95 +853,6 @@ static notrace u64 timebase_read(struct clocksource *cs) return (u64)get_tb(); } - -void update_vsyscall(struct timekeeper *tk) -{ - struct timespec64 xt; - struct clocksource *clock = tk->tkr_mono.clock; - u32 mult = tk->tkr_mono.mult; - u32 shift = tk->tkr_mono.shift; - u64 cycle_last = tk->tkr_mono.cycle_last; - u64 new_tb_to_xs, new_stamp_xsec; - u64 frac_sec; - - if (clock != &clocksource_timebase) - return; - - xt.tv_sec = tk->xtime_sec; - xt.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift); - - /* Make userspace gettimeofday spin until we're done. */ - ++vdso_data->tb_update_count; - smp_mb(); - - /* - * This computes ((2^20 / 1e9) * mult) >> shift as a - * 0.64 fixed-point fraction. - * The computation in the else clause below won't overflow - * (as long as the timebase frequency is >= 1.049 MHz) - * but loses precision because we lose the low bits of the constant - * in the shift. Note that 19342813113834067 ~= 2^(20+64) / 1e9. - * For a shift of 24 the error is about 0.5e-9, or about 0.5ns - * over a second. (Shift values are usually 22, 23 or 24.) - * For high frequency clocks such as the 512MHz timebase clock - * on POWER[6789], the mult value is small (e.g. 32768000) - * and so we can shift the constant by 16 initially - * (295147905179 ~= 2^(20+64-16) / 1e9) and then do the - * remaining shifts after the multiplication, which gives a - * more accurate result (e.g. with mult = 32768000, shift = 24, - * the error is only about 1.2e-12, or 0.7ns over 10 minutes). - */ - if (mult <= 62500000 && clock->shift >= 16) - new_tb_to_xs = ((u64) mult * 295147905179ULL) >> (clock->shift - 16); - else - new_tb_to_xs = (u64) mult * (19342813113834067ULL >> clock->shift); - - /* - * Compute the fractional second in units of 2^-32 seconds. - * The fractional second is tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift - * in nanoseconds, so multiplying that by 2^32 / 1e9 gives - * it in units of 2^-32 seconds. - * We assume shift <= 32 because clocks_calc_mult_shift() - * generates shift values in the range 0 - 32. - */ - frac_sec = tk->tkr_mono.xtime_nsec << (32 - shift); - do_div(frac_sec, NSEC_PER_SEC); - - /* - * Work out new stamp_xsec value for any legacy users of systemcfg. - * stamp_xsec is in units of 2^-20 seconds. - */ - new_stamp_xsec = frac_sec >> 12; - new_stamp_xsec += tk->xtime_sec * XSEC_PER_SEC; - - /* - * tb_update_count is used to allow the userspace gettimeofday code - * to assure itself that it sees a consistent view of the tb_to_xs and - * stamp_xsec variables. It reads the tb_update_count, then reads - * tb_to_xs and stamp_xsec and then reads tb_update_count again. If - * the two values of tb_update_count match and are even then the - * tb_to_xs and stamp_xsec values are consistent. If not, then it - * loops back and reads them again until this criteria is met. - */ - vdso_data->tb_orig_stamp = cycle_last; - vdso_data->stamp_xsec = new_stamp_xsec; - vdso_data->tb_to_xs = new_tb_to_xs; - vdso_data->wtom_clock_sec = tk->wall_to_monotonic.tv_sec; - vdso_data->wtom_clock_nsec = tk->wall_to_monotonic.tv_nsec; - vdso_data->stamp_xtime_sec = xt.tv_sec; - vdso_data->stamp_xtime_nsec = xt.tv_nsec; - vdso_data->stamp_sec_fraction = frac_sec; - vdso_data->hrtimer_res = hrtimer_resolution; - smp_wmb(); - ++(vdso_data->tb_update_count); -} - -void update_vsyscall_tz(void) -{ - vdso_data->tz_minuteswest = sys_tz.tz_minuteswest; - vdso_data->tz_dsttime = sys_tz.tz_dsttime; -} - static void __init clocksource_init(void) { struct clocksource *clock = &clocksource_timebase; @@ -1103,7 +1012,6 @@ void __init time_init(void) sys_tz.tz_dsttime = 0; } - vdso_data->tb_update_count = 0; vdso_data->tb_ticks_per_sec = tb_ticks_per_sec; /* initialise and enable the large decrementer (if we have one) */ diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 5006dcbe1d9f..3ec7b443fe6b 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -347,12 +347,6 @@ static bool exception_common(int signr, struct pt_regs *regs, int code, current->thread.trap_nr = code; - /* - * Save all the pkey registers AMR/IAMR/UAMOR. Eg: Core dumps need - * to capture the content, if the task gets killed. - */ - thread_pkey_regs_save(¤t->thread); - return true; } @@ -757,31 +751,6 @@ int machine_check_generic(struct pt_regs *regs) { return 0; } -#elif defined(CONFIG_E200) -int machine_check_e200(struct pt_regs *regs) -{ - unsigned long reason = mfspr(SPRN_MCSR); - - printk("Machine check in kernel mode.\n"); - printk("Caused by (from MCSR=%lx): ", reason); - - if (reason & MCSR_MCP) - pr_cont("Machine Check Signal\n"); - if (reason & MCSR_CP_PERR) - pr_cont("Cache Push Parity Error\n"); - if (reason & MCSR_CPERR) - pr_cont("Cache Parity Error\n"); - if (reason & MCSR_EXCP_ERR) - pr_cont("ISI, ITLB, or Bus Error on first instruction fetch for an exception handler\n"); - if (reason & MCSR_BUS_IRERR) - pr_cont("Bus - Read Bus Error on instruction fetch\n"); - if (reason & MCSR_BUS_DRERR) - pr_cont("Bus - Read Bus Error on data load\n"); - if (reason & MCSR_BUS_WRERR) - pr_cont("Bus - Write Bus Error on buffered store or cache line push\n"); - - return 0; -} #elif defined(CONFIG_PPC32) int machine_check_generic(struct pt_regs *regs) { @@ -1190,7 +1159,9 @@ static void parse_fpe(struct pt_regs *regs) flush_fp_to_thread(current); +#ifdef CONFIG_PPC_FPU_REGS code = __parse_fpscr(current->thread.fp_state.fpscr); +#endif _exception(SIGFPE, regs, code, regs->nip); } diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 8dad44262e75..e839a906fdf2 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -17,7 +17,10 @@ #include <linux/elf.h> #include <linux/security.h> #include <linux/memblock.h> +#include <linux/syscalls.h> +#include <vdso/datapage.h> +#include <asm/syscall.h> #include <asm/processor.h> #include <asm/mmu.h> #include <asm/mmu_context.h> @@ -30,39 +33,11 @@ #include <asm/vdso_datapage.h> #include <asm/setup.h> -#undef DEBUG - -#ifdef DEBUG -#define DBG(fmt...) printk(fmt) -#else -#define DBG(fmt...) -#endif - -/* Max supported size for symbol names */ -#define MAX_SYMNAME 64 - /* The alignment of the vDSO */ #define VDSO_ALIGNMENT (1 << 16) -static unsigned int vdso32_pages; -static void *vdso32_kbase; -static struct page **vdso32_pagelist; -unsigned long vdso32_sigtramp; -unsigned long vdso32_rt_sigtramp; - -#ifdef CONFIG_VDSO32 extern char vdso32_start, vdso32_end; -#endif - -#ifdef CONFIG_PPC64 extern char vdso64_start, vdso64_end; -static void *vdso64_kbase = &vdso64_start; -static unsigned int vdso64_pages; -static struct page **vdso64_pagelist; -unsigned long vdso64_rt_sigtramp; -#endif /* CONFIG_PPC64 */ - -static int vdso_ready; /* * The vdso data page (aka. systemcfg for old ppc64 fans) is here. @@ -70,77 +45,63 @@ static int vdso_ready; * with it, it will become dynamically allocated */ static union { - struct vdso_data data; + struct vdso_arch_data data; u8 page[PAGE_SIZE]; } vdso_data_store __page_aligned_data; -struct vdso_data *vdso_data = &vdso_data_store.data; +struct vdso_arch_data *vdso_data = &vdso_data_store.data; -/* Format of the patch table */ -struct vdso_patch_def +static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma, + unsigned long text_size) { - unsigned long ftr_mask, ftr_value; - const char *gen_name; - const char *fix_name; -}; + unsigned long new_size = new_vma->vm_end - new_vma->vm_start; -/* Table of functions to patch based on the CPU type/revision - * - * Currently, we only change sync_dicache to do nothing on processors - * with a coherent icache - */ -static struct vdso_patch_def vdso_patches[] = { - { - CPU_FTR_COHERENT_ICACHE, CPU_FTR_COHERENT_ICACHE, - "__kernel_sync_dicache", "__kernel_sync_dicache_p5" - }, -}; + if (new_size != text_size + PAGE_SIZE) + return -EINVAL; -/* - * Some infos carried around for each of them during parsing at - * boot time. - */ -struct lib32_elfinfo + current->mm->context.vdso = (void __user *)new_vma->vm_start + PAGE_SIZE; + + return 0; +} + +static int vdso32_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma) { - Elf32_Ehdr *hdr; /* ptr to ELF */ - Elf32_Sym *dynsym; /* ptr to .dynsym section */ - unsigned long dynsymsize; /* size of .dynsym section */ - char *dynstr; /* ptr to .dynstr section */ - unsigned long text; /* offset of .text section in .so */ -}; + return vdso_mremap(sm, new_vma, &vdso32_end - &vdso32_start); +} -struct lib64_elfinfo +static int vdso64_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma) { - Elf64_Ehdr *hdr; - Elf64_Sym *dynsym; - unsigned long dynsymsize; - char *dynstr; - unsigned long text; + return vdso_mremap(sm, new_vma, &vdso64_end - &vdso64_start); +} + +static struct vm_special_mapping vdso32_spec __ro_after_init = { + .name = "[vdso]", + .mremap = vdso32_mremap, }; +static struct vm_special_mapping vdso64_spec __ro_after_init = { + .name = "[vdso]", + .mremap = vdso64_mremap, +}; /* * This is called from binfmt_elf, we create the special vma for the * vDSO and insert it into the mm struct tree */ -int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) +static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { struct mm_struct *mm = current->mm; - struct page **vdso_pagelist; - unsigned long vdso_pages; + struct vm_special_mapping *vdso_spec; + struct vm_area_struct *vma; + unsigned long vdso_size; unsigned long vdso_base; - int rc; - - if (!vdso_ready) - return 0; -#ifdef CONFIG_PPC64 if (is_32bit_task()) { - vdso_pagelist = vdso32_pagelist; - vdso_pages = vdso32_pages; + vdso_spec = &vdso32_spec; + vdso_size = &vdso32_end - &vdso32_start; vdso_base = VDSO32_MBASE; } else { - vdso_pagelist = vdso64_pagelist; - vdso_pages = vdso64_pages; + vdso_spec = &vdso64_spec; + vdso_size = &vdso64_end - &vdso64_start; /* * On 64bit we don't have a preferred map address. This * allows get_unmapped_area to find an area near other mmaps @@ -148,21 +109,9 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) */ vdso_base = 0; } -#else - vdso_pagelist = vdso32_pagelist; - vdso_pages = vdso32_pages; - vdso_base = VDSO32_MBASE; -#endif - current->mm->context.vdso_base = 0; - - /* vDSO has a problem and was disabled, just don't "enable" it for the - * process - */ - if (vdso_pages == 0) - return 0; /* Add a page to the vdso size for the data page */ - vdso_pages ++; + vdso_size += PAGE_SIZE; /* * pick a base address for the vDSO in process space. We try to put it @@ -170,16 +119,11 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) * and end up putting it elsewhere. * Add enough to the size so that the result can be aligned. */ - if (mmap_write_lock_killable(mm)) - return -EINTR; vdso_base = get_unmapped_area(NULL, vdso_base, - (vdso_pages << PAGE_SHIFT) + - ((VDSO_ALIGNMENT - 1) & PAGE_MASK), + vdso_size + ((VDSO_ALIGNMENT - 1) & PAGE_MASK), 0, 0); - if (IS_ERR_VALUE(vdso_base)) { - rc = vdso_base; - goto fail_mmapsem; - } + if (IS_ERR_VALUE(vdso_base)) + return vdso_base; /* Add required alignment. */ vdso_base = ALIGN(vdso_base, VDSO_ALIGNMENT); @@ -187,9 +131,9 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) /* * Put vDSO base into mm struct. We need to do this before calling * install_special_mapping or the perf counter mmap tracking code - * will fail to recognise it as a vDSO (since arch_vma_name fails). + * will fail to recognise it as a vDSO. */ - current->mm->context.vdso_base = vdso_base; + mm->context.vdso = (void __user *)vdso_base + PAGE_SIZE; /* * our vma flags don't have VM_WRITE so by default, the process isn't @@ -201,434 +145,54 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) * It's fine to use that for setting breakpoints in the vDSO code * pages though. */ - rc = install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT, - VM_READ|VM_EXEC| - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, - vdso_pagelist); - if (rc) { - current->mm->context.vdso_base = 0; - goto fail_mmapsem; - } - - mmap_write_unlock(mm); - return 0; - - fail_mmapsem: - mmap_write_unlock(mm); - return rc; -} - -const char *arch_vma_name(struct vm_area_struct *vma) -{ - if (vma->vm_mm && vma->vm_start == vma->vm_mm->context.vdso_base) - return "[vdso]"; - return NULL; -} - - - -#ifdef CONFIG_VDSO32 -static void * __init find_section32(Elf32_Ehdr *ehdr, const char *secname, - unsigned long *size) -{ - Elf32_Shdr *sechdrs; - unsigned int i; - char *secnames; - - /* Grab section headers and strings so we can tell who is who */ - sechdrs = (void *)ehdr + ehdr->e_shoff; - secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset; - - /* Find the section they want */ - for (i = 1; i < ehdr->e_shnum; i++) { - if (strcmp(secnames+sechdrs[i].sh_name, secname) == 0) { - if (size) - *size = sechdrs[i].sh_size; - return (void *)ehdr + sechdrs[i].sh_offset; - } - } - *size = 0; - return NULL; -} - -static Elf32_Sym * __init find_symbol32(struct lib32_elfinfo *lib, - const char *symname) -{ - unsigned int i; - char name[MAX_SYMNAME], *c; - - for (i = 0; i < (lib->dynsymsize / sizeof(Elf32_Sym)); i++) { - if (lib->dynsym[i].st_name == 0) - continue; - strlcpy(name, lib->dynstr + lib->dynsym[i].st_name, - MAX_SYMNAME); - c = strchr(name, '@'); - if (c) - *c = 0; - if (strcmp(symname, name) == 0) - return &lib->dynsym[i]; - } - return NULL; -} - -/* Note that we assume the section is .text and the symbol is relative to - * the library base - */ -static unsigned long __init find_function32(struct lib32_elfinfo *lib, - const char *symname) -{ - Elf32_Sym *sym = find_symbol32(lib, symname); - - if (sym == NULL) { - printk(KERN_WARNING "vDSO32: function %s not found !\n", - symname); - return 0; - } - return sym->st_value - VDSO32_LBASE; -} - -static int __init vdso_do_func_patch32(struct lib32_elfinfo *v32, - struct lib64_elfinfo *v64, - const char *orig, const char *fix) -{ - Elf32_Sym *sym32_gen, *sym32_fix; - - sym32_gen = find_symbol32(v32, orig); - if (sym32_gen == NULL) { - printk(KERN_ERR "vDSO32: Can't find symbol %s !\n", orig); - return -1; - } - if (fix == NULL) { - sym32_gen->st_name = 0; - return 0; - } - sym32_fix = find_symbol32(v32, fix); - if (sym32_fix == NULL) { - printk(KERN_ERR "vDSO32: Can't find symbol %s !\n", fix); - return -1; - } - sym32_gen->st_value = sym32_fix->st_value; - sym32_gen->st_size = sym32_fix->st_size; - sym32_gen->st_info = sym32_fix->st_info; - sym32_gen->st_other = sym32_fix->st_other; - sym32_gen->st_shndx = sym32_fix->st_shndx; - - return 0; -} -#else /* !CONFIG_VDSO32 */ -static unsigned long __init find_function32(struct lib32_elfinfo *lib, - const char *symname) -{ - return 0; -} - -static int __init vdso_do_func_patch32(struct lib32_elfinfo *v32, - struct lib64_elfinfo *v64, - const char *orig, const char *fix) -{ - return 0; -} -#endif /* CONFIG_VDSO32 */ - - -#ifdef CONFIG_PPC64 - -static void * __init find_section64(Elf64_Ehdr *ehdr, const char *secname, - unsigned long *size) -{ - Elf64_Shdr *sechdrs; - unsigned int i; - char *secnames; - - /* Grab section headers and strings so we can tell who is who */ - sechdrs = (void *)ehdr + ehdr->e_shoff; - secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset; - - /* Find the section they want */ - for (i = 1; i < ehdr->e_shnum; i++) { - if (strcmp(secnames+sechdrs[i].sh_name, secname) == 0) { - if (size) - *size = sechdrs[i].sh_size; - return (void *)ehdr + sechdrs[i].sh_offset; - } - } - if (size) - *size = 0; - return NULL; -} - -static Elf64_Sym * __init find_symbol64(struct lib64_elfinfo *lib, - const char *symname) -{ - unsigned int i; - char name[MAX_SYMNAME], *c; - - for (i = 0; i < (lib->dynsymsize / sizeof(Elf64_Sym)); i++) { - if (lib->dynsym[i].st_name == 0) - continue; - strlcpy(name, lib->dynstr + lib->dynsym[i].st_name, - MAX_SYMNAME); - c = strchr(name, '@'); - if (c) - *c = 0; - if (strcmp(symname, name) == 0) - return &lib->dynsym[i]; - } - return NULL; -} - -/* Note that we assume the section is .text and the symbol is relative to - * the library base - */ -static unsigned long __init find_function64(struct lib64_elfinfo *lib, - const char *symname) -{ - Elf64_Sym *sym = find_symbol64(lib, symname); - - if (sym == NULL) { - printk(KERN_WARNING "vDSO64: function %s not found !\n", - symname); - return 0; - } - return sym->st_value - VDSO64_LBASE; -} - -static int __init vdso_do_func_patch64(struct lib32_elfinfo *v32, - struct lib64_elfinfo *v64, - const char *orig, const char *fix) -{ - Elf64_Sym *sym64_gen, *sym64_fix; - - sym64_gen = find_symbol64(v64, orig); - if (sym64_gen == NULL) { - printk(KERN_ERR "vDSO64: Can't find symbol %s !\n", orig); - return -1; - } - if (fix == NULL) { - sym64_gen->st_name = 0; - return 0; - } - sym64_fix = find_symbol64(v64, fix); - if (sym64_fix == NULL) { - printk(KERN_ERR "vDSO64: Can't find symbol %s !\n", fix); - return -1; - } - sym64_gen->st_value = sym64_fix->st_value; - sym64_gen->st_size = sym64_fix->st_size; - sym64_gen->st_info = sym64_fix->st_info; - sym64_gen->st_other = sym64_fix->st_other; - sym64_gen->st_shndx = sym64_fix->st_shndx; - - return 0; + vma = _install_special_mapping(mm, vdso_base, vdso_size, + VM_READ | VM_EXEC | VM_MAYREAD | + VM_MAYWRITE | VM_MAYEXEC, vdso_spec); + return PTR_ERR_OR_ZERO(vma); } -#endif /* CONFIG_PPC64 */ - - -static __init int vdso_do_find_sections(struct lib32_elfinfo *v32, - struct lib64_elfinfo *v64) -{ - void *sect; - - /* - * Locate symbol tables & text section - */ - -#ifdef CONFIG_VDSO32 - v32->dynsym = find_section32(v32->hdr, ".dynsym", &v32->dynsymsize); - v32->dynstr = find_section32(v32->hdr, ".dynstr", NULL); - if (v32->dynsym == NULL || v32->dynstr == NULL) { - printk(KERN_ERR "vDSO32: required symbol section not found\n"); - return -1; - } - sect = find_section32(v32->hdr, ".text", NULL); - if (sect == NULL) { - printk(KERN_ERR "vDSO32: the .text section was not found\n"); - return -1; - } - v32->text = sect - vdso32_kbase; -#endif - -#ifdef CONFIG_PPC64 - v64->dynsym = find_section64(v64->hdr, ".dynsym", &v64->dynsymsize); - v64->dynstr = find_section64(v64->hdr, ".dynstr", NULL); - if (v64->dynsym == NULL || v64->dynstr == NULL) { - printk(KERN_ERR "vDSO64: required symbol section not found\n"); - return -1; - } - sect = find_section64(v64->hdr, ".text", NULL); - if (sect == NULL) { - printk(KERN_ERR "vDSO64: the .text section was not found\n"); - return -1; - } - v64->text = sect - vdso64_kbase; -#endif /* CONFIG_PPC64 */ - - return 0; -} - -static __init void vdso_setup_trampolines(struct lib32_elfinfo *v32, - struct lib64_elfinfo *v64) +int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { - /* - * Find signal trampolines - */ - -#ifdef CONFIG_PPC64 - vdso64_rt_sigtramp = find_function64(v64, "__kernel_sigtramp_rt64"); -#endif - vdso32_sigtramp = find_function32(v32, "__kernel_sigtramp32"); - vdso32_rt_sigtramp = find_function32(v32, "__kernel_sigtramp_rt32"); -} + struct mm_struct *mm = current->mm; + int rc; -static __init int vdso_fixup_datapage(struct lib32_elfinfo *v32, - struct lib64_elfinfo *v64) -{ -#ifdef CONFIG_VDSO32 - Elf32_Sym *sym32; -#endif -#ifdef CONFIG_PPC64 - Elf64_Sym *sym64; + mm->context.vdso = NULL; - sym64 = find_symbol64(v64, "__kernel_datapage_offset"); - if (sym64 == NULL) { - printk(KERN_ERR "vDSO64: Can't find symbol " - "__kernel_datapage_offset !\n"); - return -1; - } - *((int *)(vdso64_kbase + sym64->st_value - VDSO64_LBASE)) = - (vdso64_pages << PAGE_SHIFT) - - (sym64->st_value - VDSO64_LBASE); -#endif /* CONFIG_PPC64 */ + if (mmap_write_lock_killable(mm)) + return -EINTR; -#ifdef CONFIG_VDSO32 - sym32 = find_symbol32(v32, "__kernel_datapage_offset"); - if (sym32 == NULL) { - printk(KERN_ERR "vDSO32: Can't find symbol " - "__kernel_datapage_offset !\n"); - return -1; - } - *((int *)(vdso32_kbase + (sym32->st_value - VDSO32_LBASE))) = - (vdso32_pages << PAGE_SHIFT) - - (sym32->st_value - VDSO32_LBASE); -#endif + rc = __arch_setup_additional_pages(bprm, uses_interp); + if (rc) + mm->context.vdso = NULL; - return 0; + mmap_write_unlock(mm); + return rc; } +#define VDSO_DO_FIXUPS(type, value, bits, sec) do { \ + void *__start = (void *)VDSO##bits##_SYMBOL(&vdso##bits##_start, sec##_start); \ + void *__end = (void *)VDSO##bits##_SYMBOL(&vdso##bits##_start, sec##_end); \ + \ + do_##type##_fixups((value), __start, __end); \ +} while (0) -static __init int vdso_fixup_features(struct lib32_elfinfo *v32, - struct lib64_elfinfo *v64) +static void __init vdso_fixup_features(void) { - unsigned long size; - void *start; - #ifdef CONFIG_PPC64 - start = find_section64(v64->hdr, "__ftr_fixup", &size); - if (start) - do_feature_fixups(cur_cpu_spec->cpu_features, - start, start + size); - - start = find_section64(v64->hdr, "__mmu_ftr_fixup", &size); - if (start) - do_feature_fixups(cur_cpu_spec->mmu_features, - start, start + size); - - start = find_section64(v64->hdr, "__fw_ftr_fixup", &size); - if (start) - do_feature_fixups(powerpc_firmware_features, - start, start + size); - - start = find_section64(v64->hdr, "__lwsync_fixup", &size); - if (start) - do_lwsync_fixups(cur_cpu_spec->cpu_features, - start, start + size); + VDSO_DO_FIXUPS(feature, cur_cpu_spec->cpu_features, 64, ftr_fixup); + VDSO_DO_FIXUPS(feature, cur_cpu_spec->mmu_features, 64, mmu_ftr_fixup); + VDSO_DO_FIXUPS(feature, powerpc_firmware_features, 64, fw_ftr_fixup); + VDSO_DO_FIXUPS(lwsync, cur_cpu_spec->cpu_features, 64, lwsync_fixup); #endif /* CONFIG_PPC64 */ #ifdef CONFIG_VDSO32 - start = find_section32(v32->hdr, "__ftr_fixup", &size); - if (start) - do_feature_fixups(cur_cpu_spec->cpu_features, - start, start + size); - - start = find_section32(v32->hdr, "__mmu_ftr_fixup", &size); - if (start) - do_feature_fixups(cur_cpu_spec->mmu_features, - start, start + size); - + VDSO_DO_FIXUPS(feature, cur_cpu_spec->cpu_features, 32, ftr_fixup); + VDSO_DO_FIXUPS(feature, cur_cpu_spec->mmu_features, 32, mmu_ftr_fixup); #ifdef CONFIG_PPC64 - start = find_section32(v32->hdr, "__fw_ftr_fixup", &size); - if (start) - do_feature_fixups(powerpc_firmware_features, - start, start + size); + VDSO_DO_FIXUPS(feature, powerpc_firmware_features, 32, fw_ftr_fixup); #endif /* CONFIG_PPC64 */ - - start = find_section32(v32->hdr, "__lwsync_fixup", &size); - if (start) - do_lwsync_fixups(cur_cpu_spec->cpu_features, - start, start + size); + VDSO_DO_FIXUPS(lwsync, cur_cpu_spec->cpu_features, 32, lwsync_fixup); #endif - - return 0; -} - -static __init int vdso_fixup_alt_funcs(struct lib32_elfinfo *v32, - struct lib64_elfinfo *v64) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(vdso_patches); i++) { - struct vdso_patch_def *patch = &vdso_patches[i]; - int match = (cur_cpu_spec->cpu_features & patch->ftr_mask) - == patch->ftr_value; - if (!match) - continue; - - DBG("replacing %s with %s...\n", patch->gen_name, - patch->fix_name ? "NONE" : patch->fix_name); - - /* - * Patch the 32 bits and 64 bits symbols. Note that we do not - * patch the "." symbol on 64 bits. - * It would be easy to do, but doesn't seem to be necessary, - * patching the OPD symbol is enough. - */ - vdso_do_func_patch32(v32, v64, patch->gen_name, - patch->fix_name); -#ifdef CONFIG_PPC64 - vdso_do_func_patch64(v32, v64, patch->gen_name, - patch->fix_name); -#endif /* CONFIG_PPC64 */ - } - - return 0; -} - - -static __init int vdso_setup(void) -{ - struct lib32_elfinfo v32; - struct lib64_elfinfo v64; - - v32.hdr = vdso32_kbase; -#ifdef CONFIG_PPC64 - v64.hdr = vdso64_kbase; -#endif - if (vdso_do_find_sections(&v32, &v64)) - return -1; - - if (vdso_fixup_datapage(&v32, &v64)) - return -1; - - if (vdso_fixup_features(&v32, &v64)) - return -1; - - if (vdso_fixup_alt_funcs(&v32, &v64)) - return -1; - - vdso_setup_trampolines(&v32, &v64); - - return 0; } /* @@ -638,27 +202,13 @@ static __init int vdso_setup(void) static void __init vdso_setup_syscall_map(void) { unsigned int i; - extern unsigned long *sys_call_table; -#ifdef CONFIG_PPC64 - extern unsigned long *compat_sys_call_table; -#endif - extern unsigned long sys_ni_syscall; - for (i = 0; i < NR_syscalls; i++) { -#ifdef CONFIG_PPC64 - if (sys_call_table[i] != sys_ni_syscall) - vdso_data->syscall_map_64[i >> 5] |= - 0x80000000UL >> (i & 0x1f); + if (sys_call_table[i] != (unsigned long)&sys_ni_syscall) + vdso_data->syscall_map[i >> 5] |= 0x80000000UL >> (i & 0x1f); if (IS_ENABLED(CONFIG_COMPAT) && - compat_sys_call_table[i] != sys_ni_syscall) - vdso_data->syscall_map_32[i >> 5] |= - 0x80000000UL >> (i & 0x1f); -#else /* CONFIG_PPC64 */ - if (sys_call_table[i] != sys_ni_syscall) - vdso_data->syscall_map_32[i >> 5] |= - 0x80000000UL >> (i & 0x1f); -#endif /* CONFIG_PPC64 */ + compat_sys_call_table[i] != (unsigned long)&sys_ni_syscall) + vdso_data->compat_syscall_map[i >> 5] |= 0x80000000UL >> (i & 0x1f); } } @@ -689,10 +239,26 @@ int vdso_getcpu_init(void) early_initcall(vdso_getcpu_init); #endif -static int __init vdso_init(void) +static struct page ** __init vdso_setup_pages(void *start, void *end) { int i; + struct page **pagelist; + int pages = (end - start) >> PAGE_SHIFT; + + pagelist = kcalloc(pages + 1, sizeof(struct page *), GFP_KERNEL); + if (!pagelist) + panic("%s: Cannot allocate page list for VDSO", __func__); + + pagelist[0] = virt_to_page(vdso_data); + + for (i = 0; i < pages; i++) + pagelist[i + 1] = virt_to_page(start + i * PAGE_SIZE); + + return pagelist; +} +static int __init vdso_init(void) +{ #ifdef CONFIG_PPC64 /* * Fill up the "systemcfg" stuff for backward compatibility @@ -717,75 +283,19 @@ static int __init vdso_init(void) vdso_data->icache_block_size = ppc64_caches.l1i.block_size; vdso_data->dcache_log_block_size = ppc64_caches.l1d.log_block_size; vdso_data->icache_log_block_size = ppc64_caches.l1i.log_block_size; - - /* - * Calculate the size of the 64 bits vDSO - */ - vdso64_pages = (&vdso64_end - &vdso64_start) >> PAGE_SHIFT; - DBG("vdso64_kbase: %p, 0x%x pages\n", vdso64_kbase, vdso64_pages); #endif /* CONFIG_PPC64 */ - -#ifdef CONFIG_VDSO32 - vdso32_kbase = &vdso32_start; - - /* - * Calculate the size of the 32 bits vDSO - */ - vdso32_pages = (&vdso32_end - &vdso32_start) >> PAGE_SHIFT; - DBG("vdso32_kbase: %p, 0x%x pages\n", vdso32_kbase, vdso32_pages); -#endif - - - /* - * Setup the syscall map in the vDOS - */ vdso_setup_syscall_map(); - /* - * Initialize the vDSO images in memory, that is do necessary - * fixups of vDSO symbols, locate trampolines, etc... - */ - if (vdso_setup()) { - printk(KERN_ERR "vDSO setup failure, not enabled !\n"); - vdso32_pages = 0; -#ifdef CONFIG_PPC64 - vdso64_pages = 0; -#endif - return 0; - } + vdso_fixup_features(); -#ifdef CONFIG_VDSO32 - /* Make sure pages are in the correct state */ - vdso32_pagelist = kcalloc(vdso32_pages + 2, sizeof(struct page *), - GFP_KERNEL); - BUG_ON(vdso32_pagelist == NULL); - for (i = 0; i < vdso32_pages; i++) { - struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE); - get_page(pg); - vdso32_pagelist[i] = pg; - } - vdso32_pagelist[i++] = virt_to_page(vdso_data); - vdso32_pagelist[i] = NULL; -#endif - -#ifdef CONFIG_PPC64 - vdso64_pagelist = kcalloc(vdso64_pages + 2, sizeof(struct page *), - GFP_KERNEL); - BUG_ON(vdso64_pagelist == NULL); - for (i = 0; i < vdso64_pages; i++) { - struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE); - get_page(pg); - vdso64_pagelist[i] = pg; - } - vdso64_pagelist[i++] = virt_to_page(vdso_data); - vdso64_pagelist[i] = NULL; -#endif /* CONFIG_PPC64 */ + if (IS_ENABLED(CONFIG_VDSO32)) + vdso32_spec.pages = vdso_setup_pages(&vdso32_start, &vdso32_end); - get_page(virt_to_page(vdso_data)); + if (IS_ENABLED(CONFIG_PPC64)) + vdso64_spec.pages = vdso_setup_pages(&vdso64_start, &vdso64_end); smp_wmb(); - vdso_ready = 1; return 0; } diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile index 73eada6bc8cd..59aa2944ecae 100644 --- a/arch/powerpc/kernel/vdso32/Makefile +++ b/arch/powerpc/kernel/vdso32/Makefile @@ -2,8 +2,20 @@ # List of files in the vdso, has to be asm only for now +ARCH_REL_TYPE_ABS := R_PPC_JUMP_SLOT|R_PPC_GLOB_DAT|R_PPC_ADDR32|R_PPC_ADDR24|R_PPC_ADDR16|R_PPC_ADDR16_LO|R_PPC_ADDR16_HI|R_PPC_ADDR16_HA|R_PPC_ADDR14|R_PPC_ADDR14_BRTAKEN|R_PPC_ADDR14_BRNTAKEN +include $(srctree)/lib/vdso/Makefile + obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o getcpu.o +ifneq ($(c-gettimeofday-y),) + CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y) + CFLAGS_vgettimeofday.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) + CFLAGS_vgettimeofday.o += $(call cc-option, -fno-stack-protector) + CFLAGS_vgettimeofday.o += -DDISABLE_BRANCH_PROFILING + CFLAGS_vgettimeofday.o += -ffreestanding -fasynchronous-unwind-tables + CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) +endif + # Build rules ifdef CROSS32_COMPILE @@ -15,14 +27,16 @@ endif CC32FLAGS := ifdef CONFIG_PPC64 CC32FLAGS += -m32 +KBUILD_CFLAGS := $(filter-out -mcmodel=medium,$(KBUILD_CFLAGS)) endif -targets := $(obj-vdso32) vdso32.so vdso32.so.dbg +targets := $(obj-vdso32) vdso32.so.dbg obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32)) GCOV_PROFILE := n KCOV_INSTRUMENT := n UBSAN_SANITIZE := n +KASAN_SANITIZE := n ccflags-y := -shared -fno-common -fno-builtin -nostdlib \ -Wl,-soname=linux-vdso32.so.1 -Wl,--hash-style=both @@ -33,33 +47,30 @@ targets += vdso32.lds CPPFLAGS_vdso32.lds += -P -C -Upowerpc # Force dependency (incbin is bad) -$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so +$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so.dbg # link rule for the .so file, .lds has to be first -$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) FORCE - $(call if_changed,vdso32ld) - -# strip rule for the .so file -$(obj)/%.so: OBJCOPYFLAGS := -S -$(obj)/%.so: $(obj)/%.so.dbg FORCE - $(call if_changed,objcopy) +$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) $(obj)/vgettimeofday.o FORCE + $(call if_changed,vdso32ld_and_check) # assembly rules for the .S files $(obj-vdso32): %.o: %.S FORCE $(call if_changed_dep,vdso32as) +$(obj)/vgettimeofday.o: %.o: %.c FORCE + $(call if_changed_dep,vdso32cc) + +# Generate VDSO offsets using helper script +gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh +quiet_cmd_vdsosym = VDSOSYM $@ + cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ + +include/generated/vdso32-offsets.h: $(obj)/vdso32.so.dbg FORCE + $(call if_changed,vdsosym) # actual build commands -quiet_cmd_vdso32ld = VDSO32L $@ - cmd_vdso32ld = $(VDSOCC) $(c_flags) $(CC32FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) +quiet_cmd_vdso32ld_and_check = VDSO32L $@ + cmd_vdso32ld_and_check = $(VDSOCC) $(c_flags) $(CC32FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) ; $(cmd_vdso_check) quiet_cmd_vdso32as = VDSO32A $@ cmd_vdso32as = $(VDSOCC) $(a_flags) $(CC32FLAGS) -c -o $@ $< - -# install commands for the unstripped file -quiet_cmd_vdso_install = INSTALL $@ - cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@ - -vdso32.so: $(obj)/vdso32.so.dbg - @mkdir -p $(MODLIB)/vdso - $(call cmd,vdso_install) - -vdso_install: vdso32.so +quiet_cmd_vdso32cc = VDSO32C $@ + cmd_vdso32cc = $(VDSOCC) $(c_flags) $(CC32FLAGS) -c -o $@ $< diff --git a/arch/powerpc/kernel/vdso32/cacheflush.S b/arch/powerpc/kernel/vdso32/cacheflush.S index 3440ddf21c8b..f340e82d1981 100644 --- a/arch/powerpc/kernel/vdso32/cacheflush.S +++ b/arch/powerpc/kernel/vdso32/cacheflush.S @@ -24,11 +24,15 @@ */ V_FUNCTION_BEGIN(__kernel_sync_dicache) .cfi_startproc +BEGIN_FTR_SECTION + b 3f +END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) #ifdef CONFIG_PPC64 mflr r12 .cfi_register lr,r12 - get_datapage r10, r0 + get_datapage r10 mtlr r12 + .cfi_restore lr #endif #ifdef CONFIG_PPC64 @@ -84,20 +88,11 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache) isync li r3,0 blr - .cfi_endproc -V_FUNCTION_END(__kernel_sync_dicache) - - -/* - * POWER5 version of __kernel_sync_dicache - */ -V_FUNCTION_BEGIN(__kernel_sync_dicache_p5) - .cfi_startproc +3: crclr cr0*4+so sync isync li r3,0 blr .cfi_endproc -V_FUNCTION_END(__kernel_sync_dicache_p5) - +V_FUNCTION_END(__kernel_sync_dicache) diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso32/datapage.S index 1d23e2771dba..65244416ab94 100644 --- a/arch/powerpc/kernel/vdso32/datapage.S +++ b/arch/powerpc/kernel/vdso32/datapage.S @@ -13,9 +13,6 @@ #include <asm/vdso_datapage.h> .text - .global __kernel_datapage_offset; -__kernel_datapage_offset: - .long 0 /* * void *__kernel_get_syscall_map(unsigned int *syscall_count) ; @@ -31,7 +28,7 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map) mflr r12 .cfi_register lr,r12 mr. r4,r3 - get_datapage r3, r0 + get_datapage r3 mtlr r12 addi r3,r3,CFG_SYSCALL_MAP32 beqlr @@ -51,7 +48,7 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq) .cfi_startproc mflr r12 .cfi_register lr,r12 - get_datapage r3, r0 + get_datapage r3 lwz r4,(CFG_TB_TICKS_PER_SEC + 4)(r3) lwz r3,CFG_TB_TICKS_PER_SEC(r3) mtlr r12 diff --git a/arch/powerpc/kernel/vdso32/gen_vdso_offsets.sh b/arch/powerpc/kernel/vdso32/gen_vdso_offsets.sh new file mode 100755 index 000000000000..c7b54a5dcd3e --- /dev/null +++ b/arch/powerpc/kernel/vdso32/gen_vdso_offsets.sh @@ -0,0 +1,16 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +# +# Match symbols in the DSO that look like VDSO_*; produce a header file +# of constant offsets into the shared object. +# +# Doing this inside the Makefile will break the $(filter-out) function, +# causing Kbuild to rebuild the vdso-offsets header file every time. +# +# Author: Will Deacon <will.deacon@arm.com +# + +LC_ALL=C +sed -n -e 's/^00*/0/' -e \ +'s/^\([0-9a-fA-F]*\) . VDSO_\([a-zA-Z0-9_]*\)$/\#define vdso32_offset_\2\t0x\1/p' diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S index e7f8f9f1b3f4..a6e29f880e0e 100644 --- a/arch/powerpc/kernel/vdso32/gettimeofday.S +++ b/arch/powerpc/kernel/vdso32/gettimeofday.S @@ -12,13 +12,7 @@ #include <asm/vdso_datapage.h> #include <asm/asm-offsets.h> #include <asm/unistd.h> - -/* Offset for the low 32-bit part of a field of long type */ -#ifdef CONFIG_PPC64 -#define LOPART 4 -#else -#define LOPART 0 -#endif +#include <asm/vdso/gettimeofday.h> .text /* @@ -28,32 +22,7 @@ * */ V_FUNCTION_BEGIN(__kernel_gettimeofday) - .cfi_startproc - mflr r12 - .cfi_register lr,r12 - - mr. r10,r3 /* r10 saves tv */ - mr r11,r4 /* r11 saves tz */ - get_datapage r9, r0 - beq 3f - LOAD_REG_IMMEDIATE(r7, 1000000) /* load up USEC_PER_SEC */ - bl __do_get_tspec@local /* get sec/usec from tb & kernel */ - stw r3,TVAL32_TV_SEC(r10) - stw r4,TVAL32_TV_USEC(r10) - -3: cmplwi r11,0 /* check if tz is NULL */ - mtlr r12 - crclr cr0*4+so - li r3,0 - beqlr - - lwz r4,CFG_TZ_MINUTEWEST(r9)/* fill tz */ - lwz r5,CFG_TZ_DSTTIME(r9) - stw r4,TZONE_TZ_MINWEST(r11) - stw r5,TZONE_TZ_DSTTIME(r11) - - blr - .cfi_endproc + cvdso_call __c_kernel_gettimeofday V_FUNCTION_END(__kernel_gettimeofday) /* @@ -63,129 +32,18 @@ V_FUNCTION_END(__kernel_gettimeofday) * */ V_FUNCTION_BEGIN(__kernel_clock_gettime) - .cfi_startproc - /* Check for supported clock IDs */ - cmpli cr0,r3,CLOCK_REALTIME - cmpli cr1,r3,CLOCK_MONOTONIC - cror cr0*4+eq,cr0*4+eq,cr1*4+eq - - cmpli cr5,r3,CLOCK_REALTIME_COARSE - cmpli cr6,r3,CLOCK_MONOTONIC_COARSE - cror cr5*4+eq,cr5*4+eq,cr6*4+eq - - cror cr0*4+eq,cr0*4+eq,cr5*4+eq - bne cr0, .Lgettime_fallback - - mflr r12 /* r12 saves lr */ - .cfi_register lr,r12 - mr r11,r4 /* r11 saves tp */ - get_datapage r9, r0 - LOAD_REG_IMMEDIATE(r7, NSEC_PER_SEC) /* load up NSEC_PER_SEC */ - beq cr5, .Lcoarse_clocks -.Lprecise_clocks: - bl __do_get_tspec@local /* get sec/nsec from tb & kernel */ - bne cr1, .Lfinish /* not monotonic -> all done */ - - /* - * CLOCK_MONOTONIC - */ - - /* now we must fixup using wall to monotonic. We need to snapshot - * that value and do the counter trick again. Fortunately, we still - * have the counter value in r8 that was returned by __do_get_xsec. - * At this point, r3,r4 contain our sec/nsec values, r5 and r6 - * can be used, r7 contains NSEC_PER_SEC. - */ - - lwz r5,(WTOM_CLOCK_SEC+LOPART)(r9) - lwz r6,WTOM_CLOCK_NSEC(r9) - - /* We now have our offset in r5,r6. We create a fake dependency - * on that value and re-check the counter - */ - or r0,r6,r5 - xor r0,r0,r0 - add r9,r9,r0 - lwz r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9) - cmpl cr0,r8,r0 /* check if updated */ - bne- .Lprecise_clocks - b .Lfinish_monotonic - - /* - * For coarse clocks we get data directly from the vdso data page, so - * we don't need to call __do_get_tspec, but we still need to do the - * counter trick. - */ -.Lcoarse_clocks: - lwz r8,(CFG_TB_UPDATE_COUNT+LOPART)(r9) - andi. r0,r8,1 /* pending update ? loop */ - bne- .Lcoarse_clocks - add r9,r9,r0 /* r0 is already 0 */ - - /* - * CLOCK_REALTIME_COARSE, below values are needed for MONOTONIC_COARSE - * too - */ - lwz r3,STAMP_XTIME_SEC+LOPART(r9) - lwz r4,STAMP_XTIME_NSEC+LOPART(r9) - bne cr6,1f - - /* CLOCK_MONOTONIC_COARSE */ - lwz r5,(WTOM_CLOCK_SEC+LOPART)(r9) - lwz r6,WTOM_CLOCK_NSEC(r9) - - /* check if counter has updated */ - or r0,r6,r5 -1: or r0,r0,r3 - or r0,r0,r4 - xor r0,r0,r0 - add r3,r3,r0 - lwz r0,CFG_TB_UPDATE_COUNT+LOPART(r9) - cmpl cr0,r0,r8 /* check if updated */ - bne- .Lcoarse_clocks - - /* Counter has not updated, so continue calculating proper values for - * sec and nsec if monotonic coarse, or just return with the proper - * values for realtime. - */ - bne cr6, .Lfinish - - /* Calculate and store result. Note that this mimics the C code, - * which may cause funny results if nsec goes negative... is that - * possible at all ? - */ -.Lfinish_monotonic: - add r3,r3,r5 - add r4,r4,r6 - cmpw cr0,r4,r7 - cmpwi cr1,r4,0 - blt 1f - subf r4,r7,r4 - addi r3,r3,1 -1: bge cr1, .Lfinish - addi r3,r3,-1 - add r4,r4,r7 - -.Lfinish: - stw r3,TSPC32_TV_SEC(r11) - stw r4,TSPC32_TV_NSEC(r11) - - mtlr r12 - crclr cr0*4+so - li r3,0 - blr - - /* - * syscall fallback - */ -.Lgettime_fallback: - li r0,__NR_clock_gettime - .cfi_restore lr - sc - blr - .cfi_endproc + cvdso_call __c_kernel_clock_gettime V_FUNCTION_END(__kernel_clock_gettime) +/* + * Exact prototype of clock_gettime64() + * + * int __kernel_clock_gettime64(clockid_t clock_id, struct __timespec64 *ts); + * + */ +V_FUNCTION_BEGIN(__kernel_clock_gettime64) + cvdso_call __c_kernel_clock_gettime64 +V_FUNCTION_END(__kernel_clock_gettime64) /* * Exact prototype of clock_getres() @@ -194,37 +52,7 @@ V_FUNCTION_END(__kernel_clock_gettime) * */ V_FUNCTION_BEGIN(__kernel_clock_getres) - .cfi_startproc - /* Check for supported clock IDs */ - cmplwi cr0, r3, CLOCK_MAX - cmpwi cr1, r3, CLOCK_REALTIME_COARSE - cmpwi cr7, r3, CLOCK_MONOTONIC_COARSE - bgt cr0, 99f - LOAD_REG_IMMEDIATE(r5, KTIME_LOW_RES) - beq cr1, 1f - beq cr7, 1f - - mflr r12 - .cfi_register lr,r12 - get_datapage r3, r0 - lwz r5, CLOCK_HRTIMER_RES(r3) - mtlr r12 -1: li r3,0 - cmpli cr0,r4,0 - crclr cr0*4+so - beqlr - stw r3,TSPC32_TV_SEC(r4) - stw r5,TSPC32_TV_NSEC(r4) - blr - - /* - * syscall fallback - */ -99: - li r0,__NR_clock_getres - sc - blr - .cfi_endproc + cvdso_call __c_kernel_clock_getres V_FUNCTION_END(__kernel_clock_getres) @@ -235,105 +63,5 @@ V_FUNCTION_END(__kernel_clock_getres) * */ V_FUNCTION_BEGIN(__kernel_time) - .cfi_startproc - mflr r12 - .cfi_register lr,r12 - - mr r11,r3 /* r11 holds t */ - get_datapage r9, r0 - - lwz r3,STAMP_XTIME_SEC+LOPART(r9) - - cmplwi r11,0 /* check if t is NULL */ - mtlr r12 - crclr cr0*4+so - beqlr - stw r3,0(r11) /* store result at *t */ - blr - .cfi_endproc + cvdso_call_time __c_kernel_time V_FUNCTION_END(__kernel_time) - -/* - * This is the core of clock_gettime() and gettimeofday(), - * it returns the current time in r3 (seconds) and r4. - * On entry, r7 gives the resolution of r4, either USEC_PER_SEC - * or NSEC_PER_SEC, giving r4 in microseconds or nanoseconds. - * It expects the datapage ptr in r9 and doesn't clobber it. - * It clobbers r0, r5 and r6. - * On return, r8 contains the counter value that can be reused. - * This clobbers cr0 but not any other cr field. - */ -__do_get_tspec: - .cfi_startproc - /* Check for update count & load values. We use the low - * order 32 bits of the update count - */ -1: lwz r8,(CFG_TB_UPDATE_COUNT+LOPART)(r9) - andi. r0,r8,1 /* pending update ? loop */ - bne- 1b - xor r0,r8,r8 /* create dependency */ - add r9,r9,r0 - - /* Load orig stamp (offset to TB) */ - lwz r5,CFG_TB_ORIG_STAMP(r9) - lwz r6,(CFG_TB_ORIG_STAMP+4)(r9) - - /* Get a stable TB value */ -2: MFTBU(r3) - MFTBL(r4) - MFTBU(r0) - cmplw cr0,r3,r0 - bne- 2b - - /* Subtract tb orig stamp and shift left 12 bits. - */ - subfc r4,r6,r4 - subfe r0,r5,r3 - slwi r0,r0,12 - rlwimi. r0,r4,12,20,31 - slwi r4,r4,12 - - /* - * Load scale factor & do multiplication. - * We only use the high 32 bits of the tb_to_xs value. - * Even with a 1GHz timebase clock, the high 32 bits of - * tb_to_xs will be at least 4 million, so the error from - * ignoring the low 32 bits will be no more than 0.25ppm. - * The error will just make the clock run very very slightly - * slow until the next time the kernel updates the VDSO data, - * at which point the clock will catch up to the kernel's value, - * so there is no long-term error accumulation. - */ - lwz r5,CFG_TB_TO_XS(r9) /* load values */ - mulhwu r4,r4,r5 - li r3,0 - - beq+ 4f /* skip high part computation if 0 */ - mulhwu r3,r0,r5 - mullw r5,r0,r5 - addc r4,r4,r5 - addze r3,r3 -4: - /* At this point, we have seconds since the xtime stamp - * as a 32.32 fixed-point number in r3 and r4. - * Load & add the xtime stamp. - */ - lwz r5,STAMP_XTIME_SEC+LOPART(r9) - lwz r6,STAMP_SEC_FRAC(r9) - addc r4,r4,r6 - adde r3,r3,r5 - - /* We create a fake dependency on the result in r3/r4 - * and re-check the counter - */ - or r6,r4,r3 - xor r0,r6,r6 - add r9,r9,r0 - lwz r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9) - cmplw cr0,r8,r0 /* check if updated */ - bne- 1b - - mulhwu r4,r4,r7 /* convert to micro or nanoseconds */ - - blr - .cfi_endproc diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S index 7eadac74c7f9..a4b806b0d618 100644 --- a/arch/powerpc/kernel/vdso32/vdso32.lds.S +++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S @@ -4,6 +4,8 @@ * library */ #include <asm/vdso.h> +#include <asm/page.h> +#include <asm-generic/vmlinux.lds.h> #ifdef __LITTLE_ENDIAN__ OUTPUT_FORMAT("elf32-powerpcle", "elf32-powerpcle", "elf32-powerpcle") @@ -15,7 +17,8 @@ ENTRY(_start) SECTIONS { - . = VDSO32_LBASE + SIZEOF_HEADERS; + PROVIDE(_vdso_datapage = . - PAGE_SIZE); + . = SIZEOF_HEADERS; .hash : { *(.hash) } :text .gnu.hash : { *(.gnu.hash) } @@ -36,17 +39,25 @@ SECTIONS PROVIDE(etext = .); . = ALIGN(8); + VDSO_ftr_fixup_start = .; __ftr_fixup : { *(__ftr_fixup) } + VDSO_ftr_fixup_end = .; . = ALIGN(8); + VDSO_mmu_ftr_fixup_start = .; __mmu_ftr_fixup : { *(__mmu_ftr_fixup) } + VDSO_mmu_ftr_fixup_end = .; . = ALIGN(8); + VDSO_lwsync_fixup_start = .; __lwsync_fixup : { *(__lwsync_fixup) } + VDSO_lwsync_fixup_end = .; #ifdef CONFIG_PPC64 . = ALIGN(8); + VDSO_fw_ftr_fixup_start = .; __fw_ftr_fixup : { *(__fw_ftr_fixup) } + VDSO_fw_ftr_fixup_end = .; #endif /* @@ -68,49 +79,15 @@ SECTIONS __end = .; PROVIDE(end = .); - /* - * Stabs debugging sections are here too. - */ - .stab 0 : { *(.stab) } - .stabstr 0 : { *(.stabstr) } - .stab.excl 0 : { *(.stab.excl) } - .stab.exclstr 0 : { *(.stab.exclstr) } - .stab.index 0 : { *(.stab.index) } - .stab.indexstr 0 : { *(.stab.indexstr) } - .comment 0 : { *(.comment) } - - /* - * DWARF debug sections. - * Symbols in the DWARF debugging sections are relative to the beginning - * of the section so we begin them at 0. - */ - /* DWARF 1 */ - .debug 0 : { *(.debug) } - .line 0 : { *(.line) } - /* GNU DWARF 1 extensions */ - .debug_srcinfo 0 : { *(.debug_srcinfo) } - .debug_sfnames 0 : { *(.debug_sfnames) } - /* DWARF 1.1 and DWARF 2 */ - .debug_aranges 0 : { *(.debug_aranges) } - .debug_pubnames 0 : { *(.debug_pubnames) } - /* DWARF 2 */ - .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } - .debug_abbrev 0 : { *(.debug_abbrev) } - .debug_line 0 : { *(.debug_line) } - .debug_frame 0 : { *(.debug_frame) } - .debug_str 0 : { *(.debug_str) } - .debug_loc 0 : { *(.debug_loc) } - .debug_macinfo 0 : { *(.debug_macinfo) } - /* SGI/MIPS DWARF 2 extensions */ - .debug_weaknames 0 : { *(.debug_weaknames) } - .debug_funcnames 0 : { *(.debug_funcnames) } - .debug_typenames 0 : { *(.debug_typenames) } - .debug_varnames 0 : { *(.debug_varnames) } + STABS_DEBUG + DWARF_DEBUG + ELF_DETAILS /DISCARD/ : { *(.note.GNU-stack) *(.data .data.* .gnu.linkonce.d.* .sdata*) *(.bss .sbss .dynbss .dynsbss) + *(.got1) } } @@ -138,19 +115,14 @@ VERSION { VDSO_VERSION_STRING { global: - /* - * Has to be there for the kernel to find - */ - __kernel_datapage_offset; - __kernel_get_syscall_map; __kernel_gettimeofday; __kernel_clock_gettime; + __kernel_clock_gettime64; __kernel_clock_getres; __kernel_time; __kernel_get_tbfreq; __kernel_sync_dicache; - __kernel_sync_dicache_p5; __kernel_sigtramp32; __kernel_sigtramp_rt32; #if defined(CONFIG_PPC64) || !defined(CONFIG_SMP) @@ -160,3 +132,9 @@ VERSION local: *; }; } + +/* + * Make the sigreturn code visible to the kernel. + */ +VDSO_sigtramp32 = __kernel_sigtramp32; +VDSO_sigtramp_rt32 = __kernel_sigtramp_rt32; diff --git a/arch/powerpc/kernel/vdso32/vgettimeofday.c b/arch/powerpc/kernel/vdso32/vgettimeofday.c new file mode 100644 index 000000000000..65fb03fb1731 --- /dev/null +++ b/arch/powerpc/kernel/vdso32/vgettimeofday.c @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Powerpc userspace implementations of gettimeofday() and similar. + */ +#include <linux/types.h> + +int __c_kernel_clock_gettime(clockid_t clock, struct old_timespec32 *ts, + const struct vdso_data *vd) +{ + return __cvdso_clock_gettime32_data(vd, clock, ts); +} + +int __c_kernel_clock_gettime64(clockid_t clock, struct __kernel_timespec *ts, + const struct vdso_data *vd) +{ + return __cvdso_clock_gettime_data(vd, clock, ts); +} + +int __c_kernel_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz, + const struct vdso_data *vd) +{ + return __cvdso_gettimeofday_data(vd, tv, tz); +} + +int __c_kernel_clock_getres(clockid_t clock_id, struct old_timespec32 *res, + const struct vdso_data *vd) +{ + return __cvdso_clock_getres_time32_data(vd, clock_id, res); +} + +__kernel_old_time_t __c_kernel_time(__kernel_old_time_t *time, const struct vdso_data *vd) +{ + return __cvdso_time_data(vd, time); +} diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile index dfd34f68bfa1..d365810a689a 100644 --- a/arch/powerpc/kernel/vdso64/Makefile +++ b/arch/powerpc/kernel/vdso64/Makefile @@ -1,16 +1,29 @@ # SPDX-License-Identifier: GPL-2.0 # List of files in the vdso, has to be asm only for now +ARCH_REL_TYPE_ABS := R_PPC_JUMP_SLOT|R_PPC_GLOB_DAT|R_PPC_ADDR32|R_PPC_ADDR24|R_PPC_ADDR16|R_PPC_ADDR16_LO|R_PPC_ADDR16_HI|R_PPC_ADDR16_HA|R_PPC_ADDR14|R_PPC_ADDR14_BRTAKEN|R_PPC_ADDR14_BRNTAKEN +include $(srctree)/lib/vdso/Makefile + obj-vdso64 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o getcpu.o +ifneq ($(c-gettimeofday-y),) + CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y) + CFLAGS_vgettimeofday.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) + CFLAGS_vgettimeofday.o += $(call cc-option, -fno-stack-protector) + CFLAGS_vgettimeofday.o += -DDISABLE_BRANCH_PROFILING + CFLAGS_vgettimeofday.o += -ffreestanding -fasynchronous-unwind-tables + CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) +endif + # Build rules -targets := $(obj-vdso64) vdso64.so vdso64.so.dbg +targets := $(obj-vdso64) vdso64.so.dbg obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64)) GCOV_PROFILE := n KCOV_INSTRUMENT := n UBSAN_SANITIZE := n +KASAN_SANITIZE := n ccflags-y := -shared -fno-common -fno-builtin -nostdlib \ -Wl,-soname=linux-vdso64.so.1 -Wl,--hash-style=both @@ -20,28 +33,23 @@ obj-y += vdso64_wrapper.o targets += vdso64.lds CPPFLAGS_vdso64.lds += -P -C -U$(ARCH) +$(obj)/vgettimeofday.o: %.o: %.c FORCE + # Force dependency (incbin is bad) -$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so +$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so.dbg # link rule for the .so file, .lds has to be first -$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) FORCE - $(call if_changed,vdso64ld) +$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) $(obj)/vgettimeofday.o FORCE + $(call if_changed,vdso64ld_and_check) -# strip rule for the .so file -$(obj)/%.so: OBJCOPYFLAGS := -S -$(obj)/%.so: $(obj)/%.so.dbg FORCE - $(call if_changed,objcopy) +# Generate VDSO offsets using helper script +gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh +quiet_cmd_vdsosym = VDSOSYM $@ + cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ -# actual build commands -quiet_cmd_vdso64ld = VDSO64L $@ - cmd_vdso64ld = $(CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) +include/generated/vdso64-offsets.h: $(obj)/vdso64.so.dbg FORCE + $(call if_changed,vdsosym) -# install commands for the unstripped file -quiet_cmd_vdso_install = INSTALL $@ - cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@ - -vdso64.so: $(obj)/vdso64.so.dbg - @mkdir -p $(MODLIB)/vdso - $(call cmd,vdso_install) - -vdso_install: vdso64.so +# actual build commands +quiet_cmd_vdso64ld_and_check = VDSO64L $@ + cmd_vdso64ld_and_check = $(CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^); $(cmd_vdso_check) diff --git a/arch/powerpc/kernel/vdso64/cacheflush.S b/arch/powerpc/kernel/vdso64/cacheflush.S index cab14324242b..76c3c8cf8ece 100644 --- a/arch/powerpc/kernel/vdso64/cacheflush.S +++ b/arch/powerpc/kernel/vdso64/cacheflush.S @@ -23,10 +23,14 @@ */ V_FUNCTION_BEGIN(__kernel_sync_dicache) .cfi_startproc +BEGIN_FTR_SECTION + b 3f +END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) mflr r12 .cfi_register lr,r12 - get_datapage r10, r0 + get_datapage r10 mtlr r12 + .cfi_restore lr lwz r7,CFG_DCACHE_BLOCKSZ(r10) addi r5,r7,-1 @@ -61,19 +65,11 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache) isync li r3,0 blr - .cfi_endproc -V_FUNCTION_END(__kernel_sync_dicache) - - -/* - * POWER5 version of __kernel_sync_dicache - */ -V_FUNCTION_BEGIN(__kernel_sync_dicache_p5) - .cfi_startproc +3: crclr cr0*4+so sync isync li r3,0 blr .cfi_endproc -V_FUNCTION_END(__kernel_sync_dicache_p5) +V_FUNCTION_END(__kernel_sync_dicache) diff --git a/arch/powerpc/kernel/vdso64/datapage.S b/arch/powerpc/kernel/vdso64/datapage.S index 067247d3efb9..00760dc69d68 100644 --- a/arch/powerpc/kernel/vdso64/datapage.S +++ b/arch/powerpc/kernel/vdso64/datapage.S @@ -13,9 +13,6 @@ #include <asm/vdso_datapage.h> .text -.global __kernel_datapage_offset; -__kernel_datapage_offset: - .long 0 /* * void *__kernel_get_syscall_map(unsigned int *syscall_count) ; @@ -31,7 +28,7 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map) mflr r12 .cfi_register lr,r12 mr r4,r3 - get_datapage r3, r0 + get_datapage r3 mtlr r12 addi r3,r3,CFG_SYSCALL_MAP64 cmpldi cr0,r4,0 @@ -53,7 +50,7 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq) .cfi_startproc mflr r12 .cfi_register lr,r12 - get_datapage r3, r0 + get_datapage r3 ld r3,CFG_TB_TICKS_PER_SEC(r3) mtlr r12 crclr cr0*4+so diff --git a/arch/powerpc/kernel/vdso64/gen_vdso_offsets.sh b/arch/powerpc/kernel/vdso64/gen_vdso_offsets.sh new file mode 100755 index 000000000000..4bf15ffd5933 --- /dev/null +++ b/arch/powerpc/kernel/vdso64/gen_vdso_offsets.sh @@ -0,0 +1,16 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +# +# Match symbols in the DSO that look like VDSO_*; produce a header file +# of constant offsets into the shared object. +# +# Doing this inside the Makefile will break the $(filter-out) function, +# causing Kbuild to rebuild the vdso-offsets header file every time. +# +# Author: Will Deacon <will.deacon@arm.com +# + +LC_ALL=C +sed -n -e 's/^00*/0/' -e \ +'s/^\([0-9a-fA-F]*\) . VDSO_\([a-zA-Z0-9_]*\)$/\#define vdso64_offset_\2\t0x\1/p' diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S index 20f8be40c653..d7a7bfb51081 100644 --- a/arch/powerpc/kernel/vdso64/gettimeofday.S +++ b/arch/powerpc/kernel/vdso64/gettimeofday.S @@ -12,6 +12,7 @@ #include <asm/vdso_datapage.h> #include <asm/asm-offsets.h> #include <asm/unistd.h> +#include <asm/vdso/gettimeofday.h> .text /* @@ -21,31 +22,7 @@ * */ V_FUNCTION_BEGIN(__kernel_gettimeofday) - .cfi_startproc - mflr r12 - .cfi_register lr,r12 - - mr r11,r3 /* r11 holds tv */ - mr r10,r4 /* r10 holds tz */ - get_datapage r3, r0 - cmpldi r11,0 /* check if tv is NULL */ - beq 2f - lis r7,1000000@ha /* load up USEC_PER_SEC */ - addi r7,r7,1000000@l - bl V_LOCAL_FUNC(__do_get_tspec) /* get sec/us from tb & kernel */ - std r4,TVAL64_TV_SEC(r11) /* store sec in tv */ - std r5,TVAL64_TV_USEC(r11) /* store usec in tv */ -2: cmpldi r10,0 /* check if tz is NULL */ - beq 1f - lwz r4,CFG_TZ_MINUTEWEST(r3)/* fill tz */ - lwz r5,CFG_TZ_DSTTIME(r3) - stw r4,TZONE_TZ_MINWEST(r10) - stw r5,TZONE_TZ_DSTTIME(r10) -1: mtlr r12 - crclr cr0*4+so - li r3,0 /* always success */ - blr - .cfi_endproc + cvdso_call __c_kernel_gettimeofday V_FUNCTION_END(__kernel_gettimeofday) @@ -56,120 +33,7 @@ V_FUNCTION_END(__kernel_gettimeofday) * */ V_FUNCTION_BEGIN(__kernel_clock_gettime) - .cfi_startproc - /* Check for supported clock IDs */ - cmpwi cr0,r3,CLOCK_REALTIME - cmpwi cr1,r3,CLOCK_MONOTONIC - cror cr0*4+eq,cr0*4+eq,cr1*4+eq - - cmpwi cr5,r3,CLOCK_REALTIME_COARSE - cmpwi cr6,r3,CLOCK_MONOTONIC_COARSE - cror cr5*4+eq,cr5*4+eq,cr6*4+eq - - cror cr0*4+eq,cr0*4+eq,cr5*4+eq - bne cr0,99f - - mflr r12 /* r12 saves lr */ - .cfi_register lr,r12 - mr r11,r4 /* r11 saves tp */ - get_datapage r3, r0 - lis r7,NSEC_PER_SEC@h /* want nanoseconds */ - ori r7,r7,NSEC_PER_SEC@l - beq cr5,70f -50: bl V_LOCAL_FUNC(__do_get_tspec) /* get time from tb & kernel */ - bne cr1,80f /* if not monotonic, all done */ - - /* - * CLOCK_MONOTONIC - */ - - /* now we must fixup using wall to monotonic. We need to snapshot - * that value and do the counter trick again. Fortunately, we still - * have the counter value in r8 that was returned by __do_get_tspec. - * At this point, r4,r5 contain our sec/nsec values. - */ - - ld r6,WTOM_CLOCK_SEC(r3) - lwa r9,WTOM_CLOCK_NSEC(r3) - - /* We now have our result in r6,r9. We create a fake dependency - * on that result and re-check the counter - */ - or r0,r6,r9 - xor r0,r0,r0 - add r3,r3,r0 - ld r0,CFG_TB_UPDATE_COUNT(r3) - cmpld cr0,r0,r8 /* check if updated */ - bne- 50b - b 78f - - /* - * For coarse clocks we get data directly from the vdso data page, so - * we don't need to call __do_get_tspec, but we still need to do the - * counter trick. - */ -70: ld r8,CFG_TB_UPDATE_COUNT(r3) - andi. r0,r8,1 /* pending update ? loop */ - bne- 70b - add r3,r3,r0 /* r0 is already 0 */ - - /* - * CLOCK_REALTIME_COARSE, below values are needed for MONOTONIC_COARSE - * too - */ - ld r4,STAMP_XTIME_SEC(r3) - ld r5,STAMP_XTIME_NSEC(r3) - bne cr6,75f - - /* CLOCK_MONOTONIC_COARSE */ - ld r6,WTOM_CLOCK_SEC(r3) - lwa r9,WTOM_CLOCK_NSEC(r3) - - /* check if counter has updated */ - or r0,r6,r9 -75: or r0,r0,r4 - or r0,r0,r5 - xor r0,r0,r0 - add r3,r3,r0 - ld r0,CFG_TB_UPDATE_COUNT(r3) - cmpld cr0,r0,r8 /* check if updated */ - bne- 70b - - /* Counter has not updated, so continue calculating proper values for - * sec and nsec if monotonic coarse, or just return with the proper - * values for realtime. - */ - bne cr6,80f - - /* Add wall->monotonic offset and check for overflow or underflow */ -78: add r4,r4,r6 - add r5,r5,r9 - cmpd cr0,r5,r7 - cmpdi cr1,r5,0 - blt 79f - subf r5,r7,r5 - addi r4,r4,1 -79: bge cr1,80f - addi r4,r4,-1 - add r5,r5,r7 - -80: std r4,TSPC64_TV_SEC(r11) - std r5,TSPC64_TV_NSEC(r11) - - mtlr r12 - crclr cr0*4+so - li r3,0 - blr - - /* - * syscall fallback - */ -99: - li r0,__NR_clock_gettime - .cfi_restore lr - sc - blr - .cfi_endproc + cvdso_call __c_kernel_clock_gettime V_FUNCTION_END(__kernel_clock_gettime) @@ -180,34 +44,7 @@ V_FUNCTION_END(__kernel_clock_gettime) * */ V_FUNCTION_BEGIN(__kernel_clock_getres) - .cfi_startproc - /* Check for supported clock IDs */ - cmpwi cr0,r3,CLOCK_REALTIME - cmpwi cr1,r3,CLOCK_MONOTONIC - cror cr0*4+eq,cr0*4+eq,cr1*4+eq - bne cr0,99f - - mflr r12 - .cfi_register lr,r12 - get_datapage r3, r0 - lwz r5, CLOCK_HRTIMER_RES(r3) - mtlr r12 - li r3,0 - cmpldi cr0,r4,0 - crclr cr0*4+so - beqlr - std r3,TSPC64_TV_SEC(r4) - std r5,TSPC64_TV_NSEC(r4) - blr - - /* - * syscall fallback - */ -99: - li r0,__NR_clock_getres - sc - blr - .cfi_endproc + cvdso_call __c_kernel_clock_getres V_FUNCTION_END(__kernel_clock_getres) /* @@ -217,74 +54,5 @@ V_FUNCTION_END(__kernel_clock_getres) * */ V_FUNCTION_BEGIN(__kernel_time) - .cfi_startproc - mflr r12 - .cfi_register lr,r12 - - mr r11,r3 /* r11 holds t */ - get_datapage r3, r0 - - ld r4,STAMP_XTIME_SEC(r3) - - cmpldi r11,0 /* check if t is NULL */ - beq 2f - std r4,0(r11) /* store result at *t */ -2: mtlr r12 - crclr cr0*4+so - mr r3,r4 - blr - .cfi_endproc + cvdso_call_time __c_kernel_time V_FUNCTION_END(__kernel_time) - - -/* - * This is the core of clock_gettime() and gettimeofday(), - * it returns the current time in r4 (seconds) and r5. - * On entry, r7 gives the resolution of r5, either USEC_PER_SEC - * or NSEC_PER_SEC, giving r5 in microseconds or nanoseconds. - * It expects the datapage ptr in r3 and doesn't clobber it. - * It clobbers r0, r6 and r9. - * On return, r8 contains the counter value that can be reused. - * This clobbers cr0 but not any other cr field. - */ -V_FUNCTION_BEGIN(__do_get_tspec) - .cfi_startproc - /* check for update count & load values */ -1: ld r8,CFG_TB_UPDATE_COUNT(r3) - andi. r0,r8,1 /* pending update ? loop */ - bne- 1b - xor r0,r8,r8 /* create dependency */ - add r3,r3,r0 - - /* Get TB & offset it. We use the MFTB macro which will generate - * workaround code for Cell. - */ - MFTB(r6) - ld r9,CFG_TB_ORIG_STAMP(r3) - subf r6,r9,r6 - - /* Scale result */ - ld r5,CFG_TB_TO_XS(r3) - sldi r6,r6,12 /* compute time since stamp_xtime */ - mulhdu r6,r6,r5 /* in units of 2^-32 seconds */ - - /* Add stamp since epoch */ - ld r4,STAMP_XTIME_SEC(r3) - lwz r5,STAMP_SEC_FRAC(r3) - or r0,r4,r5 - or r0,r0,r6 - xor r0,r0,r0 - add r3,r3,r0 - ld r0,CFG_TB_UPDATE_COUNT(r3) - cmpld r0,r8 /* check if updated */ - bne- 1b /* reload if so */ - - /* convert to seconds & nanoseconds and add to stamp */ - add r6,r6,r5 /* add on fractional seconds of xtime */ - mulhwu r5,r6,r7 /* compute micro or nanoseconds and */ - srdi r6,r6,32 /* seconds since stamp_xtime */ - clrldi r5,r5,32 - add r4,r4,r6 - blr - .cfi_endproc -V_FUNCTION_END(__do_get_tspec) diff --git a/arch/powerpc/kernel/vdso64/vdso64.lds.S b/arch/powerpc/kernel/vdso64/vdso64.lds.S index 256fb9720298..6164d1a1ba11 100644 --- a/arch/powerpc/kernel/vdso64/vdso64.lds.S +++ b/arch/powerpc/kernel/vdso64/vdso64.lds.S @@ -4,6 +4,8 @@ * library */ #include <asm/vdso.h> +#include <asm/page.h> +#include <asm-generic/vmlinux.lds.h> #ifdef __LITTLE_ENDIAN__ OUTPUT_FORMAT("elf64-powerpcle", "elf64-powerpcle", "elf64-powerpcle") @@ -15,7 +17,8 @@ ENTRY(_start) SECTIONS { - . = VDSO64_LBASE + SIZEOF_HEADERS; + PROVIDE(_vdso_datapage = . - PAGE_SIZE); + . = SIZEOF_HEADERS; .hash : { *(.hash) } :text .gnu.hash : { *(.gnu.hash) } @@ -37,16 +40,24 @@ SECTIONS PROVIDE(etext = .); . = ALIGN(8); + VDSO_ftr_fixup_start = .; __ftr_fixup : { *(__ftr_fixup) } + VDSO_ftr_fixup_end = .; . = ALIGN(8); + VDSO_mmu_ftr_fixup_start = .; __mmu_ftr_fixup : { *(__mmu_ftr_fixup) } + VDSO_mmu_ftr_fixup_end = .; . = ALIGN(8); + VDSO_lwsync_fixup_start = .; __lwsync_fixup : { *(__lwsync_fixup) } + VDSO_lwsync_fixup_end = .; . = ALIGN(8); + VDSO_fw_ftr_fixup_start = .; __fw_ftr_fixup : { *(__fw_ftr_fixup) } + VDSO_fw_ftr_fixup_end = .; /* * Other stuff is appended to the text segment: @@ -61,56 +72,21 @@ SECTIONS .gcc_except_table : { *(.gcc_except_table) } .rela.dyn ALIGN(8) : { *(.rela.dyn) } - .opd ALIGN(8) : { KEEP (*(.opd)) } .got ALIGN(8) : { *(.got .toc) } _end = .; PROVIDE(end = .); - /* - * Stabs debugging sections are here too. - */ - .stab 0 : { *(.stab) } - .stabstr 0 : { *(.stabstr) } - .stab.excl 0 : { *(.stab.excl) } - .stab.exclstr 0 : { *(.stab.exclstr) } - .stab.index 0 : { *(.stab.index) } - .stab.indexstr 0 : { *(.stab.indexstr) } - .comment 0 : { *(.comment) } - - /* - * DWARF debug sections. - * Symbols in the DWARF debugging sections are relative to the beginning - * of the section so we begin them at 0. - */ - /* DWARF 1 */ - .debug 0 : { *(.debug) } - .line 0 : { *(.line) } - /* GNU DWARF 1 extensions */ - .debug_srcinfo 0 : { *(.debug_srcinfo) } - .debug_sfnames 0 : { *(.debug_sfnames) } - /* DWARF 1.1 and DWARF 2 */ - .debug_aranges 0 : { *(.debug_aranges) } - .debug_pubnames 0 : { *(.debug_pubnames) } - /* DWARF 2 */ - .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } - .debug_abbrev 0 : { *(.debug_abbrev) } - .debug_line 0 : { *(.debug_line) } - .debug_frame 0 : { *(.debug_frame) } - .debug_str 0 : { *(.debug_str) } - .debug_loc 0 : { *(.debug_loc) } - .debug_macinfo 0 : { *(.debug_macinfo) } - /* SGI/MIPS DWARF 2 extensions */ - .debug_weaknames 0 : { *(.debug_weaknames) } - .debug_funcnames 0 : { *(.debug_funcnames) } - .debug_typenames 0 : { *(.debug_typenames) } - .debug_varnames 0 : { *(.debug_varnames) } + STABS_DEBUG + DWARF_DEBUG + ELF_DETAILS /DISCARD/ : { *(.note.GNU-stack) *(.branch_lt) *(.data .data.* .gnu.linkonce.d.* .sdata*) *(.bss .sbss .dynbss .dynsbss) + *(.opd) } } @@ -138,18 +114,12 @@ VERSION { VDSO_VERSION_STRING { global: - /* - * Has to be there for the kernel to find - */ - __kernel_datapage_offset; - __kernel_get_syscall_map; __kernel_gettimeofday; __kernel_clock_gettime; __kernel_clock_getres; __kernel_get_tbfreq; __kernel_sync_dicache; - __kernel_sync_dicache_p5; __kernel_sigtramp_rt64; __kernel_getcpu; __kernel_time; @@ -157,3 +127,8 @@ VERSION local: *; }; } + +/* + * Make the sigreturn code visible to the kernel. + */ +VDSO_sigtramp_rt64 = __kernel_sigtramp_rt64; diff --git a/arch/powerpc/kernel/vdso64/vgettimeofday.c b/arch/powerpc/kernel/vdso64/vgettimeofday.c new file mode 100644 index 000000000000..5b5500058344 --- /dev/null +++ b/arch/powerpc/kernel/vdso64/vgettimeofday.c @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Powerpc userspace implementations of gettimeofday() and similar. + */ +#include <linux/time.h> +#include <linux/types.h> + +int __c_kernel_clock_gettime(clockid_t clock, struct __kernel_timespec *ts, + const struct vdso_data *vd) +{ + return __cvdso_clock_gettime_data(vd, clock, ts); +} + +int __c_kernel_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz, + const struct vdso_data *vd) +{ + return __cvdso_gettimeofday_data(vd, tv, tz); +} + +int __c_kernel_clock_getres(clockid_t clock_id, struct __kernel_timespec *res, + const struct vdso_data *vd) +{ + return __cvdso_clock_getres_data(vd, clock_id, res); +} + +__kernel_old_time_t __c_kernel_time(__kernel_old_time_t *time, const struct vdso_data *vd) +{ + return __cvdso_time_data(vd, time); +} diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index e184d17387f6..0318ba436f34 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -200,21 +200,7 @@ SECTIONS EXIT_TEXT } - .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { - INIT_DATA - } - - .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { - INIT_SETUP(16) - } - - .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) { - INIT_CALLS - } - - .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) { - CON_INITCALL - } + INIT_DATA_SECTION(16) . = ALIGN(8); __ftr_fixup : AT(ADDR(__ftr_fixup) - LOAD_OFFSET) { @@ -242,9 +228,6 @@ SECTIONS __stop___fw_ftr_fixup = .; } #endif - .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { - INIT_RAM_FS - } PERCPU_SECTION(L1_CACHE_BYTES) diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index 0effd48c8f4d..b08cc15f31c7 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -840,6 +840,9 @@ int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) case SPRN_MMCR1: case SPRN_MMCR2: case SPRN_UMMCR2: + case SPRN_UAMOR: + case SPRN_IAMR: + case SPRN_AMR: #endif break; unprivileged: @@ -1004,6 +1007,9 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val case SPRN_MMCR2: case SPRN_UMMCR2: case SPRN_TIR: + case SPRN_UAMOR: + case SPRN_IAMR: + case SPRN_AMR: #endif *spr_val = 0; break; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index e3b1839fc251..6f612d240392 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1241,9 +1241,9 @@ static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu *vcpu) switch (get_xop(inst)) { case OP_31_XOP_MSGSNDP: arg = kvmppc_get_gpr(vcpu, rb); - if (((arg >> 27) & 0xf) != PPC_DBELL_SERVER) + if (((arg >> 27) & 0x1f) != PPC_DBELL_SERVER) break; - arg &= 0x3f; + arg &= 0x7f; if (arg >= kvm->arch.emul_smt_mode) break; tvcpu = kvmppc_find_vcpu(kvm, vcpu->vcpu_id - thr + arg); @@ -1256,7 +1256,7 @@ static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu *vcpu) break; case OP_31_XOP_MSGCLRP: arg = kvmppc_get_gpr(vcpu, rb); - if (((arg >> 27) & 0xf) != PPC_DBELL_SERVER) + if (((arg >> 27) & 0x1f) != PPC_DBELL_SERVER) break; vcpu->arch.vcore->dpdes = 0; vcpu->arch.doorbell_request = 0; @@ -1327,9 +1327,15 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu, case BOOK3S_INTERRUPT_SYSTEM_RESET: r = RESUME_GUEST; break; - case BOOK3S_INTERRUPT_MACHINE_CHECK: - /* Print the MCE event to host console. */ - machine_check_print_event_info(&vcpu->arch.mce_evt, false, true); + case BOOK3S_INTERRUPT_MACHINE_CHECK: { + static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); + /* + * Print the MCE event to host console. Ratelimit so the guest + * can't flood the host log. + */ + if (__ratelimit(&rs)) + machine_check_print_event_info(&vcpu->arch.mce_evt,false, true); /* * If the guest can do FWNMI, exit to userspace so it can @@ -1357,6 +1363,7 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu, r = RESUME_HOST; break; + } case BOOK3S_INTERRUPT_PROGRAM: { ulong flags; @@ -1516,11 +1523,16 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu) r = RESUME_GUEST; break; case BOOK3S_INTERRUPT_MACHINE_CHECK: + { + static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); /* Pass the machine check to the L1 guest */ r = RESUME_HOST; /* Print the MCE event to host console. */ - machine_check_print_event_info(&vcpu->arch.mce_evt, false, true); + if (__ratelimit(&rs)) + machine_check_print_event_info(&vcpu->arch.mce_evt, false, true); break; + } /* * We get these next two if the guest accesses a page which it thinks * it has mapped but which is not actually present, either because @@ -4949,7 +4961,12 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) * Work out how many sets the TLB has, for the use of * the TLB invalidation loop in book3s_hv_rmhandlers.S. */ - if (radix_enabled()) + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + /* + * P10 will flush all the congruence class with a single tlbiel + */ + kvm->arch.tlb_sets = 1; + } else if (radix_enabled()) kvm->arch.tlb_sets = POWER9_TLB_SETS_RADIX; /* 128 */ else if (cpu_has_feature(CPU_FTR_ARCH_300)) kvm->arch.tlb_sets = POWER9_TLB_SETS_HASH; /* 256 */ diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 8f58dd20b362..8053efdf7ea7 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -694,6 +694,7 @@ static void wait_for_sync(struct kvm_split_mode *sip, int phase) void kvmhv_p9_set_lpcr(struct kvm_split_mode *sip) { + int num_sets; unsigned long rb, set; /* wait for every other thread to get to real mode */ @@ -704,11 +705,19 @@ void kvmhv_p9_set_lpcr(struct kvm_split_mode *sip) mtspr(SPRN_LPID, sip->lpidr_req); isync(); + /* + * P10 will flush all the congruence class with a single tlbiel + */ + if (cpu_has_feature(CPU_FTR_ARCH_31)) + num_sets = 1; + else + num_sets = POWER9_TLB_SETS_RADIX; + /* Invalidate the TLB on thread 0 */ if (local_paca->kvm_hstate.tid == 0) { sip->do_set = 0; asm volatile("ptesync" : : : "memory"); - for (set = 0; set < POWER9_TLB_SETS_RADIX; ++set) { + for (set = 0; set < num_sets; ++set) { rb = TLBIEL_INVAL_SET_LPID + (set << TLBIEL_INVAL_SET_SHIFT); asm volatile(PPC_TLBIEL(%0, %1, 0, 0, 0) : : diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c index 6028628ea3ac..d4bca93b79f6 100644 --- a/arch/powerpc/kvm/book3s_hv_ras.c +++ b/arch/powerpc/kvm/book3s_hv_ras.c @@ -65,10 +65,9 @@ static void reload_slb(struct kvm_vcpu *vcpu) * On POWER7, see if we can handle a machine check that occurred inside * the guest in real mode, without switching to the host partition. */ -static void kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu) +static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu) { unsigned long srr1 = vcpu->arch.shregs.msr; - struct machine_check_event mce_evt; long handled = 1; if (srr1 & SRR1_MC_LDSTERR) { @@ -106,6 +105,21 @@ static void kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu) handled = 0; } + return handled; +} + +void kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu) +{ + struct machine_check_event mce_evt; + long handled; + + if (vcpu->kvm->arch.fwnmi_enabled) { + /* FWNMI guests handle their own recovery */ + handled = 0; + } else { + handled = kvmppc_realmode_mc_power7(vcpu); + } + /* * Now get the event and stash it in the vcpu struct so it can * be handled by the primary thread in virtual mode. We can't @@ -122,11 +136,6 @@ static void kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu) vcpu->arch.mce_evt = mce_evt; } -void kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu) -{ - kvmppc_realmode_mc_power7(vcpu); -} - /* Check if dynamic split is in force and return subcore size accordingly. */ static inline int kvmppc_cur_subcore_size(void) { diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index b1fefa63e125..913944dc3620 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -239,7 +239,7 @@ static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu) smsr |= (guest_msr & vcpu->arch.guest_owned_ext); /* 64-bit Process MSR values */ #ifdef CONFIG_PPC_BOOK3S_64 - smsr |= MSR_ISF | MSR_HV; + smsr |= MSR_HV; #endif #ifdef CONFIG_PPC_TRANSACTIONAL_MEM /* diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S index 3dc129a254b5..b45b750fa77a 100644 --- a/arch/powerpc/kvm/book3s_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_rmhandlers.S @@ -36,8 +36,8 @@ #define FUNC(name) name -#define RFI_TO_KERNEL RFI -#define RFI_TO_GUEST RFI +#define RFI_TO_KERNEL rfi +#define RFI_TO_GUEST rfi .macro INTERRUPT_TRAMPOLINE intno diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index 5fee5a11550d..303e3cb096db 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -473,7 +473,7 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, arch_spin_unlock(&ics->lock); local_irq_restore(flags); new_irq = reject; - check_resend = 0; + check_resend = false; goto again; } } else { @@ -501,7 +501,7 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, state->resend = 0; arch_spin_unlock(&ics->lock); local_irq_restore(flags); - check_resend = 0; + check_resend = false; goto again; } } diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index a0ebc29f30b2..30dfeac731c6 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -219,7 +219,7 @@ int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio, /* In single escalation mode, we grab the ESB MMIO of the * interrupt and mask it. Also populate the VCPU v/raddr * of the ESB page for use by asm entry/exit code. Finally - * set the XIVE_IRQ_NO_EOI flag which will prevent the + * set the XIVE_IRQ_FLAG_NO_EOI flag which will prevent the * core code from performing an EOI on the escalation * interrupt, thus leaving it effectively masked after * it fires once. @@ -231,7 +231,7 @@ int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio, xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_01); vcpu->arch.xive_esc_raddr = xd->eoi_page; vcpu->arch.xive_esc_vaddr = (__force u64)xd->eoi_mmio; - xd->flags |= XIVE_IRQ_NO_EOI; + xd->flags |= XIVE_IRQ_FLAG_NO_EOI; } return 0; @@ -419,37 +419,16 @@ static u8 xive_lock_and_mask(struct kvmppc_xive *xive, /* Get the right irq */ kvmppc_xive_select_irq(state, &hw_num, &xd); + /* Set PQ to 10, return old P and old Q and remember them */ + val = xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_10); + state->old_p = !!(val & 2); + state->old_q = !!(val & 1); + /* - * If the interrupt is marked as needing masking via - * firmware, we do it here. Firmware masking however - * is "lossy", it won't return the old p and q bits - * and won't set the interrupt to a state where it will - * record queued ones. If this is an issue we should do - * lazy masking instead. - * - * For now, we work around this in unmask by forcing - * an interrupt whenever we unmask a non-LSI via FW - * (if ever). + * Synchronize hardware to sensure the queues are updated when + * masking */ - if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) { - xive_native_configure_irq(hw_num, - kvmppc_xive_vp(xive, state->act_server), - MASKED, state->number); - /* set old_p so we can track if an H_EOI was done */ - state->old_p = true; - state->old_q = false; - } else { - /* Set PQ to 10, return old P and old Q and remember them */ - val = xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_10); - state->old_p = !!(val & 2); - state->old_q = !!(val & 1); - - /* - * Synchronize hardware to sensure the queues are updated - * when masking - */ - xive_native_sync_source(hw_num); - } + xive_native_sync_source(hw_num); return old_prio; } @@ -483,23 +462,6 @@ static void xive_finish_unmask(struct kvmppc_xive *xive, /* Get the right irq */ kvmppc_xive_select_irq(state, &hw_num, &xd); - /* - * See comment in xive_lock_and_mask() concerning masking - * via firmware. - */ - if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) { - xive_native_configure_irq(hw_num, - kvmppc_xive_vp(xive, state->act_server), - state->act_priority, state->number); - /* If an EOI is needed, do it here */ - if (!state->old_p) - xive_vm_source_eoi(hw_num, xd); - /* If this is not an LSI, force a trigger */ - if (!(xd->flags & OPAL_XIVE_IRQ_LSI)) - xive_irq_trigger(xd); - goto bail; - } - /* Old Q set, set PQ to 11 */ if (state->old_q) xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_11); @@ -2125,9 +2087,8 @@ int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu) if (!q->qpage && !xc->esc_virq[i]) continue; - seq_printf(m, " [q%d]: ", i); - if (q->qpage) { + seq_printf(m, " q[%d]: ", i); idx = q->idx; i0 = be32_to_cpup(q->qpage + idx); idx = (idx + 1) & q->msk; @@ -2141,16 +2102,54 @@ int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu) irq_data_get_irq_handler_data(d); u64 pq = xive_vm_esb_load(xd, XIVE_ESB_GET); - seq_printf(m, "E:%c%c I(%d:%llx:%llx)", - (pq & XIVE_ESB_VAL_P) ? 'P' : 'p', - (pq & XIVE_ESB_VAL_Q) ? 'Q' : 'q', - xc->esc_virq[i], pq, xd->eoi_page); + seq_printf(m, " ESC %d %c%c EOI @%llx", + xc->esc_virq[i], + (pq & XIVE_ESB_VAL_P) ? 'P' : '-', + (pq & XIVE_ESB_VAL_Q) ? 'Q' : '-', + xd->eoi_page); seq_puts(m, "\n"); } } return 0; } +void kvmppc_xive_debug_show_sources(struct seq_file *m, + struct kvmppc_xive_src_block *sb) +{ + int i; + + seq_puts(m, " LISN HW/CHIP TYPE PQ EISN CPU/PRIO\n"); + for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { + struct kvmppc_xive_irq_state *state = &sb->irq_state[i]; + struct xive_irq_data *xd; + u64 pq; + u32 hw_num; + + if (!state->valid) + continue; + + kvmppc_xive_select_irq(state, &hw_num, &xd); + + pq = xive_vm_esb_load(xd, XIVE_ESB_GET); + + seq_printf(m, "%08x %08x/%02x", state->number, hw_num, + xd->src_chip); + if (state->lsi) + seq_printf(m, " %cLSI", state->asserted ? '^' : ' '); + else + seq_puts(m, " MSI"); + + seq_printf(m, " %s %c%c %08x % 4d/%d", + state->ipi_number == hw_num ? "IPI" : " PT", + pq & XIVE_ESB_VAL_P ? 'P' : '-', + pq & XIVE_ESB_VAL_Q ? 'Q' : '-', + state->eisn, state->act_server, + state->act_priority); + + seq_puts(m, "\n"); + } +} + static int xive_debug_show(struct seq_file *m, void *private) { struct kvmppc_xive *xive = m->private; @@ -2171,7 +2170,7 @@ static int xive_debug_show(struct seq_file *m, void *private) if (!kvm) return 0; - seq_printf(m, "=========\nVCPU state\n=========\n"); + seq_puts(m, "=========\nVCPU state\n=========\n"); kvm_for_each_vcpu(i, vcpu, kvm) { struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; @@ -2179,11 +2178,12 @@ static int xive_debug_show(struct seq_file *m, void *private) if (!xc) continue; - seq_printf(m, "cpu server %#x VP:%#x CPPR:%#x HWCPPR:%#x" - " MFRR:%#x PEND:%#x h_xirr: R=%lld V=%lld\n", - xc->server_num, xc->vp_id, xc->cppr, xc->hw_cppr, - xc->mfrr, xc->pending, - xc->stat_rm_h_xirr, xc->stat_vm_h_xirr); + seq_printf(m, "VCPU %d: VP:%#x/%02x\n" + " CPPR:%#x HWCPPR:%#x MFRR:%#x PEND:%#x h_xirr: R=%lld V=%lld\n", + xc->server_num, xc->vp_id, xc->vp_chip_id, + xc->cppr, xc->hw_cppr, + xc->mfrr, xc->pending, + xc->stat_rm_h_xirr, xc->stat_vm_h_xirr); kvmppc_xive_debug_show_queues(m, vcpu); @@ -2199,13 +2199,25 @@ static int xive_debug_show(struct seq_file *m, void *private) t_vm_h_ipi += xc->stat_vm_h_ipi; } - seq_printf(m, "Hcalls totals\n"); + seq_puts(m, "Hcalls totals\n"); seq_printf(m, " H_XIRR R=%10lld V=%10lld\n", t_rm_h_xirr, t_vm_h_xirr); seq_printf(m, " H_IPOLL R=%10lld V=%10lld\n", t_rm_h_ipoll, t_vm_h_ipoll); seq_printf(m, " H_CPPR R=%10lld V=%10lld\n", t_rm_h_cppr, t_vm_h_cppr); seq_printf(m, " H_EOI R=%10lld V=%10lld\n", t_rm_h_eoi, t_vm_h_eoi); seq_printf(m, " H_IPI R=%10lld V=%10lld\n", t_rm_h_ipi, t_vm_h_ipi); + seq_puts(m, "=========\nSources\n=========\n"); + + for (i = 0; i <= xive->max_sbid; i++) { + struct kvmppc_xive_src_block *sb = xive->src_blocks[i]; + + if (sb) { + arch_spin_lock(&sb->lock); + kvmppc_xive_debug_show_sources(m, sb); + arch_spin_unlock(&sb->lock); + } + } + return 0; } diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h index 382e3a56e789..86c24a4ad809 100644 --- a/arch/powerpc/kvm/book3s_xive.h +++ b/arch/powerpc/kvm/book3s_xive.h @@ -218,6 +218,17 @@ static inline struct kvmppc_xive_src_block *kvmppc_xive_find_source(struct kvmpp return xive->src_blocks[bid]; } +/* + * When the XIVE resources are allocated at the HW level, the VP + * structures describing the vCPUs of a guest are distributed among + * the chips to optimize the PowerBUS usage. For best performance, the + * guest vCPUs can be pinned to match the VP structure distribution. + * + * Currently, the VP identifiers are deduced from the vCPU id using + * the kvmppc_pack_vcpu_id() routine which is not incorrect but not + * optimal either. It VSMT is used, the result is not continuous and + * the constraints on HW resources described above can not be met. + */ static inline u32 kvmppc_xive_vp(struct kvmppc_xive *xive, u32 server) { return xive->vp_base + kvmppc_pack_vcpu_id(xive->kvm, server); @@ -290,6 +301,8 @@ extern int (*__xive_vm_h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr); */ void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu); int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu); +void kvmppc_xive_debug_show_sources(struct seq_file *m, + struct kvmppc_xive_src_block *sb); struct kvmppc_xive_src_block *kvmppc_xive_create_src_block( struct kvmppc_xive *xive, int irq); void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb); diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index a59a94f02733..76800c84f2a3 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -37,9 +37,6 @@ static u8 xive_vm_esb_load(struct xive_irq_data *xd, u32 offset) * ordering. */ - if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG) - offset |= offset << 4; - val = in_be64(xd->eoi_mmio + offset); return (u8)val; } @@ -1219,18 +1216,31 @@ static int xive_native_debug_show(struct seq_file *m, void *private) if (!xc) continue; - seq_printf(m, "cpu server %#x VP=%#x NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n", - xc->server_num, xc->vp_id, + seq_printf(m, "VCPU %d: VP=%#x/%02x\n" + " NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n", + xc->server_num, xc->vp_id, xc->vp_chip_id, vcpu->arch.xive_saved_state.nsr, vcpu->arch.xive_saved_state.cppr, vcpu->arch.xive_saved_state.ipb, vcpu->arch.xive_saved_state.pipr, - vcpu->arch.xive_saved_state.w01, - (u32) vcpu->arch.xive_cam_word); + be64_to_cpu(vcpu->arch.xive_saved_state.w01), + be32_to_cpu(vcpu->arch.xive_cam_word)); kvmppc_xive_debug_show_queues(m, vcpu); } + seq_puts(m, "=========\nSources\n=========\n"); + + for (i = 0; i <= xive->max_sbid; i++) { + struct kvmppc_xive_src_block *sb = xive->src_blocks[i]; + + if (sb) { + arch_spin_lock(&sb->lock); + kvmppc_xive_debug_show_sources(m, sb); + arch_spin_unlock(&sb->lock); + } + } + return 0; } diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c index 4ad3c0279458..b0015e05d99a 100644 --- a/arch/powerpc/kvm/book3s_xive_template.c +++ b/arch/powerpc/kvm/book3s_xive_template.c @@ -61,9 +61,6 @@ static u8 GLUE(X_PFX,esb_load)(struct xive_irq_data *xd, u32 offset) if (offset == XIVE_ESB_SET_PQ_10 && xd->flags & XIVE_IRQ_FLAG_STORE_EOI) offset |= XIVE_ESB_LD_ST_MO; - if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG) - offset |= offset << 4; - val =__x_readq(__x_eoi_page(xd) + offset); #ifdef __LITTLE_ENDIAN__ val >>= 64-8; @@ -77,8 +74,6 @@ static void GLUE(X_PFX,source_eoi)(u32 hw_irq, struct xive_irq_data *xd) /* If the XIVE supports the new "store EOI facility, use it */ if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI) __x_writeq(0, __x_eoi_page(xd) + XIVE_ESB_STORE_EOI); - else if (hw_irq && xd->flags & XIVE_IRQ_FLAG_EOI_FW) - opal_int_eoi(hw_irq); else if (xd->flags & XIVE_IRQ_FLAG_LSI) { /* * For LSIs the HW EOI cycle is used rather than PQ bits, diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index b1abcb816439..288a9820ec01 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -500,11 +500,11 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, vcpu->arch.regs.nip = vcpu->arch.ivpr | vcpu->arch.ivor[priority]; - if (update_esr == true) + if (update_esr) kvmppc_set_esr(vcpu, vcpu->arch.queued_esr); - if (update_dear == true) + if (update_dear) kvmppc_set_dar(vcpu, vcpu->arch.queued_dear); - if (update_epr == true) { + if (update_epr) { if (vcpu->arch.epr_flags & KVMPPC_EPR_USER) kvm_make_request(KVM_REQ_EPR_EXIT, vcpu); else if (vcpu->arch.epr_flags & KVMPPC_EPR_KERNEL) { diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 13999123b735..cf52d26f49cd 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -1087,7 +1087,7 @@ static inline u64 sp_to_dp(u32 fprs) preempt_disable(); enable_kernel_fp(); - asm ("lfs%U1%X1 0,%1; stfd%U0%X0 0,%0" : "=m" (fprd) : "m" (fprs) + asm ("lfs%U1%X1 0,%1; stfd%U0%X0 0,%0" : "=m"UPD_CONSTR (fprd) : "m"UPD_CONSTR (fprs) : "fr0"); preempt_enable(); return fprd; @@ -1099,7 +1099,7 @@ static inline u32 dp_to_sp(u64 fprd) preempt_disable(); enable_kernel_fp(); - asm ("lfd%U1%X1 0,%1; stfs%U0%X0 0,%0" : "=m" (fprs) : "m" (fprd) + asm ("lfd%U1%X1 0,%1; stfs%U0%X0 0,%0" : "=m"UPD_CONSTR (fprs) : "m"UPD_CONSTR (fprd) : "fr0"); preempt_enable(); return fprs; diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 321c12a9ef6b..47821055b94c 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -124,7 +124,7 @@ static void do_stf_entry_barrier_fixups(enum stf_barrier_type types) long *start, *end; int i; - start = PTRRELOC(&__start___stf_entry_barrier_fixup), + start = PTRRELOC(&__start___stf_entry_barrier_fixup); end = PTRRELOC(&__stop___stf_entry_barrier_fixup); instrs[0] = 0x60000000; /* nop */ @@ -176,7 +176,7 @@ static void do_stf_exit_barrier_fixups(enum stf_barrier_type types) long *start, *end; int i; - start = PTRRELOC(&__start___stf_exit_barrier_fixup), + start = PTRRELOC(&__start___stf_exit_barrier_fixup); end = PTRRELOC(&__stop___stf_exit_barrier_fixup); instrs[0] = 0x60000000; /* nop */ @@ -344,7 +344,7 @@ void do_rfi_flush_fixups(enum l1d_flush_type types) long *start, *end; int i; - start = PTRRELOC(&__start___rfi_flush_fixup), + start = PTRRELOC(&__start___rfi_flush_fixup); end = PTRRELOC(&__stop___rfi_flush_fixup); instrs[0] = 0x60000000; /* nop */ @@ -417,7 +417,7 @@ void do_barrier_nospec_fixups(bool enable) { void *start, *end; - start = PTRRELOC(&__start___barrier_nospec_fixup), + start = PTRRELOC(&__start___barrier_nospec_fixup); end = PTRRELOC(&__stop___barrier_nospec_fixup); do_barrier_nospec_fixups_range(enable, start, end); diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 855457ed09b5..bf7a7d62ae8b 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -32,6 +32,10 @@ extern char system_call_vectored_emulate[]; #define XER_OV32 0x00080000U #define XER_CA32 0x00040000U +#ifdef CONFIG_VSX +#define VSX_REGISTER_XTP(rd) ((((rd) & 1) << 5) | ((rd) & 0xfe)) +#endif + #ifdef CONFIG_PPC_FPU /* * Functions in ldstfp.S @@ -279,6 +283,19 @@ static nokprobe_inline void do_byte_reverse(void *ptr, int nb) up[1] = tmp; break; } + case 32: { + unsigned long *up = (unsigned long *)ptr; + unsigned long tmp; + + tmp = byterev_8(up[0]); + up[0] = byterev_8(up[3]); + up[3] = tmp; + tmp = byterev_8(up[2]); + up[2] = byterev_8(up[1]); + up[1] = tmp; + break; + } + #endif default: WARN_ON_ONCE(1); @@ -709,6 +726,8 @@ void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg, reg->d[0] = reg->d[1] = 0; switch (op->element_size) { + case 32: + /* [p]lxvp[x] */ case 16: /* whole vector; lxv[x] or lxvl[l] */ if (size == 0) @@ -717,7 +736,7 @@ void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg, if (IS_LE && (op->vsx_flags & VSX_LDLEFT)) rev = !rev; if (rev) - do_byte_reverse(reg, 16); + do_byte_reverse(reg, size); break; case 8: /* scalar loads, lxvd2x, lxvdsx */ @@ -793,6 +812,20 @@ void emulate_vsx_store(struct instruction_op *op, const union vsx_reg *reg, size = GETSIZE(op->type); switch (op->element_size) { + case 32: + /* [p]stxvp[x] */ + if (size == 0) + break; + if (rev) { + /* reverse 32 bytes */ + buf.d[0] = byterev_8(reg->d[3]); + buf.d[1] = byterev_8(reg->d[2]); + buf.d[2] = byterev_8(reg->d[1]); + buf.d[3] = byterev_8(reg->d[0]); + reg = &buf; + } + memcpy(mem, reg, size); + break; case 16: /* stxv, stxvx, stxvl, stxvll */ if (size == 0) @@ -861,28 +894,43 @@ static nokprobe_inline int do_vsx_load(struct instruction_op *op, bool cross_endian) { int reg = op->reg; - u8 mem[16]; - union vsx_reg buf; + int i, j, nr_vsx_regs; + u8 mem[32]; + union vsx_reg buf[2]; int size = GETSIZE(op->type); if (!address_ok(regs, ea, size) || copy_mem_in(mem, ea, size, regs)) return -EFAULT; - emulate_vsx_load(op, &buf, mem, cross_endian); + nr_vsx_regs = size / sizeof(__vector128); + emulate_vsx_load(op, buf, mem, cross_endian); preempt_disable(); if (reg < 32) { /* FP regs + extensions */ if (regs->msr & MSR_FP) { - load_vsrn(reg, &buf); + for (i = 0; i < nr_vsx_regs; i++) { + j = IS_LE ? nr_vsx_regs - i - 1 : i; + load_vsrn(reg + i, &buf[j].v); + } } else { - current->thread.fp_state.fpr[reg][0] = buf.d[0]; - current->thread.fp_state.fpr[reg][1] = buf.d[1]; + for (i = 0; i < nr_vsx_regs; i++) { + j = IS_LE ? nr_vsx_regs - i - 1 : i; + current->thread.fp_state.fpr[reg + i][0] = buf[j].d[0]; + current->thread.fp_state.fpr[reg + i][1] = buf[j].d[1]; + } } } else { - if (regs->msr & MSR_VEC) - load_vsrn(reg, &buf); - else - current->thread.vr_state.vr[reg - 32] = buf.v; + if (regs->msr & MSR_VEC) { + for (i = 0; i < nr_vsx_regs; i++) { + j = IS_LE ? nr_vsx_regs - i - 1 : i; + load_vsrn(reg + i, &buf[j].v); + } + } else { + for (i = 0; i < nr_vsx_regs; i++) { + j = IS_LE ? nr_vsx_regs - i - 1 : i; + current->thread.vr_state.vr[reg - 32 + i] = buf[j].v; + } + } } preempt_enable(); return 0; @@ -893,30 +941,45 @@ static nokprobe_inline int do_vsx_store(struct instruction_op *op, bool cross_endian) { int reg = op->reg; - u8 mem[16]; - union vsx_reg buf; + int i, j, nr_vsx_regs; + u8 mem[32]; + union vsx_reg buf[2]; int size = GETSIZE(op->type); if (!address_ok(regs, ea, size)) return -EFAULT; + nr_vsx_regs = size / sizeof(__vector128); preempt_disable(); if (reg < 32) { /* FP regs + extensions */ if (regs->msr & MSR_FP) { - store_vsrn(reg, &buf); + for (i = 0; i < nr_vsx_regs; i++) { + j = IS_LE ? nr_vsx_regs - i - 1 : i; + store_vsrn(reg + i, &buf[j].v); + } } else { - buf.d[0] = current->thread.fp_state.fpr[reg][0]; - buf.d[1] = current->thread.fp_state.fpr[reg][1]; + for (i = 0; i < nr_vsx_regs; i++) { + j = IS_LE ? nr_vsx_regs - i - 1 : i; + buf[j].d[0] = current->thread.fp_state.fpr[reg + i][0]; + buf[j].d[1] = current->thread.fp_state.fpr[reg + i][1]; + } } } else { - if (regs->msr & MSR_VEC) - store_vsrn(reg, &buf); - else - buf.v = current->thread.vr_state.vr[reg - 32]; + if (regs->msr & MSR_VEC) { + for (i = 0; i < nr_vsx_regs; i++) { + j = IS_LE ? nr_vsx_regs - i - 1 : i; + store_vsrn(reg + i, &buf[j].v); + } + } else { + for (i = 0; i < nr_vsx_regs; i++) { + j = IS_LE ? nr_vsx_regs - i - 1 : i; + buf[j].v = current->thread.vr_state.vr[reg - 32 + i]; + } + } } preempt_enable(); - emulate_vsx_store(op, &buf, mem, cross_endian); + emulate_vsx_store(op, buf, mem, cross_endian); return copy_mem_out(mem, ea, size, regs); } #endif /* CONFIG_VSX */ @@ -1346,6 +1409,9 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, switch (opcode) { #ifdef __powerpc64__ case 1: + if (!cpu_has_feature(CPU_FTR_ARCH_31)) + return -1; + prefix_r = GET_PREFIX_R(word); ra = GET_PREFIX_RA(suffix); rd = (suffix >> 21) & 0x1f; @@ -2400,6 +2466,14 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, op->vsx_flags = VSX_SPLAT; break; + case 333: /* lxvpx */ + if (!cpu_has_feature(CPU_FTR_ARCH_31)) + return -1; + op->reg = VSX_REGISTER_XTP(rd); + op->type = MKOP(LOAD_VSX, 0, 32); + op->element_size = 32; + break; + case 364: /* lxvwsx */ op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 4); @@ -2428,6 +2502,13 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, VSX_CHECK_VEC; break; } + case 461: /* stxvpx */ + if (!cpu_has_feature(CPU_FTR_ARCH_31)) + return -1; + op->reg = VSX_REGISTER_XTP(rd); + op->type = MKOP(STORE_VSX, 0, 32); + op->element_size = 32; + break; case 524: /* lxsspx */ op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 4); @@ -2669,6 +2750,22 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, #endif #ifdef CONFIG_VSX + case 6: + if (!cpu_has_feature(CPU_FTR_ARCH_31)) + return -1; + op->ea = dqform_ea(word, regs); + op->reg = VSX_REGISTER_XTP(rd); + op->element_size = 32; + switch (word & 0xf) { + case 0: /* lxvp */ + op->type = MKOP(LOAD_VSX, 0, 32); + break; + case 1: /* stxvp */ + op->type = MKOP(STORE_VSX, 0, 32); + break; + } + break; + case 61: /* stfdp, lxv, stxsd, stxssp, stxv */ switch (word & 7) { case 0: /* stfdp with LSB of DS field = 0 */ @@ -2733,6 +2830,9 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, } break; case 1: /* Prefixed instructions */ + if (!cpu_has_feature(CPU_FTR_ARCH_31)) + return -1; + prefix_r = GET_PREFIX_R(word); ra = GET_PREFIX_RA(suffix); op->update_reg = ra; @@ -2751,6 +2851,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 41: /* plwa */ op->type = MKOP(LOAD, PREFIXED | SIGNEXT, 4); break; +#ifdef CONFIG_VSX case 42: /* plxsd */ op->reg = rd + 32; op->type = MKOP(LOAD_VSX, PREFIXED, 8); @@ -2791,18 +2892,33 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, op->element_size = 16; op->vsx_flags = VSX_CHECK_VEC; break; +#endif /* CONFIG_VSX */ case 56: /* plq */ op->type = MKOP(LOAD, PREFIXED, 16); break; case 57: /* pld */ op->type = MKOP(LOAD, PREFIXED, 8); break; - case 60: /* stq */ +#ifdef CONFIG_VSX + case 58: /* plxvp */ + op->reg = VSX_REGISTER_XTP(rd); + op->type = MKOP(LOAD_VSX, PREFIXED, 32); + op->element_size = 32; + break; +#endif /* CONFIG_VSX */ + case 60: /* pstq */ op->type = MKOP(STORE, PREFIXED, 16); break; case 61: /* pstd */ op->type = MKOP(STORE, PREFIXED, 8); break; +#ifdef CONFIG_VSX + case 62: /* pstxvp */ + op->reg = VSX_REGISTER_XTP(rd); + op->type = MKOP(STORE_VSX, PREFIXED, 32); + op->element_size = 32; + break; +#endif /* CONFIG_VSX */ } break; case 1: /* Type 01 Eight-Byte Register-to-Register */ diff --git a/arch/powerpc/lib/test_emulate_step.c b/arch/powerpc/lib/test_emulate_step.c index 0a201b771477..783d1b85ecfe 100644 --- a/arch/powerpc/lib/test_emulate_step.c +++ b/arch/powerpc/lib/test_emulate_step.c @@ -612,6 +612,273 @@ static void __init test_lxvd2x_stxvd2x(void) } #endif /* CONFIG_VSX */ +#ifdef CONFIG_VSX +static void __init test_lxvp_stxvp(void) +{ + struct pt_regs regs; + union { + vector128 a; + u32 b[4]; + } c[2]; + u32 cached_b[8]; + int stepped = -1; + + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + show_result("lxvp", "SKIP (!CPU_FTR_ARCH_31)"); + show_result("stxvp", "SKIP (!CPU_FTR_ARCH_31)"); + return; + } + + init_pt_regs(®s); + + /*** lxvp ***/ + + cached_b[0] = c[0].b[0] = 18233; + cached_b[1] = c[0].b[1] = 34863571; + cached_b[2] = c[0].b[2] = 834; + cached_b[3] = c[0].b[3] = 6138911; + cached_b[4] = c[1].b[0] = 1234; + cached_b[5] = c[1].b[1] = 5678; + cached_b[6] = c[1].b[2] = 91011; + cached_b[7] = c[1].b[3] = 121314; + + regs.gpr[4] = (unsigned long)&c[0].a; + + /* + * lxvp XTp,DQ(RA) + * XTp = 32xTX + 2xTp + * let TX=1 Tp=1 RA=4 DQ=0 + */ + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LXVP(34, 4, 0))); + + if (stepped == 1 && cpu_has_feature(CPU_FTR_VSX)) { + show_result("lxvp", "PASS"); + } else { + if (!cpu_has_feature(CPU_FTR_VSX)) + show_result("lxvp", "PASS (!CPU_FTR_VSX)"); + else + show_result("lxvp", "FAIL"); + } + + /*** stxvp ***/ + + c[0].b[0] = 21379463; + c[0].b[1] = 87; + c[0].b[2] = 374234; + c[0].b[3] = 4; + c[1].b[0] = 90; + c[1].b[1] = 122; + c[1].b[2] = 555; + c[1].b[3] = 32144; + + /* + * stxvp XSp,DQ(RA) + * XSp = 32xSX + 2xSp + * let SX=1 Sp=1 RA=4 DQ=0 + */ + stepped = emulate_step(®s, ppc_inst(PPC_RAW_STXVP(34, 4, 0))); + + if (stepped == 1 && cached_b[0] == c[0].b[0] && cached_b[1] == c[0].b[1] && + cached_b[2] == c[0].b[2] && cached_b[3] == c[0].b[3] && + cached_b[4] == c[1].b[0] && cached_b[5] == c[1].b[1] && + cached_b[6] == c[1].b[2] && cached_b[7] == c[1].b[3] && + cpu_has_feature(CPU_FTR_VSX)) { + show_result("stxvp", "PASS"); + } else { + if (!cpu_has_feature(CPU_FTR_VSX)) + show_result("stxvp", "PASS (!CPU_FTR_VSX)"); + else + show_result("stxvp", "FAIL"); + } +} +#else +static void __init test_lxvp_stxvp(void) +{ + show_result("lxvp", "SKIP (CONFIG_VSX is not set)"); + show_result("stxvp", "SKIP (CONFIG_VSX is not set)"); +} +#endif /* CONFIG_VSX */ + +#ifdef CONFIG_VSX +static void __init test_lxvpx_stxvpx(void) +{ + struct pt_regs regs; + union { + vector128 a; + u32 b[4]; + } c[2]; + u32 cached_b[8]; + int stepped = -1; + + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + show_result("lxvpx", "SKIP (!CPU_FTR_ARCH_31)"); + show_result("stxvpx", "SKIP (!CPU_FTR_ARCH_31)"); + return; + } + + init_pt_regs(®s); + + /*** lxvpx ***/ + + cached_b[0] = c[0].b[0] = 18233; + cached_b[1] = c[0].b[1] = 34863571; + cached_b[2] = c[0].b[2] = 834; + cached_b[3] = c[0].b[3] = 6138911; + cached_b[4] = c[1].b[0] = 1234; + cached_b[5] = c[1].b[1] = 5678; + cached_b[6] = c[1].b[2] = 91011; + cached_b[7] = c[1].b[3] = 121314; + + regs.gpr[3] = (unsigned long)&c[0].a; + regs.gpr[4] = 0; + + /* + * lxvpx XTp,RA,RB + * XTp = 32xTX + 2xTp + * let TX=1 Tp=1 RA=3 RB=4 + */ + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LXVPX(34, 3, 4))); + + if (stepped == 1 && cpu_has_feature(CPU_FTR_VSX)) { + show_result("lxvpx", "PASS"); + } else { + if (!cpu_has_feature(CPU_FTR_VSX)) + show_result("lxvpx", "PASS (!CPU_FTR_VSX)"); + else + show_result("lxvpx", "FAIL"); + } + + /*** stxvpx ***/ + + c[0].b[0] = 21379463; + c[0].b[1] = 87; + c[0].b[2] = 374234; + c[0].b[3] = 4; + c[1].b[0] = 90; + c[1].b[1] = 122; + c[1].b[2] = 555; + c[1].b[3] = 32144; + + /* + * stxvpx XSp,RA,RB + * XSp = 32xSX + 2xSp + * let SX=1 Sp=1 RA=3 RB=4 + */ + stepped = emulate_step(®s, ppc_inst(PPC_RAW_STXVPX(34, 3, 4))); + + if (stepped == 1 && cached_b[0] == c[0].b[0] && cached_b[1] == c[0].b[1] && + cached_b[2] == c[0].b[2] && cached_b[3] == c[0].b[3] && + cached_b[4] == c[1].b[0] && cached_b[5] == c[1].b[1] && + cached_b[6] == c[1].b[2] && cached_b[7] == c[1].b[3] && + cpu_has_feature(CPU_FTR_VSX)) { + show_result("stxvpx", "PASS"); + } else { + if (!cpu_has_feature(CPU_FTR_VSX)) + show_result("stxvpx", "PASS (!CPU_FTR_VSX)"); + else + show_result("stxvpx", "FAIL"); + } +} +#else +static void __init test_lxvpx_stxvpx(void) +{ + show_result("lxvpx", "SKIP (CONFIG_VSX is not set)"); + show_result("stxvpx", "SKIP (CONFIG_VSX is not set)"); +} +#endif /* CONFIG_VSX */ + +#ifdef CONFIG_VSX +static void __init test_plxvp_pstxvp(void) +{ + struct ppc_inst instr; + struct pt_regs regs; + union { + vector128 a; + u32 b[4]; + } c[2]; + u32 cached_b[8]; + int stepped = -1; + + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + show_result("plxvp", "SKIP (!CPU_FTR_ARCH_31)"); + show_result("pstxvp", "SKIP (!CPU_FTR_ARCH_31)"); + return; + } + + /*** plxvp ***/ + + cached_b[0] = c[0].b[0] = 18233; + cached_b[1] = c[0].b[1] = 34863571; + cached_b[2] = c[0].b[2] = 834; + cached_b[3] = c[0].b[3] = 6138911; + cached_b[4] = c[1].b[0] = 1234; + cached_b[5] = c[1].b[1] = 5678; + cached_b[6] = c[1].b[2] = 91011; + cached_b[7] = c[1].b[3] = 121314; + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long)&c[0].a; + + /* + * plxvp XTp,D(RA),R + * XTp = 32xTX + 2xTp + * let RA=3 R=0 D=d0||d1=0 R=0 Tp=1 TX=1 + */ + instr = ppc_inst_prefix(PPC_RAW_PLXVP(34, 0, 3, 0) >> 32, + PPC_RAW_PLXVP(34, 0, 3, 0) & 0xffffffff); + + stepped = emulate_step(®s, instr); + if (stepped == 1 && cpu_has_feature(CPU_FTR_VSX)) { + show_result("plxvp", "PASS"); + } else { + if (!cpu_has_feature(CPU_FTR_VSX)) + show_result("plxvp", "PASS (!CPU_FTR_VSX)"); + else + show_result("plxvp", "FAIL"); + } + + /*** pstxvp ***/ + + c[0].b[0] = 21379463; + c[0].b[1] = 87; + c[0].b[2] = 374234; + c[0].b[3] = 4; + c[1].b[0] = 90; + c[1].b[1] = 122; + c[1].b[2] = 555; + c[1].b[3] = 32144; + + /* + * pstxvp XSp,D(RA),R + * XSp = 32xSX + 2xSp + * let RA=3 D=d0||d1=0 R=0 Sp=1 SX=1 + */ + instr = ppc_inst_prefix(PPC_RAW_PSTXVP(34, 0, 3, 0) >> 32, + PPC_RAW_PSTXVP(34, 0, 3, 0) & 0xffffffff); + + stepped = emulate_step(®s, instr); + + if (stepped == 1 && cached_b[0] == c[0].b[0] && cached_b[1] == c[0].b[1] && + cached_b[2] == c[0].b[2] && cached_b[3] == c[0].b[3] && + cached_b[4] == c[1].b[0] && cached_b[5] == c[1].b[1] && + cached_b[6] == c[1].b[2] && cached_b[7] == c[1].b[3] && + cpu_has_feature(CPU_FTR_VSX)) { + show_result("pstxvp", "PASS"); + } else { + if (!cpu_has_feature(CPU_FTR_VSX)) + show_result("pstxvp", "PASS (!CPU_FTR_VSX)"); + else + show_result("pstxvp", "FAIL"); + } +} +#else +static void __init test_plxvp_pstxvp(void) +{ + show_result("plxvp", "SKIP (CONFIG_VSX is not set)"); + show_result("pstxvp", "SKIP (CONFIG_VSX is not set)"); +} +#endif /* CONFIG_VSX */ + static void __init run_tests_load_store(void) { test_ld(); @@ -628,6 +895,9 @@ static void __init run_tests_load_store(void) test_plfd_pstfd(); test_lvx_stvx(); test_lxvd2x_stxvd2x(); + test_lxvp_stxvp(); + test_lxvpx_stxvpx(); + test_plxvp_pstxvp(); } struct compute_test { diff --git a/arch/powerpc/mm/book3s32/Makefile b/arch/powerpc/mm/book3s32/Makefile index 1732eaa740a9..3f972db17761 100644 --- a/arch/powerpc/mm/book3s32/Makefile +++ b/arch/powerpc/mm/book3s32/Makefile @@ -6,4 +6,4 @@ ifdef CONFIG_KASAN CFLAGS_mmu.o += -DDISABLE_BRANCH_PROFILING endif -obj-y += mmu.o hash_low.o mmu_context.o tlb.o +obj-y += mmu.o hash_low.o mmu_context.o tlb.o nohash_low.o diff --git a/arch/powerpc/mm/book3s32/hash_low.S b/arch/powerpc/mm/book3s32/hash_low.S index b2c912e517b9..0e6dc830c38b 100644 --- a/arch/powerpc/mm/book3s32/hash_low.S +++ b/arch/powerpc/mm/book3s32/hash_low.S @@ -26,12 +26,11 @@ #include <asm/feature-fixups.h> #include <asm/code-patching-asm.h> -#ifdef CONFIG_SMP - .section .bss - .align 2 -mmu_hash_lock: - .space 4 -#endif /* CONFIG_SMP */ +#ifdef CONFIG_PTE_64BIT +#define PTE_FLAGS_OFFSET 4 /* offset of PTE flags, in bytes */ +#else +#define PTE_FLAGS_OFFSET 0 +#endif /* * Load a PTE into the hash table, if possible. @@ -65,13 +64,14 @@ _GLOBAL(hash_page) /* Get PTE (linux-style) and check access */ lis r0, TASK_SIZE@h /* check if kernel address */ cmplw 0,r4,r0 + mfspr r8,SPRN_SPRG_THREAD /* current task's THREAD (phys) */ ori r3,r3,_PAGE_USER|_PAGE_PRESENT /* test low addresses as user */ - mfspr r5, SPRN_SPRG_PGDIR /* phys page-table root */ + lwz r5,PGDIR(r8) /* virt page-table root */ blt+ 112f /* assume user more likely */ - lis r5, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ - addi r5 ,r5 ,(swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ + lis r5,swapper_pg_dir@ha /* if kernel address, use */ + addi r5,r5,swapper_pg_dir@l /* kernel page table */ rlwimi r3,r9,32-12,29,29 /* MSR_PR -> _PAGE_USER */ -112: +112: tophys(r5, r5) #ifndef CONFIG_PTE_64BIT rlwimi r5,r4,12,20,29 /* insert top 10 bits of address */ lwz r8,0(r5) /* get pmd entry */ @@ -94,25 +94,33 @@ _GLOBAL(hash_page) rlwimi r8,r4,22,20,29 /* insert next 10 bits of address */ #else rlwimi r8,r4,23,20,28 /* compute pte address */ + /* + * If PTE_64BIT is set, the low word is the flags word; use that + * word for locking since it contains all the interesting bits. + */ + addi r8,r8,PTE_FLAGS_OFFSET #endif - rlwinm r0,r3,32-3,24,24 /* _PAGE_RW access -> _PAGE_DIRTY */ - ori r0,r0,_PAGE_ACCESSED|_PAGE_HASHPTE /* * Update the linux PTE atomically. We do the lwarx up-front * because almost always, there won't be a permission violation * and there won't already be an HPTE, and thus we will have * to update the PTE to set _PAGE_HASHPTE. -- paulus. - * - * If PTE_64BIT is set, the low word is the flags word; use that - * word for locking since it contains all the interesting bits. */ -#if (PTE_FLAGS_OFFSET != 0) - addi r8,r8,PTE_FLAGS_OFFSET -#endif .Lretry: lwarx r6,0,r8 /* get linux-style pte, flag word */ +#ifdef CONFIG_PPC_KUAP + mfsrin r5,r4 + rlwinm r0,r9,28,_PAGE_RW /* MSR[PR] => _PAGE_RW */ + rlwinm r5,r5,12,_PAGE_RW /* Ks => _PAGE_RW */ + andc r5,r5,r0 /* Ks & ~MSR[PR] */ + andc r5,r6,r5 /* Clear _PAGE_RW when Ks = 1 && MSR[PR] = 0 */ + andc. r5,r3,r5 /* check access & ~permission */ +#else andc. r5,r3,r6 /* check access & ~permission */ +#endif + rlwinm r0,r3,32-3,24,24 /* _PAGE_RW access -> _PAGE_DIRTY */ + ori r0,r0,_PAGE_ACCESSED|_PAGE_HASHPTE #ifdef CONFIG_SMP bne- .Lhash_page_out /* return if access not permitted */ #else @@ -179,12 +187,6 @@ _GLOBAL(add_hash_page) mflr r0 stw r0,4(r1) - /* Convert context and va to VSID */ - mulli r3,r3,897*16 /* multiply context by context skew */ - rlwinm r0,r4,4,28,31 /* get ESID (top 4 bits of va) */ - mulli r0,r0,0x111 /* multiply by ESID skew */ - add r3,r3,r0 /* note create_hpte trims to 24 bits */ - #ifdef CONFIG_SMP lwz r8,TASK_CPU(r2) /* to go in mmu_hash_lock */ oris r8,r8,12 @@ -248,6 +250,12 @@ _GLOBAL(add_hash_page) stwcx. r5,0,r8 bne- 1b + /* Convert context and va to VSID */ + mulli r3,r3,897*16 /* multiply context by context skew */ + rlwinm r0,r4,4,28,31 /* get ESID (top 4 bits of va) */ + mulli r0,r0,0x111 /* multiply by ESID skew */ + add r3,r3,r0 /* note create_hpte trims to 24 bits */ + bl create_hpte 9: @@ -350,11 +358,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) beq+ 10f /* no PTE: go look for an empty slot */ tlbie r4 - lis r4, (htab_hash_searches - PAGE_OFFSET)@ha - lwz r6, (htab_hash_searches - PAGE_OFFSET)@l(r4) - addi r6,r6,1 /* count how many searches we do */ - stw r6, (htab_hash_searches - PAGE_OFFSET)@l(r4) - /* Search the primary PTEG for a PTE whose 1st (d)word matches r5 */ mtctr r0 addi r4,r3,-HPTE_SIZE @@ -384,12 +387,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) bdnzf 2,1b /* loop while ctr != 0 && !cr0.eq */ beq+ .Lfound_empty - /* update counter of times that the primary PTEG is full */ - lis r4, (primary_pteg_full - PAGE_OFFSET)@ha - lwz r6, (primary_pteg_full - PAGE_OFFSET)@l(r4) - addi r6,r6,1 - stw r6, (primary_pteg_full - PAGE_OFFSET)@l(r4) - patch_site 0f, patch__hash_page_C /* Search the secondary PTEG for an empty slot */ ori r5,r5,PTE_H /* set H (secondary hash) bit */ @@ -411,30 +408,14 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) * and we know there is a definite (although small) speed * advantage to putting the PTE in the primary PTEG, we always * put the PTE in the primary PTEG. - * - * In addition, we skip any slot that is mapping kernel text in - * order to avoid a deadlock when not using BAT mappings if - * trying to hash in the kernel hash code itself after it has - * already taken the hash table lock. This works in conjunction - * with pre-faulting of the kernel text. - * - * If the hash table bucket is full of kernel text entries, we'll - * lockup here but that shouldn't happen */ -1: lis r4, (next_slot - PAGE_OFFSET)@ha /* get next evict slot */ + lis r4, (next_slot - PAGE_OFFSET)@ha /* get next evict slot */ lwz r6, (next_slot - PAGE_OFFSET)@l(r4) addi r6,r6,HPTE_SIZE /* search for candidate */ andi. r6,r6,7*HPTE_SIZE stw r6,next_slot@l(r4) add r4,r3,r6 - LDPTE r0,HPTE_SIZE/2(r4) /* get PTE second word */ - clrrwi r0,r0,12 - lis r6,etext@h - ori r6,r6,etext@l /* get etext */ - tophys(r6,r6) - cmpl cr0,r0,r6 /* compare and try again */ - blt 1b #ifndef CONFIG_SMP /* Store PTE in PTEG */ @@ -482,10 +463,6 @@ _ASM_NOKPROBE_SYMBOL(create_hpte) .align 2 next_slot: .space 4 -primary_pteg_full: - .space 4 -htab_hash_searches: - .space 4 .previous /* @@ -517,8 +494,9 @@ _GLOBAL(flush_hash_pages) rlwimi r5,r4,22,20,29 #else rlwimi r5,r4,23,20,28 + addi r5,r5,PTE_FLAGS_OFFSET #endif -1: lwz r0,PTE_FLAGS_OFFSET(r5) +1: lwz r0,0(r5) cmpwi cr1,r6,1 andi. r0,r0,_PAGE_HASHPTE bne 2f @@ -562,9 +540,6 @@ _GLOBAL(flush_hash_pages) * already clear, we're done (for this pte). If not, * clear it (atomically) and proceed. -- paulus. */ -#if (PTE_FLAGS_OFFSET != 0) - addi r5,r5,PTE_FLAGS_OFFSET -#endif 33: lwarx r8,0,r5 /* fetch the pte flags word */ andi. r0,r8,_PAGE_HASHPTE beq 8f /* done if HASHPTE is already clear */ @@ -633,77 +608,3 @@ _GLOBAL(flush_hash_pages) .previous EXPORT_SYMBOL(flush_hash_pages) _ASM_NOKPROBE_SYMBOL(flush_hash_pages) - -/* - * Flush an entry from the TLB - */ -_GLOBAL(_tlbie) -#ifdef CONFIG_SMP - lwz r8,TASK_CPU(r2) - oris r8,r8,11 - mfmsr r10 - rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */ - rlwinm r0,r0,0,28,26 /* clear DR */ - mtmsr r0 - isync - lis r9,mmu_hash_lock@h - ori r9,r9,mmu_hash_lock@l - tophys(r9,r9) -10: lwarx r7,0,r9 - cmpwi 0,r7,0 - bne- 10b - stwcx. r8,0,r9 - bne- 10b - eieio - tlbie r3 - sync - TLBSYNC - li r0,0 - stw r0,0(r9) /* clear mmu_hash_lock */ - mtmsr r10 - isync -#else /* CONFIG_SMP */ - tlbie r3 - sync -#endif /* CONFIG_SMP */ - blr -_ASM_NOKPROBE_SYMBOL(_tlbie) - -/* - * Flush the entire TLB. 603/603e only - */ -_GLOBAL(_tlbia) -#if defined(CONFIG_SMP) - lwz r8,TASK_CPU(r2) - oris r8,r8,10 - mfmsr r10 - rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */ - rlwinm r0,r0,0,28,26 /* clear DR */ - mtmsr r0 - isync - lis r9,mmu_hash_lock@h - ori r9,r9,mmu_hash_lock@l - tophys(r9,r9) -10: lwarx r7,0,r9 - cmpwi 0,r7,0 - bne- 10b - stwcx. r8,0,r9 - bne- 10b -#endif /* CONFIG_SMP */ - li r5, 32 - lis r4, KERNELBASE@h - mtctr r5 - sync -0: tlbie r4 - addi r4, r4, 0x1000 - bdnz 0b - sync -#ifdef CONFIG_SMP - TLBSYNC - li r0,0 - stw r0,0(r9) /* clear mmu_hash_lock */ - mtmsr r10 - isync -#endif /* CONFIG_SMP */ - blr -_ASM_NOKPROBE_SYMBOL(_tlbia) diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index a59e7ec98180..859e5bd603ac 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -33,19 +33,23 @@ u8 __initdata early_hash[SZ_256K] __aligned(SZ_256K) = {0}; -struct hash_pte *Hash; -static unsigned long Hash_size, Hash_mask; -unsigned long _SDR1; -static unsigned int hash_mb, hash_mb2; +static struct hash_pte __initdata *Hash = (struct hash_pte *)early_hash; +static unsigned long __initdata Hash_size, Hash_mask; +static unsigned int __initdata hash_mb, hash_mb2; +unsigned long __initdata _SDR1; struct ppc_bat BATS[8][2]; /* 8 pairs of IBAT, DBAT */ -struct batrange { /* stores address ranges mapped by BATs */ +static struct batrange { /* stores address ranges mapped by BATs */ unsigned long start; unsigned long limit; phys_addr_t phys; } bat_addrs[8]; +#ifdef CONFIG_SMP +unsigned long mmu_hash_lock; +#endif + /* * Return PA for this VA if it is mapped by a BAT, or 0 */ @@ -157,11 +161,9 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) unsigned long done; unsigned long border = (unsigned long)__init_begin - PAGE_OFFSET; - if (__map_without_bats) { - pr_debug("RAM mapped without BATs\n"); - return base; - } - if (debug_pagealloc_enabled()) { + + if (debug_pagealloc_enabled() || __map_without_bats) { + pr_debug_once("Read-Write memory mapped without BATs\n"); if (base >= border) return base; if (top >= border) @@ -304,11 +306,11 @@ void __init setbat(int index, unsigned long virt, phys_addr_t phys, /* * Preload a translation in the hash table */ -void hash_preload(struct mm_struct *mm, unsigned long ea) +static void hash_preload(struct mm_struct *mm, unsigned long ea) { pmd_t *pmd; - if (!Hash) + if (!mmu_has_feature(MMU_FTR_HPTE_TABLE)) return; pmd = pmd_off(mm, ea); if (!pmd_none(*pmd)) @@ -469,3 +471,7 @@ void __init setup_kuap(bool disabled) pr_warn("KUAP cannot be disabled yet on 6xx when compiled in\n"); } #endif + +void __init early_init_mmu(void) +{ +} diff --git a/arch/powerpc/mm/book3s32/nohash_low.S b/arch/powerpc/mm/book3s32/nohash_low.S new file mode 100644 index 000000000000..19f418b0ed2d --- /dev/null +++ b/arch/powerpc/mm/book3s32/nohash_low.S @@ -0,0 +1,80 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * This file contains low-level assembler routines for managing + * the PowerPC 603 tlb invalidation. + */ + +#include <asm/page.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> + +/* + * Flush an entry from the TLB + */ +#ifdef CONFIG_SMP +_GLOBAL(_tlbie) + lwz r8,TASK_CPU(r2) + oris r8,r8,11 + mfmsr r10 + rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */ + rlwinm r0,r0,0,28,26 /* clear DR */ + mtmsr r0 + isync + lis r9,mmu_hash_lock@h + ori r9,r9,mmu_hash_lock@l + tophys(r9,r9) +10: lwarx r7,0,r9 + cmpwi 0,r7,0 + bne- 10b + stwcx. r8,0,r9 + bne- 10b + eieio + tlbie r3 + sync + TLBSYNC + li r0,0 + stw r0,0(r9) /* clear mmu_hash_lock */ + mtmsr r10 + isync + blr +_ASM_NOKPROBE_SYMBOL(_tlbie) +#endif /* CONFIG_SMP */ + +/* + * Flush the entire TLB. 603/603e only + */ +_GLOBAL(_tlbia) +#if defined(CONFIG_SMP) + lwz r8,TASK_CPU(r2) + oris r8,r8,10 + mfmsr r10 + rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */ + rlwinm r0,r0,0,28,26 /* clear DR */ + mtmsr r0 + isync + lis r9,mmu_hash_lock@h + ori r9,r9,mmu_hash_lock@l + tophys(r9,r9) +10: lwarx r7,0,r9 + cmpwi 0,r7,0 + bne- 10b + stwcx. r8,0,r9 + bne- 10b +#endif /* CONFIG_SMP */ + li r5, 32 + lis r4, KERNELBASE@h + mtctr r5 + sync +0: tlbie r4 + addi r4, r4, 0x1000 + bdnz 0b + sync +#ifdef CONFIG_SMP + TLBSYNC + li r0,0 + stw r0,0(r9) /* clear mmu_hash_lock */ + mtmsr r10 + isync +#endif /* CONFIG_SMP */ + blr +_ASM_NOKPROBE_SYMBOL(_tlbia) diff --git a/arch/powerpc/mm/book3s32/tlb.c b/arch/powerpc/mm/book3s32/tlb.c index b6c7427daa6f..19f0ef950d77 100644 --- a/arch/powerpc/mm/book3s32/tlb.c +++ b/arch/powerpc/mm/book3s32/tlb.c @@ -30,35 +30,6 @@ #include <mm/mmu_decl.h> /* - * Called when unmapping pages to flush entries from the TLB/hash table. - */ -void flush_hash_entry(struct mm_struct *mm, pte_t *ptep, unsigned long addr) -{ - unsigned long ptephys; - - if (Hash) { - ptephys = __pa(ptep) & PAGE_MASK; - flush_hash_pages(mm->context.id, addr, ptephys, 1); - } -} -EXPORT_SYMBOL(flush_hash_entry); - -/* - * Called at the end of a mmu_gather operation to make sure the - * TLB flush is completely done. - */ -void tlb_flush(struct mmu_gather *tlb) -{ - if (!Hash) { - /* - * 603 needs to flush the whole TLB here since - * it doesn't use a hash table. - */ - _tlbia(); - } -} - -/* * TLB flushing: * * - flush_tlb_mm(mm) flushes the specified mm context TLB's @@ -71,8 +42,12 @@ void tlb_flush(struct mmu_gather *tlb) * -- Cort */ -static void flush_range(struct mm_struct *mm, unsigned long start, - unsigned long end) +/* + * For each address in the range, find the pte for the address + * and check _PAGE_HASHPTE bit; if it is set, find and destroy + * the corresponding HPTE. + */ +void hash__flush_range(struct mm_struct *mm, unsigned long start, unsigned long end) { pmd_t *pmd; unsigned long pmd_end; @@ -80,13 +55,6 @@ static void flush_range(struct mm_struct *mm, unsigned long start, unsigned int ctx = mm->context.id; start &= PAGE_MASK; - if (!Hash) { - if (end - start <= PAGE_SIZE) - _tlbie(start); - else - _tlbia(); - return; - } if (start >= end) return; end = (end - 1) | ~PAGE_MASK; @@ -105,28 +73,15 @@ static void flush_range(struct mm_struct *mm, unsigned long start, ++pmd; } } - -/* - * Flush kernel TLB entries in the given range - */ -void flush_tlb_kernel_range(unsigned long start, unsigned long end) -{ - flush_range(&init_mm, start, end); -} -EXPORT_SYMBOL(flush_tlb_kernel_range); +EXPORT_SYMBOL(hash__flush_range); /* * Flush all the (user) entries for the address space described by mm. */ -void flush_tlb_mm(struct mm_struct *mm) +void hash__flush_tlb_mm(struct mm_struct *mm) { struct vm_area_struct *mp; - if (!Hash) { - _tlbia(); - return; - } - /* * It is safe to go down the mm's list of vmas when called * from dup_mmap, holding mmap_lock. It would also be safe from @@ -134,38 +89,18 @@ void flush_tlb_mm(struct mm_struct *mm) * but it seems dup_mmap is the only SMP case which gets here. */ for (mp = mm->mmap; mp != NULL; mp = mp->vm_next) - flush_range(mp->vm_mm, mp->vm_start, mp->vm_end); + hash__flush_range(mp->vm_mm, mp->vm_start, mp->vm_end); } -EXPORT_SYMBOL(flush_tlb_mm); +EXPORT_SYMBOL(hash__flush_tlb_mm); -void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) +void hash__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) { struct mm_struct *mm; pmd_t *pmd; - if (!Hash) { - _tlbie(vmaddr); - return; - } mm = (vmaddr < TASK_SIZE)? vma->vm_mm: &init_mm; pmd = pmd_off(mm, vmaddr); if (!pmd_none(*pmd)) flush_hash_pages(mm->context.id, vmaddr, pmd_val(*pmd), 1); } -EXPORT_SYMBOL(flush_tlb_page); - -/* - * For each address in the range, find the pte for the address - * and check _PAGE_HASHPTE bit; if it is set, find and destroy - * the corresponding HPTE. - */ -void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, - unsigned long end) -{ - flush_range(vma->vm_mm, start, end); -} -EXPORT_SYMBOL(flush_tlb_range); - -void __init early_init_mmu(void) -{ -} +EXPORT_SYMBOL(hash__flush_tlb_page); diff --git a/arch/powerpc/mm/book3s64/Makefile b/arch/powerpc/mm/book3s64/Makefile index fd393b8be14f..1b56d3af47d4 100644 --- a/arch/powerpc/mm/book3s64/Makefile +++ b/arch/powerpc/mm/book3s64/Makefile @@ -17,7 +17,7 @@ endif obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += hash_hugepage.o obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage_prot.o obj-$(CONFIG_SPAPR_TCE_IOMMU) += iommu_api.o -obj-$(CONFIG_PPC_MEM_KEYS) += pkeys.o +obj-$(CONFIG_PPC_PKEY) += pkeys.o # Instrumenting the SLB fault path can lead to duplicate SLB entries KCOV_INSTRUMENT_slb.o := n diff --git a/arch/powerpc/mm/book3s64/hash_4k.c b/arch/powerpc/mm/book3s64/hash_4k.c index 22e787123cdf..7de1a8a0c62a 100644 --- a/arch/powerpc/mm/book3s64/hash_4k.c +++ b/arch/powerpc/mm/book3s64/hash_4k.c @@ -54,7 +54,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, * PP bits. _PAGE_USER is already PP bit 0x2, so we only * need to add in 0x1 if it's a read-only user page */ - rflags = htab_convert_pte_flags(new_pte); + rflags = htab_convert_pte_flags(new_pte, flags); rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE); if (cpu_has_feature(CPU_FTR_NOEXECUTE) && diff --git a/arch/powerpc/mm/book3s64/hash_64k.c b/arch/powerpc/mm/book3s64/hash_64k.c index 7084ce2951e6..998c6817ed47 100644 --- a/arch/powerpc/mm/book3s64/hash_64k.c +++ b/arch/powerpc/mm/book3s64/hash_64k.c @@ -72,7 +72,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, * Handle the subpage protection bits */ subpg_pte = new_pte & ~subpg_prot; - rflags = htab_convert_pte_flags(subpg_pte); + rflags = htab_convert_pte_flags(subpg_pte, flags); if (cpu_has_feature(CPU_FTR_NOEXECUTE) && !cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) { @@ -260,7 +260,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access, new_pte |= _PAGE_DIRTY; } while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte))); - rflags = htab_convert_pte_flags(new_pte); + rflags = htab_convert_pte_flags(new_pte, flags); rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE); if (cpu_has_feature(CPU_FTR_NOEXECUTE) && diff --git a/arch/powerpc/mm/book3s64/hash_hugepage.c b/arch/powerpc/mm/book3s64/hash_hugepage.c index 440823797de7..c0fabe6c5a12 100644 --- a/arch/powerpc/mm/book3s64/hash_hugepage.c +++ b/arch/powerpc/mm/book3s64/hash_hugepage.c @@ -57,7 +57,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, if (!(old_pmd & (H_PAGE_THP_HUGE | _PAGE_DEVMAP))) return 0; - rflags = htab_convert_pte_flags(new_pmd); + rflags = htab_convert_pte_flags(new_pmd, flags); #if 0 if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) { diff --git a/arch/powerpc/mm/book3s64/hash_hugetlbpage.c b/arch/powerpc/mm/book3s64/hash_hugetlbpage.c index 964467b3a776..b5e9fff8c217 100644 --- a/arch/powerpc/mm/book3s64/hash_hugetlbpage.c +++ b/arch/powerpc/mm/book3s64/hash_hugetlbpage.c @@ -70,7 +70,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, if (old_pte & (H_PAGE_THP_HUGE | _PAGE_DEVMAP)) return 0; - rflags = htab_convert_pte_flags(new_pte); + rflags = htab_convert_pte_flags(new_pte, flags); if (unlikely(mmu_psize == MMU_PAGE_16G)) offset = PTRS_PER_PUD; else diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c b/arch/powerpc/mm/book3s64/hash_pgtable.c index fd9c7f91b092..567e0c6b3978 100644 --- a/arch/powerpc/mm/book3s64/hash_pgtable.c +++ b/arch/powerpc/mm/book3s64/hash_pgtable.c @@ -443,7 +443,7 @@ void hash__mark_initmem_nx(void) start = (unsigned long)__init_begin; end = (unsigned long)__init_end; - pp = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL)); + pp = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL), HPTE_USE_KERNEL_KEY); WARN_ON(!hash__change_memory_range(start, end, pp)); } diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 24702c0a92e0..73b06adb6eeb 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -112,6 +112,7 @@ int mmu_linear_psize = MMU_PAGE_4K; EXPORT_SYMBOL_GPL(mmu_linear_psize); int mmu_virtual_psize = MMU_PAGE_4K; int mmu_vmalloc_psize = MMU_PAGE_4K; +EXPORT_SYMBOL_GPL(mmu_vmalloc_psize); #ifdef CONFIG_SPARSEMEM_VMEMMAP int mmu_vmemmap_psize = MMU_PAGE_4K; #endif @@ -186,7 +187,7 @@ static struct mmu_psize_def mmu_psize_defaults_gp[] = { * - We make sure R is always set and never lost * - C is _PAGE_DIRTY, and *should* always be set for a writeable mapping */ -unsigned long htab_convert_pte_flags(unsigned long pteflags) +unsigned long htab_convert_pte_flags(unsigned long pteflags, unsigned long flags) { unsigned long rflags = 0; @@ -240,7 +241,7 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags) */ rflags |= HPTE_R_M; - rflags |= pte_to_hpte_pkey_bits(pteflags); + rflags |= pte_to_hpte_pkey_bits(pteflags, flags); return rflags; } @@ -255,7 +256,7 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend, shift = mmu_psize_defs[psize].shift; step = 1 << shift; - prot = htab_convert_pte_flags(prot); + prot = htab_convert_pte_flags(prot, HPTE_USE_KERNEL_KEY); DBG("htab_bolt_mapping(%lx..%lx -> %lx (%lx,%d,%d)\n", vstart, vend, pstart, prot, psize, ssize); @@ -845,7 +846,6 @@ int hash__remove_section_mapping(unsigned long start, unsigned long end) { int rc = htab_remove_mapping(start, end, mmu_linear_psize, mmu_kernel_ssize); - WARN_ON(rc < 0); if (resize_hpt_for_hotplug(memblock_phys_mem_size()) == -ENOSPC) pr_warn("Hash collision while resizing HPT\n"); @@ -1317,12 +1317,14 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, vsid = get_kernel_vsid(ea, mmu_kernel_ssize); psize = mmu_vmalloc_psize; ssize = mmu_kernel_ssize; + flags |= HPTE_USE_KERNEL_KEY; break; case IO_REGION_ID: vsid = get_kernel_vsid(ea, mmu_kernel_ssize); psize = mmu_io_psize; ssize = mmu_kernel_ssize; + flags |= HPTE_USE_KERNEL_KEY; break; default: /* @@ -1901,7 +1903,7 @@ static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi) unsigned long hash; unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize); unsigned long vpn = hpt_vpn(vaddr, vsid, mmu_kernel_ssize); - unsigned long mode = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL)); + unsigned long mode = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL), HPTE_USE_KERNEL_KEY); long ret; hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize); diff --git a/arch/powerpc/mm/book3s64/iommu_api.c b/arch/powerpc/mm/book3s64/iommu_api.c index 563faa10bb66..685d7bb3d26f 100644 --- a/arch/powerpc/mm/book3s64/iommu_api.c +++ b/arch/powerpc/mm/book3s64/iommu_api.c @@ -263,7 +263,7 @@ long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem) goto unlock_exit; /* Are there still mappings? */ - if (atomic_cmpxchg(&mem->mapped, 1, 0) != 1) { + if (atomic64_cmpxchg(&mem->mapped, 1, 0) != 1) { ++mem->used; ret = -EBUSY; goto unlock_exit; diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index e18ae50a275c..5b3a3bae21aa 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -136,12 +136,18 @@ static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot) return __pmd(pmd_val(pmd) | pgprot_val(pgprot)); } +/* + * At some point we should be able to get rid of + * pmd_mkhuge() and mk_huge_pmd() when we update all the + * other archs to mark the pmd huge in pfn_pmd() + */ pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot) { unsigned long pmdv; pmdv = (pfn << PAGE_SHIFT) & PTE_RPN_MASK; - return pmd_set_protbits(__pmd(pmdv), pgprot); + + return __pmd_mkhuge(pmd_set_protbits(__pmd(pmdv), pgprot)); } pmd_t mk_pmd(struct page *page, pgprot_t pgprot) diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c index b1d091a97611..f1c6f264ed91 100644 --- a/arch/powerpc/mm/book3s64/pkeys.c +++ b/arch/powerpc/mm/book3s64/pkeys.c @@ -9,9 +9,12 @@ #include <asm/mmu_context.h> #include <asm/mmu.h> #include <asm/setup.h> +#include <asm/smp.h> + #include <linux/pkeys.h> #include <linux/of_fdt.h> + int num_pkey; /* Max number of pkeys supported */ /* * Keys marked in the reservation list cannot be allocated by userspace @@ -25,8 +28,8 @@ static u32 initial_allocation_mask __ro_after_init; * Even if we allocate keys with sys_pkey_alloc(), we need to make sure * other thread still find the access denied using the same keys. */ -static u64 default_amr = ~0x0UL; -static u64 default_iamr = 0x5555555555555555UL; +u64 default_amr __ro_after_init = ~0x0UL; +u64 default_iamr __ro_after_init = 0x5555555555555555UL; u64 default_uamor __ro_after_init; /* * Key used to implement PROT_EXEC mmap. Denies READ/WRITE @@ -89,12 +92,14 @@ static int scan_pkey_feature(void) } } +#ifdef CONFIG_PPC_MEM_KEYS /* * Adjust the upper limit, based on the number of bits supported by * arch-neutral code. */ pkeys_total = min_t(int, pkeys_total, ((ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT) + 1)); +#endif return pkeys_total; } @@ -102,6 +107,7 @@ void __init pkey_early_init_devtree(void) { int pkeys_total, i; +#ifdef CONFIG_PPC_MEM_KEYS /* * We define PKEY_DISABLE_EXECUTE in addition to the arch-neutral * generic defines for PKEY_DISABLE_ACCESS and PKEY_DISABLE_WRITE. @@ -117,7 +123,7 @@ void __init pkey_early_init_devtree(void) BUILD_BUG_ON(__builtin_clzl(ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT) + __builtin_popcountl(ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT) != (sizeof(u64) * BITS_PER_BYTE)); - +#endif /* * Only P7 and above supports SPRN_AMR update with MSR[PR] = 1 */ @@ -179,6 +185,27 @@ void __init pkey_early_init_devtree(void) default_uamor &= ~(0x3ul << pkeyshift(execute_only_key)); } + if (unlikely(num_pkey <= 3)) { + /* + * Insufficient number of keys to support + * KUAP/KUEP feature. + */ + disable_kuep = true; + disable_kuap = true; + WARN(1, "Disabling kernel user protection due to low (%d) max supported keys\n", num_pkey); + } else { + /* handle key which is used by kernel for KAUP */ + reserved_allocation_mask |= (0x1 << 3); + /* + * Mark access for kup_key in default amr so that + * we continue to operate with that AMR in + * copy_to/from_user(). + */ + default_amr &= ~(0x3ul << pkeyshift(3)); + default_iamr &= ~(0x1ul << pkeyshift(3)); + default_uamor &= ~(0x3ul << pkeyshift(3)); + } + /* * Allow access for only key 0. And prevent any other modification. */ @@ -223,54 +250,92 @@ out: return; } -void pkey_mm_init(struct mm_struct *mm) +#ifdef CONFIG_PPC_KUEP +void setup_kuep(bool disabled) { - if (!mmu_has_feature(MMU_FTR_PKEY)) + if (disabled) return; - mm_pkey_allocation_map(mm) = initial_allocation_mask; - mm->context.execute_only_pkey = execute_only_key; + /* + * On hash if PKEY feature is not enabled, disable KUAP too. + */ + if (!early_radix_enabled() && !early_mmu_has_feature(MMU_FTR_PKEY)) + return; + + if (smp_processor_id() == boot_cpuid) { + pr_info("Activating Kernel Userspace Execution Prevention\n"); + cur_cpu_spec->mmu_features |= MMU_FTR_BOOK3S_KUEP; + } + + /* + * Radix always uses key0 of the IAMR to determine if an access is + * allowed. We set bit 0 (IBM bit 1) of key0, to prevent instruction + * fetch. + */ + mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED); + isync(); } +#endif -static inline u64 read_amr(void) +#ifdef CONFIG_PPC_KUAP +void setup_kuap(bool disabled) { - return mfspr(SPRN_AMR); + if (disabled) + return; + /* + * On hash if PKEY feature is not enabled, disable KUAP too. + */ + if (!early_radix_enabled() && !early_mmu_has_feature(MMU_FTR_PKEY)) + return; + + if (smp_processor_id() == boot_cpuid) { + pr_info("Activating Kernel Userspace Access Prevention\n"); + cur_cpu_spec->mmu_features |= MMU_FTR_BOOK3S_KUAP; + } + + /* + * Set the default kernel AMR values on all cpus. + */ + mtspr(SPRN_AMR, AMR_KUAP_BLOCKED); + isync(); } +#endif -static inline void write_amr(u64 value) +static inline void update_current_thread_amr(u64 value) { - mtspr(SPRN_AMR, value); + current->thread.regs->amr = value; } -static inline u64 read_iamr(void) +static inline void update_current_thread_iamr(u64 value) { if (!likely(pkey_execute_disable_supported)) - return 0x0UL; + return; - return mfspr(SPRN_IAMR); + current->thread.regs->iamr = value; } -static inline void write_iamr(u64 value) +#ifdef CONFIG_PPC_MEM_KEYS +void pkey_mm_init(struct mm_struct *mm) { - if (!likely(pkey_execute_disable_supported)) + if (!mmu_has_feature(MMU_FTR_PKEY)) return; - - mtspr(SPRN_IAMR, value); + mm_pkey_allocation_map(mm) = initial_allocation_mask; + mm->context.execute_only_pkey = execute_only_key; } static inline void init_amr(int pkey, u8 init_bits) { u64 new_amr_bits = (((u64)init_bits & 0x3UL) << pkeyshift(pkey)); - u64 old_amr = read_amr() & ~((u64)(0x3ul) << pkeyshift(pkey)); + u64 old_amr = current_thread_amr() & ~((u64)(0x3ul) << pkeyshift(pkey)); - write_amr(old_amr | new_amr_bits); + update_current_thread_amr(old_amr | new_amr_bits); } static inline void init_iamr(int pkey, u8 init_bits) { u64 new_iamr_bits = (((u64)init_bits & 0x1UL) << pkeyshift(pkey)); - u64 old_iamr = read_iamr() & ~((u64)(0x1ul) << pkeyshift(pkey)); + u64 old_iamr = current_thread_iamr() & ~((u64)(0x1ul) << pkeyshift(pkey)); - write_iamr(old_iamr | new_iamr_bits); + update_current_thread_iamr(old_iamr | new_iamr_bits); } /* @@ -313,42 +378,6 @@ int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey, return 0; } -void thread_pkey_regs_save(struct thread_struct *thread) -{ - if (!mmu_has_feature(MMU_FTR_PKEY)) - return; - - /* - * TODO: Skip saving registers if @thread hasn't used any keys yet. - */ - thread->amr = read_amr(); - thread->iamr = read_iamr(); -} - -void thread_pkey_regs_restore(struct thread_struct *new_thread, - struct thread_struct *old_thread) -{ - if (!mmu_has_feature(MMU_FTR_PKEY)) - return; - - if (old_thread->amr != new_thread->amr) - write_amr(new_thread->amr); - if (old_thread->iamr != new_thread->iamr) - write_iamr(new_thread->iamr); -} - -void thread_pkey_regs_init(struct thread_struct *thread) -{ - if (!mmu_has_feature(MMU_FTR_PKEY)) - return; - - thread->amr = default_amr; - thread->iamr = default_iamr; - - write_amr(default_amr); - write_iamr(default_iamr); -} - int execute_only_pkey(struct mm_struct *mm) { return mm->context.execute_only_pkey; @@ -397,9 +426,9 @@ static bool pkey_access_permitted(int pkey, bool write, bool execute) pkey_shift = pkeyshift(pkey); if (execute) - return !(read_iamr() & (IAMR_EX_BIT << pkey_shift)); + return !(current_thread_iamr() & (IAMR_EX_BIT << pkey_shift)); - amr = read_amr(); + amr = current_thread_amr(); if (write) return !(amr & (AMR_WR_BIT << pkey_shift)); @@ -445,3 +474,5 @@ void arch_dup_pkeys(struct mm_struct *oldmm, struct mm_struct *mm) mm_pkey_allocation_map(mm) = mm_pkey_allocation_map(oldmm); mm->context.execute_only_pkey = oldmm->context.execute_only_pkey; } + +#endif /* CONFIG_PPC_MEM_KEYS */ diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 3adcf730f478..98f0b243c1ab 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -589,48 +589,6 @@ static void radix_init_amor(void) mtspr(SPRN_AMOR, (3ul << 62)); } -#ifdef CONFIG_PPC_KUEP -void setup_kuep(bool disabled) -{ - if (disabled || !early_radix_enabled()) - return; - - if (smp_processor_id() == boot_cpuid) { - pr_info("Activating Kernel Userspace Execution Prevention\n"); - cur_cpu_spec->mmu_features |= MMU_FTR_KUEP; - } - - /* - * Radix always uses key0 of the IAMR to determine if an access is - * allowed. We set bit 0 (IBM bit 1) of key0, to prevent instruction - * fetch. - */ - mtspr(SPRN_IAMR, (1ul << 62)); -} -#endif - -#ifdef CONFIG_PPC_KUAP -void setup_kuap(bool disabled) -{ - if (disabled || !early_radix_enabled()) - return; - - if (smp_processor_id() == boot_cpuid) { - pr_info("Activating Kernel Userspace Access Prevention\n"); - cur_cpu_spec->mmu_features |= MMU_FTR_RADIX_KUAP; - } - - /* Make sure userspace can't change the AMR */ - mtspr(SPRN_UAMOR, 0); - - /* - * Set the default kernel AMR values on all cpus. - */ - mtspr(SPRN_AMR, AMR_KUAP_BLOCKED); - isync(); -} -#endif - void __init radix__early_init_mmu(void) { unsigned long lpcr; @@ -721,6 +679,9 @@ void radix__early_init_mmu_secondary(void) radix__switch_mmu_context(NULL, &init_mm); tlbiel_all(); + + /* Make sure userspace can't change the AMR */ + mtspr(SPRN_UAMOR, 0); } void radix__mmu_cleanup_all(void) diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index b487b489d4b6..fb66d154b26c 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -56,14 +56,21 @@ static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is) if (early_cpu_has_feature(CPU_FTR_HVMODE)) { /* MSR[HV] should flush partition scope translations first. */ tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0); - for (set = 1; set < num_sets; set++) - tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 0); + + if (!early_cpu_has_feature(CPU_FTR_ARCH_31)) { + for (set = 1; set < num_sets; set++) + tlbiel_radix_set_isa300(set, is, 0, + RIC_FLUSH_TLB, 0); + } } /* Flush process scoped entries. */ tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 1); - for (set = 1; set < num_sets; set++) - tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1); + + if (!early_cpu_has_feature(CPU_FTR_ARCH_31)) { + for (set = 1; set < num_sets; set++) + tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1); + } ppc_after_tlbiel_barrier(); } @@ -300,9 +307,11 @@ static __always_inline void _tlbiel_pid(unsigned long pid, unsigned long ric) return; } - /* For the remaining sets, just flush the TLB */ - for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++) - __tlbiel_pid(pid, set, RIC_FLUSH_TLB); + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + /* For the remaining sets, just flush the TLB */ + for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++) + __tlbiel_pid(pid, set, RIC_FLUSH_TLB); + } ppc_after_tlbiel_barrier(); asm volatile(PPC_RADIX_INVALIDATE_ERAT_USER "; isync" : : :"memory"); diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c index c30fcbfa0e32..584567970c11 100644 --- a/arch/powerpc/mm/book3s64/slb.c +++ b/arch/powerpc/mm/book3s64/slb.c @@ -28,35 +28,8 @@ #include "internal.h" -enum slb_index { - LINEAR_INDEX = 0, /* Kernel linear map (0xc000000000000000) */ - KSTACK_INDEX = 1, /* Kernel stack map */ -}; - static long slb_allocate_user(struct mm_struct *mm, unsigned long ea); -#define slb_esid_mask(ssize) \ - (((ssize) == MMU_SEGSIZE_256M)? ESID_MASK: ESID_MASK_1T) - -static inline unsigned long mk_esid_data(unsigned long ea, int ssize, - enum slb_index index) -{ - return (ea & slb_esid_mask(ssize)) | SLB_ESID_V | index; -} - -static inline unsigned long __mk_vsid_data(unsigned long vsid, int ssize, - unsigned long flags) -{ - return (vsid << slb_vsid_shift(ssize)) | flags | - ((unsigned long) ssize << SLB_VSID_SSIZE_SHIFT); -} - -static inline unsigned long mk_vsid_data(unsigned long ea, int ssize, - unsigned long flags) -{ - return __mk_vsid_data(get_kernel_vsid(ea, ssize), ssize, flags); -} - bool stress_slb_enabled __initdata; static int __init parse_stress_slb(char *p) @@ -255,7 +228,6 @@ void slb_dump_contents(struct slb_entry *slb_ptr) return; pr_err("SLB contents of cpu 0x%x\n", smp_processor_id()); - pr_err("Last SLB entry inserted at slot %d\n", get_paca()->stab_rr); for (i = 0; i < mmu_slb_size; i++) { e = slb_ptr->esid; @@ -265,34 +237,38 @@ void slb_dump_contents(struct slb_entry *slb_ptr) if (!e && !v) continue; - pr_err("%02d %016lx %016lx\n", i, e, v); + pr_err("%02d %016lx %016lx %s\n", i, e, v, + (e & SLB_ESID_V) ? "VALID" : "NOT VALID"); - if (!(e & SLB_ESID_V)) { - pr_err("\n"); + if (!(e & SLB_ESID_V)) continue; - } + llp = v & SLB_VSID_LLP; if (v & SLB_VSID_B_1T) { - pr_err(" 1T ESID=%9lx VSID=%13lx LLP:%3lx\n", + pr_err(" 1T ESID=%9lx VSID=%13lx LLP:%3lx\n", GET_ESID_1T(e), (v & ~SLB_VSID_B) >> SLB_VSID_SHIFT_1T, llp); } else { - pr_err(" 256M ESID=%9lx VSID=%13lx LLP:%3lx\n", + pr_err(" 256M ESID=%9lx VSID=%13lx LLP:%3lx\n", GET_ESID(e), (v & ~SLB_VSID_B) >> SLB_VSID_SHIFT, llp); } } - pr_err("----------------------------------\n"); - - /* Dump slb cache entires as well. */ - pr_err("SLB cache ptr value = %d\n", get_paca()->slb_save_cache_ptr); - pr_err("Valid SLB cache entries:\n"); - n = min_t(int, get_paca()->slb_save_cache_ptr, SLB_CACHE_ENTRIES); - for (i = 0; i < n; i++) - pr_err("%02d EA[0-35]=%9x\n", i, get_paca()->slb_cache[i]); - pr_err("Rest of SLB cache entries:\n"); - for (i = n; i < SLB_CACHE_ENTRIES; i++) - pr_err("%02d EA[0-35]=%9x\n", i, get_paca()->slb_cache[i]); + + if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) { + /* RR is not so useful as it's often not used for allocation */ + pr_err("SLB RR allocator index %d\n", get_paca()->stab_rr); + + /* Dump slb cache entires as well. */ + pr_err("SLB cache ptr value = %d\n", get_paca()->slb_save_cache_ptr); + pr_err("Valid SLB cache entries:\n"); + n = min_t(int, get_paca()->slb_save_cache_ptr, SLB_CACHE_ENTRIES); + for (i = 0; i < n; i++) + pr_err("%02d EA[0-35]=%9x\n", i, get_paca()->slb_cache[i]); + pr_err("Rest of SLB cache entries:\n"); + for (i = n; i < SLB_CACHE_ENTRIES; i++) + pr_err("%02d EA[0-35]=%9x\n", i, get_paca()->slb_cache[i]); + } } void slb_vmalloc_update(void) diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 0add963a849b..8961b44f350c 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -210,28 +210,26 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code, return true; } - if (!is_exec && address < TASK_SIZE && (error_code & DSISR_PROTFAULT) && - !search_exception_tables(regs->nip)) { - pr_crit_ratelimited("Kernel attempted to access user page (%lx) - exploit attempt? (uid: %d)\n", - address, - from_kuid(&init_user_ns, current_uid())); - } - // Kernel fault on kernel address is bad if (address >= TASK_SIZE) return true; - // Fault on user outside of certain regions (eg. copy_tofrom_user()) is bad - if (!search_exception_tables(regs->nip)) - return true; + // Read/write fault blocked by KUAP is bad, it can never succeed. + if (bad_kuap_fault(regs, address, is_write)) { + pr_crit_ratelimited("Kernel attempted to %s user page (%lx) - exploit attempt? (uid: %d)\n", + is_write ? "write" : "read", address, + from_kuid(&init_user_ns, current_uid())); - // Read/write fault in a valid region (the exception table search passed - // above), but blocked by KUAP is bad, it can never succeed. - if (bad_kuap_fault(regs, address, is_write)) - return true; + // Fault on user outside of certain regions (eg. copy_tofrom_user()) is bad + if (!search_exception_tables(regs->nip)) + return true; + + // Read/write fault in a valid region (the exception table search passed + // above), but blocked by KUAP is bad, it can never succeed. + return WARN(true, "Bug: %s fault blocked by KUAP!", is_write ? "Write" : "Read"); + } - // What's left? Kernel fault on user in well defined regions (extable - // matched), and allowed by KUAP in the faulting context. + // What's left? Kernel fault on user and allowed by KUAP in the faulting context. return false; } @@ -303,7 +301,6 @@ static inline void cmo_account_page_fault(void) static inline void cmo_account_page_fault(void) { } #endif /* CONFIG_PPC_SMLPAR */ -#ifdef CONFIG_PPC_BOOK3S static void sanity_check_fault(bool is_write, bool is_user, unsigned long error_code, unsigned long address) { @@ -320,6 +317,9 @@ static void sanity_check_fault(bool is_write, bool is_user, return; } + if (!IS_ENABLED(CONFIG_PPC_BOOK3S)) + return; + /* * For hash translation mode, we should never get a * PROTFAULT. Any update to pte to reduce access will result in us @@ -354,10 +354,6 @@ static void sanity_check_fault(bool is_write, bool is_user, WARN_ON_ONCE(error_code & DSISR_PROTFAULT); } -#else -static void sanity_check_fault(bool is_write, bool is_user, - unsigned long error_code, unsigned long address) { } -#endif /* CONFIG_PPC_BOOK3S */ /* * Define the correct "is_write" bit in error_code based @@ -365,17 +361,19 @@ static void sanity_check_fault(bool is_write, bool is_user, */ #if (defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) #define page_fault_is_write(__err) ((__err) & ESR_DST) -#define page_fault_is_bad(__err) (0) #else #define page_fault_is_write(__err) ((__err) & DSISR_ISSTORE) -#if defined(CONFIG_PPC_8xx) +#endif + +#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) +#define page_fault_is_bad(__err) (0) +#elif defined(CONFIG_PPC_8xx) #define page_fault_is_bad(__err) ((__err) & DSISR_NOEXEC_OR_G) #elif defined(CONFIG_PPC64) #define page_fault_is_bad(__err) ((__err) & DSISR_BAD_FAULT_64S) #else #define page_fault_is_bad(__err) ((__err) & DSISR_BAD_FAULT_32S) #endif -#endif /* * For 600- and 800-family processors, the error_code parameter is DSISR @@ -547,10 +545,20 @@ NOKPROBE_SYMBOL(__do_page_fault); int do_page_fault(struct pt_regs *regs, unsigned long address, unsigned long error_code) { + const struct exception_table_entry *entry; enum ctx_state prev_state = exception_enter(); int rc = __do_page_fault(regs, address, error_code); exception_exit(prev_state); - return rc; + if (likely(!rc)) + return 0; + + entry = search_exception_tables(regs->nip); + if (unlikely(!entry)) + return rc; + + instruction_pointer_set(regs, extable_fixup(entry)); + + return 0; } NOKPROBE_SYMBOL(do_page_fault); @@ -559,17 +567,10 @@ NOKPROBE_SYMBOL(do_page_fault); * It is called from the DSI and ISI handlers in head.S and from some * of the procedures in traps.c. */ -void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) +void __bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) { - const struct exception_table_entry *entry; int is_write = page_fault_is_write(regs->dsisr); - /* Are we prepared to handle this fault? */ - if ((entry = search_exception_tables(regs->nip)) != NULL) { - regs->nip = extable_fixup(entry); - return; - } - /* kernel has accessed a bad area */ switch (TRAP(regs)) { @@ -603,3 +604,15 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) die("Kernel access of bad area", regs, sig); } + +void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) +{ + const struct exception_table_entry *entry; + + /* Are we prepared to handle this fault? */ + entry = search_exception_tables(instruction_pointer(regs)); + if (entry) + instruction_pointer_set(regs, extable_fixup(entry)); + else + __bad_page_fault(regs, address, sig); +} diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 36c3800769fb..8b3cc4d688e8 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -294,6 +294,21 @@ static void hugepd_free(struct mmu_gather *tlb, void *hugepte) static inline void hugepd_free(struct mmu_gather *tlb, void *hugepte) {} #endif +/* Return true when the entry to be freed maps more than the area being freed */ +static bool range_is_outside_limits(unsigned long start, unsigned long end, + unsigned long floor, unsigned long ceiling, + unsigned long mask) +{ + if ((start & mask) < floor) + return true; + if (ceiling) { + ceiling &= mask; + if (!ceiling) + return true; + } + return end - 1 > ceiling - 1; +} + static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshift, unsigned long start, unsigned long end, unsigned long floor, unsigned long ceiling) @@ -309,15 +324,7 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif if (shift > pdshift) num_hugepd = 1 << (shift - pdshift); - start &= pdmask; - if (start < floor) - return; - if (ceiling) { - ceiling &= pdmask; - if (! ceiling) - return; - } - if (end - 1 > ceiling - 1) + if (range_is_outside_limits(start, end, floor, ceiling, pdmask)) return; for (i = 0; i < num_hugepd; i++, hpdp++) @@ -334,18 +341,9 @@ static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling) { - unsigned long start = addr; pgtable_t token = pmd_pgtable(*pmd); - start &= PMD_MASK; - if (start < floor) - return; - if (ceiling) { - ceiling &= PMD_MASK; - if (!ceiling) - return; - } - if (end - 1 > ceiling - 1) + if (range_is_outside_limits(addr, end, floor, ceiling, PMD_MASK)) return; pmd_clear(pmd); @@ -395,20 +393,12 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, addr, next, floor, ceiling); } while (addr = next, addr != end); - start &= PUD_MASK; - if (start < floor) - return; - if (ceiling) { - ceiling &= PUD_MASK; - if (!ceiling) - return; - } - if (end - 1 > ceiling - 1) + if (range_is_outside_limits(start, end, floor, ceiling, PUD_MASK)) return; - pmd = pmd_offset(pud, start); + pmd = pmd_offset(pud, start & PUD_MASK); pud_clear(pud); - pmd_free_tlb(tlb, pmd, start); + pmd_free_tlb(tlb, pmd, start & PUD_MASK); mm_dec_nr_pmds(tlb->mm); } @@ -446,20 +436,12 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, p4d_t *p4d, } } while (addr = next, addr != end); - start &= PGDIR_MASK; - if (start < floor) - return; - if (ceiling) { - ceiling &= PGDIR_MASK; - if (!ceiling) - return; - } - if (end - 1 > ceiling - 1) + if (range_is_outside_limits(start, end, floor, ceiling, PGDIR_MASK)) return; - pud = pud_offset(p4d, start); + pud = pud_offset(p4d, start & PGDIR_MASK); p4d_clear(p4d); - pud_free_tlb(tlb, pud, start); + pud_free_tlb(tlb, pud, start & PGDIR_MASK); mm_dec_nr_puds(tlb->mm); } diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c index 8e0d792ac296..3a82f89827a5 100644 --- a/arch/powerpc/mm/init-common.c +++ b/arch/powerpc/mm/init-common.c @@ -28,8 +28,8 @@ EXPORT_SYMBOL_GPL(kernstart_addr); unsigned long kernstart_virt_addr __ro_after_init = KERNELBASE; EXPORT_SYMBOL_GPL(kernstart_virt_addr); -static bool disable_kuep = !IS_ENABLED(CONFIG_PPC_KUEP); -static bool disable_kuap = !IS_ENABLED(CONFIG_PPC_KUAP); +bool disable_kuep = !IS_ENABLED(CONFIG_PPC_KUEP); +bool disable_kuap = !IS_ENABLED(CONFIG_PPC_KUAP); static int __init parse_nosmep(char *p) { @@ -47,12 +47,6 @@ static int __init parse_nosmap(char *p) } early_param("nosmap", parse_nosmap); -void __ref setup_kup(void) -{ - setup_kuep(disable_kuep); - setup_kuap(disable_kuap); -} - #define CTOR(shift) static void ctor_##shift(void *addr) \ { \ memset(addr, 0, sizeof(void *) << (shift)); \ diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 25284fdb300c..afab328d0887 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -54,11 +54,7 @@ #include <mm/mmu_decl.h> -#ifndef CPU_FTR_COHERENT_ICACHE -#define CPU_FTR_COHERENT_ICACHE 0 /* XXX for now */ -#define CPU_FTR_NOEXECUTE 0 -#endif - +static DEFINE_MUTEX(linear_mapping_mutex); unsigned long long memory_limit; bool init_mem_is_free; @@ -116,46 +112,70 @@ static void flush_dcache_range_chunked(unsigned long start, unsigned long stop, } } -int __ref arch_add_memory(int nid, u64 start, u64 size, - struct mhp_params *params) +int __ref arch_create_linear_mapping(int nid, u64 start, u64 size, + struct mhp_params *params) { - unsigned long start_pfn = start >> PAGE_SHIFT; - unsigned long nr_pages = size >> PAGE_SHIFT; int rc; start = (unsigned long)__va(start); + mutex_lock(&linear_mapping_mutex); rc = create_section_mapping(start, start + size, nid, params->pgprot); + mutex_unlock(&linear_mapping_mutex); if (rc) { - pr_warn("Unable to create mapping for hot added memory 0x%llx..0x%llx: %d\n", + pr_warn("Unable to create linear mapping for 0x%llx..0x%llx: %d\n", start, start + size, rc); return -EFAULT; } - - return __add_pages(nid, start_pfn, nr_pages, params); + return 0; } -void __ref arch_remove_memory(int nid, u64 start, u64 size, - struct vmem_altmap *altmap) +void __ref arch_remove_linear_mapping(u64 start, u64 size) { - unsigned long start_pfn = start >> PAGE_SHIFT; - unsigned long nr_pages = size >> PAGE_SHIFT; int ret; - __remove_pages(start_pfn, nr_pages, altmap); - /* Remove htab bolted mappings for this section of memory */ start = (unsigned long)__va(start); flush_dcache_range_chunked(start, start + size, FLUSH_CHUNK_SIZE); + mutex_lock(&linear_mapping_mutex); ret = remove_section_mapping(start, start + size); - WARN_ON_ONCE(ret); + mutex_unlock(&linear_mapping_mutex); + if (ret) + pr_warn("Unable to remove linear mapping for 0x%llx..0x%llx: %d\n", + start, start + size, ret); /* Ensure all vmalloc mappings are flushed in case they also * hit that section of memory */ vm_unmap_aliases(); } + +int __ref arch_add_memory(int nid, u64 start, u64 size, + struct mhp_params *params) +{ + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; + int rc; + + rc = arch_create_linear_mapping(nid, start, size, params); + if (rc) + return rc; + rc = __add_pages(nid, start_pfn, nr_pages, params); + if (rc) + arch_remove_linear_mapping(start, size); + return rc; +} + +void __ref arch_remove_memory(int nid, u64 start, u64 size, + struct vmem_altmap *altmap) +{ + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; + + __remove_pages(start_pfn, nr_pages, altmap); + arch_remove_linear_mapping(start, size); +} #endif #ifndef CONFIG_NEED_MULTIPLE_NODES @@ -525,7 +545,7 @@ void __flush_dcache_icache(void *p) * space occurs, before returning to user space. */ - if (cpu_has_feature(MMU_FTR_TYPE_44x)) + if (mmu_has_feature(MMU_FTR_TYPE_44x)) return; invalidate_icache_range(addr, addr + PAGE_SIZE); diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 1b6d39e9baed..998810e68562 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -82,17 +82,12 @@ static inline void print_system_hash_info(void) {} #else /* CONFIG_PPC_MMU_NOHASH */ -extern void _tlbie(unsigned long address); -extern void _tlbia(void); - void print_system_hash_info(void); #endif /* CONFIG_PPC_MMU_NOHASH */ #ifdef CONFIG_PPC32 -void hash_preload(struct mm_struct *mm, unsigned long ea); - extern void mapin_ram(void); extern void setbat(int index, unsigned long virt, phys_addr_t phys, unsigned int size, pgprot_t prot); @@ -101,7 +96,6 @@ extern int __map_without_bats; extern unsigned int rtas_data, rtas_size; struct hash_pte; -extern struct hash_pte *Hash; extern u8 early_hash[]; #endif /* CONFIG_PPC32 */ diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index 231ca95f9ffb..19a3eec1d8c5 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -186,8 +186,7 @@ void mmu_mark_initmem_nx(void) mmu_mapin_ram_chunk(0, boundary, PAGE_KERNEL_TEXT, false); mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL, false); - if (IS_ENABLED(CONFIG_PIN_TLB_TEXT)) - mmu_pin_tlb(block_mapped_ram, false); + mmu_pin_tlb(block_mapped_ram, false); } #ifdef CONFIG_STRICT_KERNEL_RWX diff --git a/arch/powerpc/mm/nohash/fsl_booke.c b/arch/powerpc/mm/nohash/fsl_booke.c index 36bda962d3b3..03dacbe940e5 100644 --- a/arch/powerpc/mm/nohash/fsl_booke.c +++ b/arch/powerpc/mm/nohash/fsl_booke.c @@ -223,15 +223,9 @@ void flush_instruction_cache(void) { unsigned long tmp; - if (IS_ENABLED(CONFIG_E200)) { - tmp = mfspr(SPRN_L1CSR0); - tmp |= L1CSR0_CFI | L1CSR0_CLFC; - mtspr(SPRN_L1CSR0, tmp); - } else { - tmp = mfspr(SPRN_L1CSR1); - tmp |= L1CSR1_ICFI | L1CSR1_ICLFR; - mtspr(SPRN_L1CSR1, tmp); - } + tmp = mfspr(SPRN_L1CSR1); + tmp |= L1CSR1_ICFI | L1CSR1_ICLFR; + mtspr(SPRN_L1CSR1, tmp); isync(); } diff --git a/arch/powerpc/mm/nohash/tlb_low.S b/arch/powerpc/mm/nohash/tlb_low.S index eaeee402f96e..68797e072f55 100644 --- a/arch/powerpc/mm/nohash/tlb_low.S +++ b/arch/powerpc/mm/nohash/tlb_low.S @@ -92,36 +92,25 @@ _GLOBAL(__tlbil_va) tlbsx. r6,0,r3 bne 10f sync -BEGIN_MMU_FTR_SECTION - b 2f -END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x) +#ifndef CONFIG_PPC_47x /* On 440 There are only 64 TLB entries, so r3 < 64, which means bit * 22, is clear. Since 22 is the V bit in the TLB_PAGEID, loading this * value will invalidate the TLB entry. */ tlbwe r6,r6,PPC44x_TLB_PAGEID - isync -10: wrtee r10 - blr -2: -#ifdef CONFIG_PPC_47x +#else oris r7,r6,0x8000 /* specify way explicitly */ clrrwi r4,r3,12 /* get an EPN for the hashing with V = 0 */ ori r4,r4,PPC47x_TLBE_SIZE tlbwe r4,r7,0 /* write it */ +#endif /* !CONFIG_PPC_47x */ isync - wrtee r10 +10: wrtee r10 blr -#else /* CONFIG_PPC_47x */ -1: trap - EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0; -#endif /* !CONFIG_PPC_47x */ _GLOBAL(_tlbil_all) _GLOBAL(_tlbil_pid) -BEGIN_MMU_FTR_SECTION - b 2f -END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x) +#ifndef CONFIG_PPC_47x li r3,0 sync @@ -136,8 +125,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x) isync blr -2: -#ifdef CONFIG_PPC_47x +#else /* 476 variant. There's not simple way to do this, hopefully we'll * try to limit the amount of such full invalidates */ @@ -179,11 +167,8 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x) b 1b /* Then loop */ 1: isync /* Sync shadows */ wrtee r11 -#else /* CONFIG_PPC_47x */ -1: trap - EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0; -#endif /* !CONFIG_PPC_47x */ blr +#endif /* !CONFIG_PPC_47x */ #ifdef CONFIG_PPC_47x diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 079159e97bca..e0ec67a16887 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -84,7 +84,7 @@ int __ref map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot) pg = pte_alloc_kernel(pd, va); else pg = early_pte_alloc_kernel(pd, va); - if (pg != 0) { + if (pg) { err = 0; /* The PTE should never be already set nor present in the * hash table @@ -112,10 +112,6 @@ static void __init __mapin_ram_chunk(unsigned long offset, unsigned long top) ktext = ((char *)v >= _stext && (char *)v < etext) || ((char *)v >= _sinittext && (char *)v < _einittext); map_kernel_page(v, p, ktext ? PAGE_KERNEL_TEXT : PAGE_KERNEL); -#ifdef CONFIG_PPC_BOOK3S_32 - if (ktext) - hash_preload(&init_mm, v); -#endif v += PAGE_SIZE; p += PAGE_SIZE; } diff --git a/arch/powerpc/perf/8xx-pmu.c b/arch/powerpc/perf/8xx-pmu.c index e53c3c161257..f970d1510d3d 100644 --- a/arch/powerpc/perf/8xx-pmu.c +++ b/arch/powerpc/perf/8xx-pmu.c @@ -153,6 +153,8 @@ static void mpc8xx_pmu_read(struct perf_event *event) static void mpc8xx_pmu_del(struct perf_event *event, int flags) { + struct ppc_inst insn = ppc_inst(PPC_RAW_MFSPR(10, SPRN_SPRG_SCRATCH2)); + mpc8xx_pmu_read(event); /* If it was the last user, stop counting to avoid useles overhead */ @@ -164,22 +166,12 @@ static void mpc8xx_pmu_del(struct perf_event *event, int flags) mtspr(SPRN_ICTRL, 7); break; case PERF_8xx_ID_ITLB_LOAD_MISS: - if (atomic_dec_return(&itlb_miss_ref) == 0) { - /* mfspr r10, SPRN_SPRG_SCRATCH0 */ - struct ppc_inst insn = ppc_inst(PPC_INST_MFSPR | __PPC_RS(R10) | - __PPC_SPR(SPRN_SPRG_SCRATCH0)); - + if (atomic_dec_return(&itlb_miss_ref) == 0) patch_instruction_site(&patch__itlbmiss_exit_1, insn); - } break; case PERF_8xx_ID_DTLB_LOAD_MISS: - if (atomic_dec_return(&dtlb_miss_ref) == 0) { - /* mfspr r10, SPRN_DAR */ - struct ppc_inst insn = ppc_inst(PPC_INST_MFSPR | __PPC_RS(R10) | - __PPC_SPR(SPRN_DAR)); - + if (atomic_dec_return(&dtlb_miss_ref) == 0) patch_instruction_site(&patch__dtlbmiss_exit_1, insn); - } break; } } diff --git a/arch/powerpc/perf/callchain.h b/arch/powerpc/perf/callchain.h index ae24d4a00da6..d6fa6e25234f 100644 --- a/arch/powerpc/perf/callchain.h +++ b/arch/powerpc/perf/callchain.h @@ -33,7 +33,7 @@ static inline int __read_user_stack(const void __user *ptr, void *ret, rc = copy_from_user_nofault(ret, ptr, size); - if (IS_ENABLED(CONFIG_PPC64) && rc) + if (IS_ENABLED(CONFIG_PPC64) && !radix_enabled() && rc) return read_user_stack_slow(ptr, ret, size); return rc; diff --git a/arch/powerpc/perf/callchain_32.c b/arch/powerpc/perf/callchain_32.c index 64e4013d8060..b83c47b7947f 100644 --- a/arch/powerpc/perf/callchain_32.c +++ b/arch/powerpc/perf/callchain_32.c @@ -59,8 +59,8 @@ static int is_sigreturn_32_address(unsigned int nip, unsigned int fp) { if (nip == fp + offsetof(struct signal_frame_32, mctx.mc_pad)) return 1; - if (vdso32_sigtramp && current->mm->context.vdso_base && - nip == current->mm->context.vdso_base + vdso32_sigtramp) + if (current->mm->context.vdso && + nip == VDSO32_SYMBOL(current->mm->context.vdso, sigtramp32)) return 1; return 0; } @@ -70,8 +70,8 @@ static int is_rt_sigreturn_32_address(unsigned int nip, unsigned int fp) if (nip == fp + offsetof(struct rt_signal_frame_32, uc.uc_mcontext.mc_pad)) return 1; - if (vdso32_rt_sigtramp && current->mm->context.vdso_base && - nip == current->mm->context.vdso_base + vdso32_rt_sigtramp) + if (current->mm->context.vdso && + nip == VDSO32_SYMBOL(current->mm->context.vdso, sigtramp_rt32)) return 1; return 0; } diff --git a/arch/powerpc/perf/callchain_64.c b/arch/powerpc/perf/callchain_64.c index fed90e827f3a..8d0df4226328 100644 --- a/arch/powerpc/perf/callchain_64.c +++ b/arch/powerpc/perf/callchain_64.c @@ -21,7 +21,8 @@ /* * On 64-bit we don't want to invoke hash_page on user addresses from * interrupt context, so if the access faults, we read the page tables - * to find which page (if any) is mapped and access it directly. + * to find which page (if any) is mapped and access it directly. Radix + * has no need for this so it doesn't use read_user_stack_slow. */ int read_user_stack_slow(const void __user *ptr, void *buf, int nb) { @@ -67,8 +68,8 @@ static int is_sigreturn_64_address(unsigned long nip, unsigned long fp) { if (nip == fp + offsetof(struct signal_frame_64, tramp)) return 1; - if (vdso64_rt_sigtramp && current->mm->context.vdso_base && - nip == current->mm->context.vdso_base + vdso64_rt_sigtramp) + if (current->mm->context.vdso && + nip == VDSO64_SYMBOL(current->mm->context.vdso, sigtramp_rt64)) return 1; return 0; } diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 6586f7e71cfb..28206b1fe172 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -95,6 +95,7 @@ static unsigned int freeze_events_kernel = MMCR0_FCS; #define SPRN_SIER3 0 #define MMCRA_SAMPLE_ENABLE 0 #define MMCRA_BHRB_DISABLE 0 +#define MMCR0_PMCCEXT 0 static inline unsigned long perf_ip_adjust(struct pt_regs *regs) { @@ -137,6 +138,9 @@ static void pmao_restore_workaround(bool ebb) { } bool is_sier_available(void) { + if (!ppmu) + return false; + if (ppmu->flags & PPMU_HAS_SIER) return true; @@ -250,11 +254,32 @@ static inline u32 perf_flags_from_msr(struct pt_regs *regs) static inline u32 perf_get_misc_flags(struct pt_regs *regs) { bool use_siar = regs_use_siar(regs); + unsigned long mmcra = regs->dsisr; + int marked = mmcra & MMCRA_SAMPLE_ENABLE; if (!use_siar) return perf_flags_from_msr(regs); /* + * Check the address in SIAR to identify the + * privilege levels since the SIER[MSR_HV, MSR_PR] + * bits are not set for marked events in power10 + * DD1. + */ + if (marked && (ppmu->flags & PPMU_P10_DD1)) { + unsigned long siar = mfspr(SPRN_SIAR); + if (siar) { + if (is_kernel_addr(siar)) + return PERF_RECORD_MISC_KERNEL; + return PERF_RECORD_MISC_USER; + } else { + if (is_kernel_addr(regs->nip)) + return PERF_RECORD_MISC_KERNEL; + return PERF_RECORD_MISC_USER; + } + } + + /* * If we don't have flags in MMCRA, rather than using * the MSR, we intuit the flags from the address in * SIAR which should give slightly more reliable @@ -350,7 +375,14 @@ static inline int siar_valid(struct pt_regs *regs) int marked = mmcra & MMCRA_SAMPLE_ENABLE; if (marked) { - if (ppmu->flags & PPMU_HAS_SIER) + /* + * SIER[SIAR_VALID] is not set for some + * marked events on power10 DD1, so drop + * the check for SIER[SIAR_VALID] and return true. + */ + if (ppmu->flags & PPMU_P10_DD1) + return 0x1; + else if (ppmu->flags & PPMU_HAS_SIER) return regs->dar & SIER_SIAR_VALID; if (ppmu->flags & PPMU_SIAR_VALID) @@ -1242,6 +1274,9 @@ static void power_pmu_disable(struct pmu *pmu) val |= MMCR0_FC; val &= ~(MMCR0_EBE | MMCR0_BHRBA | MMCR0_PMCC | MMCR0_PMAO | MMCR0_FC56); + /* Set mmcr0 PMCCEXT for p10 */ + if (ppmu->flags & PPMU_ARCH_31) + val |= MMCR0_PMCCEXT; /* * The barrier is to make sure the mtspr has been @@ -1881,7 +1916,7 @@ static bool is_event_blacklisted(u64 ev) static int power_pmu_event_init(struct perf_event *event) { u64 ev; - unsigned long flags; + unsigned long flags, irq_flags; struct perf_event *ctrs[MAX_HWEVENTS]; u64 events[MAX_HWEVENTS]; unsigned int cflags[MAX_HWEVENTS]; @@ -1989,7 +2024,9 @@ static int power_pmu_event_init(struct perf_event *event) if (check_excludes(ctrs, cflags, n, 1)) return -EINVAL; - cpuhw = &get_cpu_var(cpu_hw_events); + local_irq_save(irq_flags); + cpuhw = this_cpu_ptr(&cpu_hw_events); + err = power_check_constraints(cpuhw, events, cflags, n + 1); if (has_branch_stack(event)) { @@ -2000,13 +2037,13 @@ static int power_pmu_event_init(struct perf_event *event) event->attr.branch_sample_type); if (bhrb_filter == -1) { - put_cpu_var(cpu_hw_events); + local_irq_restore(irq_flags); return -EOPNOTSUPP; } cpuhw->bhrb_filter = bhrb_filter; } - put_cpu_var(cpu_hw_events); + local_irq_restore(irq_flags); if (err) return -EINVAL; @@ -2125,6 +2162,16 @@ static void record_and_restart(struct perf_event *event, unsigned long val, perf_event_update_userpage(event); /* + * Due to hardware limitation, sometimes SIAR could sample a kernel + * address even when freeze on supervisor state (kernel) is set in + * MMCR2. Check attr.exclude_kernel and address to drop the sample in + * these cases. + */ + if (event->attr.exclude_kernel && record) + if (is_kernel_addr(mfspr(SPRN_SIAR))) + record = 0; + + /* * Finally record data if requested. */ if (record) { @@ -2180,8 +2227,14 @@ unsigned long perf_misc_flags(struct pt_regs *regs) unsigned long perf_instruction_pointer(struct pt_regs *regs) { bool use_siar = regs_use_siar(regs); + unsigned long siar = mfspr(SPRN_SIAR); - if (use_siar && siar_valid(regs)) + if (ppmu->flags & PPMU_P10_DD1) { + if (siar) + return siar; + else + return regs->nip; + } else if (use_siar && siar_valid(regs)) return mfspr(SPRN_SIAR) + perf_ip_adjust(regs); else if (use_siar) return 0; // no valid instruction pointer diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index 2848904df638..6ab5b272090a 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -247,6 +247,9 @@ void isa207_get_mem_weight(u64 *weight) u64 sier = mfspr(SPRN_SIER); u64 val = (sier & ISA207_SIER_TYPE_MASK) >> ISA207_SIER_TYPE_SHIFT; + if (cpu_has_feature(CPU_FTR_ARCH_31)) + mantissa = P10_MMCRA_THR_CTR_MANT(mmcra); + if (val == 0 || val == 7) *weight = 0; else @@ -311,9 +314,11 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) } if (unit >= 6 && unit <= 9) { - if (cpu_has_feature(CPU_FTR_ARCH_31) && (unit == 6)) { - mask |= CNST_L2L3_GROUP_MASK; - value |= CNST_L2L3_GROUP_VAL(event >> p10_L2L3_EVENT_SHIFT); + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + if (unit == 6) { + mask |= CNST_L2L3_GROUP_MASK; + value |= CNST_L2L3_GROUP_VAL(event >> p10_L2L3_EVENT_SHIFT); + } } else if (cpu_has_feature(CPU_FTR_ARCH_300)) { mask |= CNST_CACHE_GROUP_MASK; value |= CNST_CACHE_GROUP_VAL(event & 0xff); @@ -339,12 +344,22 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) value |= CNST_L1_QUAL_VAL(cache); } + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + mask |= CNST_RADIX_SCOPE_GROUP_MASK; + value |= CNST_RADIX_SCOPE_GROUP_VAL(event >> p10_EVENT_RADIX_SCOPE_QUAL_SHIFT); + } + if (is_event_marked(event)) { mask |= CNST_SAMPLE_MASK; value |= CNST_SAMPLE_VAL(event >> EVENT_SAMPLE_SHIFT); } - if (cpu_has_feature(CPU_FTR_ARCH_300)) { + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + if (event_is_threshold(event)) { + mask |= CNST_THRESH_CTL_SEL_MASK; + value |= CNST_THRESH_CTL_SEL_VAL(event >> EVENT_THRESH_SHIFT); + } + } else if (cpu_has_feature(CPU_FTR_ARCH_300)) { if (event_is_threshold(event) && is_thresh_cmp_valid(event)) { mask |= CNST_THRESH_MASK; value |= CNST_THRESH_VAL(event >> EVENT_THRESH_SHIFT); @@ -456,6 +471,13 @@ int isa207_compute_mmcr(u64 event[], int n_ev, } } + /* Set RADIX_SCOPE_QUAL bit */ + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + val = (event[i] >> p10_EVENT_RADIX_SCOPE_QUAL_SHIFT) & + p10_EVENT_RADIX_SCOPE_QUAL_MASK; + mmcr1 |= val << p10_MMCR1_RADIX_SCOPE_QUAL_SHIFT; + } + if (is_event_marked(event[i])) { mmcra |= MMCRA_SAMPLE_ENABLE; @@ -539,6 +561,14 @@ int isa207_compute_mmcr(u64 event[], int n_ev, if (!(pmc_inuse & 0x60)) mmcr->mmcr0 |= MMCR0_FC56; + /* + * Set mmcr0 (PMCCEXT) for p10 which + * will restrict access to group B registers + * when MMCR0 PMCC=0b00. + */ + if (cpu_has_feature(CPU_FTR_ARCH_31)) + mmcr->mmcr0 |= MMCR0_PMCCEXT; + mmcr->mmcr1 = mmcr1; mmcr->mmcra = mmcra; mmcr->mmcr2 = mmcr2; diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h index 7025de5e60e7..454b32c31440 100644 --- a/arch/powerpc/perf/isa207-common.h +++ b/arch/powerpc/perf/isa207-common.h @@ -101,6 +101,9 @@ #define p10_EVENT_CACHE_SEL_MASK 0x3ull #define p10_EVENT_MMCR3_MASK 0x7fffull #define p10_EVENT_MMCR3_SHIFT 45 +#define p10_EVENT_RADIX_SCOPE_QUAL_SHIFT 9 +#define p10_EVENT_RADIX_SCOPE_QUAL_MASK 0x1 +#define p10_MMCR1_RADIX_SCOPE_QUAL_SHIFT 45 #define p10_EVENT_VALID_MASK \ ((p10_SDAR_MODE_MASK << p10_SDAR_MODE_SHIFT | \ @@ -112,6 +115,7 @@ (p9_EVENT_COMBINE_MASK << p9_EVENT_COMBINE_SHIFT) | \ (p10_EVENT_MMCR3_MASK << p10_EVENT_MMCR3_SHIFT) | \ (EVENT_MARKED_MASK << EVENT_MARKED_SHIFT) | \ + (p10_EVENT_RADIX_SCOPE_QUAL_MASK << p10_EVENT_RADIX_SCOPE_QUAL_SHIFT) | \ EVENT_LINUX_MASK | \ EVENT_PSEL_MASK)) /* @@ -125,9 +129,9 @@ * * 28 24 20 16 12 8 4 0 * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - * [ ] | [ ] [ sample ] [ ] [6] [5] [4] [3] [2] [1] - * | | | | - * BHRB IFM -* | | | Count of events for each PMC. + * [ ] | [ ] | [ sample ] [ ] [6] [5] [4] [3] [2] [1] + * | | | | | + * BHRB IFM -* | | |*radix_scope | Count of events for each PMC. * EBB -* | | p1, p2, p3, p4, p5, p6. * L1 I/D qualifier -* | * nc - number of counters -* @@ -145,6 +149,9 @@ #define CNST_THRESH_VAL(v) (((v) & EVENT_THRESH_MASK) << 32) #define CNST_THRESH_MASK CNST_THRESH_VAL(EVENT_THRESH_MASK) +#define CNST_THRESH_CTL_SEL_VAL(v) (((v) & 0x7ffull) << 32) +#define CNST_THRESH_CTL_SEL_MASK CNST_THRESH_CTL_SEL_VAL(0x7ff) + #define CNST_EBB_VAL(v) (((v) & EVENT_EBB_MASK) << 24) #define CNST_EBB_MASK CNST_EBB_VAL(EVENT_EBB_MASK) @@ -165,6 +172,9 @@ #define CNST_L2L3_GROUP_VAL(v) (((v) & 0x1full) << 55) #define CNST_L2L3_GROUP_MASK CNST_L2L3_GROUP_VAL(0x1f) +#define CNST_RADIX_SCOPE_GROUP_VAL(v) (((v) & 0x1ull) << 21) +#define CNST_RADIX_SCOPE_GROUP_MASK CNST_RADIX_SCOPE_GROUP_VAL(1) + /* * For NC we are counting up to 4 events. This requires three bits, and we need * the fifth event to overflow and set the 4th bit. To achieve that we bias the @@ -221,6 +231,10 @@ #define MMCRA_THR_CTR_EXP(v) (((v) >> MMCRA_THR_CTR_EXP_SHIFT) &\ MMCRA_THR_CTR_EXP_MASK) +#define P10_MMCRA_THR_CTR_MANT_MASK 0xFFul +#define P10_MMCRA_THR_CTR_MANT(v) (((v) >> MMCRA_THR_CTR_MANT_SHIFT) &\ + P10_MMCRA_THR_CTR_MANT_MASK) + /* MMCRA Threshold Compare bit constant for power9 */ #define p9_MMCRA_THR_CMP_SHIFT 45 diff --git a/arch/powerpc/perf/power10-events-list.h b/arch/powerpc/perf/power10-events-list.h index 60c1b8111082..e45dafe818ed 100644 --- a/arch/powerpc/perf/power10-events-list.h +++ b/arch/powerpc/perf/power10-events-list.h @@ -15,6 +15,9 @@ EVENT(PM_EXEC_STALL, 0x30008); EVENT(PM_RUN_INST_CMPL, 0x500fa); EVENT(PM_BR_CMPL, 0x4d05e); EVENT(PM_BR_MPRED_CMPL, 0x400f6); +EVENT(PM_BR_FIN, 0x2f04a); +EVENT(PM_MPRED_BR_FIN, 0x3e098); +EVENT(PM_LD_DEMAND_MISS_L1_FIN, 0x400f0); /* All L1 D cache load references counted at finish, gated by reject */ EVENT(PM_LD_REF_L1, 0x100fc); @@ -36,6 +39,12 @@ EVENT(PM_IC_PREF_REQ, 0x040a0); EVENT(PM_DATA_FROM_L3, 0x01340000001c040); /* Demand LD - L3 Miss (not L2 hit and not L3 hit) */ EVENT(PM_DATA_FROM_L3MISS, 0x300fe); +/* All successful D-side store dispatches for this thread */ +EVENT(PM_L2_ST, 0x010000046080); +/* All successful D-side store dispatches for this thread that were L2 Miss */ +EVENT(PM_L2_ST_MISS, 0x26880); +/* Total HW L3 prefetches(Load+store) */ +EVENT(PM_L3_PF_MISS_L3, 0x100000016080); /* Data PTEG reload */ EVENT(PM_DTLB_MISS, 0x300fc); /* ITLB Reloaded */ diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c index 9dbe8f9b89b4..79e0206ca454 100644 --- a/arch/powerpc/perf/power10-pmu.c +++ b/arch/powerpc/perf/power10-pmu.c @@ -23,10 +23,10 @@ * * 28 24 20 16 12 8 4 0 * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - * [ ] [ sample ] [ ] [ ] [ pmc ] [unit ] [ ] m [ pmcxsel ] - * | | | | | | - * | | | | | *- mark - * | | | *- L1/L2/L3 cache_sel | + * [ ] [ sample ] [ ] [ ] [ pmc ] [unit ] [ ] | m [ pmcxsel ] + * | | | | | | | + * | | | | | | *- mark + * | | | *- L1/L2/L3 cache_sel | |*-radix_scope_qual * | | sdar_mode | * | *- sampling mode for marked events *- combine * | @@ -59,6 +59,7 @@ * * MMCR1[16] = cache_sel[0] * MMCR1[17] = cache_sel[1] + * MMCR1[18] = radix_scope_qual * * if mark: * MMCRA[63] = 1 (SAMPLE_ENABLE) @@ -113,6 +114,9 @@ GENERIC_EVENT_ATTR(cache-references, PM_LD_REF_L1); GENERIC_EVENT_ATTR(cache-misses, PM_LD_MISS_L1); GENERIC_EVENT_ATTR(mem-loads, MEM_LOADS); GENERIC_EVENT_ATTR(mem-stores, MEM_STORES); +GENERIC_EVENT_ATTR(branch-instructions, PM_BR_FIN); +GENERIC_EVENT_ATTR(branch-misses, PM_MPRED_BR_FIN); +GENERIC_EVENT_ATTR(cache-misses, PM_LD_DEMAND_MISS_L1_FIN); CACHE_EVENT_ATTR(L1-dcache-load-misses, PM_LD_MISS_L1); CACHE_EVENT_ATTR(L1-dcache-loads, PM_LD_REF_L1); @@ -123,12 +127,15 @@ CACHE_EVENT_ATTR(L1-icache-loads, PM_INST_FROM_L1); CACHE_EVENT_ATTR(L1-icache-prefetches, PM_IC_PREF_REQ); CACHE_EVENT_ATTR(LLC-load-misses, PM_DATA_FROM_L3MISS); CACHE_EVENT_ATTR(LLC-loads, PM_DATA_FROM_L3); +CACHE_EVENT_ATTR(LLC-prefetches, PM_L3_PF_MISS_L3); +CACHE_EVENT_ATTR(LLC-store-misses, PM_L2_ST_MISS); +CACHE_EVENT_ATTR(LLC-stores, PM_L2_ST); CACHE_EVENT_ATTR(branch-load-misses, PM_BR_MPRED_CMPL); CACHE_EVENT_ATTR(branch-loads, PM_BR_CMPL); CACHE_EVENT_ATTR(dTLB-load-misses, PM_DTLB_MISS); CACHE_EVENT_ATTR(iTLB-load-misses, PM_ITLB_MISS); -static struct attribute *power10_events_attr[] = { +static struct attribute *power10_events_attr_dd1[] = { GENERIC_EVENT_PTR(PM_RUN_CYC), GENERIC_EVENT_PTR(PM_RUN_INST_CMPL), GENERIC_EVENT_PTR(PM_BR_CMPL), @@ -153,6 +160,39 @@ static struct attribute *power10_events_attr[] = { NULL }; +static struct attribute *power10_events_attr[] = { + GENERIC_EVENT_PTR(PM_RUN_CYC), + GENERIC_EVENT_PTR(PM_RUN_INST_CMPL), + GENERIC_EVENT_PTR(PM_BR_FIN), + GENERIC_EVENT_PTR(PM_MPRED_BR_FIN), + GENERIC_EVENT_PTR(PM_LD_REF_L1), + GENERIC_EVENT_PTR(PM_LD_DEMAND_MISS_L1_FIN), + GENERIC_EVENT_PTR(MEM_LOADS), + GENERIC_EVENT_PTR(MEM_STORES), + CACHE_EVENT_PTR(PM_LD_MISS_L1), + CACHE_EVENT_PTR(PM_LD_REF_L1), + CACHE_EVENT_PTR(PM_LD_PREFETCH_CACHE_LINE_MISS), + CACHE_EVENT_PTR(PM_ST_MISS_L1), + CACHE_EVENT_PTR(PM_L1_ICACHE_MISS), + CACHE_EVENT_PTR(PM_INST_FROM_L1), + CACHE_EVENT_PTR(PM_IC_PREF_REQ), + CACHE_EVENT_PTR(PM_DATA_FROM_L3MISS), + CACHE_EVENT_PTR(PM_DATA_FROM_L3), + CACHE_EVENT_PTR(PM_L3_PF_MISS_L3), + CACHE_EVENT_PTR(PM_L2_ST_MISS), + CACHE_EVENT_PTR(PM_L2_ST), + CACHE_EVENT_PTR(PM_BR_MPRED_CMPL), + CACHE_EVENT_PTR(PM_BR_CMPL), + CACHE_EVENT_PTR(PM_DTLB_MISS), + CACHE_EVENT_PTR(PM_ITLB_MISS), + NULL +}; + +static struct attribute_group power10_pmu_events_group_dd1 = { + .name = "events", + .attrs = power10_events_attr_dd1, +}; + static struct attribute_group power10_pmu_events_group = { .name = "events", .attrs = power10_events_attr, @@ -175,6 +215,7 @@ PMU_FORMAT_ATTR(src_sel, "config:45-46"); PMU_FORMAT_ATTR(invert_bit, "config:47"); PMU_FORMAT_ATTR(src_mask, "config:48-53"); PMU_FORMAT_ATTR(src_match, "config:54-59"); +PMU_FORMAT_ATTR(radix_scope, "config:9"); static struct attribute *power10_pmu_format_attr[] = { &format_attr_event.attr, @@ -194,6 +235,7 @@ static struct attribute *power10_pmu_format_attr[] = { &format_attr_invert_bit.attr, &format_attr_src_mask.attr, &format_attr_src_match.attr, + &format_attr_radix_scope.attr, NULL, }; @@ -202,13 +244,19 @@ static struct attribute_group power10_pmu_format_group = { .attrs = power10_pmu_format_attr, }; +static const struct attribute_group *power10_pmu_attr_groups_dd1[] = { + &power10_pmu_format_group, + &power10_pmu_events_group_dd1, + NULL, +}; + static const struct attribute_group *power10_pmu_attr_groups[] = { &power10_pmu_format_group, &power10_pmu_events_group, NULL, }; -static int power10_generic_events[] = { +static int power10_generic_events_dd1[] = { [PERF_COUNT_HW_CPU_CYCLES] = PM_RUN_CYC, [PERF_COUNT_HW_INSTRUCTIONS] = PM_RUN_INST_CMPL, [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PM_BR_CMPL, @@ -217,6 +265,15 @@ static int power10_generic_events[] = { [PERF_COUNT_HW_CACHE_MISSES] = PM_LD_MISS_L1, }; +static int power10_generic_events[] = { + [PERF_COUNT_HW_CPU_CYCLES] = PM_RUN_CYC, + [PERF_COUNT_HW_INSTRUCTIONS] = PM_RUN_INST_CMPL, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PM_BR_FIN, + [PERF_COUNT_HW_BRANCH_MISSES] = PM_MPRED_BR_FIN, + [PERF_COUNT_HW_CACHE_REFERENCES] = PM_LD_REF_L1, + [PERF_COUNT_HW_CACHE_MISSES] = PM_LD_DEMAND_MISS_L1_FIN, +}; + static u64 power10_bhrb_filter_map(u64 branch_sample_type) { u64 pmu_bhrb_filter = 0; @@ -273,7 +330,7 @@ static void power10_config_bhrb(u64 pmu_bhrb_filter) * 0 means not supported, -1 means nonsensical, other values * are event codes. */ -static u64 power10_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { +static u64 power10_cache_events_dd1[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(L1D)] = { [C(OP_READ)] = { [C(RESULT_ACCESS)] = PM_LD_REF_L1, @@ -374,6 +431,107 @@ static u64 power10_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { }, }; +static u64 power10_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { + [C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = PM_LD_REF_L1, + [C(RESULT_MISS)] = PM_LD_MISS_L1, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = 0, + [C(RESULT_MISS)] = PM_ST_MISS_L1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = PM_LD_PREFETCH_CACHE_LINE_MISS, + [C(RESULT_MISS)] = 0, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = PM_INST_FROM_L1, + [C(RESULT_MISS)] = PM_L1_ICACHE_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = PM_INST_FROM_L1MISS, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = PM_IC_PREF_REQ, + [C(RESULT_MISS)] = 0, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = PM_DATA_FROM_L3, + [C(RESULT_MISS)] = PM_DATA_FROM_L3MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = PM_L2_ST, + [C(RESULT_MISS)] = PM_L2_ST_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = PM_L3_PF_MISS_L3, + [C(RESULT_MISS)] = 0, + }, + }, + [C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0, + [C(RESULT_MISS)] = PM_DTLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0, + [C(RESULT_MISS)] = PM_ITLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = PM_BR_CMPL, + [C(RESULT_MISS)] = PM_BR_MPRED_CMPL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + }, + [C(NODE)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + }, +}; + #undef C static struct power_pmu power10_pmu = { @@ -403,6 +561,7 @@ static struct power_pmu power10_pmu = { int init_power10_pmu(void) { + unsigned int pvr; int rc; /* Comes from cpu_specs[] */ @@ -410,9 +569,20 @@ int init_power10_pmu(void) strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power10")) return -ENODEV; + pvr = mfspr(SPRN_PVR); + /* Add the ppmu flag for power10 DD1 */ + if ((PVR_CFG(pvr) == 1)) + power10_pmu.flags |= PPMU_P10_DD1; + /* Set the PERF_REG_EXTENDED_MASK here */ PERF_REG_EXTENDED_MASK = PERF_REG_PMU_MASK_31; + if ((PVR_CFG(pvr) == 1)) { + power10_pmu.generic_events = power10_generic_events_dd1; + power10_pmu.attr_groups = power10_pmu_attr_groups_dd1; + power10_pmu.cache_events = &power10_cache_events_dd1; + } + rc = register_power_pmu(&power10_pmu); if (rc) return rc; diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c index 6aa8defb5857..8d6029099848 100644 --- a/arch/powerpc/platforms/85xx/corenet_generic.c +++ b/arch/powerpc/platforms/85xx/corenet_generic.c @@ -106,6 +106,7 @@ int __init corenet_gen_publish_devices(void) { return of_platform_bus_probe(NULL, of_device_ids, NULL); } +machine_arch_initcall(corenet_generic, corenet_gen_publish_devices); static const char * const boards[] __initconst = { "fsl,P2041RDB", @@ -206,5 +207,3 @@ define_machine(corenet_generic) { .power_save = e500_idle, #endif }; - -machine_arch_initcall(corenet_generic, corenet_gen_publish_devices); diff --git a/arch/powerpc/platforms/8xx/Kconfig b/arch/powerpc/platforms/8xx/Kconfig index abb2b45b2789..60cc5b537a98 100644 --- a/arch/powerpc/platforms/8xx/Kconfig +++ b/arch/powerpc/platforms/8xx/Kconfig @@ -194,13 +194,6 @@ config PIN_TLB_IMMR CONFIG_PIN_TLB_DATA is also selected, it will reduce CONFIG_PIN_TLB_DATA to 24 Mbytes. -config PIN_TLB_TEXT - bool "Pinned TLB for TEXT" - depends on PIN_TLB - default y - help - This pins kernel text with 8M pages. - endmenu endmenu diff --git a/arch/powerpc/platforms/8xx/micropatch.c b/arch/powerpc/platforms/8xx/micropatch.c index aed4bc75f352..aef179fcbd4f 100644 --- a/arch/powerpc/platforms/8xx/micropatch.c +++ b/arch/powerpc/platforms/8xx/micropatch.c @@ -360,6 +360,17 @@ void __init cpm_load_patch(cpm8xx_t *cp) if (IS_ENABLED(CONFIG_SMC_UCODE_PATCH)) { smc_uart_t *smp; + if (IS_ENABLED(CONFIG_PPC_EARLY_DEBUG_CPM)) { + int i; + + for (i = 0; i < sizeof(*smp); i += 4) { + u32 __iomem *src = (u32 __iomem *)&cp->cp_dparam[PROFF_SMC1 + i]; + u32 __iomem *dst = (u32 __iomem *)&cp->cp_dparam[PROFF_DSP1 + i]; + + out_be32(dst, in_be32(src)); + } + } + smp = (smc_uart_t *)&cp->cp_dparam[PROFF_SMC1]; out_be16(&smp->smc_rpbase, 0x1ec0); smp = (smc_uart_t *)&cp->cp_dparam[PROFF_SMC2]; diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index c194c4ae8bc7..3ce907523b1e 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -11,9 +11,6 @@ config PPC64 This option selects whether a 32-bit or a 64-bit kernel will be built. -config PPC_BOOK3S_32 - bool - menu "Processor support" choice prompt "Processor Type" @@ -23,20 +20,19 @@ choice The most common ones are the desktop and server CPUs (603, 604, 740, 750, 74xx) CPUs from Freescale and IBM, with their embedded 512x/52xx/82xx/83xx/86xx counterparts. - The other embedded parts, namely 4xx, 8xx, e200 (55xx) and e500 + The other embedded parts, namely 4xx, 8xx and e500 (85xx) each form a family of their own that is not compatible with the others. If unsure, select 52xx/6xx/7xx/74xx/82xx/83xx/86xx. -config PPC_BOOK3S_6xx +config PPC_BOOK3S_32 bool "512x/52xx/6xx/7xx/74xx/82xx/83xx/86xx" - select PPC_BOOK3S_32 - select PPC_FPU + imply PPC_FPU select PPC_HAVE_PMU_SUPPORT select PPC_HAVE_KUEP select PPC_HAVE_KUAP - select HAVE_ARCH_VMAP_STACK if !ADB_PMU + select HAVE_ARCH_VMAP_STACK config PPC_85xx bool "Freescale 85xx" @@ -66,11 +62,24 @@ config 44x select HAVE_PCI select PHYS_64BIT -config E200 - bool "Freescale e200" - endchoice +config PPC_BOOK3S_603 + bool "Support for 603 SW loaded TLB" + depends on PPC_BOOK3S_32 + default y + help + Provide support for processors based on the 603 cores. Those + processors don't have a HASH MMU and provide SW TLB loading. + +config PPC_BOOK3S_604 + bool "Support for 604+ HASH MMU" if PPC_BOOK3S_603 + depends on PPC_BOOK3S_32 + default y + help + Provide support for processors not based on the 603 cores. + Those processors have a HASH MMU. + choice prompt "Processor Type" depends on PPC64 @@ -218,9 +227,20 @@ config PPC_E500MC such as e5500/e6500), and must be disabled for running on e500v1 or e500v2. -config PPC_FPU +config PPC_FPU_REGS bool + +config PPC_FPU + bool "Support for Floating Point Unit (FPU)" if PPC_MPC832x default y if PPC64 + select PPC_FPU_REGS + help + This must be enabled to support the Floating Point Unit + Most 6xx have an FPU but e300c2 core (mpc832x) don't have + an FPU, so when building an embedded kernel for that target + you can disable FPU support. + + If unsure say Y. config FSL_EMB_PERFMON bool "Freescale Embedded Perfmon" @@ -247,12 +267,12 @@ config 4xx config BOOKE bool - depends on E200 || E500 || 44x || PPC_BOOK3E + depends on E500 || 44x || PPC_BOOK3E default y config FSL_BOOKE bool - depends on (E200 || E500) && PPC32 + depends on E500 && PPC32 default y # this is for common code between PPC32 & PPC64 FSL BOOKE @@ -317,7 +337,7 @@ config VSX config SPE_POSSIBLE def_bool y - depends on E200 || (E500 && !PPC_E500MC) + depends on E500 && !PPC_E500MC config SPE bool "SPE Support" @@ -395,6 +415,11 @@ config PPC_KUAP_DEBUG Add extra debugging for Kernel Userspace Access Protection (KUAP) If you're unsure, say N. +config PPC_PKEY + def_bool y + depends on PPC_BOOK3S_64 + depends on PPC_MEM_KEYS || PPC_KUAP || PPC_KUEP + config ARCH_ENABLE_HUGEPAGE_MIGRATION def_bool y depends on PPC_BOOK3S_64 && HUGETLB_PAGE && MIGRATION @@ -464,7 +489,7 @@ config NR_CPUS config NOT_COHERENT_CACHE bool - depends on 4xx || PPC_8xx || E200 || PPC_MPC512x || \ + depends on 4xx || PPC_8xx || PPC_MPC512x || \ GAMECUBE_COMMON || AMIGAONE select ARCH_HAS_DMA_PREP_COHERENT select ARCH_HAS_SYNC_DMA_FOR_DEVICE diff --git a/arch/powerpc/platforms/powermac/sleep.S b/arch/powerpc/platforms/powermac/sleep.S index 7e0f8ba6e54a..d497a60003d2 100644 --- a/arch/powerpc/platforms/powermac/sleep.S +++ b/arch/powerpc/platforms/powermac/sleep.S @@ -44,7 +44,8 @@ #define SL_TB 0xa0 #define SL_R2 0xa8 #define SL_CR 0xac -#define SL_R12 0xb0 /* r12 to r31 */ +#define SL_LR 0xb0 +#define SL_R12 0xb4 /* r12 to r31 */ #define SL_SIZE (SL_R12 + 80) .section .text @@ -63,105 +64,107 @@ _GLOBAL(low_sleep_handler) blr #else mflr r0 - stw r0,4(r1) - stwu r1,-SL_SIZE(r1) + lis r11,sleep_storage@ha + addi r11,r11,sleep_storage@l + stw r0,SL_LR(r11) mfcr r0 - stw r0,SL_CR(r1) - stw r2,SL_R2(r1) - stmw r12,SL_R12(r1) + stw r0,SL_CR(r11) + stw r1,SL_SP(r11) + stw r2,SL_R2(r11) + stmw r12,SL_R12(r11) /* Save MSR & SDR1 */ mfmsr r4 - stw r4,SL_MSR(r1) + stw r4,SL_MSR(r11) mfsdr1 r4 - stw r4,SL_SDR1(r1) + stw r4,SL_SDR1(r11) /* Get a stable timebase and save it */ 1: mftbu r4 - stw r4,SL_TB(r1) + stw r4,SL_TB(r11) mftb r5 - stw r5,SL_TB+4(r1) + stw r5,SL_TB+4(r11) mftbu r3 cmpw r3,r4 bne 1b /* Save SPRGs */ mfsprg r4,0 - stw r4,SL_SPRG0(r1) + stw r4,SL_SPRG0(r11) mfsprg r4,1 - stw r4,SL_SPRG0+4(r1) + stw r4,SL_SPRG0+4(r11) mfsprg r4,2 - stw r4,SL_SPRG0+8(r1) + stw r4,SL_SPRG0+8(r11) mfsprg r4,3 - stw r4,SL_SPRG0+12(r1) + stw r4,SL_SPRG0+12(r11) /* Save BATs */ mfdbatu r4,0 - stw r4,SL_DBAT0(r1) + stw r4,SL_DBAT0(r11) mfdbatl r4,0 - stw r4,SL_DBAT0+4(r1) + stw r4,SL_DBAT0+4(r11) mfdbatu r4,1 - stw r4,SL_DBAT1(r1) + stw r4,SL_DBAT1(r11) mfdbatl r4,1 - stw r4,SL_DBAT1+4(r1) + stw r4,SL_DBAT1+4(r11) mfdbatu r4,2 - stw r4,SL_DBAT2(r1) + stw r4,SL_DBAT2(r11) mfdbatl r4,2 - stw r4,SL_DBAT2+4(r1) + stw r4,SL_DBAT2+4(r11) mfdbatu r4,3 - stw r4,SL_DBAT3(r1) + stw r4,SL_DBAT3(r11) mfdbatl r4,3 - stw r4,SL_DBAT3+4(r1) + stw r4,SL_DBAT3+4(r11) mfibatu r4,0 - stw r4,SL_IBAT0(r1) + stw r4,SL_IBAT0(r11) mfibatl r4,0 - stw r4,SL_IBAT0+4(r1) + stw r4,SL_IBAT0+4(r11) mfibatu r4,1 - stw r4,SL_IBAT1(r1) + stw r4,SL_IBAT1(r11) mfibatl r4,1 - stw r4,SL_IBAT1+4(r1) + stw r4,SL_IBAT1+4(r11) mfibatu r4,2 - stw r4,SL_IBAT2(r1) + stw r4,SL_IBAT2(r11) mfibatl r4,2 - stw r4,SL_IBAT2+4(r1) + stw r4,SL_IBAT2+4(r11) mfibatu r4,3 - stw r4,SL_IBAT3(r1) + stw r4,SL_IBAT3(r11) mfibatl r4,3 - stw r4,SL_IBAT3+4(r1) + stw r4,SL_IBAT3+4(r11) BEGIN_MMU_FTR_SECTION mfspr r4,SPRN_DBAT4U - stw r4,SL_DBAT4(r1) + stw r4,SL_DBAT4(r11) mfspr r4,SPRN_DBAT4L - stw r4,SL_DBAT4+4(r1) + stw r4,SL_DBAT4+4(r11) mfspr r4,SPRN_DBAT5U - stw r4,SL_DBAT5(r1) + stw r4,SL_DBAT5(r11) mfspr r4,SPRN_DBAT5L - stw r4,SL_DBAT5+4(r1) + stw r4,SL_DBAT5+4(r11) mfspr r4,SPRN_DBAT6U - stw r4,SL_DBAT6(r1) + stw r4,SL_DBAT6(r11) mfspr r4,SPRN_DBAT6L - stw r4,SL_DBAT6+4(r1) + stw r4,SL_DBAT6+4(r11) mfspr r4,SPRN_DBAT7U - stw r4,SL_DBAT7(r1) + stw r4,SL_DBAT7(r11) mfspr r4,SPRN_DBAT7L - stw r4,SL_DBAT7+4(r1) + stw r4,SL_DBAT7+4(r11) mfspr r4,SPRN_IBAT4U - stw r4,SL_IBAT4(r1) + stw r4,SL_IBAT4(r11) mfspr r4,SPRN_IBAT4L - stw r4,SL_IBAT4+4(r1) + stw r4,SL_IBAT4+4(r11) mfspr r4,SPRN_IBAT5U - stw r4,SL_IBAT5(r1) + stw r4,SL_IBAT5(r11) mfspr r4,SPRN_IBAT5L - stw r4,SL_IBAT5+4(r1) + stw r4,SL_IBAT5+4(r11) mfspr r4,SPRN_IBAT6U - stw r4,SL_IBAT6(r1) + stw r4,SL_IBAT6(r11) mfspr r4,SPRN_IBAT6L - stw r4,SL_IBAT6+4(r1) + stw r4,SL_IBAT6+4(r11) mfspr r4,SPRN_IBAT7U - stw r4,SL_IBAT7(r1) + stw r4,SL_IBAT7(r11) mfspr r4,SPRN_IBAT7L - stw r4,SL_IBAT7+4(r1) + stw r4,SL_IBAT7+4(r11) END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) /* Backup various CPU config stuffs */ @@ -180,9 +183,9 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) lis r5,grackle_wake_up@ha addi r5,r5,grackle_wake_up@l tophys(r5,r5) - stw r5,SL_PC(r1) + stw r5,SL_PC(r11) lis r4,KERNELBASE@h - tophys(r5,r1) + tophys(r5,r11) addi r5,r5,SL_PC lis r6,MAGIC@ha addi r6,r6,MAGIC@l @@ -194,12 +197,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) tophys(r3,r3) stw r3,0x80(r4) stw r5,0x84(r4) - /* Store a pointer to our backup storage into - * a kernel global - */ - lis r3,sleep_storage@ha - addi r3,r3,sleep_storage@l - stw r5,0(r3) .globl low_cpu_offline_self low_cpu_offline_self: @@ -279,7 +276,7 @@ _GLOBAL(core99_wake_up) lis r3,sleep_storage@ha addi r3,r3,sleep_storage@l tophys(r3,r3) - lwz r1,0(r3) + addi r1,r3,SL_PC /* Pass thru to older resume code ... */ _ASM_NOKPROBE_SYMBOL(core99_wake_up) @@ -399,13 +396,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) blt 1b sync - /* restore the MSR and turn on the MMU */ - lwz r3,SL_MSR(r1) - bl turn_on_mmu - - /* get back the stack pointer */ - tovirt(r1,r1) - /* Restore TB */ li r3,0 mttbl r3 @@ -419,28 +409,24 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) mtcr r0 lwz r2,SL_R2(r1) lmw r12,SL_R12(r1) - addi r1,r1,SL_SIZE - lwz r0,4(r1) - mtlr r0 - blr -_ASM_NOKPROBE_SYMBOL(grackle_wake_up) -turn_on_mmu: - mflr r4 - tovirt(r4,r4) + /* restore the MSR and SP and turn on the MMU and return */ + lwz r3,SL_MSR(r1) + lwz r4,SL_LR(r1) + lwz r1,SL_SP(r1) mtsrr0 r4 mtsrr1 r3 sync isync rfi -_ASM_NOKPROBE_SYMBOL(turn_on_mmu) +_ASM_NOKPROBE_SYMBOL(grackle_wake_up) #endif /* defined(CONFIG_PM) || defined(CONFIG_CPU_FREQ) */ - .section .data + .section .bss .balign L1_CACHE_BYTES sleep_storage: - .long 0 + .space SL_SIZE .balign L1_CACHE_BYTES, 0 #endif /* CONFIG_PPC_BOOK3S_32 */ diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig index 938803eab0ad..619b093a0657 100644 --- a/arch/powerpc/platforms/powernv/Kconfig +++ b/arch/powerpc/platforms/powernv/Kconfig @@ -27,11 +27,11 @@ config OPAL_PRD recovery diagnostics on OpenPower machines config PPC_MEMTRACE - bool "Enable removal of RAM from kernel mappings for tracing" - depends on PPC_POWERNV && MEMORY_HOTREMOVE + bool "Enable runtime allocation of RAM for tracing" + depends on PPC_POWERNV && MEMORY_HOTPLUG && CONTIG_ALLOC help - Enabling this option allows for the removal of memory (RAM) - from the kernel mappings to be used for hardware tracing. + Enabling this option allows for runtime allocation of memory (RAM) + for hardware tracing. config PPC_VAS bool "IBM Virtual Accelerator Switchboard (VAS)" diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index 1ed7c5286487..e6f461812856 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -589,6 +589,7 @@ struct p9_sprs { u64 spurr; u64 dscr; u64 wort; + u64 ciabr; u64 mmcra; u32 mmcr0; @@ -668,6 +669,7 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on) sprs.spurr = mfspr(SPRN_SPURR); sprs.dscr = mfspr(SPRN_DSCR); sprs.wort = mfspr(SPRN_WORT); + sprs.ciabr = mfspr(SPRN_CIABR); sprs.mmcra = mfspr(SPRN_MMCRA); sprs.mmcr0 = mfspr(SPRN_MMCR0); @@ -785,6 +787,7 @@ core_woken: mtspr(SPRN_SPURR, sprs.spurr); mtspr(SPRN_DSCR, sprs.dscr); mtspr(SPRN_WORT, sprs.wort); + mtspr(SPRN_CIABR, sprs.ciabr); mtspr(SPRN_MMCRA, sprs.mmcra); mtspr(SPRN_MMCR0, sprs.mmcr0); diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c index 6828108486f8..5fc9408bb0b3 100644 --- a/arch/powerpc/platforms/powernv/memtrace.c +++ b/arch/powerpc/platforms/powernv/memtrace.c @@ -30,6 +30,7 @@ struct memtrace_entry { char name[16]; }; +static DEFINE_MUTEX(memtrace_mutex); static u64 memtrace_size; static struct memtrace_entry *memtrace_array; @@ -50,84 +51,52 @@ static const struct file_operations memtrace_fops = { .open = simple_open, }; -static int check_memblock_online(struct memory_block *mem, void *arg) +static void memtrace_clear_range(unsigned long start_pfn, + unsigned long nr_pages) { - if (mem->state != MEM_ONLINE) - return -1; + unsigned long pfn; - return 0; -} - -static int change_memblock_state(struct memory_block *mem, void *arg) -{ - unsigned long state = (unsigned long)arg; - - mem->state = state; - - return 0; -} - -/* called with device_hotplug_lock held */ -static bool memtrace_offline_pages(u32 nid, u64 start_pfn, u64 nr_pages) -{ - const unsigned long start = PFN_PHYS(start_pfn); - const unsigned long size = PFN_PHYS(nr_pages); - - if (walk_memory_blocks(start, size, NULL, check_memblock_online)) - return false; - - walk_memory_blocks(start, size, (void *)MEM_GOING_OFFLINE, - change_memblock_state); - - if (offline_pages(start_pfn, nr_pages)) { - walk_memory_blocks(start, size, (void *)MEM_ONLINE, - change_memblock_state); - return false; + /* As HIGHMEM does not apply, use clear_page() directly. */ + for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) { + if (IS_ALIGNED(pfn, PAGES_PER_SECTION)) + cond_resched(); + clear_page(__va(PFN_PHYS(pfn))); } - - walk_memory_blocks(start, size, (void *)MEM_OFFLINE, - change_memblock_state); - - - return true; } static u64 memtrace_alloc_node(u32 nid, u64 size) { - u64 start_pfn, end_pfn, nr_pages, pfn; - u64 base_pfn; - u64 bytes = memory_block_size_bytes(); + const unsigned long nr_pages = PHYS_PFN(size); + unsigned long pfn, start_pfn; + struct page *page; - if (!node_spanned_pages(nid)) + /* + * Trace memory needs to be aligned to the size, which is guaranteed + * by alloc_contig_pages(). + */ + page = alloc_contig_pages(nr_pages, GFP_KERNEL | __GFP_THISNODE | + __GFP_NOWARN, nid, NULL); + if (!page) return 0; + start_pfn = page_to_pfn(page); - start_pfn = node_start_pfn(nid); - end_pfn = node_end_pfn(nid); - nr_pages = size >> PAGE_SHIFT; - - /* Trace memory needs to be aligned to the size */ - end_pfn = round_down(end_pfn - nr_pages, nr_pages); - - lock_device_hotplug(); - for (base_pfn = end_pfn; base_pfn > start_pfn; base_pfn -= nr_pages) { - if (memtrace_offline_pages(nid, base_pfn, nr_pages) == true) { - /* - * Remove memory in memory block size chunks so that - * iomem resources are always split to the same size and - * we never try to remove memory that spans two iomem - * resources. - */ - end_pfn = base_pfn + nr_pages; - for (pfn = base_pfn; pfn < end_pfn; pfn += bytes>> PAGE_SHIFT) { - __remove_memory(nid, pfn << PAGE_SHIFT, bytes); - } - unlock_device_hotplug(); - return base_pfn << PAGE_SHIFT; - } - } - unlock_device_hotplug(); + /* + * Clear the range while we still have a linear mapping. + * + * TODO: use __GFP_ZERO with alloc_contig_pages() once supported. + */ + memtrace_clear_range(start_pfn, nr_pages); - return 0; + /* + * Set pages PageOffline(), to indicate that nobody (e.g., hibernation, + * dumping, ...) should be touching these pages. + */ + for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) + __SetPageOffline(pfn_to_page(pfn)); + + arch_remove_linear_mapping(PFN_PHYS(start_pfn), size); + + return PFN_PHYS(start_pfn); } static int memtrace_init_regions_runtime(u64 size) @@ -197,16 +166,30 @@ static int memtrace_init_debugfs(void) return ret; } -static int online_mem_block(struct memory_block *mem, void *arg) +static int memtrace_free(int nid, u64 start, u64 size) { - return device_online(&mem->dev); + struct mhp_params params = { .pgprot = PAGE_KERNEL }; + const unsigned long nr_pages = PHYS_PFN(size); + const unsigned long start_pfn = PHYS_PFN(start); + unsigned long pfn; + int ret; + + ret = arch_create_linear_mapping(nid, start, size, ¶ms); + if (ret) + return ret; + + for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) + __ClearPageOffline(pfn_to_page(pfn)); + + free_contig_range(start_pfn, nr_pages); + return 0; } /* - * Iterate through the chunks of memory we have removed from the kernel - * and attempt to add them back to the kernel. + * Iterate through the chunks of memory we allocated and attempt to expose + * them back to the kernel. */ -static int memtrace_online(void) +static int memtrace_free_regions(void) { int i, ret = 0; struct memtrace_entry *ent; @@ -214,7 +197,7 @@ static int memtrace_online(void) for (i = memtrace_array_nr - 1; i >= 0; i--) { ent = &memtrace_array[i]; - /* We have onlined this chunk previously */ + /* We have freed this chunk previously */ if (ent->nid == NUMA_NO_NODE) continue; @@ -224,30 +207,25 @@ static int memtrace_online(void) ent->mem = 0; } - if (add_memory(ent->nid, ent->start, ent->size, MHP_NONE)) { - pr_err("Failed to add trace memory to node %d\n", + if (memtrace_free(ent->nid, ent->start, ent->size)) { + pr_err("Failed to free trace memory on node %d\n", ent->nid); ret += 1; continue; } - lock_device_hotplug(); - walk_memory_blocks(ent->start, ent->size, NULL, - online_mem_block); - unlock_device_hotplug(); - /* - * Memory was added successfully so clean up references to it - * so on reentry we can tell that this chunk was added. + * Memory was freed successfully so clean up references to it + * so on reentry we can tell that this chunk was freed. */ debugfs_remove_recursive(ent->dir); - pr_info("Added trace memory back to node %d\n", ent->nid); + pr_info("Freed trace memory back on node %d\n", ent->nid); ent->size = ent->start = ent->nid = NUMA_NO_NODE; } if (ret) return ret; - /* If all chunks of memory were added successfully, reset globals */ + /* If all chunks of memory were freed successfully, reset globals */ kfree(memtrace_array); memtrace_array = NULL; memtrace_size = 0; @@ -257,6 +235,7 @@ static int memtrace_online(void) static int memtrace_enable_set(void *data, u64 val) { + int rc = -EAGAIN; u64 bytes; /* @@ -269,25 +248,29 @@ static int memtrace_enable_set(void *data, u64 val) return -EINVAL; } - /* Re-add/online previously removed/offlined memory */ - if (memtrace_size) { - if (memtrace_online()) - return -EAGAIN; - } + mutex_lock(&memtrace_mutex); - if (!val) - return 0; + /* Free all previously allocated memory. */ + if (memtrace_size && memtrace_free_regions()) + goto out_unlock; + + if (!val) { + rc = 0; + goto out_unlock; + } - /* Offline and remove memory */ + /* Allocate memory. */ if (memtrace_init_regions_runtime(val)) - return -EINVAL; + goto out_unlock; if (memtrace_init_debugfs()) - return -EINVAL; + goto out_unlock; memtrace_size = val; - - return 0; + rc = 0; +out_unlock: + mutex_unlock(&memtrace_mutex); + return rc; } static int memtrace_enable_get(void *data, u64 *val) diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index abeaa533b976..b711dc3262a3 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -385,7 +385,8 @@ static void pnv_npu_peers_take_ownership(struct iommu_table_group *table_group) for (i = 0; i < npucomp->pe_num; ++i) { struct pnv_ioda_pe *pe = npucomp->pe[i]; - if (!pe->table_group.ops->take_ownership) + if (!pe->table_group.ops || + !pe->table_group.ops->take_ownership) continue; pe->table_group.ops->take_ownership(&pe->table_group); } @@ -401,7 +402,8 @@ static void pnv_npu_peers_release_ownership( for (i = 0; i < npucomp->pe_num; ++i) { struct pnv_ioda_pe *pe = npucomp->pe[i]; - if (!pe->table_group.ops->release_ownership) + if (!pe->table_group.ops || + !pe->table_group.ops->release_ownership) continue; pe->table_group.ops->release_ownership(&pe->table_group); } @@ -623,6 +625,11 @@ int pnv_npu2_map_lpar_dev(struct pci_dev *gpdev, unsigned int lparid, return -ENODEV; hose = pci_bus_to_host(npdev->bus); + if (hose->npu == NULL) { + dev_info_once(&npdev->dev, "Nvlink1 does not support contexts"); + return 0; + } + nphb = hose->private_data; dev_dbg(&gpdev->dev, "Map LPAR opalid=%llu lparid=%u\n", @@ -670,6 +677,11 @@ int pnv_npu2_unmap_lpar_dev(struct pci_dev *gpdev) return -ENODEV; hose = pci_bus_to_host(npdev->bus); + if (hose->npu == NULL) { + dev_info_once(&npdev->dev, "Nvlink1 does not support contexts"); + return 0; + } + nphb = hose->private_data; dev_dbg(&gpdev->dev, "destroy context opalid=%llu\n", diff --git a/arch/powerpc/platforms/powernv/ocxl.c b/arch/powerpc/platforms/powernv/ocxl.c index ecdad219d704..9105efcf242a 100644 --- a/arch/powerpc/platforms/powernv/ocxl.c +++ b/arch/powerpc/platforms/powernv/ocxl.c @@ -483,3 +483,117 @@ int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle) return rc; } EXPORT_SYMBOL_GPL(pnv_ocxl_spa_remove_pe_from_cache); + +int pnv_ocxl_map_lpar(struct pci_dev *dev, uint64_t lparid, + uint64_t lpcr, void __iomem **arva) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + u64 mmio_atsd; + int rc; + + /* ATSD physical address. + * ATSD LAUNCH register: write access initiates a shoot down to + * initiate the TLB Invalidate command. + */ + rc = of_property_read_u64_index(hose->dn, "ibm,mmio-atsd", + 0, &mmio_atsd); + if (rc) { + dev_info(&dev->dev, "No available ATSD found\n"); + return rc; + } + + /* Assign a register set to a Logical Partition and MMIO ATSD + * LPARID register to the required value. + */ + rc = opal_npu_map_lpar(phb->opal_id, pci_dev_id(dev), + lparid, lpcr); + if (rc) { + dev_err(&dev->dev, "Error mapping device to LPAR: %d\n", rc); + return rc; + } + + *arva = ioremap(mmio_atsd, 24); + if (!(*arva)) { + dev_warn(&dev->dev, "ioremap failed - mmio_atsd: %#llx\n", mmio_atsd); + rc = -ENOMEM; + } + + return rc; +} +EXPORT_SYMBOL_GPL(pnv_ocxl_map_lpar); + +void pnv_ocxl_unmap_lpar(void __iomem *arva) +{ + iounmap(arva); +} +EXPORT_SYMBOL_GPL(pnv_ocxl_unmap_lpar); + +void pnv_ocxl_tlb_invalidate(void __iomem *arva, + unsigned long pid, + unsigned long addr, + unsigned long page_size) +{ + unsigned long timeout = jiffies + (HZ * PNV_OCXL_ATSD_TIMEOUT); + u64 val = 0ull; + int pend; + u8 size; + + if (!(arva)) + return; + + if (addr) { + /* load Abbreviated Virtual Address register with + * the necessary value + */ + val |= FIELD_PREP(PNV_OCXL_ATSD_AVA_AVA, addr >> (63-51)); + out_be64(arva + PNV_OCXL_ATSD_AVA, val); + } + + /* Write access initiates a shoot down to initiate the + * TLB Invalidate command + */ + val = PNV_OCXL_ATSD_LNCH_R; + val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_RIC, 0b10); + if (addr) + val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_IS, 0b00); + else { + val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_IS, 0b01); + val |= PNV_OCXL_ATSD_LNCH_OCAPI_SINGLETON; + } + val |= PNV_OCXL_ATSD_LNCH_PRS; + /* Actual Page Size to be invalidated + * 000 4KB + * 101 64KB + * 001 2MB + * 010 1GB + */ + size = 0b101; + if (page_size == 0x1000) + size = 0b000; + if (page_size == 0x200000) + size = 0b001; + if (page_size == 0x40000000) + size = 0b010; + val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_AP, size); + val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_PID, pid); + out_be64(arva + PNV_OCXL_ATSD_LNCH, val); + + /* Poll the ATSD status register to determine when the + * TLB Invalidate has been completed. + */ + val = in_be64(arva + PNV_OCXL_ATSD_STAT); + pend = val >> 63; + + while (pend) { + if (time_after_eq(jiffies, timeout)) { + pr_err("%s - Timeout while reading XTS MMIO ATSD status register (val=%#llx, pidr=0x%lx)\n", + __func__, val, pid); + return; + } + cpu_relax(); + val = in_be64(arva + PNV_OCXL_ATSD_STAT); + pend = val >> 63; + } +} +EXPORT_SYMBOL_GPL(pnv_ocxl_tlb_invalidate); diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c index 37b380eef41a..5821b0fa8614 100644 --- a/arch/powerpc/platforms/powernv/opal-elog.c +++ b/arch/powerpc/platforms/powernv/opal-elog.c @@ -171,8 +171,8 @@ static ssize_t raw_attr_read(struct file *filep, struct kobject *kobj, opal_rc = opal_read_elog(__pa(elog->buffer), elog->size, elog->id); if (opal_rc != OPAL_SUCCESS) { - pr_err("ELOG: log read failed for log-id=%llx\n", - elog->id); + pr_err_ratelimited("ELOG: log read failed for log-id=%llx\n", + elog->id); kfree(elog->buffer); elog->buffer = NULL; return -EIO; diff --git a/arch/powerpc/platforms/powernv/opal-hmi.c b/arch/powerpc/platforms/powernv/opal-hmi.c index 3e1f064a18db..f0c1830deb51 100644 --- a/arch/powerpc/platforms/powernv/opal-hmi.c +++ b/arch/powerpc/platforms/powernv/opal-hmi.c @@ -213,6 +213,8 @@ static void print_hmi_event_info(struct OpalHMIEvent *hmi_evt) "A hypervisor resource error occurred", "CAPP recovery process is in progress", }; + static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); /* Print things out */ if (hmi_evt->version < OpalHMIEvt_V1) { @@ -240,19 +242,22 @@ static void print_hmi_event_info(struct OpalHMIEvent *hmi_evt) break; } - printk("%s%s Hypervisor Maintenance interrupt [%s]\n", - level, sevstr, - hmi_evt->disposition == OpalHMI_DISPOSITION_RECOVERED ? - "Recovered" : "Not recovered"); - error_info = hmi_evt->type < ARRAY_SIZE(hmi_error_types) ? - hmi_error_types[hmi_evt->type] - : "Unknown"; - printk("%s Error detail: %s\n", level, error_info); - printk("%s HMER: %016llx\n", level, be64_to_cpu(hmi_evt->hmer)); - if ((hmi_evt->type == OpalHMI_ERROR_TFAC) || - (hmi_evt->type == OpalHMI_ERROR_TFMR_PARITY)) - printk("%s TFMR: %016llx\n", level, + if (hmi_evt->severity != OpalHMI_SEV_NO_ERROR || __ratelimit(&rs)) { + printk("%s%s Hypervisor Maintenance interrupt [%s]\n", + level, sevstr, + hmi_evt->disposition == OpalHMI_DISPOSITION_RECOVERED ? + "Recovered" : "Not recovered"); + error_info = hmi_evt->type < ARRAY_SIZE(hmi_error_types) ? + hmi_error_types[hmi_evt->type] + : "Unknown"; + printk("%s Error detail: %s\n", level, error_info); + printk("%s HMER: %016llx\n", level, + be64_to_cpu(hmi_evt->hmer)); + if ((hmi_evt->type == OpalHMI_ERROR_TFAC) || + (hmi_evt->type == OpalHMI_ERROR_TFMR_PARITY)) + printk("%s TFMR: %016llx\n", level, be64_to_cpu(hmi_evt->tfmr)); + } if (hmi_evt->version < OpalHMIEvt_V2) return; diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index d95954ad4c0a..c61c3b62c8c6 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -731,7 +731,7 @@ int opal_hmi_exception_early2(struct pt_regs *regs) return 1; } -/* HMI exception handler called in virtual mode during check_irq_replay. */ +/* HMI exception handler called in virtual mode when irqs are next enabled. */ int opal_handle_hmi_exception(struct pt_regs *regs) { /* diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 2b4ceb5e6ce4..c4f72cdc9b51 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2613,8 +2613,10 @@ static bool pnv_pci_enable_device_hook(struct pci_dev *dev) return true; pdn = pci_get_pdn(dev); - if (!pdn || pdn->pe_number == IODA_INVALID_PE) + if (!pdn || pdn->pe_number == IODA_INVALID_PE) { + pci_err(dev, "pci_enable_device() blocked, no PE assigned.\n"); return false; + } return true; } diff --git a/arch/powerpc/platforms/powernv/pci-sriov.c b/arch/powerpc/platforms/powernv/pci-sriov.c index c4434f20f42f..28aac933a439 100644 --- a/arch/powerpc/platforms/powernv/pci-sriov.c +++ b/arch/powerpc/platforms/powernv/pci-sriov.c @@ -422,7 +422,7 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs) { struct pnv_iov_data *iov; struct pnv_phb *phb; - unsigned int win; + int win; struct resource *res; int i, j; int64_t rc; diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c index c62aaa29a9d5..b431f41c6cb5 100644 --- a/arch/powerpc/platforms/ps3/system-bus.c +++ b/arch/powerpc/platforms/ps3/system-bus.c @@ -382,7 +382,6 @@ static int ps3_system_bus_probe(struct device *_dev) static int ps3_system_bus_remove(struct device *_dev) { - int result = 0; struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev); struct ps3_system_bus_driver *drv; @@ -393,13 +392,13 @@ static int ps3_system_bus_remove(struct device *_dev) BUG_ON(!drv); if (drv->remove) - result = drv->remove(dev); + drv->remove(dev); else dev_dbg(&dev->core, "%s:%d %s: no remove method\n", __func__, __LINE__, drv->core.name); pr_debug(" <- %s:%d: %s\n", __func__, __LINE__, dev_name(&dev->core)); - return result; + return 0; } static void ps3_system_bus_shutdown(struct device *_dev) diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index a02012f1b04a..12cbffd3c2e3 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -746,6 +746,7 @@ static int dlpar_cpu_add_by_count(u32 cpus_to_add) parent = of_find_node_by_path("/cpus"); if (!parent) { pr_warn("Could not find CPU root node in device tree\n"); + kfree(cpu_drcs); return -1; } diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 7efe6ec5d14a..8377f1f7c78e 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -479,7 +479,7 @@ static int dlpar_memory_remove_by_index(u32 drc_index) int lmb_found; int rc; - pr_info("Attempting to hot-remove LMB, drc index %x\n", drc_index); + pr_debug("Attempting to hot-remove LMB, drc index %x\n", drc_index); lmb_found = 0; for_each_drmem_lmb(lmb) { @@ -497,10 +497,10 @@ static int dlpar_memory_remove_by_index(u32 drc_index) rc = -EINVAL; if (rc) - pr_info("Failed to hot-remove memory at %llx\n", - lmb->base_addr); + pr_debug("Failed to hot-remove memory at %llx\n", + lmb->base_addr); else - pr_info("Memory at %llx was hot-removed\n", lmb->base_addr); + pr_debug("Memory at %llx was hot-removed\n", lmb->base_addr); return rc; } @@ -717,8 +717,8 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add) if (!drmem_lmb_reserved(lmb)) continue; - pr_info("Memory at %llx (drc index %x) was hot-added\n", - lmb->base_addr, lmb->drc_index); + pr_debug("Memory at %llx (drc index %x) was hot-added\n", + lmb->base_addr, lmb->drc_index); drmem_remove_lmb_reservation(lmb); } rc = 0; diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index 2f73cb5bf12d..ea4d6a660e0d 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -12,9 +12,11 @@ #include <linux/cpu.h> #include <linux/kernel.h> #include <linux/kobject.h> +#include <linux/nmi.h> #include <linux/sched.h> #include <linux/smp.h> #include <linux/stat.h> +#include <linux/stop_machine.h> #include <linux/completion.h> #include <linux/device.h> #include <linux/delay.h> @@ -59,18 +61,10 @@ static int mobility_rtas_call(int token, char *buf, s32 scope) return rc; } -static int delete_dt_node(__be32 phandle) +static int delete_dt_node(struct device_node *dn) { - struct device_node *dn; - - dn = of_find_node_by_phandle(be32_to_cpu(phandle)); - if (!dn) - return -ENOENT; - pr_debug("removing node %pOFfp\n", dn); - dlpar_detach_node(dn); - of_node_put(dn); return 0; } @@ -135,10 +129,9 @@ static int update_dt_property(struct device_node *dn, struct property **prop, return 0; } -static int update_dt_node(__be32 phandle, s32 scope) +static int update_dt_node(struct device_node *dn, s32 scope) { struct update_props_workarea *upwa; - struct device_node *dn; struct property *prop = NULL; int i, rc, rtas_rc; char *prop_data; @@ -155,14 +148,8 @@ static int update_dt_node(__be32 phandle, s32 scope) if (!rtas_buf) return -ENOMEM; - dn = of_find_node_by_phandle(be32_to_cpu(phandle)); - if (!dn) { - kfree(rtas_buf); - return -ENOENT; - } - upwa = (struct update_props_workarea *)&rtas_buf[0]; - upwa->phandle = phandle; + upwa->phandle = cpu_to_be32(dn->phandle); do { rtas_rc = mobility_rtas_call(update_properties_token, rtas_buf, @@ -208,11 +195,12 @@ static int update_dt_node(__be32 phandle, s32 scope) rc = update_dt_property(dn, &prop, prop_name, vd, prop_data); if (rc) { - printk(KERN_ERR "Could not update %s" - " property\n", prop_name); + pr_err("updating %s property failed: %d\n", + prop_name, rc); } prop_data += vd; + break; } cond_resched(); @@ -221,26 +209,18 @@ static int update_dt_node(__be32 phandle, s32 scope) cond_resched(); } while (rtas_rc == 1); - of_node_put(dn); kfree(rtas_buf); return 0; } -static int add_dt_node(__be32 parent_phandle, __be32 drc_index) +static int add_dt_node(struct device_node *parent_dn, __be32 drc_index) { struct device_node *dn; - struct device_node *parent_dn; int rc; - parent_dn = of_find_node_by_phandle(be32_to_cpu(parent_phandle)); - if (!parent_dn) - return -ENOENT; - dn = dlpar_configure_connector(drc_index, parent_dn); - if (!dn) { - of_node_put(parent_dn); + if (!dn) return -ENOENT; - } rc = dlpar_attach_node(dn, parent_dn); if (rc) @@ -248,7 +228,6 @@ static int add_dt_node(__be32 parent_phandle, __be32 drc_index) pr_debug("added node %pOFfp\n", dn); - of_node_put(parent_dn); return rc; } @@ -261,7 +240,7 @@ int pseries_devicetree_update(s32 scope) update_nodes_token = rtas_token("ibm,update-nodes"); if (update_nodes_token == RTAS_UNKNOWN_SERVICE) - return -EINVAL; + return 0; rtas_buf = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL); if (!rtas_buf) @@ -281,22 +260,31 @@ int pseries_devicetree_update(s32 scope) data++; for (i = 0; i < node_count; i++) { + struct device_node *np; __be32 phandle = *data++; __be32 drc_index; + np = of_find_node_by_phandle(be32_to_cpu(phandle)); + if (!np) { + pr_warn("Failed lookup: phandle 0x%x for action 0x%x\n", + be32_to_cpu(phandle), action); + continue; + } + switch (action) { case DELETE_DT_NODE: - delete_dt_node(phandle); + delete_dt_node(np); break; case UPDATE_DT_NODE: - update_dt_node(phandle, scope); + update_dt_node(np, scope); break; case ADD_DT_NODE: drc_index = *data++; - add_dt_node(phandle, drc_index); + add_dt_node(np, drc_index); break; } + of_node_put(np); cond_resched(); } } @@ -311,21 +299,8 @@ int pseries_devicetree_update(s32 scope) void post_mobility_fixup(void) { int rc; - int activate_fw_token; - - activate_fw_token = rtas_token("ibm,activate-firmware"); - if (activate_fw_token == RTAS_UNKNOWN_SERVICE) { - printk(KERN_ERR "Could not make post-mobility " - "activate-fw call.\n"); - return; - } - - do { - rc = rtas_call(activate_fw_token, 0, 1, NULL); - } while (rtas_busy_delay(rc)); - if (rc) - printk(KERN_ERR "Post-mobility activate-fw failed: %d\n", rc); + rtas_activate_firmware(); /* * We don't want CPUs to go online/offline while the device @@ -342,8 +317,7 @@ void post_mobility_fixup(void) rc = pseries_devicetree_update(MIGRATION_SCOPE); if (rc) - printk(KERN_ERR "Post-mobility device tree update " - "failed: %d\n", rc); + pr_err("device tree update failed: %d\n", rc); cacheinfo_rebuild(); @@ -358,6 +332,279 @@ void post_mobility_fixup(void) return; } +static int poll_vasi_state(u64 handle, unsigned long *res) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + long hvrc; + int ret; + + hvrc = plpar_hcall(H_VASI_STATE, retbuf, handle); + switch (hvrc) { + case H_SUCCESS: + ret = 0; + *res = retbuf[0]; + break; + case H_PARAMETER: + ret = -EINVAL; + break; + case H_FUNCTION: + ret = -EOPNOTSUPP; + break; + case H_HARDWARE: + default: + pr_err("unexpected H_VASI_STATE result %ld\n", hvrc); + ret = -EIO; + break; + } + return ret; +} + +static int wait_for_vasi_session_suspending(u64 handle) +{ + unsigned long state; + int ret; + + /* + * Wait for transition from H_VASI_ENABLED to + * H_VASI_SUSPENDING. Treat anything else as an error. + */ + while (true) { + ret = poll_vasi_state(handle, &state); + + if (ret != 0 || state == H_VASI_SUSPENDING) { + break; + } else if (state == H_VASI_ENABLED) { + ssleep(1); + } else { + pr_err("unexpected H_VASI_STATE result %lu\n", state); + ret = -EIO; + break; + } + } + + /* + * Proceed even if H_VASI_STATE is unavailable. If H_JOIN or + * ibm,suspend-me are also unimplemented, we'll recover then. + */ + if (ret == -EOPNOTSUPP) + ret = 0; + + return ret; +} + +static void prod_single(unsigned int target_cpu) +{ + long hvrc; + int hwid; + + hwid = get_hard_smp_processor_id(target_cpu); + hvrc = plpar_hcall_norets(H_PROD, hwid); + if (hvrc == H_SUCCESS) + return; + pr_err_ratelimited("H_PROD of CPU %u (hwid %d) error: %ld\n", + target_cpu, hwid, hvrc); +} + +static void prod_others(void) +{ + unsigned int cpu; + + for_each_online_cpu(cpu) { + if (cpu != smp_processor_id()) + prod_single(cpu); + } +} + +static u16 clamp_slb_size(void) +{ + u16 prev = mmu_slb_size; + + slb_set_size(SLB_MIN_SIZE); + + return prev; +} + +static int do_suspend(void) +{ + u16 saved_slb_size; + int status; + int ret; + + pr_info("calling ibm,suspend-me on CPU %i\n", smp_processor_id()); + + /* + * The destination processor model may have fewer SLB entries + * than the source. We reduce mmu_slb_size to a safe minimum + * before suspending in order to minimize the possibility of + * programming non-existent entries on the destination. If + * suspend fails, we restore it before returning. On success + * the OF reconfig path will update it from the new device + * tree after resuming on the destination. + */ + saved_slb_size = clamp_slb_size(); + + ret = rtas_ibm_suspend_me(&status); + if (ret != 0) { + pr_err("ibm,suspend-me error: %d\n", status); + slb_set_size(saved_slb_size); + } + + return ret; +} + +static int do_join(void *arg) +{ + atomic_t *counter = arg; + long hvrc; + int ret; + + /* Must ensure MSR.EE off for H_JOIN. */ + hard_irq_disable(); + hvrc = plpar_hcall_norets(H_JOIN); + + switch (hvrc) { + case H_CONTINUE: + /* + * All other CPUs are offline or in H_JOIN. This CPU + * attempts the suspend. + */ + ret = do_suspend(); + break; + case H_SUCCESS: + /* + * The suspend is complete and this cpu has received a + * prod. + */ + ret = 0; + break; + case H_BAD_MODE: + case H_HARDWARE: + default: + ret = -EIO; + pr_err_ratelimited("H_JOIN error %ld on CPU %i\n", + hvrc, smp_processor_id()); + break; + } + + if (atomic_inc_return(counter) == 1) { + pr_info("CPU %u waking all threads\n", smp_processor_id()); + prod_others(); + } + /* + * Execution may have been suspended for several seconds, so + * reset the watchdog. + */ + touch_nmi_watchdog(); + return ret; +} + +/* + * Abort reason code byte 0. We use only the 'Migrating partition' value. + */ +enum vasi_aborting_entity { + ORCHESTRATOR = 1, + VSP_SOURCE = 2, + PARTITION_FIRMWARE = 3, + PLATFORM_FIRMWARE = 4, + VSP_TARGET = 5, + MIGRATING_PARTITION = 6, +}; + +static void pseries_cancel_migration(u64 handle, int err) +{ + u32 reason_code; + u32 detail; + u8 entity; + long hvrc; + + entity = MIGRATING_PARTITION; + detail = abs(err) & 0xffffff; + reason_code = (entity << 24) | detail; + + hvrc = plpar_hcall_norets(H_VASI_SIGNAL, handle, + H_VASI_SIGNAL_CANCEL, reason_code); + if (hvrc) + pr_err("H_VASI_SIGNAL error: %ld\n", hvrc); +} + +static int pseries_suspend(u64 handle) +{ + const unsigned int max_attempts = 5; + unsigned int retry_interval_ms = 1; + unsigned int attempt = 1; + int ret; + + while (true) { + atomic_t counter = ATOMIC_INIT(0); + unsigned long vasi_state; + int vasi_err; + + ret = stop_machine(do_join, &counter, cpu_online_mask); + if (ret == 0) + break; + /* + * Encountered an error. If the VASI stream is still + * in Suspending state, it's likely a transient + * condition related to some device in the partition + * and we can retry in the hope that the cause has + * cleared after some delay. + * + * A better design would allow drivers etc to prepare + * for the suspend and avoid conditions which prevent + * the suspend from succeeding. For now, we have this + * mitigation. + */ + pr_notice("Partition suspend attempt %u of %u error: %d\n", + attempt, max_attempts, ret); + + if (attempt == max_attempts) + break; + + vasi_err = poll_vasi_state(handle, &vasi_state); + if (vasi_err == 0) { + if (vasi_state != H_VASI_SUSPENDING) { + pr_notice("VASI state %lu after failed suspend\n", + vasi_state); + break; + } + } else if (vasi_err != -EOPNOTSUPP) { + pr_err("VASI state poll error: %d", vasi_err); + break; + } + + pr_notice("Will retry partition suspend after %u ms\n", + retry_interval_ms); + + msleep(retry_interval_ms); + retry_interval_ms *= 10; + attempt++; + } + + return ret; +} + +static int pseries_migrate_partition(u64 handle) +{ + int ret; + + ret = wait_for_vasi_session_suspending(handle); + if (ret) + return ret; + + ret = pseries_suspend(handle); + if (ret == 0) + post_mobility_fixup(); + else + pseries_cancel_migration(handle, ret); + + return ret; +} + +int rtas_syscall_dispatch_ibm_suspend_me(u64 handle) +{ + return pseries_migrate_partition(handle); +} + static ssize_t migration_store(struct class *class, struct class_attribute *attr, const char *buf, size_t count) @@ -369,17 +616,10 @@ static ssize_t migration_store(struct class *class, if (rc) return rc; - do { - rc = rtas_ibm_suspend_me(streamid); - if (rc == -EAGAIN) - ssleep(1); - } while (rc == -EAGAIN); - + rc = pseries_migrate_partition(streamid); if (rc) return rc; - post_mobility_fixup(); - return count; } diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c index 911534b89c85..72a4d4167849 100644 --- a/arch/powerpc/platforms/pseries/pci.c +++ b/arch/powerpc/platforms/pseries/pci.c @@ -290,6 +290,25 @@ static void fixup_winbond_82c105(struct pci_dev* dev) DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_WINBOND, PCI_DEVICE_ID_WINBOND_82C105, fixup_winbond_82c105); +static enum pci_bus_speed prop_to_pci_speed(u32 prop) +{ + switch (prop) { + case 0x01: + return PCIE_SPEED_2_5GT; + case 0x02: + return PCIE_SPEED_5_0GT; + case 0x04: + return PCIE_SPEED_8_0GT; + case 0x08: + return PCIE_SPEED_16_0GT; + case 0x10: + return PCIE_SPEED_32_0GT; + default: + pr_debug("Unexpected PCI link speed property value\n"); + return PCI_SPEED_UNKNOWN; + } +} + int pseries_root_bridge_prepare(struct pci_host_bridge *bridge) { struct device_node *dn, *pdn; @@ -322,35 +341,7 @@ int pseries_root_bridge_prepare(struct pci_host_bridge *bridge) return 0; } - switch (pcie_link_speed_stats[0]) { - case 0x01: - bus->max_bus_speed = PCIE_SPEED_2_5GT; - break; - case 0x02: - bus->max_bus_speed = PCIE_SPEED_5_0GT; - break; - case 0x04: - bus->max_bus_speed = PCIE_SPEED_8_0GT; - break; - default: - bus->max_bus_speed = PCI_SPEED_UNKNOWN; - break; - } - - switch (pcie_link_speed_stats[1]) { - case 0x01: - bus->cur_bus_speed = PCIE_SPEED_2_5GT; - break; - case 0x02: - bus->cur_bus_speed = PCIE_SPEED_5_0GT; - break; - case 0x04: - bus->cur_bus_speed = PCIE_SPEED_8_0GT; - break; - default: - bus->cur_bus_speed = PCI_SPEED_UNKNOWN; - break; - } - + bus->max_bus_speed = prop_to_pci_speed(pcie_link_speed_stats[0]); + bus->cur_bus_speed = prop_to_pci_speed(pcie_link_speed_stats[1]); return 0; } diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index b2b245b25edb..149cec2212e6 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -526,8 +526,11 @@ static int mce_handle_err_realmode(int disposition, u8 error_type) #ifdef CONFIG_PPC_BOOK3S_64 if (disposition == RTAS_DISP_NOT_RECOVERED) { switch (error_type) { - case MC_ERROR_TYPE_SLB: case MC_ERROR_TYPE_ERAT: + flush_erat(); + disposition = RTAS_DISP_FULLY_RECOVERED; + break; + case MC_ERROR_TYPE_SLB: /* * Store the old slb content in paca before flushing. * Print this when we go to virtual mode. diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c index 92922491a81c..c70b4be9f0a5 100644 --- a/arch/powerpc/platforms/pseries/smp.c +++ b/arch/powerpc/platforms/pseries/smp.c @@ -42,6 +42,7 @@ #include <asm/plpar_wrappers.h> #include <asm/code-patching.h> #include <asm/svm.h> +#include <asm/kvm_guest.h> #include "pseries.h" @@ -210,7 +211,7 @@ static __init void pSeries_smp_probe(void) if (!cpu_has_feature(CPU_FTR_SMT)) return; - if (is_kvm_guest()) { + if (check_kvm_guest()) { /* * KVM emulates doorbells by disabling FSCR[MSGP] so msgsndp * faults to the hypervisor which then reads the instruction diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c index 81e0ac58d620..1b902cbf85c5 100644 --- a/arch/powerpc/platforms/pseries/suspend.c +++ b/arch/powerpc/platforms/pseries/suspend.c @@ -13,13 +13,8 @@ #include <asm/mmu.h> #include <asm/rtas.h> #include <asm/topology.h> -#include "../../kernel/cacheinfo.h" -static u64 stream_id; static struct device suspend_dev; -static DECLARE_COMPLETION(suspend_work); -static struct rtas_suspend_me_data suspend_data; -static atomic_t suspending; /** * pseries_suspend_begin - First phase of hibernation @@ -29,7 +24,7 @@ static atomic_t suspending; * Return value: * 0 on success / other on failure **/ -static int pseries_suspend_begin(suspend_state_t state) +static int pseries_suspend_begin(u64 stream_id) { long vasi_state, rc; unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; @@ -49,41 +44,10 @@ static int pseries_suspend_begin(suspend_state_t state) vasi_state); return -EIO; } - - return 0; -} - -/** - * pseries_suspend_cpu - Suspend a single CPU - * - * Makes the H_JOIN call to suspend the CPU - * - **/ -static int pseries_suspend_cpu(void) -{ - if (atomic_read(&suspending)) - return rtas_suspend_cpu(&suspend_data); return 0; } /** - * pseries_suspend_enable_irqs - * - * Post suspend configuration updates - * - **/ -static void pseries_suspend_enable_irqs(void) -{ - /* - * Update configuration which can be modified based on device tree - * changes during resume. - */ - cacheinfo_cpu_offline(smp_processor_id()); - post_mobility_fixup(); - cacheinfo_cpu_online(smp_processor_id()); -} - -/** * pseries_suspend_enter - Final phase of hibernation * * Return value: @@ -91,28 +55,7 @@ static void pseries_suspend_enable_irqs(void) **/ static int pseries_suspend_enter(suspend_state_t state) { - int rc = rtas_suspend_last_cpu(&suspend_data); - - atomic_set(&suspending, 0); - atomic_set(&suspend_data.done, 1); - return rc; -} - -/** - * pseries_prepare_late - Prepare to suspend all other CPUs - * - * Return value: - * 0 on success / other on failure - **/ -static int pseries_prepare_late(void) -{ - atomic_set(&suspending, 1); - atomic_set(&suspend_data.working, 0); - atomic_set(&suspend_data.done, 0); - atomic_set(&suspend_data.error, 0); - suspend_data.complete = &suspend_work; - reinit_completion(&suspend_work); - return 0; + return rtas_ibm_suspend_me(NULL); } /** @@ -132,6 +75,7 @@ static ssize_t store_hibernate(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { + u64 stream_id; int rc; if (!capable(CAP_SYS_ADMIN)) @@ -140,7 +84,7 @@ static ssize_t store_hibernate(struct device *dev, stream_id = simple_strtoul(buf, NULL, 16); do { - rc = pseries_suspend_begin(PM_SUSPEND_MEM); + rc = pseries_suspend_begin(stream_id); if (rc == -EAGAIN) ssleep(1); } while (rc == -EAGAIN); @@ -148,10 +92,11 @@ static ssize_t store_hibernate(struct device *dev, if (!rc) rc = pm_suspend(PM_SUSPEND_MEM); - stream_id = 0; - - if (!rc) + if (!rc) { rc = count; + post_mobility_fixup(); + } + return rc; } @@ -187,8 +132,6 @@ static struct bus_type suspend_subsys = { static const struct platform_suspend_ops pseries_suspend_ops = { .valid = suspend_valid_only_mem, - .begin = pseries_suspend_begin, - .prepare_late = pseries_prepare_late, .enter = pseries_suspend_enter, }; @@ -231,15 +174,9 @@ static int __init pseries_suspend_init(void) if (!firmware_has_feature(FW_FEATURE_LPAR)) return 0; - suspend_data.token = rtas_token("ibm,suspend-me"); - if (suspend_data.token == RTAS_UNKNOWN_SERVICE) - return 0; - if ((rc = pseries_suspend_sysfs_register(&suspend_dev))) return rc; - ppc_md.suspend_disable_cpu = pseries_suspend_cpu; - ppc_md.suspend_enable_irqs = pseries_suspend_enable_irqs; suspend_set_ops(&pseries_suspend_ops); return 0; } diff --git a/arch/powerpc/sysdev/mpic_msgr.c b/arch/powerpc/sysdev/mpic_msgr.c index f6b253e2be40..36ec0bdd8b63 100644 --- a/arch/powerpc/sysdev/mpic_msgr.c +++ b/arch/powerpc/sysdev/mpic_msgr.c @@ -191,7 +191,7 @@ static int mpic_msgr_probe(struct platform_device *dev) /* IO map the message register block. */ of_address_to_resource(np, 0, &rsrc); - msgr_block_addr = ioremap(rsrc.start, resource_size(&rsrc)); + msgr_block_addr = devm_ioremap(&dev->dev, rsrc.start, resource_size(&rsrc)); if (!msgr_block_addr) { dev_err(&dev->dev, "Failed to iomap MPIC message registers"); return -EFAULT; diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index a80440af491a..595310e056f4 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -200,10 +200,6 @@ static notrace u8 xive_esb_read(struct xive_irq_data *xd, u32 offset) if (offset == XIVE_ESB_SET_PQ_10 && xd->flags & XIVE_IRQ_FLAG_STORE_EOI) offset |= XIVE_ESB_LD_ST_MO; - /* Handle HW errata */ - if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG) - offset |= offset << 4; - if ((xd->flags & XIVE_IRQ_FLAG_H_INT_ESB) && xive_ops->esb_rw) val = xive_ops->esb_rw(xd->hw_irq, offset, 0, 0); else @@ -214,10 +210,6 @@ static notrace u8 xive_esb_read(struct xive_irq_data *xd, u32 offset) static void xive_esb_write(struct xive_irq_data *xd, u32 offset, u64 data) { - /* Handle HW errata */ - if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG) - offset |= offset << 4; - if ((xd->flags & XIVE_IRQ_FLAG_H_INT_ESB) && xive_ops->esb_rw) xive_ops->esb_rw(xd->hw_irq, offset, data, 1); else @@ -356,50 +348,40 @@ static void xive_do_queue_eoi(struct xive_cpu *xc) * EOI an interrupt at the source. There are several methods * to do this depending on the HW version and source type */ -static void xive_do_source_eoi(u32 hw_irq, struct xive_irq_data *xd) +static void xive_do_source_eoi(struct xive_irq_data *xd) { + u8 eoi_val; + xd->stale_p = false; + /* If the XIVE supports the new "store EOI facility, use it */ - if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI) + if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI) { xive_esb_write(xd, XIVE_ESB_STORE_EOI, 0); - else if (hw_irq && xd->flags & XIVE_IRQ_FLAG_EOI_FW) { - /* - * The FW told us to call it. This happens for some - * interrupt sources that need additional HW whacking - * beyond the ESB manipulation. For example LPC interrupts - * on P9 DD1.0 needed a latch to be clared in the LPC bridge - * itself. The Firmware will take care of it. - */ - if (WARN_ON_ONCE(!xive_ops->eoi)) - return; - xive_ops->eoi(hw_irq); - } else { - u8 eoi_val; + return; + } - /* - * Otherwise for EOI, we use the special MMIO that does - * a clear of both P and Q and returns the old Q, - * except for LSIs where we use the "EOI cycle" special - * load. - * - * This allows us to then do a re-trigger if Q was set - * rather than synthesizing an interrupt in software - * - * For LSIs the HW EOI cycle is used rather than PQ bits, - * as they are automatically re-triggred in HW when still - * pending. - */ - if (xd->flags & XIVE_IRQ_FLAG_LSI) - xive_esb_read(xd, XIVE_ESB_LOAD_EOI); - else { - eoi_val = xive_esb_read(xd, XIVE_ESB_SET_PQ_00); - DBG_VERBOSE("eoi_val=%x\n", eoi_val); - - /* Re-trigger if needed */ - if ((eoi_val & XIVE_ESB_VAL_Q) && xd->trig_mmio) - out_be64(xd->trig_mmio, 0); - } + /* + * For LSIs, we use the "EOI cycle" special load rather than + * PQ bits, as they are automatically re-triggered in HW when + * still pending. + */ + if (xd->flags & XIVE_IRQ_FLAG_LSI) { + xive_esb_read(xd, XIVE_ESB_LOAD_EOI); + return; } + + /* + * Otherwise, we use the special MMIO that does a clear of + * both P and Q and returns the old Q. This allows us to then + * do a re-trigger if Q was set rather than synthesizing an + * interrupt in software + */ + eoi_val = xive_esb_read(xd, XIVE_ESB_SET_PQ_00); + DBG_VERBOSE("eoi_val=%x\n", eoi_val); + + /* Re-trigger if needed */ + if ((eoi_val & XIVE_ESB_VAL_Q) && xd->trig_mmio) + out_be64(xd->trig_mmio, 0); } /* irq_chip eoi callback, called with irq descriptor lock held */ @@ -416,8 +398,8 @@ static void xive_irq_eoi(struct irq_data *d) * been passed-through to a KVM guest */ if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d) && - !(xd->flags & XIVE_IRQ_NO_EOI)) - xive_do_source_eoi(irqd_to_hwirq(d), xd); + !(xd->flags & XIVE_IRQ_FLAG_NO_EOI)) + xive_do_source_eoi(xd); else xd->stale_p = true; @@ -432,9 +414,7 @@ static void xive_irq_eoi(struct irq_data *d) } /* - * Helper used to mask and unmask an interrupt source. This - * is only called for normal interrupts that do not require - * masking/unmasking via firmware. + * Helper used to mask and unmask an interrupt source. */ static void xive_do_source_set_mask(struct xive_irq_data *xd, bool mask) @@ -681,20 +661,6 @@ static void xive_irq_unmask(struct irq_data *d) pr_devel("xive_irq_unmask: irq %d data @%p\n", d->irq, xd); - /* - * This is a workaround for PCI LSI problems on P9, for - * these, we call FW to set the mask. The problems might - * be fixed by P9 DD2.0, if that is the case, firmware - * will no longer set that flag. - */ - if (xd->flags & XIVE_IRQ_FLAG_MASK_FW) { - unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); - xive_ops->configure_irq(hw_irq, - get_hard_smp_processor_id(xd->target), - xive_irq_priority, d->irq); - return; - } - xive_do_source_set_mask(xd, false); } @@ -704,20 +670,6 @@ static void xive_irq_mask(struct irq_data *d) pr_devel("xive_irq_mask: irq %d data @%p\n", d->irq, xd); - /* - * This is a workaround for PCI LSI problems on P9, for - * these, we call OPAL to set the mask. The problems might - * be fixed by P9 DD2.0, if that is the case, firmware - * will no longer set that flag. - */ - if (xd->flags & XIVE_IRQ_FLAG_MASK_FW) { - unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); - xive_ops->configure_irq(hw_irq, - get_hard_smp_processor_id(xd->target), - 0xff, d->irq); - return; - } - xive_do_source_set_mask(xd, true); } @@ -837,14 +789,7 @@ static int xive_irq_retrigger(struct irq_data *d) * 11, then perform an EOI. */ xive_esb_read(xd, XIVE_ESB_SET_PQ_11); - - /* - * Note: We pass "0" to the hw_irq argument in order to - * avoid calling into the backend EOI code which we don't - * want to do in the case of a re-trigger. Backends typically - * only do EOI for LSIs anyway. - */ - xive_do_source_eoi(0, xd); + xive_do_source_eoi(xd); return 1; } @@ -861,13 +806,6 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state) u8 pq; /* - * We only support this on interrupts that do not require - * firmware calls for masking and unmasking - */ - if (xd->flags & XIVE_IRQ_FLAG_MASK_FW) - return -EIO; - - /* * This is called by KVM with state non-NULL for enabling * pass-through or NULL for disabling it */ @@ -966,7 +904,7 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state) * while masked, the generic code will re-mask it anyway. */ if (!xd->saved_p) - xive_do_source_eoi(hw_irq, xd); + xive_do_source_eoi(xd); } return 0; @@ -1110,7 +1048,7 @@ static void xive_ipi_eoi(struct irq_data *d) DBG_VERBOSE("IPI eoi: irq=%d [0x%lx] (HW IRQ 0x%x) pending=%02x\n", d->irq, irqd_to_hwirq(d), xc->hw_ipi, xc->pending_prio); - xive_do_source_eoi(xc->hw_ipi, &xc->ipi_data); + xive_do_source_eoi(&xc->ipi_data); xive_do_queue_eoi(xc); } @@ -1142,7 +1080,7 @@ static void __init xive_request_ipi(void) return; /* Initialize it */ - virq = irq_create_mapping(xive_irq_domain, 0); + virq = irq_create_mapping(xive_irq_domain, XIVE_IPI_HW_IRQ); xive_ipi_irq = virq; WARN_ON(request_irq(virq, xive_muxed_ipi_action, @@ -1242,7 +1180,7 @@ static int xive_irq_domain_map(struct irq_domain *h, unsigned int virq, #ifdef CONFIG_SMP /* IPIs are special and come up with HW number 0 */ - if (hw == 0) { + if (hw == XIVE_IPI_HW_IRQ) { /* * IPIs are marked per-cpu. We use separate HW interrupts under * the hood but associated with the same "linux" interrupt @@ -1271,7 +1209,7 @@ static void xive_irq_domain_unmap(struct irq_domain *d, unsigned int virq) if (!data) return; hw_irq = (unsigned int)irqd_to_hwirq(data); - if (hw_irq) + if (hw_irq != XIVE_IPI_HW_IRQ) xive_irq_free_data(virq); } @@ -1303,16 +1241,71 @@ static int xive_irq_domain_match(struct irq_domain *h, struct device_node *node, return xive_ops->match(node); } +#ifdef CONFIG_GENERIC_IRQ_DEBUGFS +static const char * const esb_names[] = { "RESET", "OFF", "PENDING", "QUEUED" }; + +static const struct { + u64 mask; + char *name; +} xive_irq_flags[] = { + { XIVE_IRQ_FLAG_STORE_EOI, "STORE_EOI" }, + { XIVE_IRQ_FLAG_LSI, "LSI" }, + { XIVE_IRQ_FLAG_H_INT_ESB, "H_INT_ESB" }, + { XIVE_IRQ_FLAG_NO_EOI, "NO_EOI" }, +}; + +static void xive_irq_domain_debug_show(struct seq_file *m, struct irq_domain *d, + struct irq_data *irqd, int ind) +{ + struct xive_irq_data *xd; + u64 val; + int i; + + /* No IRQ domain level information. To be done */ + if (!irqd) + return; + + if (!is_xive_irq(irq_data_get_irq_chip(irqd))) + return; + + seq_printf(m, "%*sXIVE:\n", ind, ""); + ind++; + + xd = irq_data_get_irq_handler_data(irqd); + if (!xd) { + seq_printf(m, "%*snot assigned\n", ind, ""); + return; + } + + val = xive_esb_read(xd, XIVE_ESB_GET); + seq_printf(m, "%*sESB: %s\n", ind, "", esb_names[val & 0x3]); + seq_printf(m, "%*sPstate: %s %s\n", ind, "", xd->stale_p ? "stale" : "", + xd->saved_p ? "saved" : ""); + seq_printf(m, "%*sTarget: %d\n", ind, "", xd->target); + seq_printf(m, "%*sChip: %d\n", ind, "", xd->src_chip); + seq_printf(m, "%*sTrigger: 0x%016llx\n", ind, "", xd->trig_page); + seq_printf(m, "%*sEOI: 0x%016llx\n", ind, "", xd->eoi_page); + seq_printf(m, "%*sFlags: 0x%llx\n", ind, "", xd->flags); + for (i = 0; i < ARRAY_SIZE(xive_irq_flags); i++) { + if (xd->flags & xive_irq_flags[i].mask) + seq_printf(m, "%*s%s\n", ind + 12, "", xive_irq_flags[i].name); + } +} +#endif + static const struct irq_domain_ops xive_irq_domain_ops = { .match = xive_irq_domain_match, .map = xive_irq_domain_map, .unmap = xive_irq_domain_unmap, .xlate = xive_irq_domain_xlate, +#ifdef CONFIG_GENERIC_IRQ_DEBUGFS + .debug_show = xive_irq_domain_debug_show, +#endif }; -static void __init xive_init_host(void) +static void __init xive_init_host(struct device_node *np) { - xive_irq_domain = irq_domain_add_nomap(NULL, XIVE_MAX_IRQ, + xive_irq_domain = irq_domain_add_nomap(np, XIVE_MAX_IRQ, &xive_irq_domain_ops, NULL); if (WARN_ON(xive_irq_domain == NULL)) return; @@ -1421,7 +1414,7 @@ static void xive_flush_cpu_queue(unsigned int cpu, struct xive_cpu *xc) * Ignore anything that isn't a XIVE irq and ignore * IPIs, so can just be dropped. */ - if (d->domain != xive_irq_domain || hw_irq == 0) + if (d->domain != xive_irq_domain || hw_irq == XIVE_IPI_HW_IRQ) continue; /* @@ -1446,7 +1439,7 @@ static void xive_flush_cpu_queue(unsigned int cpu, struct xive_cpu *xc) * still asserted. Otherwise do an MSI retrigger. */ if (xd->flags & XIVE_IRQ_FLAG_LSI) - xive_do_source_eoi(irqd_to_hwirq(d), xd); + xive_do_source_eoi(xd); else xive_irq_retrigger(d); @@ -1513,8 +1506,8 @@ void xive_shutdown(void) xive_ops->shutdown(); } -bool __init xive_core_init(const struct xive_ops *ops, void __iomem *area, u32 offset, - u8 max_prio) +bool __init xive_core_init(struct device_node *np, const struct xive_ops *ops, + void __iomem *area, u32 offset, u8 max_prio) { xive_tima = area; xive_tima_offset = offset; @@ -1525,7 +1518,7 @@ bool __init xive_core_init(const struct xive_ops *ops, void __iomem *area, u32 o __xive_enabled = true; pr_devel("Initializing host..\n"); - xive_init_host(); + xive_init_host(np); pr_devel("Initializing boot CPU..\n"); @@ -1655,7 +1648,7 @@ static int xive_core_debug_show(struct seq_file *m, void *private) hw_irq = (unsigned int)irqd_to_hwirq(d); /* IPIs are special (HW number 0) */ - if (hw_irq) + if (hw_irq != XIVE_IPI_HW_IRQ) xive_debug_show_irq(m, hw_irq, d); } return 0; diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c index cb58ec7ce77a..05a800a3104e 100644 --- a/arch/powerpc/sysdev/xive/native.c +++ b/arch/powerpc/sysdev/xive/native.c @@ -64,12 +64,6 @@ int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data) data->flags |= XIVE_IRQ_FLAG_STORE_EOI; if (opal_flags & OPAL_XIVE_IRQ_LSI) data->flags |= XIVE_IRQ_FLAG_LSI; - if (opal_flags & OPAL_XIVE_IRQ_SHIFT_BUG) - data->flags |= XIVE_IRQ_FLAG_SHIFT_BUG; - if (opal_flags & OPAL_XIVE_IRQ_MASK_VIA_FW) - data->flags |= XIVE_IRQ_FLAG_MASK_FW; - if (opal_flags & OPAL_XIVE_IRQ_EOI_VIA_FW) - data->flags |= XIVE_IRQ_FLAG_EOI_FW; data->eoi_page = be64_to_cpu(eoi_page); data->trig_page = be64_to_cpu(trig_page); data->esb_shift = be32_to_cpu(esb_shift); @@ -128,6 +122,8 @@ static int xive_native_get_irq_config(u32 hw_irq, u32 *target, u8 *prio, return rc == 0 ? 0 : -ENXIO; } +#define vp_err(vp, fmt, ...) pr_err("VP[0x%x]: " fmt, vp, ##__VA_ARGS__) + /* This can be called multiple time to change a queue configuration */ int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio, __be32 *qpage, u32 order, bool can_escalate) @@ -155,7 +151,7 @@ int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio, &esc_irq_be, NULL); if (rc) { - pr_err("Error %lld getting queue info prio %d\n", rc, prio); + vp_err(vp_id, "Failed to get queue %d info : %lld\n", prio, rc); rc = -EIO; goto fail; } @@ -178,7 +174,7 @@ int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio, msleep(OPAL_BUSY_DELAY_MS); } if (rc) { - pr_err("Error %lld setting queue for prio %d\n", rc, prio); + vp_err(vp_id, "Failed to set queue %d info: %lld\n", prio, rc); rc = -EIO; } else { /* @@ -205,7 +201,7 @@ static void __xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio) msleep(OPAL_BUSY_DELAY_MS); } if (rc) - pr_err("Error %lld disabling queue for prio %d\n", rc, prio); + vp_err(vp_id, "Failed to disable queue %d : %lld\n", prio, rc); } void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio) @@ -384,15 +380,6 @@ static void xive_native_update_pending(struct xive_cpu *xc) } } -static void xive_native_eoi(u32 hw_irq) -{ - /* - * Not normally used except if specific interrupts need - * a workaround on EOI. - */ - opal_int_eoi(hw_irq); -} - static void xive_native_setup_cpu(unsigned int cpu, struct xive_cpu *xc) { s64 rc; @@ -475,7 +462,6 @@ static const struct xive_ops xive_native_ops = { .match = xive_native_match, .shutdown = xive_native_shutdown, .update_pending = xive_native_update_pending, - .eoi = xive_native_eoi, .setup_cpu = xive_native_setup_cpu, .teardown_cpu = xive_native_teardown_cpu, .sync_source = xive_native_sync_source, @@ -622,7 +608,7 @@ bool __init xive_native_init(void) xive_native_setup_pools(); /* Initialize XIVE core with our backend */ - if (!xive_core_init(&xive_native_ops, tima, TM_QW3_HV_PHYS, + if (!xive_core_init(np, &xive_native_ops, tima, TM_QW3_HV_PHYS, max_prio)) { opal_xive_reset(OPAL_XIVE_MODE_EMU); return false; @@ -714,6 +700,8 @@ int xive_native_enable_vp(u32 vp_id, bool single_escalation) break; msleep(OPAL_BUSY_DELAY_MS); } + if (rc) + vp_err(vp_id, "Failed to enable VP : %lld\n", rc); return rc ? -EIO : 0; } EXPORT_SYMBOL_GPL(xive_native_enable_vp); @@ -728,6 +716,8 @@ int xive_native_disable_vp(u32 vp_id) break; msleep(OPAL_BUSY_DELAY_MS); } + if (rc) + vp_err(vp_id, "Failed to disable VP : %lld\n", rc); return rc ? -EIO : 0; } EXPORT_SYMBOL_GPL(xive_native_disable_vp); @@ -739,8 +729,10 @@ int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id) s64 rc; rc = opal_xive_get_vp_info(vp_id, NULL, &vp_cam_be, NULL, &vp_chip_id_be); - if (rc) + if (rc) { + vp_err(vp_id, "Failed to get VP info : %lld\n", rc); return -EIO; + } *out_cam_id = be64_to_cpu(vp_cam_be) & 0xffffffffu; *out_chip_id = be32_to_cpu(vp_chip_id_be); @@ -771,8 +763,7 @@ int xive_native_get_queue_info(u32 vp_id, u32 prio, rc = opal_xive_get_queue_info(vp_id, prio, &qpage, &qsize, &qeoi_page, &escalate_irq, &qflags); if (rc) { - pr_err("OPAL failed to get queue info for VCPU %d/%d : %lld\n", - vp_id, prio, rc); + vp_err(vp_id, "failed to get queue %d info : %lld\n", prio, rc); return -EIO; } @@ -800,8 +791,7 @@ int xive_native_get_queue_state(u32 vp_id, u32 prio, u32 *qtoggle, u32 *qindex) rc = opal_xive_get_queue_state(vp_id, prio, &opal_qtoggle, &opal_qindex); if (rc) { - pr_err("OPAL failed to get queue state for VCPU %d/%d : %lld\n", - vp_id, prio, rc); + vp_err(vp_id, "failed to get queue %d state : %lld\n", prio, rc); return -EIO; } @@ -820,8 +810,7 @@ int xive_native_set_queue_state(u32 vp_id, u32 prio, u32 qtoggle, u32 qindex) rc = opal_xive_set_queue_state(vp_id, prio, qtoggle, qindex); if (rc) { - pr_err("OPAL failed to set queue state for VCPU %d/%d : %lld\n", - vp_id, prio, rc); + vp_err(vp_id, "failed to set queue %d state : %lld\n", prio, rc); return -EIO; } @@ -843,8 +832,7 @@ int xive_native_get_vp_state(u32 vp_id, u64 *out_state) rc = opal_xive_get_vp_state(vp_id, &state); if (rc) { - pr_err("OPAL failed to get vp state for VCPU %d : %lld\n", - vp_id, rc); + vp_err(vp_id, "failed to get vp state : %lld\n", rc); return -EIO; } diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index 1e3674d7ea7b..01ccc0786ada 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -628,11 +628,6 @@ static void xive_spapr_update_pending(struct xive_cpu *xc) } } -static void xive_spapr_eoi(u32 hw_irq) -{ - /* Not used */; -} - static void xive_spapr_setup_cpu(unsigned int cpu, struct xive_cpu *xc) { /* Only some debug on the TIMA settings */ @@ -677,7 +672,6 @@ static const struct xive_ops xive_spapr_ops = { .match = xive_spapr_match, .shutdown = xive_spapr_shutdown, .update_pending = xive_spapr_update_pending, - .eoi = xive_spapr_eoi, .setup_cpu = xive_spapr_setup_cpu, .teardown_cpu = xive_spapr_teardown_cpu, .sync_source = xive_spapr_sync_source, @@ -857,7 +851,7 @@ bool __init xive_spapr_init(void) } /* Initialize XIVE core with our backend */ - if (!xive_core_init(&xive_spapr_ops, tima, TM_QW1_OS, max_prio)) + if (!xive_core_init(np, &xive_spapr_ops, tima, TM_QW1_OS, max_prio)) return false; pr_info("Using %dkB queues\n", 1 << (xive_queue_shift - 10)); diff --git a/arch/powerpc/sysdev/xive/xive-internal.h b/arch/powerpc/sysdev/xive/xive-internal.h index b7b901da2168..9cf57c722faa 100644 --- a/arch/powerpc/sysdev/xive/xive-internal.h +++ b/arch/powerpc/sysdev/xive/xive-internal.h @@ -5,6 +5,8 @@ #ifndef __XIVE_INTERNAL_H #define __XIVE_INTERNAL_H +#define XIVE_IPI_HW_IRQ 0 /* interrupt source # for IPIs */ + /* * A "disabled" interrupt should never fire, to catch problems * we set its logical number to this @@ -50,7 +52,6 @@ struct xive_ops { void (*shutdown)(void); void (*update_pending)(struct xive_cpu *xc); - void (*eoi)(u32 hw_irq); void (*sync_source)(u32 hw_irq); u64 (*esb_rw)(u32 hw_irq, u32 offset, u64 data, bool write); #ifdef CONFIG_SMP @@ -61,8 +62,8 @@ struct xive_ops { const char *name; }; -bool xive_core_init(const struct xive_ops *ops, void __iomem *area, u32 offset, - u8 max_prio); +bool xive_core_init(struct device_node *np, const struct xive_ops *ops, + void __iomem *area, u32 offset, u8 max_prio); __be32 *xive_queue_page_alloc(unsigned int cpu, u32 queue_shift); int xive_core_debug_init(void); diff --git a/arch/powerpc/xmon/nonstdio.c b/arch/powerpc/xmon/nonstdio.c index 5c1a50912229..9b0d85bff021 100644 --- a/arch/powerpc/xmon/nonstdio.c +++ b/arch/powerpc/xmon/nonstdio.c @@ -178,7 +178,7 @@ void xmon_printf(const char *format, ...) if (n && rc == 0) { /* No udbg hooks, fallback to printk() - dangerous */ - printk("%s", xmon_outbuf); + pr_cont("%s", xmon_outbuf); } } diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 55c43a6c9111..dcd817ca2edf 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -1383,6 +1383,7 @@ static long check_bp_loc(unsigned long addr) return 1; } +#ifndef CONFIG_PPC_8xx static int find_free_data_bpt(void) { int i; @@ -1394,6 +1395,7 @@ static int find_free_data_bpt(void) printf("Couldn't find free breakpoint register\n"); return -1; } +#endif static void print_data_bpts(void) { @@ -1745,9 +1747,9 @@ static void print_bug_trap(struct pt_regs *regs) #ifdef CONFIG_DEBUG_BUGVERBOSE printf("kernel BUG at %s:%u!\n", - bug->file, bug->line); + (char *)bug + bug->file_disp, bug->line); #else - printf("kernel BUG at %px!\n", (void *)bug->bug_addr); + printf("kernel BUG at %px!\n", (void *)bug + bug->bug_addr_disp); #endif #endif /* CONFIG_BUG */ } diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index 7b55811c2a81..ba3ece56cbb3 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c @@ -507,7 +507,7 @@ fail: return error; } -static int ps3disk_remove(struct ps3_system_bus_device *_dev) +static void ps3disk_remove(struct ps3_system_bus_device *_dev) { struct ps3_storage_device *dev = to_ps3_storage_device(&_dev->core); struct ps3disk_private *priv = ps3_system_bus_get_drvdata(&dev->sbd); @@ -526,7 +526,6 @@ static int ps3disk_remove(struct ps3_system_bus_device *_dev) kfree(dev->bounce_buf); kfree(priv); ps3_system_bus_set_drvdata(_dev, NULL); - return 0; } static struct ps3_system_bus_driver ps3disk = { diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c index 1088798c8dd0..b71d28372ef3 100644 --- a/drivers/block/ps3vram.c +++ b/drivers/block/ps3vram.c @@ -797,7 +797,7 @@ fail: return error; } -static int ps3vram_remove(struct ps3_system_bus_device *dev) +static void ps3vram_remove(struct ps3_system_bus_device *dev) { struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev); @@ -817,7 +817,6 @@ static int ps3vram_remove(struct ps3_system_bus_device *dev) free_pages((unsigned long) priv->xdr_buf, get_order(XDR_BUF_SIZE)); kfree(priv); ps3_system_bus_set_drvdata(dev, NULL); - return 0; } static struct ps3_system_bus_driver ps3vram = { diff --git a/drivers/char/ps3flash.c b/drivers/char/ps3flash.c index 1a07fee33f66..23871cde41fb 100644 --- a/drivers/char/ps3flash.c +++ b/drivers/char/ps3flash.c @@ -403,7 +403,7 @@ fail: return error; } -static int ps3flash_remove(struct ps3_system_bus_device *_dev) +static void ps3flash_remove(struct ps3_system_bus_device *_dev) { struct ps3_storage_device *dev = to_ps3_storage_device(&_dev->core); @@ -413,7 +413,6 @@ static int ps3flash_remove(struct ps3_system_bus_device *_dev) kfree(ps3_system_bus_get_drvdata(&dev->sbd)); ps3_system_bus_set_drvdata(&dev->sbd, NULL); ps3flash_dev = NULL; - return 0; } diff --git a/drivers/misc/lkdtm/Makefile b/drivers/misc/lkdtm/Makefile index 7727bfd32be9..6b888d04392d 100644 --- a/drivers/misc/lkdtm/Makefile +++ b/drivers/misc/lkdtm/Makefile @@ -11,6 +11,7 @@ lkdtm-$(CONFIG_LKDTM) += usercopy.o lkdtm-$(CONFIG_LKDTM) += stackleak.o lkdtm-$(CONFIG_LKDTM) += cfi.o lkdtm-$(CONFIG_LKDTM) += fortify.o +lkdtm-$(CONFIG_PPC_BOOK3S_64) += powerpc.o KASAN_SANITIZE_rodata.o := n KASAN_SANITIZE_stackleak.o := n diff --git a/drivers/misc/lkdtm/core.c b/drivers/misc/lkdtm/core.c index 3c0a67f072c0..b2aff4d87c01 100644 --- a/drivers/misc/lkdtm/core.c +++ b/drivers/misc/lkdtm/core.c @@ -179,6 +179,9 @@ static const struct crashtype crashtypes[] = { #ifdef CONFIG_X86_32 CRASHTYPE(DOUBLE_FAULT), #endif +#ifdef CONFIG_PPC_BOOK3S_64 + CRASHTYPE(PPC_SLB_MULTIHIT), +#endif }; diff --git a/drivers/misc/lkdtm/lkdtm.h b/drivers/misc/lkdtm/lkdtm.h index 6aa6d6a1a839..5ae48c64df24 100644 --- a/drivers/misc/lkdtm/lkdtm.h +++ b/drivers/misc/lkdtm/lkdtm.h @@ -107,4 +107,7 @@ void lkdtm_CFI_FORWARD_PROTO(void); /* fortify.c */ void lkdtm_FORTIFIED_STRSCPY(void); +/* powerpc.c */ +void lkdtm_PPC_SLB_MULTIHIT(void); + #endif diff --git a/drivers/misc/lkdtm/powerpc.c b/drivers/misc/lkdtm/powerpc.c new file mode 100644 index 000000000000..077c9f9ed8d0 --- /dev/null +++ b/drivers/misc/lkdtm/powerpc.c @@ -0,0 +1,120 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "lkdtm.h" +#include <linux/slab.h> +#include <linux/vmalloc.h> +#include <asm/mmu.h> + +/* Inserts new slb entries */ +static void insert_slb_entry(unsigned long p, int ssize, int page_size) +{ + unsigned long flags; + + flags = SLB_VSID_KERNEL | mmu_psize_defs[page_size].sllp; + preempt_disable(); + + asm volatile("slbmte %0,%1" : + : "r" (mk_vsid_data(p, ssize, flags)), + "r" (mk_esid_data(p, ssize, SLB_NUM_BOLTED)) + : "memory"); + + asm volatile("slbmte %0,%1" : + : "r" (mk_vsid_data(p, ssize, flags)), + "r" (mk_esid_data(p, ssize, SLB_NUM_BOLTED + 1)) + : "memory"); + preempt_enable(); +} + +/* Inject slb multihit on vmalloc-ed address i.e 0xD00... */ +static int inject_vmalloc_slb_multihit(void) +{ + char *p; + + p = vmalloc(PAGE_SIZE); + if (!p) + return -ENOMEM; + + insert_slb_entry((unsigned long)p, MMU_SEGSIZE_1T, mmu_vmalloc_psize); + /* + * This triggers exception, If handled correctly we must recover + * from this error. + */ + p[0] = '!'; + vfree(p); + return 0; +} + +/* Inject slb multihit on kmalloc-ed address i.e 0xC00... */ +static int inject_kmalloc_slb_multihit(void) +{ + char *p; + + p = kmalloc(2048, GFP_KERNEL); + if (!p) + return -ENOMEM; + + insert_slb_entry((unsigned long)p, MMU_SEGSIZE_1T, mmu_linear_psize); + /* + * This triggers exception, If handled correctly we must recover + * from this error. + */ + p[0] = '!'; + kfree(p); + return 0; +} + +/* + * Few initial SLB entries are bolted. Add a test to inject + * multihit in bolted entry 0. + */ +static void insert_dup_slb_entry_0(void) +{ + unsigned long test_address = PAGE_OFFSET, *test_ptr; + unsigned long esid, vsid; + unsigned long i = 0; + + test_ptr = (unsigned long *)test_address; + preempt_disable(); + + asm volatile("slbmfee %0,%1" : "=r" (esid) : "r" (i)); + asm volatile("slbmfev %0,%1" : "=r" (vsid) : "r" (i)); + + /* for i !=0 we would need to mask out the old entry number */ + asm volatile("slbmte %0,%1" : + : "r" (vsid), + "r" (esid | SLB_NUM_BOLTED) + : "memory"); + + asm volatile("slbmfee %0,%1" : "=r" (esid) : "r" (i)); + asm volatile("slbmfev %0,%1" : "=r" (vsid) : "r" (i)); + + /* for i !=0 we would need to mask out the old entry number */ + asm volatile("slbmte %0,%1" : + : "r" (vsid), + "r" (esid | (SLB_NUM_BOLTED + 1)) + : "memory"); + + pr_info("%s accessing test address 0x%lx: 0x%lx\n", + __func__, test_address, *test_ptr); + + preempt_enable(); +} + +void lkdtm_PPC_SLB_MULTIHIT(void) +{ + if (!radix_enabled()) { + pr_info("Injecting SLB multihit errors\n"); + /* + * These need not be separate tests, And they do pretty + * much same thing. In any case we must recover from the + * errors introduced by these functions, machine would not + * survive these tests in case of failure to handle. + */ + inject_vmalloc_slb_multihit(); + inject_kmalloc_slb_multihit(); + insert_dup_slb_entry_0(); + pr_info("Recovered from SLB multihit errors\n"); + } else { + pr_err("XFAIL: This test is for ppc64 and with hash mode MMU only\n"); + } +} diff --git a/drivers/misc/ocxl/context.c b/drivers/misc/ocxl/context.c index c21f65a5c762..9eb0d93b01c6 100644 --- a/drivers/misc/ocxl/context.c +++ b/drivers/misc/ocxl/context.c @@ -70,6 +70,7 @@ int ocxl_context_attach(struct ocxl_context *ctx, u64 amr, struct mm_struct *mm) { int rc; unsigned long pidr = 0; + struct pci_dev *dev; // Locks both status & tidr mutex_lock(&ctx->status_mutex); @@ -81,8 +82,9 @@ int ocxl_context_attach(struct ocxl_context *ctx, u64 amr, struct mm_struct *mm) if (mm) pidr = mm->context.id; + dev = to_pci_dev(ctx->afu->fn->dev.parent); rc = ocxl_link_add_pe(ctx->afu->fn->link, ctx->pasid, pidr, ctx->tidr, - amr, mm, xsl_fault_error, ctx); + amr, pci_dev_id(dev), mm, xsl_fault_error, ctx); if (rc) goto out; diff --git a/drivers/misc/ocxl/link.c b/drivers/misc/ocxl/link.c index fd73d3bc0eb6..ab039c115381 100644 --- a/drivers/misc/ocxl/link.c +++ b/drivers/misc/ocxl/link.c @@ -2,8 +2,10 @@ // Copyright 2017 IBM Corp. #include <linux/sched/mm.h> #include <linux/mutex.h> +#include <linux/mm.h> #include <linux/mm_types.h> #include <linux/mmu_context.h> +#include <linux/mmu_notifier.h> #include <asm/copro.h> #include <asm/pnv-ocxl.h> #include <asm/xive.h> @@ -33,6 +35,7 @@ #define SPA_PE_VALID 0x80000000 +struct ocxl_link; struct pe_data { struct mm_struct *mm; @@ -41,6 +44,8 @@ struct pe_data { /* opaque pointer to be passed to the above callback */ void *xsl_err_data; struct rcu_head rcu; + struct ocxl_link *link; + struct mmu_notifier mmu_notifier; }; struct spa { @@ -83,6 +88,8 @@ struct ocxl_link { int domain; int bus; int dev; + void __iomem *arva; /* ATSD register virtual address */ + spinlock_t atsd_lock; /* to serialize shootdowns */ atomic_t irq_available; struct spa *spa; void *platform_data; @@ -388,6 +395,7 @@ static int alloc_link(struct pci_dev *dev, int PE_mask, struct ocxl_link **out_l link->bus = dev->bus->number; link->dev = PCI_SLOT(dev->devfn); atomic_set(&link->irq_available, MAX_IRQ_PER_LINK); + spin_lock_init(&link->atsd_lock); rc = alloc_spa(dev, link); if (rc) @@ -403,6 +411,13 @@ static int alloc_link(struct pci_dev *dev, int PE_mask, struct ocxl_link **out_l if (rc) goto err_xsl_irq; + /* if link->arva is not defeined, MMIO registers are not used to + * generate TLB invalidate. PowerBus snooping is enabled. + * Otherwise, PowerBus snooping is disabled. TLB Invalidates are + * initiated using MMIO registers. + */ + pnv_ocxl_map_lpar(dev, mfspr(SPRN_LPID), 0, &link->arva); + *out_link = link; return 0; @@ -454,6 +469,11 @@ static void release_xsl(struct kref *ref) { struct ocxl_link *link = container_of(ref, struct ocxl_link, ref); + if (link->arva) { + pnv_ocxl_unmap_lpar(link->arva); + link->arva = NULL; + } + list_del(&link->list); /* call platform code before releasing data */ pnv_ocxl_spa_release(link->platform_data); @@ -470,6 +490,27 @@ void ocxl_link_release(struct pci_dev *dev, void *link_handle) } EXPORT_SYMBOL_GPL(ocxl_link_release); +static void invalidate_range(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + struct pe_data *pe_data = container_of(mn, struct pe_data, mmu_notifier); + struct ocxl_link *link = pe_data->link; + unsigned long addr, pid, page_size = PAGE_SIZE; + + pid = mm->context.id; + trace_ocxl_mmu_notifier_range(start, end, pid); + + spin_lock(&link->atsd_lock); + for (addr = start; addr < end; addr += page_size) + pnv_ocxl_tlb_invalidate(link->arva, pid, addr, page_size); + spin_unlock(&link->atsd_lock); +} + +static const struct mmu_notifier_ops ocxl_mmu_notifier_ops = { + .invalidate_range = invalidate_range, +}; + static u64 calculate_cfg_state(bool kernel) { u64 state; @@ -494,7 +535,7 @@ static u64 calculate_cfg_state(bool kernel) } int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr, - u64 amr, struct mm_struct *mm, + u64 amr, u16 bdf, struct mm_struct *mm, void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr), void *xsl_err_data) { @@ -526,9 +567,13 @@ int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr, pe_data->mm = mm; pe_data->xsl_err_cb = xsl_err_cb; pe_data->xsl_err_data = xsl_err_data; + pe_data->link = link; + pe_data->mmu_notifier.ops = &ocxl_mmu_notifier_ops; memset(pe, 0, sizeof(struct ocxl_process_element)); pe->config_state = cpu_to_be64(calculate_cfg_state(pidr == 0)); + pe->pasid = cpu_to_be32(pasid << (31 - 19)); + pe->bdf = cpu_to_be16(bdf); pe->lpid = cpu_to_be32(mfspr(SPRN_LPID)); pe->pid = cpu_to_be32(pidr); pe->tid = cpu_to_be32(tidr); @@ -540,8 +585,17 @@ int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr, * by the nest MMU. If we have a kernel context, TLBIs are * already global. */ - if (mm) + if (mm) { mm_context_add_copro(mm); + if (link->arva) { + /* Use MMIO registers for the TLB Invalidate + * operations. + */ + trace_ocxl_init_mmu_notifier(pasid, mm->context.id); + mmu_notifier_register(&pe_data->mmu_notifier, mm); + } + } + /* * Barrier is to make sure PE is visible in the SPA before it * is used by the device. It also helps with the global TLBI @@ -672,6 +726,18 @@ int ocxl_link_remove_pe(void *link_handle, int pasid) WARN(1, "Couldn't find pe data when removing PE\n"); } else { if (pe_data->mm) { + if (link->arva) { + trace_ocxl_release_mmu_notifier(pasid, + pe_data->mm->context.id); + mmu_notifier_unregister(&pe_data->mmu_notifier, + pe_data->mm); + spin_lock(&link->atsd_lock); + pnv_ocxl_tlb_invalidate(link->arva, + pe_data->mm->context.id, + 0ull, + PAGE_SIZE); + spin_unlock(&link->atsd_lock); + } mm_context_remove_copro(pe_data->mm); mmdrop(pe_data->mm); } diff --git a/drivers/misc/ocxl/ocxl_internal.h b/drivers/misc/ocxl/ocxl_internal.h index 0bad0a123af6..10125a22d5a5 100644 --- a/drivers/misc/ocxl/ocxl_internal.h +++ b/drivers/misc/ocxl/ocxl_internal.h @@ -84,13 +84,16 @@ struct ocxl_context { struct ocxl_process_element { __be64 config_state; - __be32 reserved1[11]; + __be32 pasid; + __be16 bdf; + __be16 reserved1; + __be32 reserved2[9]; __be32 lpid; __be32 tid; __be32 pid; - __be32 reserved2[10]; + __be32 reserved3[10]; __be64 amr; - __be32 reserved3[3]; + __be32 reserved4[3]; __be32 software_state; }; diff --git a/drivers/misc/ocxl/trace.h b/drivers/misc/ocxl/trace.h index 17e21cb2addd..a33a5094ff6c 100644 --- a/drivers/misc/ocxl/trace.h +++ b/drivers/misc/ocxl/trace.h @@ -8,6 +8,70 @@ #include <linux/tracepoint.h> + +TRACE_EVENT(ocxl_mmu_notifier_range, + TP_PROTO(unsigned long start, unsigned long end, unsigned long pidr), + TP_ARGS(start, end, pidr), + + TP_STRUCT__entry( + __field(unsigned long, start) + __field(unsigned long, end) + __field(unsigned long, pidr) + ), + + TP_fast_assign( + __entry->start = start; + __entry->end = end; + __entry->pidr = pidr; + ), + + TP_printk("start=0x%lx end=0x%lx pidr=0x%lx", + __entry->start, + __entry->end, + __entry->pidr + ) +); + +TRACE_EVENT(ocxl_init_mmu_notifier, + TP_PROTO(int pasid, unsigned long pidr), + TP_ARGS(pasid, pidr), + + TP_STRUCT__entry( + __field(int, pasid) + __field(unsigned long, pidr) + ), + + TP_fast_assign( + __entry->pasid = pasid; + __entry->pidr = pidr; + ), + + TP_printk("pasid=%d, pidr=0x%lx", + __entry->pasid, + __entry->pidr + ) +); + +TRACE_EVENT(ocxl_release_mmu_notifier, + TP_PROTO(int pasid, unsigned long pidr), + TP_ARGS(pasid, pidr), + + TP_STRUCT__entry( + __field(int, pasid) + __field(unsigned long, pidr) + ), + + TP_fast_assign( + __entry->pasid = pasid; + __entry->pidr = pidr; + ), + + TP_printk("pasid=%d, pidr=0x%lx", + __entry->pasid, + __entry->pidr + ) +); + DECLARE_EVENT_CLASS(ocxl_context, TP_PROTO(pid_t pid, void *spa, int pasid, u32 pidr, u32 tidr), TP_ARGS(pid, spa, pasid, pidr, tidr), diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.c b/drivers/net/ethernet/toshiba/ps3_gelic_net.c index d9a5722f561b..3d1fc8d2ca66 100644 --- a/drivers/net/ethernet/toshiba/ps3_gelic_net.c +++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.c @@ -1791,7 +1791,7 @@ fail_open: * ps3_gelic_driver_remove - remove a device from the control of this driver */ -static int ps3_gelic_driver_remove(struct ps3_system_bus_device *dev) +static void ps3_gelic_driver_remove(struct ps3_system_bus_device *dev) { struct gelic_card *card = ps3_system_bus_get_drvdata(dev); struct net_device *netdev0; @@ -1840,7 +1840,6 @@ static int ps3_gelic_driver_remove(struct ps3_system_bus_device *dev) ps3_close_hv_device(dev); pr_debug("%s: done\n", __func__); - return 0; } static struct ps3_system_bus_driver ps3_gelic_driver = { diff --git a/drivers/ps3/ps3-lpm.c b/drivers/ps3/ps3-lpm.c index e54aa2d82f50..65512b6cc6fd 100644 --- a/drivers/ps3/ps3-lpm.c +++ b/drivers/ps3/ps3-lpm.c @@ -1196,7 +1196,7 @@ static int ps3_lpm_probe(struct ps3_system_bus_device *dev) return 0; } -static int ps3_lpm_remove(struct ps3_system_bus_device *dev) +static void ps3_lpm_remove(struct ps3_system_bus_device *dev) { dev_dbg(&dev->core, " -> %s:%u:\n", __func__, __LINE__); @@ -1206,7 +1206,6 @@ static int ps3_lpm_remove(struct ps3_system_bus_device *dev) lpm_priv = NULL; dev_info(&dev->core, " <- %s:%u:\n", __func__, __LINE__); - return 0; } static struct ps3_system_bus_driver ps3_lpm_driver = { diff --git a/drivers/ps3/ps3-vuart.c b/drivers/ps3/ps3-vuart.c index 4ed131eaff51..e34ae6a442c7 100644 --- a/drivers/ps3/ps3-vuart.c +++ b/drivers/ps3/ps3-vuart.c @@ -1102,7 +1102,7 @@ static int ps3_vuart_cleanup(struct ps3_system_bus_device *dev) * device can no longer be used. */ -static int ps3_vuart_remove(struct ps3_system_bus_device *dev) +static void ps3_vuart_remove(struct ps3_system_bus_device *dev) { struct ps3_vuart_port_priv *priv = to_port_priv(dev); struct ps3_vuart_port_driver *drv; @@ -1118,7 +1118,7 @@ static int ps3_vuart_remove(struct ps3_system_bus_device *dev) dev_dbg(&dev->core, "%s:%d: no driver bound\n", __func__, __LINE__); mutex_unlock(&vuart_bus_priv.probe_mutex); - return 0; + return; } drv = ps3_system_bus_dev_to_vuart_drv(dev); @@ -1141,7 +1141,6 @@ static int ps3_vuart_remove(struct ps3_system_bus_device *dev) dev_dbg(&dev->core, " <- %s:%d\n", __func__, __LINE__); mutex_unlock(&vuart_bus_priv.probe_mutex); - return 0; } /** @@ -1154,7 +1153,7 @@ static int ps3_vuart_remove(struct ps3_system_bus_device *dev) * sequence. */ -static int ps3_vuart_shutdown(struct ps3_system_bus_device *dev) +static void ps3_vuart_shutdown(struct ps3_system_bus_device *dev) { struct ps3_vuart_port_driver *drv; @@ -1169,7 +1168,7 @@ static int ps3_vuart_shutdown(struct ps3_system_bus_device *dev) dev_dbg(&dev->core, "%s:%d: no driver bound\n", __func__, __LINE__); mutex_unlock(&vuart_bus_priv.probe_mutex); - return 0; + return; } drv = ps3_system_bus_dev_to_vuart_drv(dev); @@ -1193,7 +1192,6 @@ static int ps3_vuart_shutdown(struct ps3_system_bus_device *dev) dev_dbg(&dev->core, " <- %s:%d\n", __func__, __LINE__); mutex_unlock(&vuart_bus_priv.probe_mutex); - return 0; } static int __init ps3_vuart_bus_init(void) diff --git a/drivers/ps3/ps3stor_lib.c b/drivers/ps3/ps3stor_lib.c index 333ba83006e4..a12a1ad9b5fe 100644 --- a/drivers/ps3/ps3stor_lib.c +++ b/drivers/ps3/ps3stor_lib.c @@ -189,7 +189,7 @@ int ps3stor_setup(struct ps3_storage_device *dev, irq_handler_t handler) dev->bounce_lpar = ps3_mm_phys_to_lpar(__pa(dev->bounce_buf)); dev->bounce_dma = dma_map_single(&dev->sbd.core, dev->bounce_buf, dev->bounce_size, DMA_BIDIRECTIONAL); - if (!dev->bounce_dma) { + if (dma_mapping_error(&dev->sbd.core, dev->bounce_dma)) { dev_err(&dev->sbd.core, "%s:%u: map DMA region failed\n", __func__, __LINE__); error = -ENODEV; diff --git a/drivers/scsi/cxlflash/ocxl_hw.c b/drivers/scsi/cxlflash/ocxl_hw.c index e4e0d767b98e..244fc27215dc 100644 --- a/drivers/scsi/cxlflash/ocxl_hw.c +++ b/drivers/scsi/cxlflash/ocxl_hw.c @@ -329,6 +329,7 @@ static int start_context(struct ocxlflash_context *ctx) struct ocxl_hw_afu *afu = ctx->hw_afu; struct ocxl_afu_config *acfg = &afu->acfg; void *link_token = afu->link_token; + struct pci_dev *pdev = afu->pdev; struct device *dev = afu->dev; bool master = ctx->master; struct mm_struct *mm; @@ -360,8 +361,9 @@ static int start_context(struct ocxlflash_context *ctx) mm = current->mm; } - rc = ocxl_link_add_pe(link_token, ctx->pe, pid, 0, 0, mm, - ocxlflash_xsl_fault, ctx); + rc = ocxl_link_add_pe(link_token, ctx->pe, pid, 0, 0, + pci_dev_id(pdev), mm, ocxlflash_xsl_fault, + ctx); if (unlikely(rc)) { dev_err(dev, "%s: ocxl_link_add_pe failed rc=%d\n", __func__, rc); diff --git a/drivers/scsi/ps3rom.c b/drivers/scsi/ps3rom.c index f75c0b5cd587..ccb5771f1cb7 100644 --- a/drivers/scsi/ps3rom.c +++ b/drivers/scsi/ps3rom.c @@ -402,7 +402,7 @@ fail_free_bounce: return error; } -static int ps3rom_remove(struct ps3_system_bus_device *_dev) +static void ps3rom_remove(struct ps3_system_bus_device *_dev) { struct ps3_storage_device *dev = to_ps3_storage_device(&_dev->core); struct Scsi_Host *host = ps3_system_bus_get_drvdata(&dev->sbd); @@ -412,7 +412,6 @@ static int ps3rom_remove(struct ps3_system_bus_device *_dev) scsi_host_put(host); ps3_system_bus_set_drvdata(&dev->sbd, NULL); kfree(dev->bounce_buf); - return 0; } static struct ps3_system_bus_driver ps3rom = { diff --git a/drivers/usb/host/ehci-ps3.c b/drivers/usb/host/ehci-ps3.c index fb52133c3557..98568b046a1a 100644 --- a/drivers/usb/host/ehci-ps3.c +++ b/drivers/usb/host/ehci-ps3.c @@ -200,7 +200,7 @@ fail_start: return result; } -static int ps3_ehci_remove(struct ps3_system_bus_device *dev) +static void ps3_ehci_remove(struct ps3_system_bus_device *dev) { unsigned int tmp; struct usb_hcd *hcd = ps3_system_bus_get_drvdata(dev); @@ -227,8 +227,6 @@ static int ps3_ehci_remove(struct ps3_system_bus_device *dev) ps3_dma_region_free(dev->d_region); ps3_close_hv_device(dev); - - return 0; } static int __init ps3_ehci_driver_register(struct ps3_system_bus_driver *drv) diff --git a/drivers/usb/host/ohci-ps3.c b/drivers/usb/host/ohci-ps3.c index f77cd6af0ccf..4f5af929c3e4 100644 --- a/drivers/usb/host/ohci-ps3.c +++ b/drivers/usb/host/ohci-ps3.c @@ -184,7 +184,7 @@ fail_start: return result; } -static int ps3_ohci_remove(struct ps3_system_bus_device *dev) +static void ps3_ohci_remove(struct ps3_system_bus_device *dev) { unsigned int tmp; struct usb_hcd *hcd = ps3_system_bus_get_drvdata(dev); @@ -212,8 +212,6 @@ static int ps3_ohci_remove(struct ps3_system_bus_device *dev) ps3_dma_region_free(dev->d_region); ps3_close_hv_device(dev); - - return 0; } static int __init ps3_ohci_driver_register(struct ps3_system_bus_driver *drv) diff --git a/drivers/video/fbdev/ps3fb.c b/drivers/video/fbdev/ps3fb.c index 203c254f8f6c..2fe08b67eda7 100644 --- a/drivers/video/fbdev/ps3fb.c +++ b/drivers/video/fbdev/ps3fb.c @@ -1208,7 +1208,7 @@ err: return retval; } -static int ps3fb_shutdown(struct ps3_system_bus_device *dev) +static void ps3fb_shutdown(struct ps3_system_bus_device *dev) { struct fb_info *info = ps3_system_bus_get_drvdata(dev); u64 xdr_lpar = ps3_mm_phys_to_lpar(__pa(ps3fb_videomemory.address)); @@ -1241,8 +1241,6 @@ static int ps3fb_shutdown(struct ps3_system_bus_device *dev) lv1_gpu_memory_free(ps3fb.memory_handle); ps3_close_hv_device(dev); dev_dbg(&dev->core, " <- %s:%d\n", __func__, __LINE__); - - return 0; } static struct ps3_system_bus_driver ps3fb_driver = { diff --git a/include/linux/compat.h b/include/linux/compat.h index 14d514233e1d..400c0941c8af 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -442,6 +442,38 @@ put_compat_sigset(compat_sigset_t __user *compat, const sigset_t *set, #endif } +#ifdef CONFIG_CPU_BIG_ENDIAN +#define unsafe_put_compat_sigset(compat, set, label) do { \ + compat_sigset_t __user *__c = compat; \ + const sigset_t *__s = set; \ + \ + switch (_NSIG_WORDS) { \ + case 4: \ + unsafe_put_user(__s->sig[3] >> 32, &__c->sig[7], label); \ + unsafe_put_user(__s->sig[3], &__c->sig[6], label); \ + fallthrough; \ + case 3: \ + unsafe_put_user(__s->sig[2] >> 32, &__c->sig[5], label); \ + unsafe_put_user(__s->sig[2], &__c->sig[4], label); \ + fallthrough; \ + case 2: \ + unsafe_put_user(__s->sig[1] >> 32, &__c->sig[3], label); \ + unsafe_put_user(__s->sig[1], &__c->sig[2], label); \ + fallthrough; \ + case 1: \ + unsafe_put_user(__s->sig[0] >> 32, &__c->sig[1], label); \ + unsafe_put_user(__s->sig[0], &__c->sig[0], label); \ + } \ +} while (0) +#else +#define unsafe_put_compat_sigset(compat, set, label) do { \ + compat_sigset_t __user *__c = compat; \ + const sigset_t *__s = set; \ + \ + unsafe_copy_to_user(__c, __s, sizeof(*__c), label); \ +} while (0) +#endif + extern int compat_ptrace_request(struct task_struct *child, compat_long_t request, compat_ulong_t addr, compat_ulong_t data); diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 551093b74596..15acce5ab106 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -361,6 +361,9 @@ extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum); extern struct zone *zone_for_pfn_range(int online_type, int nid, unsigned start_pfn, unsigned long nr_pages); +extern int arch_create_linear_mapping(int nid, u64 start, u64 size, + struct mhp_params *params); +void arch_remove_linear_mapping(u64 start, u64 size); #endif /* CONFIG_MEMORY_HOTPLUG */ #endif /* __LINUX_MEMORY_HOTPLUG_H */ diff --git a/include/linux/regset.h b/include/linux/regset.h index c3403f328257..a00765f0e8cf 100644 --- a/include/linux/regset.h +++ b/include/linux/regset.h @@ -46,6 +46,18 @@ static inline int membuf_write(struct membuf *s, const void *v, size_t size) return s->left; } +static inline struct membuf membuf_at(const struct membuf *s, size_t offs) +{ + struct membuf n = *s; + + if (offs > n.left) + offs = n.left; + n.p += offs; + n.left -= offs; + + return n; +} + /* current s->p must be aligned for v; v must be a scalar */ #define membuf_store(s, v) \ ({ \ diff --git a/include/misc/ocxl.h b/include/misc/ocxl.h index e013736e275d..3ed736da02c8 100644 --- a/include/misc/ocxl.h +++ b/include/misc/ocxl.h @@ -447,7 +447,7 @@ void ocxl_link_release(struct pci_dev *dev, void *link_handle); * defined */ int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr, - u64 amr, struct mm_struct *mm, + u64 amr, u16 bdf, struct mm_struct *mm, void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr), void *xsl_err_data); diff --git a/sound/ppc/snd_ps3.c b/sound/ppc/snd_ps3.c index 966b709ee286..631a61ce52f4 100644 --- a/sound/ppc/snd_ps3.c +++ b/sound/ppc/snd_ps3.c @@ -1044,7 +1044,7 @@ clean_open: }; /* snd_ps3_probe */ /* called when module removal */ -static int snd_ps3_driver_remove(struct ps3_system_bus_device *dev) +static void snd_ps3_driver_remove(struct ps3_system_bus_device *dev) { int ret; pr_info("%s:start id=%d\n", __func__, dev->match_id); @@ -1070,7 +1070,6 @@ static int snd_ps3_driver_remove(struct ps3_system_bus_device *dev) lv1_gpu_device_unmap(2); ps3_close_hv_device(dev); pr_info("%s:end id=%d\n", __func__, dev->match_id); - return 0; } /* snd_ps3_remove */ static struct ps3_system_bus_driver snd_ps3_bus_driver_info = { diff --git a/tools/testing/selftests/lkdtm/tests.txt b/tools/testing/selftests/lkdtm/tests.txt index 92ba4cc41314..11ef159be0fd 100644 --- a/tools/testing/selftests/lkdtm/tests.txt +++ b/tools/testing/selftests/lkdtm/tests.txt @@ -69,3 +69,4 @@ USERCOPY_KERNEL STACKLEAK_ERASING OK: the rest of the thread stack is properly erased CFI_FORWARD_PROTO FORTIFIED_STRSCPY +PPC_SLB_MULTIHIT Recovered diff --git a/tools/testing/selftests/powerpc/eeh/Makefile b/tools/testing/selftests/powerpc/eeh/Makefile index b397babd569b..ae963eb2dc5b 100644 --- a/tools/testing/selftests/powerpc/eeh/Makefile +++ b/tools/testing/selftests/powerpc/eeh/Makefile @@ -3,7 +3,7 @@ noarg: $(MAKE) -C ../ TEST_PROGS := eeh-basic.sh -TEST_FILES := eeh-functions.sh +TEST_FILES := eeh-functions.sh settings top_srcdir = ../../../../.. include ../../lib.mk diff --git a/tools/testing/selftests/powerpc/eeh/settings b/tools/testing/selftests/powerpc/eeh/settings new file mode 100644 index 000000000000..694d70710ff0 --- /dev/null +++ b/tools/testing/selftests/powerpc/eeh/settings @@ -0,0 +1 @@ +timeout=300 diff --git a/tools/testing/selftests/powerpc/mm/bad_accesses.c b/tools/testing/selftests/powerpc/mm/bad_accesses.c index fd747b2ffcfc..65d2148b05dc 100644 --- a/tools/testing/selftests/powerpc/mm/bad_accesses.c +++ b/tools/testing/selftests/powerpc/mm/bad_accesses.c @@ -38,7 +38,7 @@ static void segv_handler(int n, siginfo_t *info, void *ctxt_v) int bad_access(char *p, bool write) { - char x; + char x = 0; fault_code = 0; fault_addr = 0; diff --git a/tools/testing/selftests/powerpc/nx-gzip/.gitignore b/tools/testing/selftests/powerpc/nx-gzip/.gitignore new file mode 100644 index 000000000000..886d522d52df --- /dev/null +++ b/tools/testing/selftests/powerpc/nx-gzip/.gitignore @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only +gunz_test +gzfht_test diff --git a/tools/testing/selftests/powerpc/security/.gitignore b/tools/testing/selftests/powerpc/security/.gitignore index 4257a1f156bb..93614b125ded 100644 --- a/tools/testing/selftests/powerpc/security/.gitignore +++ b/tools/testing/selftests/powerpc/security/.gitignore @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only rfi_flush entry_flush +spectre_v2 diff --git a/tools/testing/selftests/powerpc/signal/.gitignore b/tools/testing/selftests/powerpc/signal/.gitignore index 405b5364044c..ce3375cd8e73 100644 --- a/tools/testing/selftests/powerpc/signal/.gitignore +++ b/tools/testing/selftests/powerpc/signal/.gitignore @@ -3,3 +3,4 @@ signal signal_tm sigfuz sigreturn_vdso +sig_sc_double_restart diff --git a/tools/testing/selftests/powerpc/syscalls/.gitignore b/tools/testing/selftests/powerpc/syscalls/.gitignore index b00cab225476..a1e19ccdef84 100644 --- a/tools/testing/selftests/powerpc/syscalls/.gitignore +++ b/tools/testing/selftests/powerpc/syscalls/.gitignore @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only ipc_unmuxed +rtas_filter |