aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/powerpc/firmware-assisted-dump.txt4
-rw-r--r--MAINTAINERS13
-rw-r--r--arch/powerpc/Kconfig25
-rw-r--r--arch/powerpc/Makefile11
-rw-r--r--arch/powerpc/Makefile.postlink17
-rw-r--r--arch/powerpc/boot/Makefile5
-rw-r--r--arch/powerpc/boot/crtsavres.S8
-rw-r--r--arch/powerpc/boot/dts/ac14xx.dts2
-rw-r--r--arch/powerpc/boot/dts/digsy_mtc.dts2
-rw-r--r--arch/powerpc/boot/dts/fsl/b4qds.dtsi8
-rw-r--r--arch/powerpc/boot/dts/fsl/c293pcie.dts2
-rw-r--r--arch/powerpc/boot/dts/fsl/p1010rdb.dtsi2
-rw-r--r--arch/powerpc/boot/dts/fsl/p1023rdb.dts2
-rw-r--r--arch/powerpc/boot/dts/fsl/p2041rdb.dts4
-rw-r--r--arch/powerpc/boot/dts/fsl/p3041ds.dts4
-rw-r--r--arch/powerpc/boot/dts/fsl/p4080ds.dts4
-rw-r--r--arch/powerpc/boot/dts/fsl/p5020ds.dts4
-rw-r--r--arch/powerpc/boot/dts/fsl/p5040ds.dts4
-rw-r--r--arch/powerpc/boot/dts/fsl/t208xqds.dtsi8
-rw-r--r--arch/powerpc/boot/dts/fsl/t4240qds.dts12
-rw-r--r--arch/powerpc/boot/dts/fsl/t4240rdb.dts6
-rw-r--r--arch/powerpc/boot/dts/fsp2.dts608
-rw-r--r--arch/powerpc/boot/dts/mpc5121ads.dts4
-rw-r--r--arch/powerpc/boot/dts/mpc8308_p1m.dts2
-rw-r--r--arch/powerpc/boot/dts/mpc8349emitx.dts4
-rw-r--r--arch/powerpc/boot/dts/mpc8377_rdb.dts2
-rw-r--r--arch/powerpc/boot/dts/mpc8377_wlan.dts2
-rw-r--r--arch/powerpc/boot/dts/mpc8378_rdb.dts2
-rw-r--r--arch/powerpc/boot/dts/mpc8379_rdb.dts2
-rw-r--r--arch/powerpc/boot/dts/pcm030.dts2
-rw-r--r--arch/powerpc/boot/dts/pcm032.dts2
-rw-r--r--arch/powerpc/boot/dts/pdm360ng.dts2
-rw-r--r--arch/powerpc/boot/dts/sequoia.dts2
-rw-r--r--arch/powerpc/boot/dts/warp.dts2
-rw-r--r--arch/powerpc/boot/ppc_asm.h12
-rw-r--r--arch/powerpc/configs/44x/fsp2_defconfig126
-rw-r--r--arch/powerpc/include/asm/barrier.h5
-rw-r--r--arch/powerpc/include/asm/bitops.h87
-rw-r--r--arch/powerpc/include/asm/book3s/32/pgalloc.h3
-rw-r--r--arch/powerpc/include/asm/book3s/32/pgtable.h2
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash.h3
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgalloc.h16
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h45
-rw-r--r--arch/powerpc/include/asm/book3s/64/radix.h6
-rw-r--r--arch/powerpc/include/asm/code-patching.h10
-rw-r--r--arch/powerpc/include/asm/dbell.h13
-rw-r--r--arch/powerpc/include/asm/delay.h16
-rw-r--r--arch/powerpc/include/asm/exception-64s.h49
-rw-r--r--arch/powerpc/include/asm/fadump.h4
-rw-r--r--arch/powerpc/include/asm/head-64.h25
-rw-r--r--arch/powerpc/include/asm/hw_irq.h4
-rw-r--r--arch/powerpc/include/asm/machdep.h1
-rw-r--r--arch/powerpc/include/asm/mce.h15
-rw-r--r--arch/powerpc/include/asm/nohash/32/pgalloc.h3
-rw-r--r--arch/powerpc/include/asm/nohash/32/pgtable.h2
-rw-r--r--arch/powerpc/include/asm/nohash/64/pgalloc.h11
-rw-r--r--arch/powerpc/include/asm/opal-api.h76
-rw-r--r--arch/powerpc/include/asm/paca.h14
-rw-r--r--arch/powerpc/include/asm/pgalloc.h14
-rw-r--r--arch/powerpc/include/asm/ppc-opcode.h11
-rw-r--r--arch/powerpc/include/asm/ppc_asm.h11
-rw-r--r--arch/powerpc/include/asm/processor.h30
-rw-r--r--arch/powerpc/include/asm/trace.h33
-rw-r--r--arch/powerpc/include/uapi/asm/Kbuild6
-rw-r--r--arch/powerpc/include/uapi/asm/param.h1
-rw-r--r--arch/powerpc/include/uapi/asm/poll.h1
-rw-r--r--arch/powerpc/include/uapi/asm/resource.h1
-rw-r--r--arch/powerpc/include/uapi/asm/sockios.h20
-rw-r--r--arch/powerpc/include/uapi/asm/statfs.h6
-rw-r--r--arch/powerpc/kernel/Makefile2
-rw-r--r--arch/powerpc/kernel/asm-offsets.c10
-rw-r--r--arch/powerpc/kernel/entry_64.S193
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S284
-rw-r--r--arch/powerpc/kernel/fadump.c196
-rw-r--r--arch/powerpc/kernel/idle_book3s.S188
-rw-r--r--arch/powerpc/kernel/irq.c62
-rw-r--r--arch/powerpc/kernel/kprobes.c8
-rw-r--r--arch/powerpc/kernel/mce.c1
-rw-r--r--arch/powerpc/kernel/mce_power.c3
-rw-r--r--arch/powerpc/kernel/misc_32.S6
-rw-r--r--arch/powerpc/kernel/optprobes.c53
-rw-r--r--arch/powerpc/kernel/process.c45
-rw-r--r--arch/powerpc/kernel/setup-common.c4
-rw-r--r--arch/powerpc/kernel/smp.c7
-rw-r--r--arch/powerpc/kernel/time.c96
-rw-r--r--arch/powerpc/kernel/tm.S4
-rw-r--r--arch/powerpc/kernel/traps.c3
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S61
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c11
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S12
-rw-r--r--arch/powerpc/lib/Makefile15
-rw-r--r--arch/powerpc/lib/code-patching.c171
-rw-r--r--arch/powerpc/lib/copyuser_power7.S4
-rw-r--r--arch/powerpc/lib/crtsavres.S6
-rw-r--r--arch/powerpc/lib/xor_vmx.c53
-rw-r--r--arch/powerpc/lib/xor_vmx.h20
-rw-r--r--arch/powerpc/lib/xor_vmx_glue.c62
-rw-r--r--arch/powerpc/mm/8xx_mmu.c2
-rw-r--r--arch/powerpc/mm/dma-noncoherent.c2
-rw-r--r--arch/powerpc/mm/dump_hashpagetable.c2
-rw-r--r--arch/powerpc/mm/fault.c17
-rw-r--r--arch/powerpc/mm/hash_native_64.c41
-rw-r--r--arch/powerpc/mm/hash_utils_64.c2
-rw-r--r--arch/powerpc/mm/hugetlbpage.c5
-rw-r--r--arch/powerpc/mm/init_64.c82
-rw-r--r--arch/powerpc/mm/mem.c20
-rw-r--r--arch/powerpc/mm/mmu_context_book3s64.c15
-rw-r--r--arch/powerpc/mm/mmu_decl.h1
-rw-r--r--arch/powerpc/mm/pgtable-book3s64.c4
-rw-r--r--arch/powerpc/mm/pgtable-hash64.c115
-rw-r--r--arch/powerpc/mm/pgtable-radix.c90
-rw-r--r--arch/powerpc/mm/pgtable_32.c15
-rw-r--r--arch/powerpc/mm/pgtable_64.c45
-rw-r--r--arch/powerpc/mm/slb.c10
-rw-r--r--arch/powerpc/mm/slb_low.S30
-rw-r--r--arch/powerpc/mm/tlb-radix.c9
-rw-r--r--arch/powerpc/mm/tlb_hash64.c6
-rw-r--r--arch/powerpc/perf/hv-24x7.c242
-rw-r--r--arch/powerpc/perf/hv-24x7.h69
-rw-r--r--arch/powerpc/perf/power9-events-list.h4
-rw-r--r--arch/powerpc/perf/power9-pmu.c8
-rw-r--r--arch/powerpc/platforms/44x/Kconfig12
-rw-r--r--arch/powerpc/platforms/44x/Makefile1
-rw-r--r--arch/powerpc/platforms/44x/fsp2.c62
-rw-r--r--arch/powerpc/platforms/cell/smp.c3
-rw-r--r--arch/powerpc/platforms/powernv/eeh-powernv.c16
-rw-r--r--arch/powerpc/platforms/powernv/idle.c198
-rw-r--r--arch/powerpc/platforms/powernv/opal-wrappers.S6
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c133
-rw-r--r--arch/powerpc/platforms/powernv/pci.c160
-rw-r--r--arch/powerpc/platforms/powernv/pci.h13
-rw-r--r--arch/powerpc/platforms/powernv/smp.c34
-rw-r--r--arch/powerpc/platforms/powernv/subcore.c3
-rw-r--r--arch/powerpc/platforms/pseries/Kconfig2
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-cpu.c2
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-memory.c7
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c11
-rw-r--r--arch/powerpc/platforms/pseries/smp.c3
-rw-r--r--arch/powerpc/sysdev/mpc8xx_pic.c2
-rw-r--r--arch/powerpc/sysdev/xive/common.c2
-rw-r--r--arch/powerpc/sysdev/xive/native.c4
-rw-r--r--arch/powerpc/tools/head_check.sh78
-rwxr-xr-xarch/powerpc/tools/unrel_branch_check.sh57
-rw-r--r--arch/powerpc/xmon/xmon.c15
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--drivers/cpuidle/cpuidle-powernv.c53
-rw-r--r--drivers/cpuidle/cpuidle-pseries.c22
-rw-r--r--drivers/misc/cxl/Kconfig5
-rw-r--r--drivers/misc/cxl/Makefile2
-rw-r--r--drivers/misc/cxl/cxl.h6
-rw-r--r--drivers/misc/cxl/cxllib.c246
-rw-r--r--drivers/misc/cxl/fault.c29
-rw-r--r--drivers/misc/cxl/flash.c8
-rw-r--r--drivers/misc/cxl/native.c16
-rw-r--r--drivers/misc/cxl/pci.c41
-rw-r--r--drivers/watchdog/Kconfig2
-rw-r--r--include/asm-generic/vmlinux.lds.h12
-rw-r--r--include/linux/processor.h70
-rw-r--r--include/misc/cxllib.h133
-rw-r--r--mm/Kconfig6
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/context_switch.c53
161 files changed, 4328 insertions, 1082 deletions
diff --git a/Documentation/powerpc/firmware-assisted-dump.txt b/Documentation/powerpc/firmware-assisted-dump.txt
index 9cabaf8a207e..bdd344aa18d9 100644
--- a/Documentation/powerpc/firmware-assisted-dump.txt
+++ b/Documentation/powerpc/firmware-assisted-dump.txt
@@ -61,8 +61,8 @@ as follows:
boot successfully. For syntax of crashkernel= parameter,
refer to Documentation/kdump/kdump.txt. If any offset is
provided in crashkernel= parameter, it will be ignored
- as fadump reserves memory at end of RAM for boot memory
- dump preservation in case of a crash.
+ as fadump uses a predefined offset to reserve memory
+ for boot memory dump preservation in case of a crash.
-- After the low memory (boot memory) area has been saved, the
firmware will reset PCI and other hardware state. It will
diff --git a/MAINTAINERS b/MAINTAINERS
index a9795896323e..a4f37b69a66c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3781,8 +3781,8 @@ S: Supported
F: drivers/net/ethernet/chelsio/cxgb4vf/
CXL (IBM Coherent Accelerator Processor Interface CAPI) DRIVER
-M: Ian Munsie <imunsie@au1.ibm.com>
M: Frederic Barrat <fbarrat@linux.vnet.ibm.com>
+M: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
L: linuxppc-dev@lists.ozlabs.org
S: Supported
F: arch/powerpc/platforms/powernv/pci-cxl.c
@@ -5352,7 +5352,7 @@ S: Maintained
F: drivers/video/fbdev/fsl-diu-fb.*
FREESCALE DMA DRIVER
-M: Li Yang <leoli@freescale.com>
+M: Li Yang <leoyang.li@nxp.com>
M: Zhang Wei <zw@zh-kernel.org>
L: linuxppc-dev@lists.ozlabs.org
S: Maintained
@@ -5417,11 +5417,11 @@ S: Maintained
F: drivers/net/ethernet/freescale/dpaa
FREESCALE SOC DRIVERS
-M: Scott Wood <oss@buserror.net>
+M: Li Yang <leoyang.li@nxp.com>
L: linuxppc-dev@lists.ozlabs.org
L: linux-arm-kernel@lists.infradead.org
S: Maintained
-F: Documentation/devicetree/bindings/powerpc/fsl/
+F: Documentation/devicetree/bindings/soc/fsl/
F: drivers/soc/fsl/
F: include/linux/fsl/
@@ -5434,14 +5434,14 @@ F: include/soc/fsl/*qe*.h
F: include/soc/fsl/*ucc*.h
FREESCALE USB PERIPHERAL DRIVERS
-M: Li Yang <leoli@freescale.com>
+M: Li Yang <leoyang.li@nxp.com>
L: linux-usb@vger.kernel.org
L: linuxppc-dev@lists.ozlabs.org
S: Maintained
F: drivers/usb/gadget/udc/fsl*
FREESCALE QUICC ENGINE UCC ETHERNET DRIVER
-M: Li Yang <leoli@freescale.com>
+M: Li Yang <leoyang.li@nxp.com>
L: netdev@vger.kernel.org
L: linuxppc-dev@lists.ozlabs.org
S: Maintained
@@ -7784,6 +7784,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/scottwood/linux.git
S: Maintained
F: arch/powerpc/platforms/83xx/
F: arch/powerpc/platforms/85xx/
+F: Documentation/devicetree/bindings/powerpc/fsl/
LINUX FOR POWERPC PA SEMI PWRFICIENT
L: linuxppc-dev@lists.ozlabs.org
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 6189238e69f8..afb608413314 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -109,14 +109,6 @@ config GENERIC_LOCKBREAK
default y
depends on SMP && PREEMPT
-config ARCH_HAS_ILOG2_U32
- bool
- default y
-
-config ARCH_HAS_ILOG2_U64
- bool
- default y if 64BIT
-
config GENERIC_HWEIGHT
bool
default y
@@ -138,6 +130,7 @@ config PPC
select ARCH_HAS_SG_CHAIN
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_HAS_UBSAN_SANITIZE_ALL
+ select ARCH_HAS_ZONE_DEVICE if PPC_BOOK3S_64
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select ARCH_MIGHT_HAVE_PC_PARPORT
select ARCH_MIGHT_HAVE_PC_SERIO
@@ -163,7 +156,7 @@ config PPC
select GENERIC_SMP_IDLE_THREAD
select GENERIC_STRNCPY_FROM_USER
select GENERIC_STRNLEN_USER
- select GENERIC_TIME_VSYSCALL_OLD
+ select GENERIC_TIME_VSYSCALL
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_JUMP_LABEL
select HAVE_ARCH_KGDB
@@ -171,6 +164,8 @@ config PPC
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
+ select ARCH_HAS_STRICT_KERNEL_RWX if (PPC_BOOK3S_64 && !RELOCATABLE && !HIBERNATION)
+ select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
select HAVE_CBPF_JIT if !PPC64
select HAVE_CONTEXT_TRACKING if PPC64
select HAVE_DEBUG_KMEMLEAK
@@ -208,6 +203,7 @@ config PPC
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_VIRT_CPU_ACCOUNTING
+ select HAVE_IRQ_TIME_ACCOUNTING
select IRQ_DOMAIN
select IRQ_FORCED_THREADING
select MODULES_USE_ELF_RELA
@@ -438,6 +434,17 @@ config PPC_TRANSACTIONAL_MEM
---help---
Support user-mode Transactional Memory on POWERPC.
+config LD_HEAD_STUB_CATCH
+ bool "Reserve 256 bytes to cope with linker stubs in HEAD text" if EXPERT
+ depends on PPC64
+ default n
+ help
+ Very large kernels can cause linker branch stubs to be generated by
+ code in head_64.S, which moves the head text sections out of their
+ specified location. This option can work around the problem.
+
+ If unsure, say "N".
+
config DISABLE_MPROFILE_KERNEL
bool "Disable use of mprofile-kernel for kernel tracing"
depends on PPC64 && CPU_LITTLE_ENDIAN
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 3e0f0e1fadef..8d4ed73d5490 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -98,6 +98,7 @@ endif
LDFLAGS_vmlinux-y := -Bstatic
LDFLAGS_vmlinux-$(CONFIG_RELOCATABLE) := -pie
LDFLAGS_vmlinux := $(LDFLAGS_vmlinux-y)
+LDFLAGS_vmlinux += $(call ld-option,--orphan-handling=warn)
ifeq ($(CONFIG_PPC64),y)
ifeq ($(call cc-option-yn,-mcmodel=medium),y)
@@ -189,7 +190,17 @@ else
CHECKFLAGS += -D__LITTLE_ENDIAN__
endif
+ifdef CONFIG_PPC32
KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o
+else
+ifeq ($(call ld-ifversion, -ge, 225000000, y),y)
+# Have the linker provide sfpr if possible.
+# There is a corresponding test in arch/powerpc/lib/Makefile
+KBUILD_LDFLAGS_MODULE += --save-restore-funcs
+else
+KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o
+endif
+endif
ifeq ($(CONFIG_476FPE_ERR46),y)
KBUILD_LDFLAGS_MODULE += --ppc476-workaround \
diff --git a/arch/powerpc/Makefile.postlink b/arch/powerpc/Makefile.postlink
index eccfcc88afae..5db43ebbe2df 100644
--- a/arch/powerpc/Makefile.postlink
+++ b/arch/powerpc/Makefile.postlink
@@ -10,13 +10,26 @@ __archpost:
-include include/config/auto.conf
include scripts/Kbuild.include
+quiet_cmd_head_check = CHKHEAD $@
+ cmd_head_check = $(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/head_check.sh "$(NM)" "$@"
+
quiet_cmd_relocs_check = CHKREL $@
- cmd_relocs_check = $(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/relocs_check.sh "$(OBJDUMP)" "$@"
+ifdef CONFIG_PPC_BOOK3S_64
+ cmd_relocs_check = \
+ $(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/relocs_check.sh "$(OBJDUMP)" "$@" ; \
+ $(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/unrel_branch_check.sh "$(OBJDUMP)" "$@"
+else
+ cmd_relocs_check = \
+ $(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/relocs_check.sh "$(OBJDUMP)" "$@"
+endif
# `@true` prevents complaint when there is nothing to be done
vmlinux: FORCE
@true
+ifdef CONFIG_PPC64
+ $(call cmd,head_check)
+endif
ifdef CONFIG_RELOCATABLE
$(call if_changed,relocs_check)
endif
@@ -25,7 +38,7 @@ endif
@true
clean:
- @true
+ rm -f .tmp_symbols.txt
PHONY += FORCE clean
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index e82f333cc84a..a7814a7b1523 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -95,13 +95,16 @@ libfdtheader := fdt.h libfdt.h libfdt_internal.h
$(addprefix $(obj)/,$(libfdt) libfdt-wrapper.o simpleboot.o epapr.o opal.o): \
$(addprefix $(obj)/,$(libfdtheader))
-src-wlib-y := string.S crt0.S crtsavres.S stdio.c decompress.c main.c \
+src-wlib-y := string.S crt0.S stdio.c decompress.c main.c \
$(libfdt) libfdt-wrapper.c \
ns16550.c serial.c simple_alloc.c div64.S util.S \
elf_util.c $(zlib-y) devtree.c stdlib.c \
oflib.c ofconsole.c cuboot.c mpsc.c cpm-serial.c \
uartlite.c mpc52xx-psc.c opal.c
src-wlib-$(CONFIG_PPC64_BOOT_WRAPPER) += opal-calls.S
+ifndef CONFIG_PPC64_BOOT_WRAPPER
+src-wlib-y += crtsavres.S
+endif
src-wlib-$(CONFIG_40x) += 4xx.c planetcore.c
src-wlib-$(CONFIG_44x) += 4xx.c ebony.c bamboo.c
src-wlib-$(CONFIG_8xx) += mpc8xx.c planetcore.c fsl-soc.c
diff --git a/arch/powerpc/boot/crtsavres.S b/arch/powerpc/boot/crtsavres.S
index f3d9b35c07d4..085fb2b9a8b8 100644
--- a/arch/powerpc/boot/crtsavres.S
+++ b/arch/powerpc/boot/crtsavres.S
@@ -37,12 +37,13 @@
* the executable file might be covered by the GNU General Public License.
*/
+#ifdef __powerpc64__
+#error "On PPC64, FPR save/restore functions are provided by the linker."
+#endif
+
.file "crtsavres.S"
.section ".text"
-/* On PowerPC64 Linux, these functions are provided by the linker. */
-#ifndef __powerpc64__
-
#define _GLOBAL(name) \
.type name,@function; \
.globl name; \
@@ -230,4 +231,3 @@ _GLOBAL(_rest32gpr_31_x)
mtlr 0
mr 1,11
blr
-#endif
diff --git a/arch/powerpc/boot/dts/ac14xx.dts b/arch/powerpc/boot/dts/ac14xx.dts
index 27fcabc2f857..83bcfd865167 100644
--- a/arch/powerpc/boot/dts/ac14xx.dts
+++ b/arch/powerpc/boot/dts/ac14xx.dts
@@ -10,7 +10,7 @@
*/
-#include <mpc5121.dtsi>
+#include "mpc5121.dtsi"
/ {
model = "ac14xx";
diff --git a/arch/powerpc/boot/dts/digsy_mtc.dts b/arch/powerpc/boot/dts/digsy_mtc.dts
index 955bff629df3..c280e75c86bf 100644
--- a/arch/powerpc/boot/dts/digsy_mtc.dts
+++ b/arch/powerpc/boot/dts/digsy_mtc.dts
@@ -73,7 +73,7 @@
i2c@3d00 {
eeprom@50 {
- compatible = "at,24c08";
+ compatible = "atmel,24c08";
reg = <0x50>;
};
diff --git a/arch/powerpc/boot/dts/fsl/b4qds.dtsi b/arch/powerpc/boot/dts/fsl/b4qds.dtsi
index 3785ef826d07..999efd3bc167 100644
--- a/arch/powerpc/boot/dts/fsl/b4qds.dtsi
+++ b/arch/powerpc/boot/dts/fsl/b4qds.dtsi
@@ -166,19 +166,19 @@
reg = <0>;
eeprom@50 {
- compatible = "at24,24c64";
+ compatible = "atmel,24c64";
reg = <0x50>;
};
eeprom@51 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x51>;
};
eeprom@53 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x53>;
};
eeprom@57 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x57>;
};
rtc@68 {
diff --git a/arch/powerpc/boot/dts/fsl/c293pcie.dts b/arch/powerpc/boot/dts/fsl/c293pcie.dts
index 66709788429d..5e905e0857cf 100644
--- a/arch/powerpc/boot/dts/fsl/c293pcie.dts
+++ b/arch/powerpc/boot/dts/fsl/c293pcie.dts
@@ -153,7 +153,7 @@
&soc {
i2c@3000 {
eeprom@50 {
- compatible = "st,24c1024";
+ compatible = "st,24c1024", "atmel,24c1024";
reg = <0x50>;
};
diff --git a/arch/powerpc/boot/dts/fsl/p1010rdb.dtsi b/arch/powerpc/boot/dts/fsl/p1010rdb.dtsi
index a8e4ba070104..2ca9cee2ddeb 100644
--- a/arch/powerpc/boot/dts/fsl/p1010rdb.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p1010rdb.dtsi
@@ -89,7 +89,7 @@
&board_soc {
i2c@3000 {
eeprom@50 {
- compatible = "st,24c256";
+ compatible = "st,24c256", "atmel,24c256";
reg = <0x50>;
};
diff --git a/arch/powerpc/boot/dts/fsl/p1023rdb.dts b/arch/powerpc/boot/dts/fsl/p1023rdb.dts
index 9716ca64651c..ead928364beb 100644
--- a/arch/powerpc/boot/dts/fsl/p1023rdb.dts
+++ b/arch/powerpc/boot/dts/fsl/p1023rdb.dts
@@ -79,7 +79,7 @@
i2c@3000 {
eeprom@53 {
- compatible = "at24,24c04";
+ compatible = "atmel,24c04";
reg = <0x53>;
};
diff --git a/arch/powerpc/boot/dts/fsl/p2041rdb.dts b/arch/powerpc/boot/dts/fsl/p2041rdb.dts
index e50fea95a853..950816b9d6e1 100644
--- a/arch/powerpc/boot/dts/fsl/p2041rdb.dts
+++ b/arch/powerpc/boot/dts/fsl/p2041rdb.dts
@@ -127,7 +127,7 @@
reg = <0x48>;
};
eeprom@50 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x50>;
};
rtc@68 {
@@ -142,7 +142,7 @@
i2c@118100 {
eeprom@50 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x50>;
};
};
diff --git a/arch/powerpc/boot/dts/fsl/p3041ds.dts b/arch/powerpc/boot/dts/fsl/p3041ds.dts
index 40748e415adb..6f5f7283c533 100644
--- a/arch/powerpc/boot/dts/fsl/p3041ds.dts
+++ b/arch/powerpc/boot/dts/fsl/p3041ds.dts
@@ -124,11 +124,11 @@
i2c@118100 {
eeprom@51 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x51>;
};
eeprom@52 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x52>;
};
};
diff --git a/arch/powerpc/boot/dts/fsl/p4080ds.dts b/arch/powerpc/boot/dts/fsl/p4080ds.dts
index 816b9788d5f6..65e20152e22f 100644
--- a/arch/powerpc/boot/dts/fsl/p4080ds.dts
+++ b/arch/powerpc/boot/dts/fsl/p4080ds.dts
@@ -125,11 +125,11 @@
i2c@118100 {
eeprom@51 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x51>;
};
eeprom@52 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x52>;
};
rtc@68 {
diff --git a/arch/powerpc/boot/dts/fsl/p5020ds.dts b/arch/powerpc/boot/dts/fsl/p5020ds.dts
index cd6f37386111..b24adf902d8d 100644
--- a/arch/powerpc/boot/dts/fsl/p5020ds.dts
+++ b/arch/powerpc/boot/dts/fsl/p5020ds.dts
@@ -124,11 +124,11 @@
i2c@118100 {
eeprom@51 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x51>;
};
eeprom@52 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x52>;
};
};
diff --git a/arch/powerpc/boot/dts/fsl/p5040ds.dts b/arch/powerpc/boot/dts/fsl/p5040ds.dts
index 45084738cf4e..30850b3228e0 100644
--- a/arch/powerpc/boot/dts/fsl/p5040ds.dts
+++ b/arch/powerpc/boot/dts/fsl/p5040ds.dts
@@ -133,11 +133,11 @@
i2c@118100 {
eeprom@51 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x51>;
};
eeprom@52 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x52>;
};
};
diff --git a/arch/powerpc/boot/dts/fsl/t208xqds.dtsi b/arch/powerpc/boot/dts/fsl/t208xqds.dtsi
index ec080bd01b09..db4139999b28 100644
--- a/arch/powerpc/boot/dts/fsl/t208xqds.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t208xqds.dtsi
@@ -147,17 +147,17 @@
reg = <0x0>;
eeprom@50 {
- compatible = "at24,24c512";
+ compatible = "atmel,24c512";
reg = <0x50>;
};
eeprom@51 {
- compatible = "at24,24c02";
+ compatible = "atmel,24c02";
reg = <0x51>;
};
eeprom@57 {
- compatible = "at24,24c02";
+ compatible = "atmel,24c02";
reg = <0x57>;
};
@@ -174,7 +174,7 @@
reg = <0x1>;
eeprom@55 {
- compatible = "at24,24c02";
+ compatible = "atmel,24c02";
reg = <0x55>;
};
};
diff --git a/arch/powerpc/boot/dts/fsl/t4240qds.dts b/arch/powerpc/boot/dts/fsl/t4240qds.dts
index 9573ceada07c..c0913ac5aaad 100644
--- a/arch/powerpc/boot/dts/fsl/t4240qds.dts
+++ b/arch/powerpc/boot/dts/fsl/t4240qds.dts
@@ -377,27 +377,27 @@
reg = <0>;
eeprom@51 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x51>;
};
eeprom@52 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x52>;
};
eeprom@53 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x53>;
};
eeprom@54 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x54>;
};
eeprom@55 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x55>;
};
eeprom@56 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x56>;
};
rtc@68 {
diff --git a/arch/powerpc/boot/dts/fsl/t4240rdb.dts b/arch/powerpc/boot/dts/fsl/t4240rdb.dts
index 8166c660712a..15eb0a3f7290 100644
--- a/arch/powerpc/boot/dts/fsl/t4240rdb.dts
+++ b/arch/powerpc/boot/dts/fsl/t4240rdb.dts
@@ -130,15 +130,15 @@
reg = <0x2f>;
};
eeprom@52 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x52>;
};
eeprom@54 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x54>;
};
eeprom@56 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x56>;
};
rtc@68 {
diff --git a/arch/powerpc/boot/dts/fsp2.dts b/arch/powerpc/boot/dts/fsp2.dts
new file mode 100644
index 000000000000..475953ada707
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsp2.dts
@@ -0,0 +1,608 @@
+/*
+ * Device Tree Source for FSP2
+ *
+ * Copyright 2010,2012 IBM Corp.
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without
+ * any warranty of any kind, whether express or implied.
+ */
+
+
+/dts-v1/;
+
+/ {
+ #address-cells = <2>;
+ #size-cells = <1>;
+ model = "ibm,fsp2";
+ compatible = "ibm,fsp2";
+ dcr-parent = <&{/cpus/cpu@0}>;
+
+ aliases {
+ ethernet0 = &EMAC0;
+ ethernet1 = &EMAC1;
+ serial0 = &UART0;
+ };
+
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpu@0 {
+ device_type = "cpu";
+ model = "PowerPC, 476FSP2";
+ reg = <0x0>;
+ clock-frequency = <0>; /* Filled in by cuboot */
+ timebase-frequency = <0>; /* Filled in by cuboot */
+ i-cache-line-size = <32>;
+ d-cache-line-size = <32>;
+ d-cache-size = <32768>;
+ i-cache-size = <32768>;
+ dcr-controller;
+ dcr-access-method = "native";
+ };
+ };
+
+ memory {
+ device_type = "memory";
+ reg = <0x00000000 0x00000000 0x00000000>; /* Filled in by
+ cuboot */
+ };
+
+ clocks {
+ mmc_clk: mmc_clk {
+ compatible = "fixed-clock";
+ clock-frequency = <50000000>;
+ clock-output-names = "mmc_clk";
+ };
+ };
+
+ UIC0: uic0 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <0>;
+ dcr-reg = <0x2c0 0x8>;
+ };
+
+ /* "interrupts" field is <bit level bit level>
+ first pair is non-critical, second is critical */
+ UIC1_0: uic1_0 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <1>;
+ dcr-reg = <0x2c8 0x8>;
+ interrupt-parent = <&UIC0>;
+ interrupts = <21 0x4 4 0x84>;
+ };
+
+ /* PSI and DMA */
+ UIC1_1: uic1_1 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <2>;
+ dcr-reg = <0x350 0x8>;
+ interrupt-parent = <&UIC0>;
+ interrupts = <22 0x4 5 0x84>;
+ };
+
+ /* Ethernet and USB */
+ UIC1_2: uic1_2 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <3>;
+ dcr-reg = <0x358 0x8>;
+ interrupt-parent = <&UIC0>;
+ interrupts = <23 0x4 6 0x84>;
+ };
+
+ /* PLB Errors */
+ UIC1_3: uic1_3 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <4>;
+ dcr-reg = <0x360 0x8>;
+ interrupt-parent = <&UIC0>;
+ interrupts = <24 0x4 7 0x84>;
+ };
+
+ UIC1_4: uic1_4 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <5>;
+ dcr-reg = <0x368 0x8>;
+ interrupt-parent = <&UIC0>;
+ interrupts = <25 0x4 8 0x84>;
+ };
+
+ UIC1_5: uic1_5 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <6>;
+ dcr-reg = <0x370 0x8>;
+ interrupt-parent = <&UIC0>;
+ interrupts = <26 0x4 9 0x84>;
+ };
+
+ /* 2nd level UICs for FSI */
+ UIC2_0: uic2_0 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <7>;
+ dcr-reg = <0x2d0 0x8>;
+ interrupt-parent = <&UIC1_0>;
+ interrupts = <16 0x4 0 0x84>;
+ };
+
+ UIC2_1: uic2_1 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <8>;
+ dcr-reg = <0x2d8 0x8>;
+ interrupt-parent = <&UIC1_0>;
+ interrupts = <17 0x4 1 0x84>;
+ };
+
+ UIC2_2: uic2_2 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <9>;
+ dcr-reg = <0x2e0 0x8>;
+ interrupt-parent = <&UIC1_0>;
+ interrupts = <18 0x4 2 0x84>;
+ };
+
+ UIC2_3: uic2_3 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <10>;
+ dcr-reg = <0x2e8 0x8>;
+ interrupt-parent = <&UIC1_0>;
+ interrupts = <19 0x4 3 0x84>;
+ };
+
+ UIC2_4: uic2_4 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <11>;
+ dcr-reg = <0x2f0 0x8>;
+ interrupt-parent = <&UIC1_0>;
+ interrupts = <20 0x4 4 0x84>;
+ };
+
+ UIC2_5: uic2_5 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <12>;
+ dcr-reg = <0x2f8 0x8>;
+ interrupt-parent = <&UIC1_0>;
+ interrupts = <21 0x4 5 0x84>;
+ };
+
+ UIC2_6: uic2_6 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <13>;
+ dcr-reg = <0x300 0x8>;
+ interrupt-parent = <&UIC1_0>;
+ interrupts = <22 0x4 6 0x84>;
+ };
+
+ UIC2_7: uic2_7 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <14>;
+ dcr-reg = <0x308 0x8>;
+ interrupt-parent = <&UIC1_0>;
+ interrupts = <23 0x4 7 0x84>;
+ };
+
+ UIC2_8: uic2_8 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <15>;
+ dcr-reg = <0x310 0x8>;
+ interrupt-parent = <&UIC1_0>;
+ interrupts = <24 0x4 8 0x84>;
+ };
+
+ UIC2_9: uic2_9 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <16>;
+ dcr-reg = <0x318 0x8>;
+ interrupt-parent = <&UIC1_0>;
+ interrupts = <25 0x4 9 0x84>;
+ };
+
+ UIC2_10: uic2_10 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <17>;
+ dcr-reg = <0x320 0x8>;
+ interrupt-parent = <&UIC1_0>;
+ interrupts = <26 0x4 10 0x84>;
+ };
+
+ UIC2_11: uic2_11 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <18>;
+ dcr-reg = <0x328 0x8>;
+ interrupt-parent = <&UIC1_0>;
+ interrupts = <27 0x4 11 0x84>;
+ };
+
+ UIC2_12: uic2_12 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <19>;
+ dcr-reg = <0x330 0x8>;
+ interrupt-parent = <&UIC1_0>;
+ interrupts = <28 0x4 12 0x84>;
+ };
+
+ UIC2_13: uic2_13 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <20>;
+ dcr-reg = <0x338 0x8>;
+ interrupt-parent = <&UIC1_0>;
+ interrupts = <29 0x4 13 0x84>;
+ };
+
+ UIC2_14: uic2_14 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <21>;
+ dcr-reg = <0x340 0x8>;
+ interrupt-parent = <&UIC1_0>;
+ interrupts = <30 0x4 14 0x84>;
+ };
+
+ UIC2_15: uic2_15 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <22>;
+ dcr-reg = <0x348 0x8>;
+ interrupt-parent = <&UIC1_0>;
+ interrupts = <31 0x4 15 0x84>;
+ };
+
+ mmc0: sdhci@020c0000 {
+ compatible = "st,sdhci-stih407", "st,sdhci";
+ status = "disabled";
+ reg = <0x020c0000 0x20000>;
+ reg-names = "mmc";
+ interrupt-parent = <&UIC1_3>;
+ interrupts = <21 0x4 22 0x4>;
+ interrupt-names = "mmcirq";
+ pinctrl-names = "default";
+ pinctrl-0 = <>;
+ clock-names = "mmc";
+ clocks = <&mmc_clk>;
+ };
+
+ plb6 {
+ compatible = "ibm,plb6";
+ #address-cells = <2>;
+ #size-cells = <1>;
+ ranges;
+
+ MCW0: memory-controller-wrapper {
+ compatible = "ibm,cw-476fsp2";
+ dcr-reg = <0x11111800 0x40>;
+ };
+
+ MCIF0: memory-controller {
+ compatible = "ibm,sdram-476fsp2", "ibm,sdram-4xx-ddr3";
+ dcr-reg = <0x11120000 0x10000>;
+ mcer-device = <&MCW0>;
+ interrupt-parent = <&UIC0>;
+ interrupts = <10 0x84 /* ECC UE */
+ 11 0x84>; /* ECC CE */
+ };
+ };
+
+ plb4 {
+ compatible = "ibm,plb4";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges = <0x00000000 0x00000010 0x00000000 0x80000000
+ 0x80000000 0x00000010 0x80000000 0x80000000>;
+ clock-frequency = <333333334>;
+
+ plb6-system-hung-irq {
+ compatible = "ibm,bus-error-irq";
+ #interrupt-cells = <2>;
+ interrupt-parent = <&UIC0>;
+ interrupts = <0 0x84>;
+ };
+
+ l2-error-irq {
+ compatible = "ibm,bus-error-irq";
+ #interrupt-cells = <2>;
+ interrupt-parent = <&UIC0>;
+ interrupts = <20 0x84>;
+ };
+
+ plb6-plb4-irq {
+ compatible = "ibm,bus-error-irq";
+ #interrupt-cells = <2>;
+ interrupt-parent = <&UIC0>;
+ interrupts = <1 0x84>;
+ };
+
+ plb4-ahb-irq {
+ compatible = "ibm,bus-error-irq";
+ #interrupt-cells = <2>;
+ interrupt-parent = <&UIC1_3>;
+ interrupts = <20 0x84>;
+ };
+
+ opbd-error-irq {
+ compatible = "ibm,opbd-error-irq";
+ #interrupt-cells = <2>;
+ interrupt-parent = <&UIC1_4>;
+ interrupts = <5 0x84>;
+ };
+
+ cmu-error-irq {
+ compatible = "ibm,cmu-error-irq";
+ #interrupt-cells = <2>;
+ interrupt-parent = <&UIC0>;
+ interrupts = <28 0x84>;
+ };
+
+ conf-error-irq {
+ compatible = "ibm,conf-error-irq";
+ #interrupt-cells = <2>;
+ interrupt-parent = <&UIC1_4>;
+ interrupts = <11 0x84>;
+ };
+
+ mc-ue-irq {
+ compatible = "ibm,mc-ue-irq";
+ #interrupt-cells = <2>;
+ interrupt-parent = <&UIC0>;
+ interrupts = <10 0x84>;
+ };
+
+ reset-warning-irq {
+ compatible = "ibm,reset-warning-irq";
+ #interrupt-cells = <2>;
+ interrupt-parent = <&UIC0>;
+ interrupts = <17 0x84>;
+ };
+
+ MAL0: mcmal0 {
+ #interrupt-cells = <1>;
+ #address-cells = <0>;
+ #size-cells = <0>;
+ compatible = "ibm,mcmal";
+ dcr-reg = <0x80 0x80>;
+ num-tx-chans = <1>;
+ num-rx-chans = <1>;
+ interrupt-parent = <&MAL0>;
+ interrupts = <0 1 2 3 4>;
+ /* index interrupt-parent interrupt# type */
+ interrupt-map = </*TXEOB*/ 0 &UIC1_2 4 0x4
+ /*RXEOB*/ 1 &UIC1_2 3 0x4
+ /*SERR*/ 2 &UIC1_2 7 0x4
+ /*TXDE*/ 3 &UIC1_2 6 0x4
+ /*RXDE*/ 4 &UIC1_2 5 0x4>;
+ };
+
+ MAL1: mcmal1 {
+ #interrupt-cells = <1>;
+ #address-cells = <0>;
+ #size-cells = <0>;
+ compatible = "ibm,mcmal";
+ dcr-reg = <0x100 0x80>;
+ num-tx-chans = <1>;
+ num-rx-chans = <1>;
+ interrupt-parent = <&MAL1>;
+ interrupts = <0 1 2 3 4>;
+ /* index interrupt-parent interrupt# type */
+ interrupt-map = </*TXEOB*/ 0 &UIC1_2 12 0x4
+ /*RXEOB*/ 1 &UIC1_2 11 0x4
+ /*SERR*/ 2 &UIC1_2 15 0x4
+ /*TXDE*/ 3 &UIC1_2 14 0x4
+ /*RXDE*/ 4 &UIC1_2 13 0x4>;
+ };
+
+ opb {
+ compatible = "ibm,opb";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges; // pass-thru to parent bus
+ clock-frequency = <83333334>;
+
+ EMAC0: ethernet@b0000000 {
+ linux,network-index = <0>;
+ device_type = "network";
+ compatible = "ibm,emac4sync";
+ has-inverted-stacr-oc;
+ interrupt-parent = <&UIC1_2>;
+ interrupts = <1 0x4 0 0x4>;
+ reg = <0xb0000000 0x100>;
+ local-mac-address = [000000000000]; /* Filled in by
+ cuboot */
+ mal-device = <&MAL0>;
+ mal-tx-channel = <0>;
+ mal-rx-channel = <0>;
+ cell-index = <0>;
+ max-frame-size = <1500>;
+ rx-fifo-size = <4096>;
+ tx-fifo-size = <4096>;
+ rx-fifo-size-gige = <16384>;
+ tx-fifo-size-gige = <8192>;
+ phy-address = <1>;
+ phy-mode = "rgmii";
+ phy-map = <00000003>;
+ rgmii-device = <&RGMII>;
+ rgmii-channel = <0>;
+ };
+
+ EMAC1: ethernet@b0000100 {
+ linux,network-index = <1>;
+ device_type = "network";
+ compatible = "ibm,emac4sync";
+ has-inverted-stacr-oc;
+ interrupt-parent = <&UIC1_2>;
+ interrupts = <9 0x4 8 0x4>;
+ reg = <0xb0000100 0x100>;
+ local-mac-address = [000000000000]; /* Filled in by
+ cuboot */
+ mal-device = <&MAL1>;
+ mal-tx-channel = <0>;
+ mal-rx-channel = <0>;
+ cell-index = <1>;
+ max-frame-size = <1500>;
+ rx-fifo-size = <4096>;
+ tx-fifo-size = <4096>;
+ rx-fifo-size-gige = <16384>;
+ tx-fifo-size-gige = <8192>;
+ phy-address = <2>;
+ phy-mode = "rgmii";
+ phy-map = <00000003>;
+ rgmii-device = <&RGMII>;
+ rgmii-channel = <1>;
+ };
+
+ RGMII: rgmii@b0000600 {
+ compatible = "ibm,rgmii";
+ has-mdio;
+ reg = <0xb0000600 0x8>;
+ };
+
+ UART0: serial@b0020000 {
+ device_type = "serial";
+ compatible = "ns16550";
+ reg = <0xb0020000 0x8>;
+ virtual-reg = <0xb0020000>;
+ clock-frequency = <20833333>;
+ current-speed = <115200>;
+ interrupt-parent = <&UIC0>;
+ interrupts = <31 0x4>;
+ };
+ };
+
+ OHCI1: ohci@02040000 {
+ compatible = "ohci-le";
+ reg = <0x02040000 0xa0>;
+ interrupt-parent = <&UIC1_3>;
+ interrupts = <28 0x8 29 0x8>;
+ };
+
+ OHCI2: ohci@02080000 {
+ compatible = "ohci-le";
+ reg = <0x02080000 0xa0>;
+ interrupt-parent = <&UIC1_3>;
+ interrupts = <30 0x8 31 0x8>;
+ };
+
+ EHCI: ehci@02000000 {
+ compatible = "usb-ehci";
+ reg = <0x02000000 0xa4>;
+ interrupt-parent = <&UIC1_3>;
+ interrupts = <23 0x4>;
+ };
+
+ };
+
+ chosen {
+ linux,stdout-path = "/plb/opb/serial@b0020000";
+ bootargs = "console=ttyS0,115200 rw log_buf_len=32768 debug";
+ };
+};
diff --git a/arch/powerpc/boot/dts/mpc5121ads.dts b/arch/powerpc/boot/dts/mpc5121ads.dts
index 75888ce2c792..1e81a7e32d18 100644
--- a/arch/powerpc/boot/dts/mpc5121ads.dts
+++ b/arch/powerpc/boot/dts/mpc5121ads.dts
@@ -9,7 +9,7 @@
* option) any later version.
*/
-#include <mpc5121.dtsi>
+#include "mpc5121.dtsi"
/ {
model = "mpc5121ads";
@@ -94,7 +94,7 @@
};
eeprom@50 {
- compatible = "at,24c32";
+ compatible = "atmel,24c32";
reg = <0x50>;
};
diff --git a/arch/powerpc/boot/dts/mpc8308_p1m.dts b/arch/powerpc/boot/dts/mpc8308_p1m.dts
index 57f86cdf9f36..cab933b3957a 100644
--- a/arch/powerpc/boot/dts/mpc8308_p1m.dts
+++ b/arch/powerpc/boot/dts/mpc8308_p1m.dts
@@ -123,7 +123,7 @@
interrupt-parent = <&ipic>;
dfsrr;
fram@50 {
- compatible = "ramtron,24c64";
+ compatible = "ramtron,24c64", "atmel,24c64";
reg = <0x50>;
};
};
diff --git a/arch/powerpc/boot/dts/mpc8349emitx.dts b/arch/powerpc/boot/dts/mpc8349emitx.dts
index 90aed3ac2f69..648a85858eb5 100644
--- a/arch/powerpc/boot/dts/mpc8349emitx.dts
+++ b/arch/powerpc/boot/dts/mpc8349emitx.dts
@@ -92,7 +92,7 @@
dfsrr;
eeprom: at24@50 {
- compatible = "st,24c256";
+ compatible = "st,24c256", "atmel,24c256";
reg = <0x50>;
};
@@ -130,7 +130,7 @@
};
spd: at24@51 {
- compatible = "at24,spd";
+ compatible = "atmel,spd";
reg = <0x51>;
};
diff --git a/arch/powerpc/boot/dts/mpc8377_rdb.dts b/arch/powerpc/boot/dts/mpc8377_rdb.dts
index e32613963ab0..5e85d8c93bca 100644
--- a/arch/powerpc/boot/dts/mpc8377_rdb.dts
+++ b/arch/powerpc/boot/dts/mpc8377_rdb.dts
@@ -150,7 +150,7 @@
};
at24@50 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x50>;
};
diff --git a/arch/powerpc/boot/dts/mpc8377_wlan.dts b/arch/powerpc/boot/dts/mpc8377_wlan.dts
index c0c790168b96..fee15fcbb46f 100644
--- a/arch/powerpc/boot/dts/mpc8377_wlan.dts
+++ b/arch/powerpc/boot/dts/mpc8377_wlan.dts
@@ -135,7 +135,7 @@
dfsrr;
at24@50 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x50>;
};
diff --git a/arch/powerpc/boot/dts/mpc8378_rdb.dts b/arch/powerpc/boot/dts/mpc8378_rdb.dts
index 71842fcd621f..e973d61956b9 100644
--- a/arch/powerpc/boot/dts/mpc8378_rdb.dts
+++ b/arch/powerpc/boot/dts/mpc8378_rdb.dts
@@ -150,7 +150,7 @@
};
at24@50 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x50>;
};
diff --git a/arch/powerpc/boot/dts/mpc8379_rdb.dts b/arch/powerpc/boot/dts/mpc8379_rdb.dts
index e442a29b2fe0..ed5d12ff2ee0 100644
--- a/arch/powerpc/boot/dts/mpc8379_rdb.dts
+++ b/arch/powerpc/boot/dts/mpc8379_rdb.dts
@@ -148,7 +148,7 @@
};
at24@50 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x50>;
};
diff --git a/arch/powerpc/boot/dts/pcm030.dts b/arch/powerpc/boot/dts/pcm030.dts
index 192e66af0001..836e47cc4bed 100644
--- a/arch/powerpc/boot/dts/pcm030.dts
+++ b/arch/powerpc/boot/dts/pcm030.dts
@@ -71,7 +71,7 @@
reg = <0x51>;
};
eeprom@52 {
- compatible = "catalyst,24c32";
+ compatible = "catalyst,24c32", "atmel,24c32";
reg = <0x52>;
pagesize = <32>;
};
diff --git a/arch/powerpc/boot/dts/pcm032.dts b/arch/powerpc/boot/dts/pcm032.dts
index 96b139bf50e9..576249bf2fb9 100644
--- a/arch/powerpc/boot/dts/pcm032.dts
+++ b/arch/powerpc/boot/dts/pcm032.dts
@@ -75,7 +75,7 @@
reg = <0x51>;
};
eeprom@52 {
- compatible = "catalyst,24c32";
+ compatible = "catalyst,24c32", "atmel,24c32";
reg = <0x52>;
pagesize = <32>;
};
diff --git a/arch/powerpc/boot/dts/pdm360ng.dts b/arch/powerpc/boot/dts/pdm360ng.dts
index 0cec7244abe7..445b88114009 100644
--- a/arch/powerpc/boot/dts/pdm360ng.dts
+++ b/arch/powerpc/boot/dts/pdm360ng.dts
@@ -13,7 +13,7 @@
* option) any later version.
*/
-#include <mpc5121.dtsi>
+#include "mpc5121.dtsi"
/ {
model = "pdm360ng";
diff --git a/arch/powerpc/boot/dts/sequoia.dts b/arch/powerpc/boot/dts/sequoia.dts
index b1d329246b08..e41b88a5eaee 100644
--- a/arch/powerpc/boot/dts/sequoia.dts
+++ b/arch/powerpc/boot/dts/sequoia.dts
@@ -229,7 +229,7 @@
};
partition@84000 {
label = "user";
- reg = <0x00000000 0x01f7c000>;
+ reg = <0x00084000 0x01f7c000>;
};
};
};
diff --git a/arch/powerpc/boot/dts/warp.dts b/arch/powerpc/boot/dts/warp.dts
index e576ee85c42f..ea9053ef4819 100644
--- a/arch/powerpc/boot/dts/warp.dts
+++ b/arch/powerpc/boot/dts/warp.dts
@@ -238,7 +238,7 @@
/* This will create 52 and 53 */
at24@52 {
- compatible = "at,24c04";
+ compatible = "atmel,24c04";
reg = <0x52>;
};
};
diff --git a/arch/powerpc/boot/ppc_asm.h b/arch/powerpc/boot/ppc_asm.h
index b03373d8b386..68e388ee94fe 100644
--- a/arch/powerpc/boot/ppc_asm.h
+++ b/arch/powerpc/boot/ppc_asm.h
@@ -67,13 +67,15 @@
#define MSR_LE 0x0000000000000001
#define FIXUP_ENDIAN \
- tdi 0, 0, 0x48; /* Reverse endian of b . + 8 */ \
- b $+36; /* Skip trampoline if endian is good */ \
- .long 0x05009f42; /* bcl 20,31,$+4 */ \
- .long 0xa602487d; /* mflr r10 */ \
- .long 0x1c004a39; /* addi r10,r10,28 */ \
+ tdi 0,0,0x48; /* Reverse endian of b . + 8 */ \
+ b $+44; /* Skip trampoline if endian is good */ \
.long 0xa600607d; /* mfmsr r11 */ \
.long 0x01006b69; /* xori r11,r11,1 */ \
+ .long 0x00004039; /* li r10,0 */ \
+ .long 0x6401417d; /* mtmsrd r10,1 */ \
+ .long 0x05009f42; /* bcl 20,31,$+4 */ \
+ .long 0xa602487d; /* mflr r10 */ \
+ .long 0x14004a39; /* addi r10,r10,20 */ \
.long 0xa6035a7d; /* mtsrr0 r10 */ \
.long 0xa6037b7d; /* mtsrr1 r11 */ \
.long 0x2400004c /* rfid */
diff --git a/arch/powerpc/configs/44x/fsp2_defconfig b/arch/powerpc/configs/44x/fsp2_defconfig
new file mode 100644
index 000000000000..e8e6a6999852
--- /dev/null
+++ b/arch/powerpc/configs/44x/fsp2_defconfig
@@ -0,0 +1,126 @@
+CONFIG_44x=y
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+# CONFIG_FHANDLE is not set
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=16
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_RD_LZMA is not set
+# CONFIG_RD_XZ is not set
+# CONFIG_RD_LZO is not set
+# CONFIG_RD_LZ4 is not set
+CONFIG_KALLSYMS_ALL=y
+CONFIG_BPF_SYSCALL=y
+CONFIG_EMBEDDED=y
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PPC_47x=y
+# CONFIG_EBONY is not set
+CONFIG_FSP2=y
+CONFIG_476FPE_ERR46=y
+CONFIG_SWIOTLB=y
+CONFIG_KEXEC=y
+CONFIG_CRASH_DUMP=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="ip=on rw"
+# CONFIG_SUSPEND is not set
+# CONFIG_PCI is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_IPV6 is not set
+CONFIG_VLAN_8021Q=m
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_CONNECTOR=y
+CONFIG_MTD=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_JEDECPROBE=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=35000
+# CONFIG_SCSI_PROC_FS is not set
+CONFIG_BLK_DEV_SD=y
+# CONFIG_SCSI_LOWLEVEL is not set
+CONFIG_ATA=y
+# CONFIG_SATA_PMP is not set
+# CONFIG_ATA_SFF is not set
+CONFIG_NETDEVICES=y
+CONFIG_BONDING=m
+CONFIG_IBM_EMAC=m
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_DEVMEM is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=32
+CONFIG_SERIAL_8250_RUNTIME_UARTS=32
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+CONFIG_I2C=y
+CONFIG_I2C_IBM_IIC=y
+CONFIG_PTP_1588_CLOCK=y
+# CONFIG_HWMON is not set
+CONFIG_THERMAL=y
+CONFIG_WATCHDOG=y
+CONFIG_BOOKE_WDT=y
+CONFIG_USB=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_MMC=y
+CONFIG_MMC_DEBUG=y
+CONFIG_MMC_SDHCI=y
+CONFIG_MMC_SDHCI_PLTFM=y
+CONFIG_MMC_SDHCI_OF_ARASAN=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_M41T80=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_JFFS2_FS=y
+CONFIG_JFFS2_FS_WBUF_VERIFY=y
+CONFIG_JFFS2_SUMMARY=y
+CONFIG_JFFS2_FS_XATTR=y
+CONFIG_CRAMFS=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=y
+CONFIG_ROOT_NFS=y
+CONFIG_NLS_DEFAULT="n"
+CONFIG_XZ_DEC=y
+CONFIG_PRINTK_TIME=y
+CONFIG_MESSAGE_LOGLEVEL_DEFAULT=3
+CONFIG_DYNAMIC_DEBUG=y
+CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_FS=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_CRYPTO_CBC=y
+CONFIG_CRYPTO_ECB=y
+CONFIG_CRYPTO_PCBC=y
+CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_DES=y
+# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h
index c0deafc212b8..25d42bd3f114 100644
--- a/arch/powerpc/include/asm/barrier.h
+++ b/arch/powerpc/include/asm/barrier.h
@@ -74,6 +74,11 @@ do { \
___p1; \
})
+/*
+ * This must resolve to hwsync on SMP for the context switch path.
+ * See _switch, and core scheduler context switch memory ordering
+ * comments.
+ */
#define smp_mb__before_spinlock() smp_mb()
#include <asm-generic/barrier.h>
diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h
index 33a24fdd7958..b750ffef83c7 100644
--- a/arch/powerpc/include/asm/bitops.h
+++ b/arch/powerpc/include/asm/bitops.h
@@ -206,68 +206,13 @@ static __inline__ void __clear_bit_unlock(int nr, volatile unsigned long *addr)
* Return the zero-based bit position (LE, not IBM bit numbering) of
* the most significant 1-bit in a double word.
*/
-static __inline__ __attribute__((const))
-int __ilog2(unsigned long x)
-{
- int lz;
+#define __ilog2(x) ilog2(x)
- asm (PPC_CNTLZL "%0,%1" : "=r" (lz) : "r" (x));
- return BITS_PER_LONG - 1 - lz;
-}
+#include <asm-generic/bitops/ffz.h>
-static inline __attribute__((const))
-int __ilog2_u32(u32 n)
-{
- int bit;
- asm ("cntlzw %0,%1" : "=r" (bit) : "r" (n));
- return 31 - bit;
-}
+#include <asm-generic/bitops/builtin-__ffs.h>
-#ifdef __powerpc64__
-static inline __attribute__((const))
-int __ilog2_u64(u64 n)
-{
- int bit;
- asm ("cntlzd %0,%1" : "=r" (bit) : "r" (n));
- return 63 - bit;
-}
-#endif
-
-/*
- * Determines the bit position of the least significant 0 bit in the
- * specified double word. The returned bit position will be
- * zero-based, starting from the right side (63/31 - 0).
- */
-static __inline__ unsigned long ffz(unsigned long x)
-{
- /* no zero exists anywhere in the 8 byte area. */
- if ((x = ~x) == 0)
- return BITS_PER_LONG;
-
- /*
- * Calculate the bit position of the least significant '1' bit in x
- * (since x has been changed this will actually be the least significant
- * '0' bit in * the original x). Note: (x & -x) gives us a mask that
- * is the least significant * (RIGHT-most) 1-bit of the value in x.
- */
- return __ilog2(x & -x);
-}
-
-static __inline__ unsigned long __ffs(unsigned long x)
-{
- return __ilog2(x & -x);
-}
-
-/*
- * ffs: find first bit set. This is defined the same way as
- * the libc and compiler builtin ffs routines, therefore
- * differs in spirit from the above ffz (man ffs).
- */
-static __inline__ int ffs(int x)
-{
- unsigned long i = (unsigned long)x;
- return __ilog2(i & -i) + 1;
-}
+#include <asm-generic/bitops/builtin-ffs.h>
/*
* fls: find last (most-significant) bit set.
@@ -275,33 +220,15 @@ static __inline__ int ffs(int x)
*/
static __inline__ int fls(unsigned int x)
{
- int lz;
-
- asm ("cntlzw %0,%1" : "=r" (lz) : "r" (x));
- return 32 - lz;
+ return 32 - __builtin_clz(x);
}
-static __inline__ unsigned long __fls(unsigned long x)
-{
- return __ilog2(x);
-}
+#include <asm-generic/bitops/builtin-__fls.h>
-/*
- * 64-bit can do this using one cntlzd (count leading zeroes doubleword)
- * instruction; for 32-bit we use the generic version, which does two
- * 32-bit fls calls.
- */
-#ifdef __powerpc64__
static __inline__ int fls64(__u64 x)
{
- int lz;
-
- asm ("cntlzd %0,%1" : "=r" (lz) : "r" (x));
- return 64 - lz;
+ return 64 - __builtin_clzll(x);
}
-#else
-#include <asm-generic/bitops/fls64.h>
-#endif /* __powerpc64__ */
#ifdef CONFIG_PPC64
unsigned int __arch_hweight8(unsigned int w);
diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h
index d310546e5d9d..a120e7f8d535 100644
--- a/arch/powerpc/include/asm/book3s/32/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
@@ -31,7 +31,8 @@ extern struct kmem_cache *pgtable_cache[];
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
- return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), GFP_KERNEL);
+ return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
+ pgtable_gfp_flags(mm, GFP_KERNEL));
}
static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 26ed228d4dc6..7fb755880409 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -297,6 +297,8 @@ static inline void __ptep_set_access_flags(struct mm_struct *mm,
extern int get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep,
pmd_t **pmdp);
+int map_kernel_page(unsigned long va, phys_addr_t pa, int flags);
+
/* Generic accessors to PTE bits */
static inline int pte_write(pte_t pte) { return !!(pte_val(pte) & _PAGE_RW);}
static inline int pte_dirty(pte_t pte) { return !!(pte_val(pte) & _PAGE_DIRTY); }
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index 4e957b027fe0..0ce513f2926f 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -89,6 +89,9 @@ static inline int hash__pgd_bad(pgd_t pgd)
{
return (pgd_val(pgd) == 0);
}
+#ifdef CONFIG_STRICT_KERNEL_RWX
+extern void hash__mark_rodata_ro(void);
+#endif
extern void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, unsigned long pte, int huge);
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index cd5e7aa8cc34..20b1485ff1e8 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -53,10 +53,11 @@ extern void __tlb_remove_table(void *_table);
static inline pgd_t *radix__pgd_alloc(struct mm_struct *mm)
{
#ifdef CONFIG_PPC_64K_PAGES
- return (pgd_t *)__get_free_page(PGALLOC_GFP);
+ return (pgd_t *)__get_free_page(pgtable_gfp_flags(mm, PGALLOC_GFP));
#else
struct page *page;
- page = alloc_pages(PGALLOC_GFP | __GFP_REPEAT, 4);
+ page = alloc_pages(pgtable_gfp_flags(mm, PGALLOC_GFP | __GFP_REPEAT),
+ 4);
if (!page)
return NULL;
return (pgd_t *) page_address(page);
@@ -76,7 +77,8 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
if (radix_enabled())
return radix__pgd_alloc(mm);
- return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), GFP_KERNEL);
+ return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
+ pgtable_gfp_flags(mm, GFP_KERNEL));
}
static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
@@ -93,7 +95,8 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
{
- return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE), GFP_KERNEL);
+ return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE),
+ pgtable_gfp_flags(mm, GFP_KERNEL));
}
static inline void pud_free(struct mm_struct *mm, pud_t *pud)
@@ -119,7 +122,8 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
{
- return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX), GFP_KERNEL);
+ return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX),
+ pgtable_gfp_flags(mm, GFP_KERNEL));
}
static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
@@ -168,7 +172,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
struct page *page;
pte_t *pte;
- pte = pte_alloc_one_kernel(mm, address);
+ pte = (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT);
if (!pte)
return NULL;
page = virt_to_page(pte);
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 85bc9875c3be..c0737c86a362 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -5,6 +5,7 @@
#ifndef __ASSEMBLY__
#include <linux/mmdebug.h>
+#include <linux/bug.h>
#endif
/*
@@ -79,6 +80,9 @@
#define _PAGE_SOFT_DIRTY _RPAGE_SW3 /* software: software dirty tracking */
#define _PAGE_SPECIAL _RPAGE_SW2 /* software: special page */
+#define _PAGE_DEVMAP _RPAGE_SW1 /* software: ZONE_DEVICE page */
+#define __HAVE_ARCH_PTE_DEVMAP
+
/*
* Drivers request for cache inhibited pte mapping using _PAGE_NO_CACHE
* Instead of fixing all of them, add an alternate define which
@@ -599,6 +603,16 @@ static inline pte_t pte_mkhuge(pte_t pte)
return pte;
}
+static inline pte_t pte_mkdevmap(pte_t pte)
+{
+ return __pte(pte_val(pte) | _PAGE_SPECIAL|_PAGE_DEVMAP);
+}
+
+static inline int pte_devmap(pte_t pte)
+{
+ return !!(pte_raw(pte) & cpu_to_be64(_PAGE_DEVMAP));
+}
+
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
{
/* FIXME!! check whether this need to be a conditional */
@@ -1146,6 +1160,37 @@ static inline bool arch_needs_pgtable_deposit(void)
return true;
}
+
+static inline pmd_t pmd_mkdevmap(pmd_t pmd)
+{
+ return __pmd(pmd_val(pmd) | (_PAGE_PTE | _PAGE_DEVMAP));
+}
+
+static inline int pmd_devmap(pmd_t pmd)
+{
+ return pte_devmap(pmd_pte(pmd));
+}
+
+static inline int pud_devmap(pud_t pud)
+{
+ return 0;
+}
+
+static inline int pgd_devmap(pgd_t pgd)
+{
+ return 0;
+}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+static inline const int pud_pfn(pud_t pud)
+{
+ /*
+ * Currently all calls to pud_pfn() are gated around a pud_devmap()
+ * check so this should never be used. If it grows another user we
+ * want to know about it.
+ */
+ BUILD_BUG();
+ return 0;
+}
#endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ */
diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index ac16d1943022..487709ff6875 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -116,6 +116,10 @@
#define RADIX_PUD_TABLE_SIZE (sizeof(pud_t) << RADIX_PUD_INDEX_SIZE)
#define RADIX_PGD_TABLE_SIZE (sizeof(pgd_t) << RADIX_PGD_INDEX_SIZE)
+#ifdef CONFIG_STRICT_KERNEL_RWX
+extern void radix__mark_rodata_ro(void);
+#endif
+
static inline unsigned long __radix_pte_update(pte_t *ptep, unsigned long clr,
unsigned long set)
{
@@ -252,7 +256,7 @@ static inline int radix__pgd_bad(pgd_t pgd)
static inline int radix__pmd_trans_huge(pmd_t pmd)
{
- return !!(pmd_val(pmd) & _PAGE_PTE);
+ return (pmd_val(pmd) & (_PAGE_PTE | _PAGE_DEVMAP)) == _PAGE_PTE;
}
static inline pmd_t radix__pmd_mkhuge(pmd_t pmd)
diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h
index abef812de7f8..5482928eea1b 100644
--- a/arch/powerpc/include/asm/code-patching.h
+++ b/arch/powerpc/include/asm/code-patching.h
@@ -83,8 +83,16 @@ static inline unsigned long ppc_function_entry(void *func)
* On PPC64 ABIv1 the function pointer actually points to the
* function's descriptor. The first entry in the descriptor is the
* address of the function text.
+ *
+ * However, we may also receive pointer to an assembly symbol. To
+ * detect that, we first check if the function pointer we receive
+ * already points to kernel/module text and we only dereference it
+ * if it doesn't.
*/
- return ((func_descr_t *)func)->entry;
+ if (kernel_text_address((unsigned long)func))
+ return (unsigned long)func;
+ else
+ return ((func_descr_t *)func)->entry;
#else
return (unsigned long)func;
#endif
diff --git a/arch/powerpc/include/asm/dbell.h b/arch/powerpc/include/asm/dbell.h
index f70cbfe0ec04..9f2ae0d25e15 100644
--- a/arch/powerpc/include/asm/dbell.h
+++ b/arch/powerpc/include/asm/dbell.h
@@ -56,6 +56,19 @@ static inline void ppc_msgsync(void)
: : "i" (CPU_FTR_HVMODE|CPU_FTR_ARCH_300));
}
+static inline void _ppc_msgclr(u32 msg)
+{
+ __asm__ __volatile__ (ASM_FTR_IFSET(PPC_MSGCLR(%1), PPC_MSGCLRP(%1), %0)
+ : : "i" (CPU_FTR_HVMODE), "r" (msg));
+}
+
+static inline void ppc_msgclr(enum ppc_dbell type)
+{
+ u32 msg = PPC_DBELL_TYPE(type);
+
+ _ppc_msgclr(msg);
+}
+
#else /* CONFIG_PPC_BOOK3S */
#define PPC_DBELL_MSGTYPE PPC_DBELL
diff --git a/arch/powerpc/include/asm/delay.h b/arch/powerpc/include/asm/delay.h
index 52e4d54da2a9..3df4417dd9c8 100644
--- a/arch/powerpc/include/asm/delay.h
+++ b/arch/powerpc/include/asm/delay.h
@@ -2,6 +2,7 @@
#define _ASM_POWERPC_DELAY_H
#ifdef __KERNEL__
+#include <linux/processor.h>
#include <asm/time.h>
/*
@@ -58,11 +59,18 @@ extern void udelay(unsigned long usecs);
typeof(condition) __ret; \
unsigned long __loops = tb_ticks_per_usec * timeout; \
unsigned long __start = get_tbl(); \
- while (!(__ret = (condition)) && (tb_ticks_since(__start) <= __loops)) \
- if (delay) \
+ \
+ if (delay) { \
+ while (!(__ret = (condition)) && \
+ (tb_ticks_since(__start) <= __loops)) \
udelay(delay); \
- else \
- cpu_relax(); \
+ } else { \
+ spin_begin(); \
+ while (!(__ret = (condition)) && \
+ (tb_ticks_since(__start) <= __loops)) \
+ spin_cpu_relax(); \
+ spin_end(); \
+ } \
if (!__ret) \
__ret = (condition); \
__ret; \
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 183d73b6ed99..9a318973af05 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -36,20 +36,38 @@
*/
#include <asm/head-64.h>
+/* PACA save area offsets (exgen, exmc, etc) */
#define EX_R9 0
#define EX_R10 8
#define EX_R11 16
#define EX_R12 24
#define EX_R13 32
-#define EX_SRR0 40
-#define EX_DAR 48
-#define EX_DSISR 56
-#define EX_CCR 60
-#define EX_R3 64
-#define EX_LR 72
-#define EX_CFAR 80
-#define EX_PPR 88 /* SMT thread status register (priority) */
-#define EX_CTR 96
+#define EX_DAR 40
+#define EX_DSISR 48
+#define EX_CCR 52
+#define EX_CFAR 56
+#define EX_PPR 64
+#if defined(CONFIG_RELOCATABLE)
+#define EX_CTR 72
+#define EX_SIZE 10 /* size in u64 units */
+#else
+#define EX_SIZE 9 /* size in u64 units */
+#endif
+
+/*
+ * EX_LR is only used in EXSLB and where it does not overlap with EX_DAR
+ * EX_CCR similarly with DSISR, but being 4 byte registers there is a hole
+ * in the save area so it's not necessary to overlap them. Could be used
+ * for future savings though if another 4 byte register was to be saved.
+ */
+#define EX_LR EX_DAR
+
+/*
+ * EX_R3 is only used by the bad_stack handler. bad_stack reloads and
+ * saves DAR from SPRN_DAR, and EX_DAR is not used. So EX_R3 can overlap
+ * with EX_DAR.
+ */
+#define EX_R3 EX_DAR
#ifdef CONFIG_RELOCATABLE
#define __EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) \
@@ -236,6 +254,19 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
#define kvmppc_interrupt kvmppc_interrupt_pr
#endif
+/*
+ * Branch to label using its 0xC000 address. This results in instruction
+ * address suitable for MSR[IR]=0 or 1, which allows relocation to be turned
+ * on using mtmsr rather than rfid.
+ *
+ * This could set the 0xc bits for !RELOCATABLE as an immediate, rather than
+ * load KBASE for a slight optimisation.
+ */
+#define BRANCH_TO_C000(reg, label) \
+ __LOAD_HANDLER(reg, label); \
+ mtctr reg; \
+ bctr
+
#ifdef CONFIG_RELOCATABLE
#define BRANCH_TO_COMMON(reg, label) \
__LOAD_HANDLER(reg, label); \
diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h
index 60b91084f33c..ce88bbe1d809 100644
--- a/arch/powerpc/include/asm/fadump.h
+++ b/arch/powerpc/include/asm/fadump.h
@@ -43,6 +43,9 @@
#define MIN_BOOT_MEM (((RMA_END < (0x1UL << 28)) ? (0x1UL << 28) : RMA_END) \
+ (0x1UL << 26))
+/* The upper limit percentage for user specified boot memory size (25%) */
+#define MAX_BOOT_MEM_RATIO 4
+
#define memblock_num_regions(memblock_type) (memblock.memblock_type.cnt)
/* Firmware provided dump sections */
@@ -200,6 +203,7 @@ struct fad_crash_memory_ranges {
unsigned long long size;
};
+extern int is_fadump_boot_memory_area(u64 addr, ulong size);
extern int early_init_dt_scan_fw_dump(unsigned long node,
const char *uname, int depth, void *data);
extern int fadump_reserve_mem(void);
diff --git a/arch/powerpc/include/asm/head-64.h b/arch/powerpc/include/asm/head-64.h
index 86eb87382031..d81eac5b509f 100644
--- a/arch/powerpc/include/asm/head-64.h
+++ b/arch/powerpc/include/asm/head-64.h
@@ -3,6 +3,7 @@
#include <asm/cache.h>
+#ifdef __ASSEMBLY__
/*
* We can't do CPP stringification and concatination directly into the section
* name for some reason, so these macros can do it for us.
@@ -49,8 +50,8 @@
* CLOSE_FIXED_SECTION() or elsewhere, there may be something
* unexpected being added there. Remove the '. = x_len' line, rebuild, and
* check what is pushing the section down.
- * - If the build dies in linking, check arch/powerpc/kernel/vmlinux.lds.S
- * for instructions.
+ * - If the build dies in linking, check arch/powerpc/tools/head_check.sh
+ * comments.
* - If the kernel crashes or hangs in very early boot, it could be linker
* stubs at the start of the main text.
*/
@@ -63,11 +64,29 @@
. = 0x0; \
start_##sname:
+/*
+ * .linker_stub_catch section is used to catch linker stubs from being
+ * inserted in our .text section, above the start_text label (which breaks
+ * the ABS_ADDR calculation). See kernel/vmlinux.lds.S and tools/head_check.sh
+ * for more details. We would prefer to just keep a cacheline (0x80), but
+ * 0x100 seems to be how the linker aligns branch stub groups.
+ */
+#ifdef CONFIG_LD_HEAD_STUB_CATCH
+#define OPEN_TEXT_SECTION(start) \
+ .section ".linker_stub_catch","ax",@progbits; \
+linker_stub_catch: \
+ . = 0x4; \
+ text_start = (start) + 0x100; \
+ .section ".text","ax",@progbits; \
+ .balign 0x100; \
+start_text:
+#else
#define OPEN_TEXT_SECTION(start) \
text_start = (start); \
.section ".text","ax",@progbits; \
. = 0x0; \
start_text:
+#endif
#define ZERO_FIXED_SECTION(sname, start, end) \
sname##_start = (start); \
@@ -397,4 +416,6 @@ name:
EXC_COMMON_BEGIN(name); \
STD_EXCEPTION_COMMON(realvec + 0x2, name, hdlr); \
+#endif /* __ASSEMBLY__ */
+
#endif /* _ASM_POWERPC_HEAD_64_H */
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index eba60416536e..c1dd1929342d 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -129,6 +129,10 @@ static inline bool arch_irq_disabled_regs(struct pt_regs *regs)
}
extern bool prep_irq_for_idle(void);
+extern bool prep_irq_for_idle_irqsoff(void);
+extern void irq_set_pending_from_srr1(unsigned long srr1);
+
+#define fini_irq_for_idle_irqsoff() trace_hardirqs_off();
extern void force_external_irq_replay(void);
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index f90b22c722e1..cd2fc1cc1cc7 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -226,6 +226,7 @@ struct machdep_calls {
extern void e500_idle(void);
extern void power4_idle(void);
extern void power7_idle(void);
+extern void power9_idle(void);
extern void ppc6xx_idle(void);
extern void book3e_idle(void);
diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index 81eff8631434..190d69a7f701 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -90,13 +90,14 @@ enum MCE_UserErrorType {
enum MCE_RaErrorType {
MCE_RA_ERROR_INDETERMINATE = 0,
MCE_RA_ERROR_IFETCH = 1,
- MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH = 2,
- MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN = 3,
- MCE_RA_ERROR_LOAD = 4,
- MCE_RA_ERROR_STORE = 5,
- MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE = 6,
- MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN = 7,
- MCE_RA_ERROR_LOAD_STORE_FOREIGN = 8,
+ MCE_RA_ERROR_IFETCH_FOREIGN = 2,
+ MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH = 3,
+ MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN = 4,
+ MCE_RA_ERROR_LOAD = 5,
+ MCE_RA_ERROR_STORE = 6,
+ MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE = 7,
+ MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN = 8,
+ MCE_RA_ERROR_LOAD_STORE_FOREIGN = 9,
};
enum MCE_LinkErrorType {
diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h
index 633139291a48..cc369a70f2bb 100644
--- a/arch/powerpc/include/asm/nohash/32/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h
@@ -31,7 +31,8 @@ extern struct kmem_cache *pgtable_cache[];
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
- return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), GFP_KERNEL);
+ return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
+ pgtable_gfp_flags(mm, GFP_KERNEL));
}
static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
index 5134ade2e850..91314268f04f 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -340,6 +340,8 @@ static inline void __ptep_set_access_flags(struct mm_struct *mm,
extern int get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep,
pmd_t **pmdp);
+int map_kernel_page(unsigned long va, phys_addr_t pa, int flags);
+
#endif /* !__ASSEMBLY__ */
#endif /* __ASM_POWERPC_NOHASH_32_PGTABLE_H */
diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h
index 897d2e1c8a9b..9721c7867b9c 100644
--- a/arch/powerpc/include/asm/nohash/64/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h
@@ -43,7 +43,8 @@ extern struct kmem_cache *pgtable_cache[];
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
- return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), GFP_KERNEL);
+ return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
+ pgtable_gfp_flags(mm, GFP_KERNEL));
}
static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
@@ -57,7 +58,8 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
{
- return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE), GFP_KERNEL);
+ return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE),
+ pgtable_gfp_flags(mm, GFP_KERNEL));
}
static inline void pud_free(struct mm_struct *mm, pud_t *pud)
@@ -96,7 +98,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
struct page *page;
pte_t *pte;
- pte = pte_alloc_one_kernel(mm, address);
+ pte = (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT);
if (!pte)
return NULL;
page = virt_to_page(pte);
@@ -189,7 +191,8 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
{
- return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX), GFP_KERNEL);
+ return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX),
+ pgtable_gfp_flags(mm, GFP_KERNEL));
}
static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index cb3e6242a78c..ef930ba500f9 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -667,12 +667,14 @@ enum {
enum {
OPAL_PHB_ERROR_DATA_TYPE_P7IOC = 1,
- OPAL_PHB_ERROR_DATA_TYPE_PHB3 = 2
+ OPAL_PHB_ERROR_DATA_TYPE_PHB3 = 2,
+ OPAL_PHB_ERROR_DATA_TYPE_PHB4 = 3
};
enum {
OPAL_P7IOC_NUM_PEST_REGS = 128,
- OPAL_PHB3_NUM_PEST_REGS = 256
+ OPAL_PHB3_NUM_PEST_REGS = 256,
+ OPAL_PHB4_NUM_PEST_REGS = 512
};
struct OpalIoPhbErrorCommon {
@@ -802,6 +804,75 @@ struct OpalIoPhb3ErrorData {
__be64 pestB[OPAL_PHB3_NUM_PEST_REGS];
};
+struct OpalIoPhb4ErrorData {
+ struct OpalIoPhbErrorCommon common;
+
+ __be32 brdgCtl;
+
+ /* PHB4 cfg regs */
+ __be32 deviceStatus;
+ __be32 slotStatus;
+ __be32 linkStatus;
+ __be32 devCmdStatus;
+ __be32 devSecStatus;
+
+ /* cfg AER regs */
+ __be32 rootErrorStatus;
+ __be32 uncorrErrorStatus;
+ __be32 corrErrorStatus;
+ __be32 tlpHdr1;
+ __be32 tlpHdr2;
+ __be32 tlpHdr3;
+ __be32 tlpHdr4;
+ __be32 sourceId;
+
+ /* PHB4 ETU Error Regs */
+ __be64 nFir; /* 000 */
+ __be64 nFirMask; /* 003 */
+ __be64 nFirWOF; /* 008 */
+ __be64 phbPlssr; /* 120 */
+ __be64 phbCsr; /* 110 */
+ __be64 lemFir; /* C00 */
+ __be64 lemErrorMask; /* C18 */
+ __be64 lemWOF; /* C40 */
+ __be64 phbErrorStatus; /* C80 */
+ __be64 phbFirstErrorStatus; /* C88 */
+ __be64 phbErrorLog0; /* CC0 */
+ __be64 phbErrorLog1; /* CC8 */
+ __be64 phbTxeErrorStatus; /* D00 */
+ __be64 phbTxeFirstErrorStatus; /* D08 */
+ __be64 phbTxeErrorLog0; /* D40 */
+ __be64 phbTxeErrorLog1; /* D48 */
+ __be64 phbRxeArbErrorStatus; /* D80 */
+ __be64 phbRxeArbFirstErrorStatus; /* D88 */
+ __be64 phbRxeArbErrorLog0; /* DC0 */
+ __be64 phbRxeArbErrorLog1; /* DC8 */
+ __be64 phbRxeMrgErrorStatus; /* E00 */
+ __be64 phbRxeMrgFirstErrorStatus; /* E08 */
+ __be64 phbRxeMrgErrorLog0; /* E40 */
+ __be64 phbRxeMrgErrorLog1; /* E48 */
+ __be64 phbRxeTceErrorStatus; /* E80 */
+ __be64 phbRxeTceFirstErrorStatus; /* E88 */
+ __be64 phbRxeTceErrorLog0; /* EC0 */
+ __be64 phbRxeTceErrorLog1; /* EC8 */
+
+ /* PHB4 REGB Error Regs */
+ __be64 phbPblErrorStatus; /* 1900 */
+ __be64 phbPblFirstErrorStatus; /* 1908 */
+ __be64 phbPblErrorLog0; /* 1940 */
+ __be64 phbPblErrorLog1; /* 1948 */
+ __be64 phbPcieDlpErrorLog1; /* 1AA0 */
+ __be64 phbPcieDlpErrorLog2; /* 1AA8 */
+ __be64 phbPcieDlpErrorStatus; /* 1AB0 */
+ __be64 phbRegbErrorStatus; /* 1C00 */
+ __be64 phbRegbFirstErrorStatus; /* 1C08 */
+ __be64 phbRegbErrorLog0; /* 1C40 */
+ __be64 phbRegbErrorLog1; /* 1C48 */
+
+ __be64 pestA[OPAL_PHB4_NUM_PEST_REGS];
+ __be64 pestB[OPAL_PHB4_NUM_PEST_REGS];
+};
+
enum {
OPAL_REINIT_CPUS_HILE_BE = (1 << 0),
OPAL_REINIT_CPUS_HILE_LE = (1 << 1),
@@ -877,6 +948,7 @@ enum {
OPAL_PHB_CAPI_MODE_SNOOP_OFF = 2,
OPAL_PHB_CAPI_MODE_SNOOP_ON = 3,
OPAL_PHB_CAPI_MODE_DMA = 4,
+ OPAL_PHB_CAPI_MODE_DMA_TVT1 = 5,
};
/* OPAL I2C request */
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 1c09f8fe2ee8..dc88a31cc79a 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -21,7 +21,11 @@
#include <asm/lppaca.h>
#include <asm/mmu.h>
#include <asm/page.h>
+#ifdef CONFIG_PPC_BOOK3E
#include <asm/exception-64e.h>
+#else
+#include <asm/exception-64s.h>
+#endif
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
#include <asm/kvm_book3s_asm.h>
#endif
@@ -98,8 +102,8 @@ struct paca_struct {
* Now, starting in cacheline 2, the exception save areas
*/
/* used for most interrupts/exceptions */
- u64 exgen[13] __attribute__((aligned(0x80)));
- u64 exslb[13]; /* used for SLB/segment table misses
+ u64 exgen[EX_SIZE] __attribute__((aligned(0x80)));
+ u64 exslb[EX_SIZE]; /* used for SLB/segment table misses
* on the linear mapping */
/* SLB related definitions */
u16 vmalloc_sllp;
@@ -177,12 +181,14 @@ struct paca_struct {
* to the sibling threads' paca.
*/
struct paca_struct **thread_sibling_pacas;
+ /* The PSSCR value that the kernel requested before going to stop */
+ u64 requested_psscr;
#endif
#ifdef CONFIG_PPC_STD_MMU_64
/* Non-maskable exceptions that are not performance critical */
- u64 exnmi[13]; /* used for system reset (nmi) */
- u64 exmc[13]; /* used for machine checks */
+ u64 exnmi[EX_SIZE]; /* used for system reset (nmi) */
+ u64 exmc[EX_SIZE]; /* used for machine checks */
#endif
#ifdef CONFIG_PPC_BOOK3S_64
/* Exclusive stacks for system reset and machine check exception. */
diff --git a/arch/powerpc/include/asm/pgalloc.h b/arch/powerpc/include/asm/pgalloc.h
index 0413457ba11d..d795c5d5789c 100644
--- a/arch/powerpc/include/asm/pgalloc.h
+++ b/arch/powerpc/include/asm/pgalloc.h
@@ -3,6 +3,20 @@
#include <linux/mm.h>
+#ifndef MODULE
+static inline gfp_t pgtable_gfp_flags(struct mm_struct *mm, gfp_t gfp)
+{
+ if (unlikely(mm == &init_mm))
+ return gfp;
+ return gfp | __GFP_ACCOUNT;
+}
+#else /* !MODULE */
+static inline gfp_t pgtable_gfp_flags(struct mm_struct *mm, gfp_t gfp)
+{
+ return gfp | __GFP_ACCOUNT;
+}
+#endif /* MODULE */
+
#ifdef CONFIG_PPC_BOOK3S
#include <asm/book3s/pgalloc.h>
#else
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 1a9b45198c06..fa9ebaead91e 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -191,8 +191,7 @@
/* sorted alphabetically */
#define PPC_INST_BHRBE 0x7c00025c
#define PPC_INST_CLRBHRB 0x7c00035c
-#define PPC_INST_COPY 0x7c00060c
-#define PPC_INST_COPY_FIRST 0x7c20060c
+#define PPC_INST_COPY 0x7c20060c
#define PPC_INST_CP_ABORT 0x7c00068c
#define PPC_INST_DCBA 0x7c0005ec
#define PPC_INST_DCBA_MASK 0xfc0007fe
@@ -223,10 +222,10 @@
#define PPC_INST_MSGCLR 0x7c0001dc
#define PPC_INST_MSGSYNC 0x7c0006ec
#define PPC_INST_MSGSNDP 0x7c00011c
+#define PPC_INST_MSGCLRP 0x7c00015c
#define PPC_INST_MTTMR 0x7c0003dc
#define PPC_INST_NOP 0x60000000
-#define PPC_INST_PASTE 0x7c00070c
-#define PPC_INST_PASTE_LAST 0x7c20070d
+#define PPC_INST_PASTE 0x7c20070d
#define PPC_INST_POPCNTB 0x7c0000f4
#define PPC_INST_POPCNTB_MASK 0xfc0007fe
#define PPC_INST_POPCNTD 0x7c0003f4
@@ -394,6 +393,8 @@
/* Deal with instructions that older assemblers aren't aware of */
#define PPC_CP_ABORT stringify_in_c(.long PPC_INST_CP_ABORT)
+#define PPC_COPY(a, b) stringify_in_c(.long PPC_INST_COPY | \
+ ___PPC_RA(a) | ___PPC_RB(b))
#define PPC_DCBAL(a, b) stringify_in_c(.long PPC_INST_DCBAL | \
__PPC_RA(a) | __PPC_RB(b))
#define PPC_DCBZL(a, b) stringify_in_c(.long PPC_INST_DCBZL | \
@@ -411,6 +412,8 @@
___PPC_RB(b))
#define PPC_MSGSNDP(b) stringify_in_c(.long PPC_INST_MSGSNDP | \
___PPC_RB(b))
+#define PPC_MSGCLRP(b) stringify_in_c(.long PPC_INST_MSGCLRP | \
+ ___PPC_RB(b))
#define PPC_POPCNTB(a, s) stringify_in_c(.long PPC_INST_POPCNTB | \
__PPC_RA(a) | __PPC_RS(s))
#define PPC_POPCNTD(a, s) stringify_in_c(.long PPC_INST_POPCNTD | \
diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index 359c44341761..6baeeb9acd0d 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -770,15 +770,18 @@ END_FTR_SECTION_IFCLR(CPU_FTR_601)
#else
#define FIXUP_ENDIAN \
tdi 0,0,0x48; /* Reverse endian of b . + 8 */ \
- b $+36; /* Skip trampoline if endian is good */ \
- .long 0x05009f42; /* bcl 20,31,$+4 */ \
- .long 0xa602487d; /* mflr r10 */ \
- .long 0x1c004a39; /* addi r10,r10,28 */ \
+ b $+44; /* Skip trampoline if endian is good */ \
.long 0xa600607d; /* mfmsr r11 */ \
.long 0x01006b69; /* xori r11,r11,1 */ \
+ .long 0x00004039; /* li r10,0 */ \
+ .long 0x6401417d; /* mtmsrd r10,1 */ \
+ .long 0x05009f42; /* bcl 20,31,$+4 */ \
+ .long 0xa602487d; /* mflr r10 */ \
+ .long 0x14004a39; /* addi r10,r10,20 */ \
.long 0xa6035a7d; /* mtsrr0 r10 */ \
.long 0xa6037b7d; /* mtsrr1 r11 */ \
.long 0x2400004c /* rfid */
+
#endif /* !CONFIG_PPC_BOOK3E */
#endif /* __ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 1189d04f3bd1..fab7ff877304 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -421,6 +421,26 @@ static inline unsigned long __pack_fe01(unsigned int fpmode)
#ifdef CONFIG_PPC64
#define cpu_relax() do { HMT_low(); HMT_medium(); barrier(); } while (0)
+
+#define spin_begin() HMT_low()
+
+#define spin_cpu_relax() barrier()
+
+#define spin_cpu_yield() spin_cpu_relax()
+
+#define spin_end() HMT_medium()
+
+#define spin_until_cond(cond) \
+do { \
+ if (unlikely(!(cond))) { \
+ spin_begin(); \
+ do { \
+ spin_cpu_relax(); \
+ } while (!(cond)); \
+ spin_end(); \
+ } \
+} while (0)
+
#else
#define cpu_relax() barrier()
#endif
@@ -474,11 +494,11 @@ extern unsigned long cpuidle_disable;
enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF};
extern int powersave_nap; /* set if nap mode can be used in idle loop */
-extern unsigned long power7_nap(int check_irq);
-extern unsigned long power7_sleep(void);
-extern unsigned long power7_winkle(void);
-extern unsigned long power9_idle_stop(unsigned long stop_psscr_val,
- unsigned long stop_psscr_mask);
+extern unsigned long power7_idle_insn(unsigned long type); /* PNV_THREAD_NAP/etc*/
+extern void power7_idle_type(unsigned long type);
+extern unsigned long power9_idle_stop(unsigned long psscr_val);
+extern void power9_idle_type(unsigned long stop_psscr_val,
+ unsigned long stop_psscr_mask);
extern void flush_instruction_cache(void);
extern void hard_reset_now(void);
diff --git a/arch/powerpc/include/asm/trace.h b/arch/powerpc/include/asm/trace.h
index c05cef6ee06c..18f168aebae3 100644
--- a/arch/powerpc/include/asm/trace.h
+++ b/arch/powerpc/include/asm/trace.h
@@ -168,6 +168,39 @@ TRACE_EVENT(hash_fault,
__entry->addr, __entry->access, __entry->trap)
);
+
+TRACE_EVENT(tlbie,
+
+ TP_PROTO(unsigned long lpid, unsigned long local, unsigned long rb,
+ unsigned long rs, unsigned long ric, unsigned long prs,
+ unsigned long r),
+ TP_ARGS(lpid, local, rb, rs, ric, prs, r),
+ TP_STRUCT__entry(
+ __field(unsigned long, lpid)
+ __field(unsigned long, local)
+ __field(unsigned long, rb)
+ __field(unsigned long, rs)
+ __field(unsigned long, ric)
+ __field(unsigned long, prs)
+ __field(unsigned long, r)
+ ),
+
+ TP_fast_assign(
+ __entry->lpid = lpid;
+ __entry->local = local;
+ __entry->rb = rb;
+ __entry->rs = rs;
+ __entry->ric = ric;
+ __entry->prs = prs;
+ __entry->r = r;
+ ),
+
+ TP_printk("lpid=%ld, local=%ld, rb=0x%lx, rs=0x%lx, ric=0x%lx, "
+ "prs=0x%lx, r=0x%lx", __entry->lpid, __entry->local,
+ __entry->rb, __entry->rs, __entry->ric, __entry->prs,
+ __entry->r)
+);
+
#endif /* _TRACE_POWERPC_H */
#undef TRACE_INCLUDE_PATH
diff --git a/arch/powerpc/include/uapi/asm/Kbuild b/arch/powerpc/include/uapi/asm/Kbuild
index b15bf6bc0e94..0d960ef78a9a 100644
--- a/arch/powerpc/include/uapi/asm/Kbuild
+++ b/arch/powerpc/include/uapi/asm/Kbuild
@@ -1,2 +1,8 @@
# UAPI Header export list
include include/uapi/asm-generic/Kbuild.asm
+
+generic-y += param.h
+generic-y += poll.h
+generic-y += resource.h
+generic-y += sockios.h
+generic-y += statfs.h
diff --git a/arch/powerpc/include/uapi/asm/param.h b/arch/powerpc/include/uapi/asm/param.h
deleted file mode 100644
index 965d45427975..000000000000
--- a/arch/powerpc/include/uapi/asm/param.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/param.h>
diff --git a/arch/powerpc/include/uapi/asm/poll.h b/arch/powerpc/include/uapi/asm/poll.h
deleted file mode 100644
index c98509d3149e..000000000000
--- a/arch/powerpc/include/uapi/asm/poll.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/poll.h>
diff --git a/arch/powerpc/include/uapi/asm/resource.h b/arch/powerpc/include/uapi/asm/resource.h
deleted file mode 100644
index 04bc4db8921b..000000000000
--- a/arch/powerpc/include/uapi/asm/resource.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/resource.h>
diff --git a/arch/powerpc/include/uapi/asm/sockios.h b/arch/powerpc/include/uapi/asm/sockios.h
deleted file mode 100644
index 55cef7675a31..000000000000
--- a/arch/powerpc/include/uapi/asm/sockios.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#ifndef _ASM_POWERPC_SOCKIOS_H
-#define _ASM_POWERPC_SOCKIOS_H
-
-/*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-/* Socket-level I/O control calls. */
-#define FIOSETOWN 0x8901
-#define SIOCSPGRP 0x8902
-#define FIOGETOWN 0x8903
-#define SIOCGPGRP 0x8904
-#define SIOCATMARK 0x8905
-#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */
-#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */
-
-#endif /* _ASM_POWERPC_SOCKIOS_H */
diff --git a/arch/powerpc/include/uapi/asm/statfs.h b/arch/powerpc/include/uapi/asm/statfs.h
deleted file mode 100644
index 5244834583a4..000000000000
--- a/arch/powerpc/include/uapi/asm/statfs.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_POWERPC_STATFS_H
-#define _ASM_POWERPC_STATFS_H
-
-#include <asm-generic/statfs.h>
-
-#endif
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index e132902e1f14..0845eebc5af3 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -25,8 +25,6 @@ CFLAGS_REMOVE_cputable.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_prom_init.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_btext.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_prom.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
-# timers used by tracing
-CFLAGS_REMOVE_time.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
endif
obj-y := cputable.o ptrace.o syscalls.o \
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index ae8e89e0d083..6e95c2c19a7e 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -100,12 +100,12 @@ int main(void)
OFFSET(THREAD_NORMSAVES, thread_struct, normsave[0]);
#endif
OFFSET(THREAD_FPEXC_MODE, thread_struct, fpexc_mode);
- OFFSET(THREAD_FPSTATE, thread_struct, fp_state);
+ OFFSET(THREAD_FPSTATE, thread_struct, fp_state.fpr);
OFFSET(THREAD_FPSAVEAREA, thread_struct, fp_save_area);
OFFSET(FPSTATE_FPSCR, thread_fp_state, fpscr);
OFFSET(THREAD_LOAD_FP, thread_struct, load_fp);
#ifdef CONFIG_ALTIVEC
- OFFSET(THREAD_VRSTATE, thread_struct, vr_state);
+ OFFSET(THREAD_VRSTATE, thread_struct, vr_state.vr);
OFFSET(THREAD_VRSAVEAREA, thread_struct, vr_save_area);
OFFSET(THREAD_VRSAVE, thread_struct, vrsave);
OFFSET(THREAD_USED_VR, thread_struct, used_vr);
@@ -145,9 +145,9 @@ int main(void)
OFFSET(THREAD_TM_PPR, thread_struct, tm_ppr);
OFFSET(THREAD_TM_DSCR, thread_struct, tm_dscr);
OFFSET(PT_CKPT_REGS, thread_struct, ckpt_regs);
- OFFSET(THREAD_CKVRSTATE, thread_struct, ckvr_state);
+ OFFSET(THREAD_CKVRSTATE, thread_struct, ckvr_state.vr);
OFFSET(THREAD_CKVRSAVE, thread_struct, ckvrsave);
- OFFSET(THREAD_CKFPSTATE, thread_struct, ckfp_state);
+ OFFSET(THREAD_CKFPSTATE, thread_struct, ckfp_state.fpr);
/* Local pt_regs on stack for Transactional Memory funcs. */
DEFINE(TM_FRAME_SIZE, STACK_FRAME_OVERHEAD +
sizeof(struct pt_regs) + 16);
@@ -745,9 +745,11 @@ int main(void)
OFFSET(PACA_THREAD_MASK, paca_struct, thread_mask);
OFFSET(PACA_SUBCORE_SIBLING_MASK, paca_struct, subcore_sibling_mask);
OFFSET(PACA_SIBLING_PACA_PTRS, paca_struct, thread_sibling_pacas);
+ OFFSET(PACA_REQ_PSSCR, paca_struct, requested_psscr);
#endif
DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER);
+ DEFINE(PPC_DBELL_MSGTYPE, PPC_DBELL_MSGTYPE);
#ifdef CONFIG_PPC_8xx
DEFINE(VIRT_IMMR_BASE, (u64)__fix_to_virt(FIX_IMMR_BASE));
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index bfbad08a1207..49d8422767b4 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -57,7 +57,7 @@ system_call_common:
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
BEGIN_FTR_SECTION
extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */
- bne tabort_syscall
+ bne .Ltabort_syscall
END_FTR_SECTION_IFSET(CPU_FTR_TM)
#endif
andi. r10,r12,MSR_PR
@@ -143,6 +143,7 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
mtmsrd r11,1
#endif /* CONFIG_PPC_BOOK3E */
+system_call: /* label this so stack traces look sane */
/* We do need to set SOFTE in the stack frame or the return
* from interrupt will be painful
*/
@@ -152,11 +153,11 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
CURRENT_THREAD_INFO(r11, r1)
ld r10,TI_FLAGS(r11)
andi. r11,r10,_TIF_SYSCALL_DOTRACE
- bne syscall_dotrace /* does not return */
+ bne .Lsyscall_dotrace /* does not return */
cmpldi 0,r0,NR_syscalls
- bge- syscall_enosys
+ bge- .Lsyscall_enosys
-system_call: /* label this so stack traces look sane */
+.Lsyscall:
/*
* Need to vector to 32 Bit or default sys_call_table here,
* based on caller's run-mode / personality.
@@ -185,8 +186,20 @@ system_call: /* label this so stack traces look sane */
#ifdef CONFIG_PPC_BOOK3S
/* No MSR:RI on BookE */
andi. r10,r8,MSR_RI
- beq- unrecov_restore
+ beq- .Lunrecov_restore
#endif
+
+/*
+ * This is a few instructions into the actual syscall exit path (which actually
+ * starts at .Lsyscall_exit) to cater to kprobe blacklisting and to reduce the
+ * number of visible symbols for profiling purposes.
+ *
+ * We can probe from system_call until this point as MSR_RI is set. But once it
+ * is cleared below, we won't be able to take a trap.
+ *
+ * This is blacklisted from kprobes further below with _ASM_NOKPROBE_SYMBOL().
+ */
+system_call_exit:
/*
* Disable interrupts so current_thread_info()->flags can't change,
* and so that we don't get interrupted after loading SRR0/1.
@@ -208,31 +221,21 @@ system_call: /* label this so stack traces look sane */
ld r9,TI_FLAGS(r12)
li r11,-MAX_ERRNO
andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
- bne- syscall_exit_work
+ bne- .Lsyscall_exit_work
- andi. r0,r8,MSR_FP
- beq 2f
+ /* If MSR_FP and MSR_VEC are set in user msr, then no need to restore */
+ li r7,MSR_FP
#ifdef CONFIG_ALTIVEC
- andis. r0,r8,MSR_VEC@h
- bne 3f
-#endif
-2: addi r3,r1,STACK_FRAME_OVERHEAD
-#ifdef CONFIG_PPC_BOOK3S
- li r10,MSR_RI
- mtmsrd r10,1 /* Restore RI */
-#endif
- bl restore_math
-#ifdef CONFIG_PPC_BOOK3S
- li r11,0
- mtmsrd r11,1
+ oris r7,r7,MSR_VEC@h
#endif
- ld r8,_MSR(r1)
- ld r3,RESULT(r1)
- li r11,-MAX_ERRNO
+ and r0,r8,r7
+ cmpd r0,r7
+ bne .Lsyscall_restore_math
+.Lsyscall_restore_math_cont:
-3: cmpld r3,r11
+ cmpld r3,r11
ld r5,_CCR(r1)
- bge- syscall_error
+ bge- .Lsyscall_error
.Lsyscall_error_cont:
ld r7,_NIP(r1)
BEGIN_FTR_SECTION
@@ -258,14 +261,48 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
RFI
b . /* prevent speculative execution */
-syscall_error:
+.Lsyscall_error:
oris r5,r5,0x1000 /* Set SO bit in CR */
neg r3,r3
std r5,_CCR(r1)
b .Lsyscall_error_cont
-
+
+.Lsyscall_restore_math:
+ /*
+ * Some initial tests from restore_math to avoid the heavyweight
+ * C code entry and MSR manipulations.
+ */
+ LOAD_REG_IMMEDIATE(r0, MSR_TS_MASK)
+ and. r0,r0,r8
+ bne 1f
+
+ ld r7,PACACURRENT(r13)
+ lbz r0,THREAD+THREAD_LOAD_FP(r7)
+#ifdef CONFIG_ALTIVEC
+ lbz r6,THREAD+THREAD_LOAD_VEC(r7)
+ add r0,r0,r6
+#endif
+ cmpdi r0,0
+ beq .Lsyscall_restore_math_cont
+
+1: addi r3,r1,STACK_FRAME_OVERHEAD
+#ifdef CONFIG_PPC_BOOK3S
+ li r10,MSR_RI
+ mtmsrd r10,1 /* Restore RI */
+#endif
+ bl restore_math
+#ifdef CONFIG_PPC_BOOK3S
+ li r11,0
+ mtmsrd r11,1
+#endif
+ /* Restore volatiles, reload MSR from updated one */
+ ld r8,_MSR(r1)
+ ld r3,RESULT(r1)
+ li r11,-MAX_ERRNO
+ b .Lsyscall_restore_math_cont
+
/* Traced system call support */
-syscall_dotrace:
+.Lsyscall_dotrace:
bl save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_syscall_trace_enter
@@ -286,23 +323,23 @@ syscall_dotrace:
ld r7,GPR7(r1)
ld r8,GPR8(r1)
- /* Repopulate r9 and r10 for the system_call path */
+ /* Repopulate r9 and r10 for the syscall path */
addi r9,r1,STACK_FRAME_OVERHEAD
CURRENT_THREAD_INFO(r10, r1)
ld r10,TI_FLAGS(r10)
cmpldi r0,NR_syscalls
- blt+ system_call
+ blt+ .Lsyscall
/* Return code is already in r3 thanks to do_syscall_trace_enter() */
b .Lsyscall_exit
-syscall_enosys:
+.Lsyscall_enosys:
li r3,-ENOSYS
b .Lsyscall_exit
-syscall_exit_work:
+.Lsyscall_exit_work:
#ifdef CONFIG_PPC_BOOK3S
li r10,MSR_RI
mtmsrd r10,1 /* Restore RI */
@@ -362,7 +399,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
b ret_from_except
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-tabort_syscall:
+.Ltabort_syscall:
/* Firstly we need to enable TM in the kernel */
mfmsr r10
li r9, 1
@@ -388,6 +425,8 @@ tabort_syscall:
rfid
b . /* prevent speculative execution */
#endif
+_ASM_NOKPROBE_SYMBOL(system_call_common);
+_ASM_NOKPROBE_SYMBOL(system_call_exit);
/* Save non-volatile GPRs, if not already saved. */
_GLOBAL(save_nvgprs)
@@ -398,6 +437,7 @@ _GLOBAL(save_nvgprs)
clrrdi r0,r11,1
std r0,_TRAP(r1)
blr
+_ASM_NOKPROBE_SYMBOL(save_nvgprs);
/*
@@ -488,33 +528,30 @@ _GLOBAL(_switch)
std r23,_CCR(r1)
std r1,KSP(r3) /* Set old stack pointer */
-#ifdef CONFIG_SMP
- /* We need a sync somewhere here to make sure that if the
- * previous task gets rescheduled on another CPU, it sees all
- * stores it has performed on this one.
+ /*
+ * On SMP kernels, care must be taken because a task may be
+ * scheduled off CPUx and on to CPUy. Memory ordering must be
+ * considered.
+ *
+ * Cacheable stores on CPUx will be visible when the task is
+ * scheduled on CPUy by virtue of the core scheduler barriers
+ * (see "Notes on Program-Order guarantees on SMP systems." in
+ * kernel/sched/core.c).
+ *
+ * Uncacheable stores in the case of involuntary preemption must
+ * be taken care of. The smp_mb__before_spin_lock() in __schedule()
+ * is implemented as hwsync on powerpc, which orders MMIO too. So
+ * long as there is an hwsync in the context switch path, it will
+ * be executed on the source CPU after the task has performed
+ * all MMIO ops on that CPU, and on the destination CPU before the
+ * task performs any MMIO ops there.
*/
- sync
-#endif /* CONFIG_SMP */
/*
- * If we optimise away the clear of the reservation in system
- * calls because we know the CPU tracks the address of the
- * reservation, then we need to clear it here to cover the
- * case that the kernel context switch path has no larx
- * instructions.
+ * The kernel context switch path must contain a spin_lock,
+ * which contains larx/stcx, which will clear any reservation
+ * of the task being switched.
*/
-BEGIN_FTR_SECTION
- ldarx r6,0,r1
-END_FTR_SECTION_IFSET(CPU_FTR_STCX_CHECKS_ADDRESS)
-
-BEGIN_FTR_SECTION
-/*
- * A cp_abort (copy paste abort) here ensures that when context switching, a
- * copy from one process can't leak into the paste of another.
- */
- PPC_CP_ABORT
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
-
#ifdef CONFIG_PPC_BOOK3S
/* Cancel all explict user streams as they will have no use after context
* switch and will stop the HW from creating streams itself
@@ -583,6 +620,14 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
top of the kernel stack. */
addi r7,r7,THREAD_SIZE-SWITCH_FRAME_SIZE
+ /*
+ * PMU interrupts in radix may come in here. They will use r1, not
+ * PACAKSAVE, so this stack switch will not cause a problem. They
+ * will store to the process stack, which may then be migrated to
+ * another CPU. However the rq lock release on this CPU paired with
+ * the rq lock acquire on the new CPU before the stack becomes
+ * active on the new CPU, will order those stores.
+ */
mr r1,r8 /* start using new stack pointer */
std r7,PACAKSAVE(r13)
@@ -763,11 +808,11 @@ restore:
ld r5,SOFTE(r1)
lbz r6,PACASOFTIRQEN(r13)
cmpwi cr0,r5,0
- beq restore_irq_off
+ beq .Lrestore_irq_off
/* We are enabling, were we already enabled ? Yes, just return */
cmpwi cr0,r6,1
- beq cr0,do_restore
+ beq cr0,.Ldo_restore
/*
* We are about to soft-enable interrupts (we are hard disabled
@@ -776,14 +821,14 @@ restore:
*/
lbz r0,PACAIRQHAPPENED(r13)
cmpwi cr0,r0,0
- bne- restore_check_irq_replay
+ bne- .Lrestore_check_irq_replay
/*
* Get here when nothing happened while soft-disabled, just
* soft-enable and move-on. We will hard-enable as a side
* effect of rfi
*/
-restore_no_replay:
+.Lrestore_no_replay:
TRACE_ENABLE_INTS
li r0,1
stb r0,PACASOFTIRQEN(r13);
@@ -791,7 +836,7 @@ restore_no_replay:
/*
* Final return path. BookE is handled in a different file
*/
-do_restore:
+.Ldo_restore:
#ifdef CONFIG_PPC_BOOK3E
b exception_return_book3e
#else
@@ -825,7 +870,7 @@ fast_exception_return:
REST_8GPRS(5, r1)
andi. r0,r3,MSR_RI
- beq- unrecov_restore
+ beq- .Lunrecov_restore
/* Load PPR from thread struct before we clear MSR:RI */
BEGIN_FTR_SECTION
@@ -883,7 +928,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
* make sure that in this case, we also clear PACA_IRQ_HARD_DIS
* or that bit can get out of sync and bad things will happen
*/
-restore_irq_off:
+.Lrestore_irq_off:
ld r3,_MSR(r1)
lbz r7,PACAIRQHAPPENED(r13)
andi. r0,r3,MSR_EE
@@ -893,13 +938,13 @@ restore_irq_off:
1: li r0,0
stb r0,PACASOFTIRQEN(r13);
TRACE_DISABLE_INTS
- b do_restore
+ b .Ldo_restore
/*
* Something did happen, check if a re-emit is needed
* (this also clears paca->irq_happened)
*/
-restore_check_irq_replay:
+.Lrestore_check_irq_replay:
/* XXX: We could implement a fast path here where we check
* for irq_happened being just 0x01, in which case we can
* clear it and return. That means that we would potentially
@@ -909,7 +954,7 @@ restore_check_irq_replay:
*/
bl __check_irq_replay
cmpwi cr0,r3,0
- beq restore_no_replay
+ beq .Lrestore_no_replay
/*
* We need to re-emit an interrupt. We do so by re-using our
@@ -958,10 +1003,18 @@ restore_check_irq_replay:
#endif /* CONFIG_PPC_DOORBELL */
1: b ret_from_except /* What else to do here ? */
-unrecov_restore:
+.Lunrecov_restore:
addi r3,r1,STACK_FRAME_OVERHEAD
bl unrecoverable_exception
- b unrecov_restore
+ b .Lunrecov_restore
+
+_ASM_NOKPROBE_SYMBOL(ret_from_except);
+_ASM_NOKPROBE_SYMBOL(ret_from_except_lite);
+_ASM_NOKPROBE_SYMBOL(resume_kernel);
+_ASM_NOKPROBE_SYMBOL(fast_exc_return_irq);
+_ASM_NOKPROBE_SYMBOL(restore);
+_ASM_NOKPROBE_SYMBOL(fast_exception_return);
+
#ifdef CONFIG_PPC_RTAS
/*
@@ -1038,6 +1091,8 @@ _GLOBAL(enter_rtas)
rldicr r9,r9,MSR_SF_LG,(63-MSR_SF_LG)
ori r9,r9,MSR_IR|MSR_DR|MSR_FE0|MSR_FE1|MSR_FP|MSR_RI|MSR_LE
andc r6,r0,r9
+
+__enter_rtas:
sync /* disable interrupts so SRR0/1 */
mtmsrd r0 /* don't get trashed */
@@ -1074,6 +1129,8 @@ rtas_return_loc:
mtspr SPRN_SRR1,r4
rfid
b . /* prevent speculative execution */
+_ASM_NOKPROBE_SYMBOL(__enter_rtas)
+_ASM_NOKPROBE_SYMBOL(rtas_return_loc)
.align 3
1: .llong rtas_restore_regs
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index b886795060fd..4c18a5fbb4bb 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -99,7 +99,11 @@ EXC_VIRT_NONE(0x4000, 0x100)
#ifdef CONFIG_PPC_P7_NAP
/*
* If running native on arch 2.06 or later, check if we are waking up
- * from nap/sleep/winkle, and branch to idle handler.
+ * from nap/sleep/winkle, and branch to idle handler. This tests SRR1
+ * bits 46:47. A non-0 value indicates that we are coming from a power
+ * saving state. The idle wakeup handler initially runs in real mode,
+ * but we branch to the 0xc000... address so we can turn on relocation
+ * with mtmsr.
*/
#define IDLETEST(n) \
BEGIN_FTR_SECTION ; \
@@ -107,7 +111,7 @@ EXC_VIRT_NONE(0x4000, 0x100)
rlwinm. r10,r10,47-31,30,31 ; \
beq- 1f ; \
cmpwi cr3,r10,2 ; \
- BRANCH_TO_COMMON(r10, system_reset_idle_common) ; \
+ BRANCH_TO_C000(r10, system_reset_idle_common) ; \
1: \
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
#else
@@ -128,6 +132,7 @@ EXC_VIRT_NONE(0x4100, 0x100)
#ifdef CONFIG_PPC_P7_NAP
EXC_COMMON_BEGIN(system_reset_idle_common)
+ mfspr r12,SPRN_SRR1
b pnv_powersave_wakeup
#endif
@@ -507,46 +512,22 @@ EXC_REAL_BEGIN(data_access_slb, 0x380, 0x80)
SET_SCRATCH0(r13)
EXCEPTION_PROLOG_0(PACA_EXSLB)
EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x380)
- std r3,PACA_EXSLB+EX_R3(r13)
+ mr r12,r3 /* save r3 */
mfspr r3,SPRN_DAR
- mfspr r12,SPRN_SRR1
+ mfspr r11,SPRN_SRR1
crset 4*cr6+eq
-#ifndef CONFIG_RELOCATABLE
- b slb_miss_realmode
-#else
- /*
- * We can't just use a direct branch to slb_miss_realmode
- * because the distance from here to there depends on where
- * the kernel ends up being put.
- */
- mfctr r11
- LOAD_HANDLER(r10, slb_miss_realmode)
- mtctr r10
- bctr
-#endif
+ BRANCH_TO_COMMON(r10, slb_miss_common)
EXC_REAL_END(data_access_slb, 0x380, 0x80)
EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80)
SET_SCRATCH0(r13)
EXCEPTION_PROLOG_0(PACA_EXSLB)
EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x380)
- std r3,PACA_EXSLB+EX_R3(r13)
+ mr r12,r3 /* save r3 */
mfspr r3,SPRN_DAR
- mfspr r12,SPRN_SRR1
+ mfspr r11,SPRN_SRR1
crset 4*cr6+eq
-#ifndef CONFIG_RELOCATABLE
- b slb_miss_realmode
-#else
- /*
- * We can't just use a direct branch to slb_miss_realmode
- * because the distance from here to there depends on where
- * the kernel ends up being put.
- */
- mfctr r11
- LOAD_HANDLER(r10, slb_miss_realmode)
- mtctr r10
- bctr
-#endif
+ BRANCH_TO_COMMON(r10, slb_miss_common)
EXC_VIRT_END(data_access_slb, 0x4380, 0x80)
TRAMP_KVM_SKIP(PACA_EXSLB, 0x380)
@@ -575,88 +556,82 @@ EXC_REAL_BEGIN(instruction_access_slb, 0x480, 0x80)
SET_SCRATCH0(r13)
EXCEPTION_PROLOG_0(PACA_EXSLB)
EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x480)
- std r3,PACA_EXSLB+EX_R3(r13)
+ mr r12,r3 /* save r3 */
mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */
- mfspr r12,SPRN_SRR1
+ mfspr r11,SPRN_SRR1
crclr 4*cr6+eq
-#ifndef CONFIG_RELOCATABLE
- b slb_miss_realmode
-#else
- mfctr r11
- LOAD_HANDLER(r10, slb_miss_realmode)
- mtctr r10
- bctr
-#endif
+ BRANCH_TO_COMMON(r10, slb_miss_common)
EXC_REAL_END(instruction_access_slb, 0x480, 0x80)
EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80)
SET_SCRATCH0(r13)
EXCEPTION_PROLOG_0(PACA_EXSLB)
EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x480)
- std r3,PACA_EXSLB+EX_R3(r13)
+ mr r12,r3 /* save r3 */
mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */
- mfspr r12,SPRN_SRR1
+ mfspr r11,SPRN_SRR1
crclr 4*cr6+eq
-#ifndef CONFIG_RELOCATABLE
- b slb_miss_realmode
-#else
- mfctr r11
- LOAD_HANDLER(r10, slb_miss_realmode)
- mtctr r10
- bctr
-#endif
+ BRANCH_TO_COMMON(r10, slb_miss_common)
EXC_VIRT_END(instruction_access_slb, 0x4480, 0x80)
TRAMP_KVM(PACA_EXSLB, 0x480)
-/* This handler is used by both 0x380 and 0x480 slb miss interrupts */
-EXC_COMMON_BEGIN(slb_miss_realmode)
+/*
+ * This handler is used by the 0x380 and 0x480 SLB miss interrupts, as well as
+ * the virtual mode 0x4380 and 0x4480 interrupts if AIL is enabled.
+ */
+EXC_COMMON_BEGIN(slb_miss_common)
/*
* r13 points to the PACA, r9 contains the saved CR,
- * r12 contain the saved SRR1, SRR0 is still ready for return
+ * r12 contains the saved r3,
+ * r11 contain the saved SRR1, SRR0 is still ready for return
* r3 has the faulting address
* r9 - r13 are saved in paca->exslb.
- * r3 is saved in paca->slb_r3
* cr6.eq is set for a D-SLB miss, clear for a I-SLB miss
* We assume we aren't going to take any exceptions during this
* procedure.
*/
mflr r10
-#ifdef CONFIG_RELOCATABLE
- mtctr r11
-#endif
-
stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
std r10,PACA_EXSLB+EX_LR(r13) /* save LR */
- std r3,PACA_EXSLB+EX_DAR(r13)
+
+ /*
+ * Test MSR_RI before calling slb_allocate_realmode, because the
+ * MSR in r11 gets clobbered. However we still want to allocate
+ * SLB in case MSR_RI=0, to minimise the risk of getting stuck in
+ * recursive SLB faults. So use cr5 for this, which is preserved.
+ */
+ andi. r11,r11,MSR_RI /* check for unrecoverable exception */
+ cmpdi cr5,r11,MSR_RI
crset 4*cr0+eq
#ifdef CONFIG_PPC_STD_MMU_64
BEGIN_MMU_FTR_SECTION
- bl slb_allocate_realmode
+ bl slb_allocate
END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
#endif
ld r10,PACA_EXSLB+EX_LR(r13)
- ld r3,PACA_EXSLB+EX_R3(r13)
lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */
mtlr r10
- beq 8f /* if bad address, make full stack frame */
+ beq- 8f /* if bad address, make full stack frame */
- andi. r10,r12,MSR_RI /* check for unrecoverable exception */
- beq- 2f
+ bne- cr5,2f /* if unrecoverable exception, oops */
/* All done -- return from exception. */
.machine push
.machine "power4"
mtcrf 0x80,r9
+ mtcrf 0x04,r9 /* MSR[RI] indication is in cr5 */
mtcrf 0x02,r9 /* I/D indication is in cr6 */
mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
.machine pop
+ RESTORE_CTR(r9, PACA_EXSLB)
RESTORE_PPR_PACA(PACA_EXSLB, r9)
+ mr r3,r12
ld r9,PACA_EXSLB+EX_R9(r13)
ld r10,PACA_EXSLB+EX_R10(r13)
ld r11,PACA_EXSLB+EX_R11(r13)
@@ -665,7 +640,10 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
rfid
b . /* prevent speculative execution */
-2: mfspr r11,SPRN_SRR0
+2: std r3,PACA_EXSLB+EX_DAR(r13)
+ mr r3,r12
+ mfspr r11,SPRN_SRR0
+ mfspr r12,SPRN_SRR1
LOAD_HANDLER(r10,unrecov_slb)
mtspr SPRN_SRR0,r10
ld r10,PACAKMSR(r13)
@@ -673,7 +651,10 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
rfid
b .
-8: mfspr r11,SPRN_SRR0
+8: std r3,PACA_EXSLB+EX_DAR(r13)
+ mr r3,r12
+ mfspr r11,SPRN_SRR0
+ mfspr r12,SPRN_SRR1
LOAD_HANDLER(r10,bad_addr_slb)
mtspr SPRN_SRR0,r10
ld r10,PACAKMSR(r13)
@@ -821,46 +802,80 @@ EXC_VIRT(trap_0b, 0x4b00, 0x100, 0xb00)
TRAMP_KVM(PACA_EXGEN, 0xb00)
EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
+/*
+ * system call / hypercall (0xc00, 0x4c00)
+ *
+ * The system call exception is invoked with "sc 0" and does not alter HV bit.
+ * There is support for kernel code to invoke system calls but there are no
+ * in-tree users.
+ *
+ * The hypercall is invoked with "sc 1" and sets HV=1.
+ *
+ * In HPT, sc 1 always goes to 0xc00 real mode. In RADIX, sc 1 can go to
+ * 0x4c00 virtual mode.
+ *
+ * Call convention:
+ *
+ * syscall register convention is in Documentation/powerpc/syscall64-abi.txt
+ *
+ * For hypercalls, the register convention is as follows:
+ * r0 volatile
+ * r1-2 nonvolatile
+ * r3 volatile parameter and return value for status
+ * r4-r10 volatile input and output value
+ * r11 volatile hypercall number and output value
+ * r12 volatile
+ * r13-r31 nonvolatile
+ * LR nonvolatile
+ * CTR volatile
+ * XER volatile
+ * CR0-1 CR5-7 volatile
+ * CR2-4 nonvolatile
+ * Other registers nonvolatile
+ *
+ * The intersection of volatile registers that don't contain possible
+ * inputs is: r12, cr0, xer, ctr. We may use these as scratch regs
+ * upon entry without saving.
+ */
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
- /*
- * If CONFIG_KVM_BOOK3S_64_HANDLER is set, save the PPR (on systems
- * that support it) before changing to HMT_MEDIUM. That allows the KVM
- * code to save that value into the guest state (it is the guest's PPR
- * value). Otherwise just change to HMT_MEDIUM as userspace has
- * already saved the PPR.
- */
+ /*
+ * There is a little bit of juggling to get syscall and hcall
+ * working well. Save r10 in ctr to be restored in case it is a
+ * hcall.
+ *
+ * Userspace syscalls have already saved the PPR, hcalls must save
+ * it before setting HMT_MEDIUM.
+ */
#define SYSCALL_KVMTEST \
- SET_SCRATCH0(r13); \
+ mr r12,r13; \
GET_PACA(r13); \
- std r9,PACA_EXGEN+EX_R9(r13); \
- OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR); \
+ mtctr r10; \
+ KVMTEST_PR(0xc00); /* uses r10, branch to do_kvm_0xc00_system_call */ \
HMT_MEDIUM; \
- std r10,PACA_EXGEN+EX_R10(r13); \
- OPT_SAVE_REG_TO_PACA(PACA_EXGEN+EX_PPR, r9, CPU_FTR_HAS_PPR); \
- mfcr r9; \
- KVMTEST_PR(0xc00); \
- GET_SCRATCH0(r13)
+ mr r9,r12; \
#else
#define SYSCALL_KVMTEST \
- HMT_MEDIUM
+ HMT_MEDIUM; \
+ mr r9,r13; \
+ GET_PACA(r13);
#endif
#define LOAD_SYSCALL_HANDLER(reg) \
__LOAD_HANDLER(reg, system_call_common)
-/* Syscall routine is used twice, in reloc-off and reloc-on paths */
-#define SYSCALL_PSERIES_1 \
+#define SYSCALL_FASTENDIAN_TEST \
BEGIN_FTR_SECTION \
cmpdi r0,0x1ebe ; \
beq- 1f ; \
END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \
- mr r9,r13 ; \
- GET_PACA(r13) ; \
- mfspr r11,SPRN_SRR0 ; \
-0:
-#define SYSCALL_PSERIES_2_RFID \
+/*
+ * After SYSCALL_KVMTEST, we reach here with PACA in r13, r13 in r9,
+ * and HMT_MEDIUM.
+ */
+#define SYSCALL_REAL \
+ mfspr r11,SPRN_SRR0 ; \
mfspr r12,SPRN_SRR1 ; \
LOAD_SYSCALL_HANDLER(r10) ; \
mtspr SPRN_SRR0,r10 ; \
@@ -869,11 +884,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \
rfid ; \
b . ; /* prevent speculative execution */
-#define SYSCALL_PSERIES_3 \
+#define SYSCALL_FASTENDIAN \
/* Fast LE/BE switch system call */ \
1: mfspr r12,SPRN_SRR1 ; \
xori r12,r12,MSR_LE ; \
mtspr SPRN_SRR1,r12 ; \
+ mr r13,r9 ; \
rfid ; /* return to userspace */ \
b . ; /* prevent speculative execution */
@@ -882,16 +898,18 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \
* We can't branch directly so we do it via the CTR which
* is volatile across system calls.
*/
-#define SYSCALL_PSERIES_2_DIRECT \
- LOAD_SYSCALL_HANDLER(r12) ; \
- mtctr r12 ; \
+#define SYSCALL_VIRT \
+ LOAD_SYSCALL_HANDLER(r10) ; \
+ mtctr r10 ; \
+ mfspr r11,SPRN_SRR0 ; \
mfspr r12,SPRN_SRR1 ; \
li r10,MSR_RI ; \
mtmsrd r10,1 ; \
bctr ;
#else
/* We can branch directly */
-#define SYSCALL_PSERIES_2_DIRECT \
+#define SYSCALL_VIRT \
+ mfspr r11,SPRN_SRR0 ; \
mfspr r12,SPRN_SRR1 ; \
li r10,MSR_RI ; \
mtmsrd r10,1 ; /* Set RI (EE=0) */ \
@@ -899,20 +917,43 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \
#endif
EXC_REAL_BEGIN(system_call, 0xc00, 0x100)
- SYSCALL_KVMTEST
- SYSCALL_PSERIES_1
- SYSCALL_PSERIES_2_RFID
- SYSCALL_PSERIES_3
+ SYSCALL_KVMTEST /* loads PACA into r13, and saves r13 to r9 */
+ SYSCALL_FASTENDIAN_TEST
+ SYSCALL_REAL
+ SYSCALL_FASTENDIAN
EXC_REAL_END(system_call, 0xc00, 0x100)
EXC_VIRT_BEGIN(system_call, 0x4c00, 0x100)
- SYSCALL_KVMTEST
- SYSCALL_PSERIES_1
- SYSCALL_PSERIES_2_DIRECT
- SYSCALL_PSERIES_3
+ SYSCALL_KVMTEST /* loads PACA into r13, and saves r13 to r9 */
+ SYSCALL_FASTENDIAN_TEST
+ SYSCALL_VIRT
+ SYSCALL_FASTENDIAN
EXC_VIRT_END(system_call, 0x4c00, 0x100)
-TRAMP_KVM(PACA_EXGEN, 0xc00)
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+ /*
+ * This is a hcall, so register convention is as above, with these
+ * differences:
+ * r13 = PACA
+ * r12 = orig r13
+ * ctr = orig r10
+ */
+TRAMP_KVM_BEGIN(do_kvm_0xc00)
+ /*
+ * Save the PPR (on systems that support it) before changing to
+ * HMT_MEDIUM. That allows the KVM code to save that value into the
+ * guest state (it is the guest's PPR value).
+ */
+ OPT_GET_SPR(r0, SPRN_PPR, CPU_FTR_HAS_PPR)
+ HMT_MEDIUM
+ OPT_SAVE_REG_TO_PACA(PACA_EXGEN+EX_PPR, r0, CPU_FTR_HAS_PPR)
+ mfctr r10
+ SET_SCRATCH0(r12)
+ std r9,PACA_EXGEN+EX_R9(r13)
+ mfcr r9
+ std r10,PACA_EXGEN+EX_R10(r13)
+ KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xc00)
+#endif
EXC_REAL(single_step, 0xd00, 0x100)
@@ -1553,6 +1594,26 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
1: addi r3,r1,STACK_FRAME_OVERHEAD
bl kernel_bad_stack
b 1b
+_ASM_NOKPROBE_SYMBOL(bad_stack);
+
+/*
+ * When doorbell is triggered from system reset wakeup, the message is
+ * not cleared, so it would fire again when EE is enabled.
+ *
+ * When coming from local_irq_enable, there may be the same problem if
+ * we were hard disabled.
+ *
+ * Execute msgclr to clear pending exceptions before handling it.
+ */
+h_doorbell_common_msgclr:
+ LOAD_REG_IMMEDIATE(r3, PPC_DBELL_MSGTYPE << (63-36))
+ PPC_MSGCLR(3)
+ b h_doorbell_common
+
+doorbell_super_common_msgclr:
+ LOAD_REG_IMMEDIATE(r3, PPC_DBELL_MSGTYPE << (63-36))
+ PPC_MSGCLRP(3)
+ b doorbell_super_common
/*
* Called from arch_local_irq_enable when an interrupt needs
@@ -1563,6 +1624,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
* Note: While MSR:EE is off, we need to make sure that _MSR
* in the generated frame has EE set to 1 or the exception
* handler will not properly re-enable them.
+ *
+ * Note that we don't specify LR as the NIP (return address) for
+ * the interrupt because that would unbalance the return branch
+ * predictor.
*/
_GLOBAL(__replay_interrupt)
/* We are going to jump to the exception common code which
@@ -1570,7 +1635,7 @@ _GLOBAL(__replay_interrupt)
* we don't give a damn about, so we don't bother storing them.
*/
mfmsr r12
- mflr r11
+ LOAD_REG_ADDR(r11, 1f)
mfcr r9
ori r12,r12,MSR_EE
cmpwi r3,0x900
@@ -1579,13 +1644,16 @@ _GLOBAL(__replay_interrupt)
beq hardware_interrupt_common
BEGIN_FTR_SECTION
cmpwi r3,0xe80
- beq h_doorbell_common
+ beq h_doorbell_common_msgclr
cmpwi r3,0xea0
beq h_virt_irq_common
cmpwi r3,0xe60
beq hmi_exception_common
FTR_SECTION_ELSE
cmpwi r3,0xa00
- beq doorbell_super_common
+ beq doorbell_super_common_msgclr
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
+1:
blr
+
+_ASM_NOKPROBE_SYMBOL(__replay_interrupt)
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 466569e26278..3079518f2245 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -113,11 +113,55 @@ int __init early_init_dt_scan_fw_dump(unsigned long node,
return 1;
}
+/*
+ * If fadump is registered, check if the memory provided
+ * falls within boot memory area.
+ */
+int is_fadump_boot_memory_area(u64 addr, ulong size)
+{
+ if (!fw_dump.dump_registered)
+ return 0;
+
+ return (addr + size) > RMA_START && addr <= fw_dump.boot_memory_size;
+}
+
int is_fadump_active(void)
{
return fw_dump.dump_active;
}
+/*
+ * Returns 1, if there are no holes in boot memory area,
+ * 0 otherwise.
+ */
+static int is_boot_memory_area_contiguous(void)
+{
+ struct memblock_region *reg;
+ unsigned long tstart, tend;
+ unsigned long start_pfn = PHYS_PFN(RMA_START);
+ unsigned long end_pfn = PHYS_PFN(RMA_START + fw_dump.boot_memory_size);
+ unsigned int ret = 0;
+
+ for_each_memblock(memory, reg) {
+ tstart = max(start_pfn, memblock_region_memory_base_pfn(reg));
+ tend = min(end_pfn, memblock_region_memory_end_pfn(reg));
+ if (tstart < tend) {
+ /* Memory hole from start_pfn to tstart */
+ if (tstart > start_pfn)
+ break;
+
+ if (tend == end_pfn) {
+ ret = 1;
+ break;
+ }
+
+ start_pfn = tend + 1;
+ }
+ }
+
+ return ret;
+}
+
/* Print firmware assisted dump configurations for debugging purpose. */
static void fadump_show_config(void)
{
@@ -212,20 +256,46 @@ static inline unsigned long fadump_calculate_reserve_size(void)
int ret;
unsigned long long base, size;
+ if (fw_dump.reserve_bootvar)
+ pr_warn("'fadump_reserve_mem=' parameter is deprecated in favor of 'crashkernel=' parameter.\n");
+
/*
* Check if the size is specified through crashkernel= cmdline
- * option. If yes, then use that but ignore base as fadump
- * reserves memory at end of RAM.
+ * option. If yes, then use that but ignore base as fadump reserves
+ * memory at a predefined offset.
*/
ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
&size, &base);
if (ret == 0 && size > 0) {
+ unsigned long max_size;
+
+ if (fw_dump.reserve_bootvar)
+ pr_info("Using 'crashkernel=' parameter for memory reservation.\n");
+
fw_dump.reserve_bootvar = (unsigned long)size;
+
+ /*
+ * Adjust if the boot memory size specified is above
+ * the upper limit.
+ */
+ max_size = memblock_phys_mem_size() / MAX_BOOT_MEM_RATIO;
+ if (fw_dump.reserve_bootvar > max_size) {
+ fw_dump.reserve_bootvar = max_size;
+ pr_info("Adjusted boot memory size to %luMB\n",
+ (fw_dump.reserve_bootvar >> 20));
+ }
+
+ return fw_dump.reserve_bootvar;
+ } else if (fw_dump.reserve_bootvar) {
+ /*
+ * 'fadump_reserve_mem=' is being used to reserve memory
+ * for firmware-assisted dump.
+ */
return fw_dump.reserve_bootvar;
}
/* divide by 20 to get 5% of value */
- size = memblock_end_of_DRAM() / 20;
+ size = memblock_phys_mem_size() / 20;
/* round it down in multiples of 256 */
size = size & ~0x0FFFFFFFUL;
@@ -377,9 +447,22 @@ static int __init early_fadump_param(char *p)
}
early_param("fadump", early_fadump_param);
-static void register_fw_dump(struct fadump_mem_struct *fdm)
+/*
+ * Look for fadump_reserve_mem= cmdline option
+ * TODO: Remove references to 'fadump_reserve_mem=' parameter,
+ * the sooner 'crashkernel=' parameter is accustomed to.
+ */
+static int __init early_fadump_reserve_mem(char *p)
+{
+ if (p)
+ fw_dump.reserve_bootvar = memparse(p, &p);
+ return 0;
+}
+early_param("fadump_reserve_mem", early_fadump_reserve_mem);
+
+static int register_fw_dump(struct fadump_mem_struct *fdm)
{
- int rc;
+ int rc, err;
unsigned int wait_time;
pr_debug("Registering for firmware-assisted kernel dump...\n");
@@ -396,26 +479,38 @@ static void register_fw_dump(struct fadump_mem_struct *fdm)
} while (wait_time);
+ err = -EIO;
switch (rc) {
+ default:
+ pr_err("Failed to register. Unknown Error(%d).\n", rc);
+ break;
case -1:
printk(KERN_ERR "Failed to register firmware-assisted kernel"
" dump. Hardware Error(%d).\n", rc);
break;
case -3:
+ if (!is_boot_memory_area_contiguous())
+ pr_err("Can't have holes in boot memory area while "
+ "registering fadump\n");
+
printk(KERN_ERR "Failed to register firmware-assisted kernel"
" dump. Parameter Error(%d).\n", rc);
+ err = -EINVAL;
break;
case -9:
printk(KERN_ERR "firmware-assisted kernel dump is already "
" registered.");
fw_dump.dump_registered = 1;
+ err = -EEXIST;
break;
case 0:
printk(KERN_INFO "firmware-assisted kernel dump registration"
" is successful\n");
fw_dump.dump_registered = 1;
+ err = 0;
break;
}
+ return err;
}
void crash_fadump(struct pt_regs *regs, const char *str)
@@ -831,8 +926,19 @@ static void fadump_setup_crash_memory_ranges(void)
for_each_memblock(memory, reg) {
start = (unsigned long long)reg->base;
end = start + (unsigned long long)reg->size;
- if (start == RMA_START && end >= fw_dump.boot_memory_size)
- start = fw_dump.boot_memory_size;
+
+ /*
+ * skip the first memory chunk that is already added (RMA_START
+ * through boot_memory_size). This logic needs a relook if and
+ * when RMA_START changes to a non-zero value.
+ */
+ BUILD_BUG_ON(RMA_START != 0);
+ if (start < fw_dump.boot_memory_size) {
+ if (end > fw_dump.boot_memory_size)
+ start = fw_dump.boot_memory_size;
+ else
+ continue;
+ }
/* add this range excluding the reserved dump area. */
fadump_exclude_reserved_area(start, end);
@@ -956,7 +1062,7 @@ static unsigned long init_fadump_header(unsigned long addr)
return addr;
}
-static void register_fadump(void)
+static int register_fadump(void)
{
unsigned long addr;
void *vaddr;
@@ -966,7 +1072,7 @@ static void register_fadump(void)
* assisted dump.
*/
if (!fw_dump.reserve_dump_area_size)
- return;
+ return -ENODEV;
fadump_setup_crash_memory_ranges();
@@ -979,7 +1085,7 @@ static void register_fadump(void)
fadump_create_elfcore_headers(vaddr);
/* register the future kernel dump with firmware. */
- register_fw_dump(&fdm);
+ return register_fw_dump(&fdm);
}
static int fadump_unregister_dump(struct fadump_mem_struct *fdm)
@@ -1046,28 +1152,71 @@ void fadump_cleanup(void)
}
}
+static void fadump_free_reserved_memory(unsigned long start_pfn,
+ unsigned long end_pfn)
+{
+ unsigned long pfn;
+ unsigned long time_limit = jiffies + HZ;
+
+ pr_info("freeing reserved memory (0x%llx - 0x%llx)\n",
+ PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
+
+ for (pfn = start_pfn; pfn < end_pfn; pfn++) {
+ free_reserved_page(pfn_to_page(pfn));
+
+ if (time_after(jiffies, time_limit)) {
+ cond_resched();
+ time_limit = jiffies + HZ;
+ }
+ }
+}
+
+/*
+ * Skip memory holes and free memory that was actually reserved.
+ */
+static void fadump_release_reserved_area(unsigned long start, unsigned long end)
+{
+ struct memblock_region *reg;
+ unsigned long tstart, tend;
+ unsigned long start_pfn = PHYS_PFN(start);
+ unsigned long end_pfn = PHYS_PFN(end);
+
+ for_each_memblock(memory, reg) {
+ tstart = max(start_pfn, memblock_region_memory_base_pfn(reg));
+ tend = min(end_pfn, memblock_region_memory_end_pfn(reg));
+ if (tstart < tend) {
+ fadump_free_reserved_memory(tstart, tend);
+
+ if (tend == end_pfn)
+ break;
+
+ start_pfn = tend + 1;
+ }
+ }
+}
+
/*
* Release the memory that was reserved in early boot to preserve the memory
* contents. The released memory will be available for general use.
*/
static void fadump_release_memory(unsigned long begin, unsigned long end)
{
- unsigned long addr;
unsigned long ra_start, ra_end;
ra_start = fw_dump.reserve_dump_area_start;
ra_end = ra_start + fw_dump.reserve_dump_area_size;
- for (addr = begin; addr < end; addr += PAGE_SIZE) {
- /*
- * exclude the dump reserve area. Will reuse it for next
- * fadump registration.
- */
- if (addr <= ra_end && ((addr + PAGE_SIZE) > ra_start))
- continue;
-
- free_reserved_page(pfn_to_page(addr >> PAGE_SHIFT));
- }
+ /*
+ * exclude the dump reserve area. Will reuse it for next
+ * fadump registration.
+ */
+ if (begin < ra_end && end > ra_start) {
+ if (begin < ra_start)
+ fadump_release_reserved_area(begin, ra_start);
+ if (end > ra_end)
+ fadump_release_reserved_area(ra_end, end);
+ } else
+ fadump_release_reserved_area(begin, end);
}
static void fadump_invalidate_release_mem(void)
@@ -1161,7 +1310,6 @@ static ssize_t fadump_register_store(struct kobject *kobj,
switch (buf[0]) {
case '0':
if (fw_dump.dump_registered == 0) {
- ret = -EINVAL;
goto unlock_out;
}
/* Un-register Firmware-assisted dump */
@@ -1169,11 +1317,11 @@ static ssize_t fadump_register_store(struct kobject *kobj,
break;
case '1':
if (fw_dump.dump_registered == 1) {
- ret = -EINVAL;
+ ret = -EEXIST;
goto unlock_out;
}
/* Register Firmware-assisted dump */
- register_fadump();
+ ret = register_fadump();
break;
default:
ret = -EINVAL;
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 4898d676dcae..5adb390e773b 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -31,6 +31,7 @@
* registers for winkle support.
*/
#define _SDR1 GPR3
+#define _PTCR GPR3
#define _RPR GPR4
#define _SPURR GPR5
#define _PURR GPR6
@@ -39,7 +40,7 @@
#define _AMOR GPR9
#define _WORT GPR10
#define _WORC GPR11
-#define _PTCR GPR12
+#define _LPCR GPR12
#define PSSCR_EC_ESL_MASK_SHIFTED (PSSCR_EC | PSSCR_ESL) >> 16
@@ -55,12 +56,14 @@ save_sprs_to_stack:
* here since any thread in the core might wake up first
*/
BEGIN_FTR_SECTION
- mfspr r3,SPRN_PTCR
- std r3,_PTCR(r1)
/*
* Note - SDR1 is dropped in Power ISA v3. Hence not restoring
* SDR1 here
*/
+ mfspr r3,SPRN_PTCR
+ std r3,_PTCR(r1)
+ mfspr r3,SPRN_LPCR
+ std r3,_LPCR(r1)
FTR_SECTION_ELSE
mfspr r3,SPRN_SDR1
std r3,_SDR1(r1)
@@ -106,13 +109,9 @@ core_idle_lock_held:
/*
* Pass requested state in r3:
* r3 - PNV_THREAD_NAP/SLEEP/WINKLE in POWER8
- * - Requested STOP state in POWER9
- *
- * To check IRQ_HAPPENED in r4
- * 0 - don't check
- * 1 - check
+ * - Requested PSSCR value in POWER9
*
- * Address to 'rfid' to in r5
+ * Address of idle handler to branch to in realmode in r4
*/
pnv_powersave_common:
/* Use r3 to pass state nap/sleep/winkle */
@@ -122,37 +121,14 @@ pnv_powersave_common:
* need to save PC, some CR bits and the NV GPRs,
* but for now an interrupt frame will do.
*/
+ mtctr r4
+
mflr r0
std r0,16(r1)
stdu r1,-INT_FRAME_SIZE(r1)
std r0,_LINK(r1)
std r0,_NIP(r1)
- /* Hard disable interrupts */
- mfmsr r9
- rldicl r9,r9,48,1
- rotldi r9,r9,16
- mtmsrd r9,1 /* hard-disable interrupts */
-
- /* Check if something happened while soft-disabled */
- lbz r0,PACAIRQHAPPENED(r13)
- andi. r0,r0,~PACA_IRQ_HARD_DIS@l
- beq 1f
- cmpwi cr0,r4,0
- beq 1f
- addi r1,r1,INT_FRAME_SIZE
- ld r0,16(r1)
- li r3,0 /* Return 0 (no nap) */
- mtlr r0
- blr
-
-1: /* We mark irqs hard disabled as this is the state we'll
- * be in when returning and we need to tell arch_local_irq_restore()
- * about it
- */
- li r0,PACA_IRQ_HARD_DIS
- stb r0,PACAIRQHAPPENED(r13)
-
/* We haven't lost state ... yet */
li r0,0
stb r0,PACA_NAPSTATELOST(r13)
@@ -160,9 +136,8 @@ pnv_powersave_common:
/* Continue saving state */
SAVE_GPR(2, r1)
SAVE_NVGPRS(r1)
- mfcr r4
- std r4,_CCR(r1)
- std r9,_MSR(r1)
+ mfcr r5
+ std r5,_CCR(r1)
std r1,PACAR1(r13)
/*
@@ -172,12 +147,8 @@ pnv_powersave_common:
* the MMU context to the guest.
*/
LOAD_REG_IMMEDIATE(r7, MSR_IDLE)
- li r6, MSR_RI
- andc r6, r9, r6
- mtmsrd r6, 1 /* clear RI before setting SRR0/1 */
- mtspr SPRN_SRR0, r5
- mtspr SPRN_SRR1, r7
- rfid
+ mtmsrd r7,0
+ bctr
.globl pnv_enter_arch207_idle_mode
pnv_enter_arch207_idle_mode:
@@ -285,6 +256,19 @@ power_enter_stop:
bne .Lhandle_esl_ec_set
IDLE_STATE_ENTER_SEQ(PPC_STOP)
li r3,0 /* Since we didn't lose state, return 0 */
+
+ /*
+ * pnv_wakeup_noloss() expects r12 to contain the SRR1 value so
+ * it can determine if the wakeup reason is an HMI in
+ * CHECK_HMI_INTERRUPT.
+ *
+ * However, when we wakeup with ESL=0, SRR1 will not contain the wakeup
+ * reason, so there is no point setting r12 to SRR1.
+ *
+ * Further, we clear r12 here, so that we don't accidentally enter the
+ * HMI in pnv_wakeup_noloss() if the value of r12[42:45] == WAKE_HMI.
+ */
+ li r12, 0
b pnv_wakeup_noloss
.Lhandle_esl_ec_set:
@@ -319,45 +303,23 @@ lwarx_loop_stop:
IDLE_STATE_ENTER_SEQ_NORET(PPC_STOP)
-_GLOBAL(power7_idle)
+/*
+ * Entered with MSR[EE]=0 and no soft-masked interrupts pending.
+ * r3 contains desired idle state (PNV_THREAD_NAP/SLEEP/WINKLE).
+ */
+_GLOBAL(power7_idle_insn)
/* Now check if user or arch enabled NAP mode */
- LOAD_REG_ADDRBASE(r3,powersave_nap)
- lwz r4,ADDROFF(powersave_nap)(r3)
- cmpwi 0,r4,0
- beqlr
- li r3, 1
- /* fall through */
-
-_GLOBAL(power7_nap)
- mr r4,r3
- li r3,PNV_THREAD_NAP
- LOAD_REG_ADDR(r5, pnv_enter_arch207_idle_mode)
- b pnv_powersave_common
- /* No return */
-
-_GLOBAL(power7_sleep)
- li r3,PNV_THREAD_SLEEP
- li r4,1
- LOAD_REG_ADDR(r5, pnv_enter_arch207_idle_mode)
- b pnv_powersave_common
- /* No return */
-
-_GLOBAL(power7_winkle)
- li r3,PNV_THREAD_WINKLE
- li r4,1
- LOAD_REG_ADDR(r5, pnv_enter_arch207_idle_mode)
+ LOAD_REG_ADDR(r4, pnv_enter_arch207_idle_mode)
b pnv_powersave_common
- /* No return */
#define CHECK_HMI_INTERRUPT \
- mfspr r0,SPRN_SRR1; \
BEGIN_FTR_SECTION_NESTED(66); \
- rlwinm r0,r0,45-31,0xf; /* extract wake reason field (P8) */ \
+ rlwinm r0,r12,45-31,0xf; /* extract wake reason field (P8) */ \
FTR_SECTION_ELSE_NESTED(66); \
- rlwinm r0,r0,45-31,0xe; /* P7 wake reason field is 3 bits */ \
+ rlwinm r0,r12,45-31,0xe; /* P7 wake reason field is 3 bits */ \
ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \
cmpwi r0,0xa; /* Hypervisor maintenance ? */ \
- bne 20f; \
+ bne+ 20f; \
/* Invoke opal call to handle hmi */ \
ld r2,PACATOC(r13); \
ld r1,PACAR1(r13); \
@@ -369,16 +331,13 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \
20: nop;
/*
- * r3 - The PSSCR value corresponding to the stop state.
- * r4 - The PSSCR mask corrresonding to the stop state.
+ * Entered with MSR[EE]=0 and no soft-masked interrupts pending.
+ * r3 contains desired PSSCR register value.
*/
_GLOBAL(power9_idle_stop)
- mfspr r5,SPRN_PSSCR
- andc r5,r5,r4
- or r3,r3,r5
+ std r3, PACA_REQ_PSSCR(r13)
mtspr SPRN_PSSCR,r3
- LOAD_REG_ADDR(r5,power_enter_stop)
- li r4,1
+ LOAD_REG_ADDR(r4,power_enter_stop)
b pnv_powersave_common
/* No return */
@@ -436,17 +395,17 @@ pnv_powersave_wakeup_mce:
/*
* Now put the original SRR1 with SRR1_WAKEMCE_RESVD as the wake
- * reason into SRR1, which allows reuse of the system reset wakeup
+ * reason into r12, which allows reuse of the system reset wakeup
* code without being mistaken for another type of wakeup.
*/
- oris r3,r3,SRR1_WAKEMCE_RESVD@h
- mtspr SPRN_SRR1,r3
+ oris r12,r3,SRR1_WAKEMCE_RESVD@h
b pnv_powersave_wakeup
/*
* Called from reset vector for powersave wakeups.
* cr3 - set to gt if waking up with partial/complete hypervisor state loss
+ * r12 - SRR1
*/
.global pnv_powersave_wakeup
pnv_powersave_wakeup:
@@ -464,6 +423,8 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
li r0,PNV_THREAD_RUNNING
stb r0,PACA_THREAD_IDLE_STATE(r13) /* Clear thread state */
+ mr r3,r12
+
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
li r0,KVM_HWTHREAD_IN_KERNEL
stb r0,HSTATE_HWTHREAD_STATE(r13)
@@ -477,7 +438,6 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
#endif
/* Return SRR1 from power7_nap() */
- mfspr r3,SPRN_SRR1
blt cr3,pnv_wakeup_noloss
b pnv_wakeup_loss
@@ -489,18 +449,35 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
*/
pnv_restore_hyp_resource_arch300:
/*
+ * Workaround for POWER9, if we lost resources, the ERAT
+ * might have been mixed up and needs flushing.
+ */
+ blt cr3,1f
+ PPC_INVALIDATE_ERAT
+1:
+ /*
* POWER ISA 3. Use PSSCR to determine if we
* are waking up from deep idle state
*/
LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state)
ld r4,ADDROFF(pnv_first_deep_stop_state)(r5)
- mfspr r5,SPRN_PSSCR
+BEGIN_FTR_SECTION_NESTED(71)
+ /*
+ * Assume that we are waking up from the state
+ * same as the Requested Level (RL) in the PSSCR
+ * which are Bits 60-63
+ */
+ ld r5,PACA_REQ_PSSCR(r13)
+ rldicl r5,r5,0,60
+FTR_SECTION_ELSE_NESTED(71)
/*
* 0-3 bits correspond to Power-Saving Level Status
* which indicates the idle state we are waking up from
*/
+ mfspr r5, SPRN_PSSCR
rldicl r5,r5,4,60
+ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_POWER9_DD1, 71)
cmpd cr4,r5,r4
bge cr4,pnv_wakeup_tb_loss /* returns to caller */
@@ -567,9 +544,9 @@ pnv_wakeup_tb_loss:
* is required to return back to reset vector after hypervisor state
* restore is complete.
*/
+ mr r19,r12
mr r18,r4
mflr r17
- mfspr r16,SPRN_SRR1
BEGIN_FTR_SECTION
CHECK_HMI_INTERRUPT
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
@@ -731,13 +708,14 @@ timebase_resync:
* Use cr3 which indicates that we are waking up with atleast partial
* hypervisor state loss to determine if TIMEBASE RESYNC is needed.
*/
- ble cr3,clear_lock
+ ble cr3,.Ltb_resynced
/* Time base re-sync */
bl opal_resync_timebase;
/*
- * If waking up from sleep, per core state is not lost, skip to
- * clear_lock.
+ * If waking up from sleep (POWER8), per core state
+ * is not lost, skip to clear_lock.
*/
+.Ltb_resynced:
blt cr4,clear_lock
/*
@@ -812,9 +790,13 @@ no_segments:
mtctr r12
bctrl
+BEGIN_FTR_SECTION
+ ld r4,_LPCR(r1)
+ mtspr SPRN_LPCR,r4
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
hypervisor_state_restored:
- mtspr SPRN_SRR1,r16
+ mr r12,r19
mtlr r17
blr /* return to pnv_powersave_wakeup */
@@ -827,6 +809,7 @@ fastsleep_workaround_at_exit:
/*
* R3 here contains the value that will be returned to the caller
* of power7_nap.
+ * R12 contains SRR1 for CHECK_HMI_INTERRUPT.
*/
.global pnv_wakeup_loss
pnv_wakeup_loss:
@@ -836,32 +819,33 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
REST_NVGPRS(r1)
REST_GPR(2, r1)
+ ld r4,PACAKMSR(r13)
+ ld r5,_LINK(r1)
ld r6,_CCR(r1)
- ld r4,_MSR(r1)
- ld r5,_NIP(r1)
addi r1,r1,INT_FRAME_SIZE
+ mtlr r5
mtcr r6
- mtspr SPRN_SRR1,r4
- mtspr SPRN_SRR0,r5
- rfid
+ mtmsrd r4
+ blr
/*
* R3 here contains the value that will be returned to the caller
* of power7_nap.
+ * R12 contains SRR1 for CHECK_HMI_INTERRUPT.
*/
pnv_wakeup_noloss:
lbz r0,PACA_NAPSTATELOST(r13)
cmpwi r0,0
bne pnv_wakeup_loss
+ ld r1,PACAR1(r13)
BEGIN_FTR_SECTION
CHECK_HMI_INTERRUPT
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
- ld r1,PACAR1(r13)
- ld r6,_CCR(r1)
- ld r4,_MSR(r1)
+ ld r4,PACAKMSR(r13)
ld r5,_NIP(r1)
+ ld r6,_CCR(r1)
addi r1,r1,INT_FRAME_SIZE
+ mtlr r5
mtcr r6
- mtspr SPRN_SRR1,r4
- mtspr SPRN_SRR0,r5
- rfid
+ mtmsrd r4
+ blr
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 5c291df30fe3..0bcec745a672 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -322,7 +322,8 @@ bool prep_irq_for_idle(void)
* First we need to hard disable to ensure no interrupt
* occurs before we effectively enter the low power state
*/
- hard_irq_disable();
+ __hard_irq_disable();
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
/*
* If anything happened while we were soft-disabled,
@@ -347,6 +348,65 @@ bool prep_irq_for_idle(void)
return true;
}
+#ifdef CONFIG_PPC_BOOK3S
+/*
+ * This is for idle sequences that return with IRQs off, but the
+ * idle state itself wakes on interrupt. Tell the irq tracer that
+ * IRQs are enabled for the duration of idle so it does not get long
+ * off times. Must be paired with fini_irq_for_idle_irqsoff.
+ */
+bool prep_irq_for_idle_irqsoff(void)
+{
+ WARN_ON(!irqs_disabled());
+
+ /*
+ * First we need to hard disable to ensure no interrupt
+ * occurs before we effectively enter the low power state
+ */
+ __hard_irq_disable();
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+
+ /*
+ * If anything happened while we were soft-disabled,
+ * we return now and do not enter the low power state.
+ */
+ if (lazy_irq_pending())
+ return false;
+
+ /* Tell lockdep we are about to re-enable */
+ trace_hardirqs_on();
+
+ return true;
+}
+
+/*
+ * Take the SRR1 wakeup reason, index into this table to find the
+ * appropriate irq_happened bit.
+ */
+static const u8 srr1_to_lazyirq[0x10] = {
+ 0, 0, 0,
+ PACA_IRQ_DBELL,
+ 0,
+ PACA_IRQ_DBELL,
+ PACA_IRQ_DEC,
+ 0,
+ PACA_IRQ_EE,
+ PACA_IRQ_EE,
+ PACA_IRQ_HMI,
+ 0, 0, 0, 0, 0 };
+
+void irq_set_pending_from_srr1(unsigned long srr1)
+{
+ unsigned int idx = (srr1 & SRR1_WAKEMASK_P8) >> 18;
+
+ /*
+ * The 0 index (SRR1[42:45]=b0000) must always evaluate to 0,
+ * so this can be called unconditionally with srr1 wake reason.
+ */
+ local_paca->irq_happened |= srr1_to_lazyirq[idx];
+}
+#endif /* CONFIG_PPC_BOOK3S */
+
/*
* Force a replay of the external interrupt handler on this CPU.
*/
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 01addfb0ed0a..45f1ff721c32 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -164,17 +164,13 @@ NOKPROBE_SYMBOL(arch_prepare_kprobe);
void arch_arm_kprobe(struct kprobe *p)
{
- *p->addr = BREAKPOINT_INSTRUCTION;
- flush_icache_range((unsigned long) p->addr,
- (unsigned long) p->addr + sizeof(kprobe_opcode_t));
+ patch_instruction(p->addr, BREAKPOINT_INSTRUCTION);
}
NOKPROBE_SYMBOL(arch_arm_kprobe);
void arch_disarm_kprobe(struct kprobe *p)
{
- *p->addr = p->opcode;
- flush_icache_range((unsigned long) p->addr,
- (unsigned long) p->addr + sizeof(kprobe_opcode_t));
+ patch_instruction(p->addr, p->opcode);
}
NOKPROBE_SYMBOL(arch_disarm_kprobe);
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index a9bfa49f3698..e0e131e662ed 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -268,6 +268,7 @@ void machine_check_print_event_info(struct machine_check_event *evt,
static const char *mc_ra_types[] = {
"Indeterminate",
"Instruction fetch (bad)",
+ "Instruction fetch (foreign)",
"Page table walk ifetch (bad)",
"Page table walk ifetch (foreign)",
"Load (bad)",
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index f913139bb0c2..d24e689e893f 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -236,6 +236,9 @@ static const struct mce_ierror_table mce_p9_ierror_table[] = {
{ 0x00000000081c0000, 0x0000000000180000, true,
MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH,
MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x00000000001c0000, true,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH_FOREIGN,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
{ 0x00000000081c0000, 0x0000000008000000, true,
MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_IFETCH_TIMEOUT,
MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 84db14e435f5..3f7a9a2d2435 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -244,8 +244,7 @@ _GLOBAL(_nmask_and_or_msr)
*/
_GLOBAL(real_readb)
mfmsr r7
- ori r0,r7,MSR_DR
- xori r0,r0,MSR_DR
+ rlwinm r0,r7,0,~MSR_DR
sync
mtmsr r0
sync
@@ -262,8 +261,7 @@ _GLOBAL(real_readb)
*/
_GLOBAL(real_writeb)
mfmsr r7
- ori r0,r7,MSR_DR
- xori r0,r0,MSR_DR
+ rlwinm r0,r7,0,~MSR_DR
sync
mtmsr r0
sync
diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
index ec60ed0d4aad..6f8273f5e988 100644
--- a/arch/powerpc/kernel/optprobes.c
+++ b/arch/powerpc/kernel/optprobes.c
@@ -158,12 +158,13 @@ void arch_remove_optimized_kprobe(struct optimized_kprobe *op)
void patch_imm32_load_insns(unsigned int val, kprobe_opcode_t *addr)
{
/* addis r4,0,(insn)@h */
- *addr++ = PPC_INST_ADDIS | ___PPC_RT(4) |
- ((val >> 16) & 0xffff);
+ patch_instruction(addr, PPC_INST_ADDIS | ___PPC_RT(4) |
+ ((val >> 16) & 0xffff));
+ addr++;
/* ori r4,r4,(insn)@l */
- *addr = PPC_INST_ORI | ___PPC_RA(4) | ___PPC_RS(4) |
- (val & 0xffff);
+ patch_instruction(addr, PPC_INST_ORI | ___PPC_RA(4) |
+ ___PPC_RS(4) | (val & 0xffff));
}
/*
@@ -173,24 +174,28 @@ void patch_imm32_load_insns(unsigned int val, kprobe_opcode_t *addr)
void patch_imm64_load_insns(unsigned long val, kprobe_opcode_t *addr)
{
/* lis r3,(op)@highest */
- *addr++ = PPC_INST_ADDIS | ___PPC_RT(3) |
- ((val >> 48) & 0xffff);
+ patch_instruction(addr, PPC_INST_ADDIS | ___PPC_RT(3) |
+ ((val >> 48) & 0xffff));
+ addr++;
/* ori r3,r3,(op)@higher */
- *addr++ = PPC_INST_ORI | ___PPC_RA(3) | ___PPC_RS(3) |
- ((val >> 32) & 0xffff);
+ patch_instruction(addr, PPC_INST_ORI | ___PPC_RA(3) |
+ ___PPC_RS(3) | ((val >> 32) & 0xffff));
+ addr++;
/* rldicr r3,r3,32,31 */
- *addr++ = PPC_INST_RLDICR | ___PPC_RA(3) | ___PPC_RS(3) |
- __PPC_SH64(32) | __PPC_ME64(31);
+ patch_instruction(addr, PPC_INST_RLDICR | ___PPC_RA(3) |
+ ___PPC_RS(3) | __PPC_SH64(32) | __PPC_ME64(31));
+ addr++;
/* oris r3,r3,(op)@h */
- *addr++ = PPC_INST_ORIS | ___PPC_RA(3) | ___PPC_RS(3) |
- ((val >> 16) & 0xffff);
+ patch_instruction(addr, PPC_INST_ORIS | ___PPC_RA(3) |
+ ___PPC_RS(3) | ((val >> 16) & 0xffff));
+ addr++;
/* ori r3,r3,(op)@l */
- *addr = PPC_INST_ORI | ___PPC_RA(3) | ___PPC_RS(3) |
- (val & 0xffff);
+ patch_instruction(addr, PPC_INST_ORI | ___PPC_RA(3) |
+ ___PPC_RS(3) | (val & 0xffff));
}
int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
@@ -198,7 +203,8 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
kprobe_opcode_t *buff, branch_op_callback, branch_emulate_step;
kprobe_opcode_t *op_callback_addr, *emulate_step_addr;
long b_offset;
- unsigned long nip;
+ unsigned long nip, size;
+ int rc, i;
kprobe_ppc_optinsn_slots.insn_size = MAX_OPTINSN_SIZE;
@@ -231,8 +237,14 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
goto error;
/* Setup template */
- memcpy(buff, optprobe_template_entry,
- TMPL_END_IDX * sizeof(kprobe_opcode_t));
+ /* We can optimize this via patch_instruction_window later */
+ size = (TMPL_END_IDX * sizeof(kprobe_opcode_t)) / sizeof(int);
+ pr_devel("Copying template to %p, size %lu\n", buff, size);
+ for (i = 0; i < size; i++) {
+ rc = patch_instruction(buff + i, *(optprobe_template_entry + i));
+ if (rc < 0)
+ goto error;
+ }
/*
* Fixup the template with instructions to:
@@ -261,8 +273,8 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
if (!branch_op_callback || !branch_emulate_step)
goto error;
- buff[TMPL_CALL_HDLR_IDX] = branch_op_callback;
- buff[TMPL_EMULATE_IDX] = branch_emulate_step;
+ patch_instruction(buff + TMPL_CALL_HDLR_IDX, branch_op_callback);
+ patch_instruction(buff + TMPL_EMULATE_IDX, branch_emulate_step);
/*
* 3. load instruction to be emulated into relevant register, and
@@ -272,8 +284,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
/*
* 4. branch back from trampoline
*/
- buff[TMPL_RET_IDX] = create_branch((unsigned int *)buff + TMPL_RET_IDX,
- (unsigned long)nip, 0);
+ patch_branch(buff + TMPL_RET_IDX, (unsigned long)nip, 0);
flush_icache_range((unsigned long)buff,
(unsigned long)(&buff[TMPL_END_IDX]));
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 2ad725ef4368..9f3e2c932dcc 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -511,6 +511,10 @@ void restore_math(struct pt_regs *regs)
{
unsigned long msr;
+ /*
+ * Syscall exit makes a similar initial check before branching
+ * to restore_math. Keep them in synch.
+ */
if (!msr_tm_active(regs->msr) &&
!current->thread.load_fp && !loadvec(current->thread))
return;
@@ -1133,6 +1137,11 @@ static inline void restore_sprs(struct thread_struct *old_thread,
#endif
}
+#ifdef CONFIG_PPC_BOOK3S_64
+#define CP_SIZE 128
+static const u8 dummy_copy_buffer[CP_SIZE] __attribute__((aligned(CP_SIZE)));
+#endif
+
struct task_struct *__switch_to(struct task_struct *prev,
struct task_struct *new)
{
@@ -1195,12 +1204,14 @@ struct task_struct *__switch_to(struct task_struct *prev,
__switch_to_tm(prev, new);
- /*
- * We can't take a PMU exception inside _switch() since there is a
- * window where the kernel stack SLB and the kernel stack are out
- * of sync. Hard disable here.
- */
- hard_irq_disable();
+ if (!radix_enabled()) {
+ /*
+ * We can't take a PMU exception inside _switch() since there
+ * is a window where the kernel stack SLB and the kernel stack
+ * are out of sync. Hard disable here.
+ */
+ hard_irq_disable();
+ }
/*
* Call restore_sprs() before calling _switch(). If we move it after
@@ -1220,8 +1231,28 @@ struct task_struct *__switch_to(struct task_struct *prev,
batch->active = 1;
}
- if (current_thread_info()->task->thread.regs)
+ if (current_thread_info()->task->thread.regs) {
restore_math(current_thread_info()->task->thread.regs);
+
+ /*
+ * The copy-paste buffer can only store into foreign real
+ * addresses, so unprivileged processes can not see the
+ * data or use it in any way unless they have foreign real
+ * mappings. We don't have a VAS driver that allocates those
+ * yet, so no cpabort is required.
+ */
+ if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
+ /*
+ * DD1 allows paste into normal system memory, so we
+ * do an unpaired copy here to clear the buffer and
+ * prevent a covert channel being set up.
+ *
+ * cpabort is not used because it is quite expensive.
+ */
+ asm volatile(PPC_COPY(%0, %1)
+ : : "r"(dummy_copy_buffer), "r"(0));
+ }
+ }
#endif /* CONFIG_PPC_STD_MMU_64 */
return last;
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 857129acf960..94a948207cd2 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -335,6 +335,10 @@ static int show_cpuinfo(struct seq_file *m, void *v)
maj = ((pvr >> 8) & 0xFF) - 1;
min = pvr & 0xFF;
break;
+ case 0x004e: /* POWER9 bits 12-15 give chip type */
+ maj = (pvr >> 8) & 0x0F;
+ min = pvr & 0xFF;
+ break;
default:
maj = (pvr >> 8) & 0xFF;
min = pvr & 0xFF;
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 1069f74fca47..c6b8bace1766 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -33,6 +33,7 @@
#include <linux/notifier.h>
#include <linux/topology.h>
#include <linux/profile.h>
+#include <linux/processor.h>
#include <asm/ptrace.h>
#include <linux/atomic.h>
@@ -112,7 +113,8 @@ int smp_generic_cpu_bootable(unsigned int nr)
#ifdef CONFIG_PPC64
int smp_generic_kick_cpu(int nr)
{
- BUG_ON(nr < 0 || nr >= NR_CPUS);
+ if (nr < 0 || nr >= nr_cpu_ids)
+ return -EINVAL;
/*
* The processor is currently spinning, waiting for the
@@ -766,8 +768,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
smp_ops->give_timebase();
/* Wait until cpu puts itself in the online & active maps */
- while (!cpu_online(cpu))
- cpu_relax();
+ spin_until_cond(cpu_online(cpu));
return 0;
}
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 2b33cfaac7b8..fe6f3a285455 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -59,10 +59,10 @@
#include <linux/suspend.h>
#include <linux/rtc.h>
#include <linux/sched/cputime.h>
+#include <linux/processor.h>
#include <asm/trace.h>
#include <asm/io.h>
-#include <asm/processor.h>
#include <asm/nvram.h>
#include <asm/cache.h>
#include <asm/machdep.h>
@@ -442,6 +442,7 @@ void __delay(unsigned long loops)
unsigned long start;
int diff;
+ spin_begin();
if (__USE_RTC()) {
start = get_rtcl();
do {
@@ -449,13 +450,14 @@ void __delay(unsigned long loops)
diff = get_rtcl() - start;
if (diff < 0)
diff += 1000000000;
+ spin_cpu_relax();
} while (diff < loops);
} else {
start = get_tbl();
while (get_tbl() - start < loops)
- HMT_low();
- HMT_medium();
+ spin_cpu_relax();
}
+ spin_end();
}
EXPORT_SYMBOL(__delay);
@@ -675,7 +677,7 @@ EXPORT_SYMBOL_GPL(tb_to_ns);
* the high 64 bits of a * b, i.e. (a * b) >> 64, where a and b
* are 64-bit unsigned numbers.
*/
-unsigned long long sched_clock(void)
+notrace unsigned long long sched_clock(void)
{
if (__USE_RTC())
return get_rtc();
@@ -739,12 +741,20 @@ static int __init get_freq(char *name, int cells, unsigned long *val)
static void start_cpu_decrementer(void)
{
#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
+ unsigned int tcr;
+
/* Clear any pending timer interrupts */
mtspr(SPRN_TSR, TSR_ENW | TSR_WIS | TSR_DIS | TSR_FIS);
- /* Enable decrementer interrupt */
- mtspr(SPRN_TCR, TCR_DIE);
-#endif /* defined(CONFIG_BOOKE) || defined(CONFIG_40x) */
+ tcr = mfspr(SPRN_TCR);
+ /*
+ * The watchdog may have already been enabled by u-boot. So leave
+ * TRC[WP] (Watchdog Period) alone.
+ */
+ tcr &= TCR_WP_MASK; /* Clear all bits except for TCR[WP] */
+ tcr |= TCR_DIE; /* Enable decrementer */
+ mtspr(SPRN_TCR, tcr);
+#endif
}
void __init generic_calibrate_decr(void)
@@ -823,38 +833,76 @@ void read_persistent_clock(struct timespec *ts)
}
/* clocksource code */
-static u64 rtc_read(struct clocksource *cs)
+static notrace u64 rtc_read(struct clocksource *cs)
{
return (u64)get_rtc();
}
-static u64 timebase_read(struct clocksource *cs)
+static notrace u64 timebase_read(struct clocksource *cs)
{
return (u64)get_tb();
}
-void update_vsyscall_old(struct timespec *wall_time, struct timespec *wtm,
- struct clocksource *clock, u32 mult, u64 cycle_last)
+
+void update_vsyscall(struct timekeeper *tk)
{
+ struct timespec xt;
+ struct clocksource *clock = tk->tkr_mono.clock;
+ u32 mult = tk->tkr_mono.mult;
+ u32 shift = tk->tkr_mono.shift;
+ u64 cycle_last = tk->tkr_mono.cycle_last;
u64 new_tb_to_xs, new_stamp_xsec;
- u32 frac_sec;
+ u64 frac_sec;
if (clock != &clocksource_timebase)
return;
+ xt.tv_sec = tk->xtime_sec;
+ xt.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
+
/* Make userspace gettimeofday spin until we're done. */
++vdso_data->tb_update_count;
smp_mb();
- /* 19342813113834067 ~= 2^(20+64) / 1e9 */
- new_tb_to_xs = (u64) mult * (19342813113834067ULL >> clock->shift);
- new_stamp_xsec = (u64) wall_time->tv_nsec * XSEC_PER_SEC;
- do_div(new_stamp_xsec, 1000000000);
- new_stamp_xsec += (u64) wall_time->tv_sec * XSEC_PER_SEC;
+ /*
+ * This computes ((2^20 / 1e9) * mult) >> shift as a
+ * 0.64 fixed-point fraction.
+ * The computation in the else clause below won't overflow
+ * (as long as the timebase frequency is >= 1.049 MHz)
+ * but loses precision because we lose the low bits of the constant
+ * in the shift. Note that 19342813113834067 ~= 2^(20+64) / 1e9.
+ * For a shift of 24 the error is about 0.5e-9, or about 0.5ns
+ * over a second. (Shift values are usually 22, 23 or 24.)
+ * For high frequency clocks such as the 512MHz timebase clock
+ * on POWER[6789], the mult value is small (e.g. 32768000)
+ * and so we can shift the constant by 16 initially
+ * (295147905179 ~= 2^(20+64-16) / 1e9) and then do the
+ * remaining shifts after the multiplication, which gives a
+ * more accurate result (e.g. with mult = 32768000, shift = 24,
+ * the error is only about 1.2e-12, or 0.7ns over 10 minutes).
+ */
+ if (mult <= 62500000 && clock->shift >= 16)
+ new_tb_to_xs = ((u64) mult * 295147905179ULL) >> (clock->shift - 16);
+ else
+ new_tb_to_xs = (u64) mult * (19342813113834067ULL >> clock->shift);
+
+ /*
+ * Compute the fractional second in units of 2^-32 seconds.
+ * The fractional second is tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift
+ * in nanoseconds, so multiplying that by 2^32 / 1e9 gives
+ * it in units of 2^-32 seconds.
+ * We assume shift <= 32 because clocks_calc_mult_shift()
+ * generates shift values in the range 0 - 32.
+ */
+ frac_sec = tk->tkr_mono.xtime_nsec << (32 - shift);
+ do_div(frac_sec, NSEC_PER_SEC);
- BUG_ON(wall_time->tv_nsec >= NSEC_PER_SEC);
- /* this is tv_nsec / 1e9 as a 0.32 fraction */
- frac_sec = ((u64) wall_time->tv_nsec * 18446744073ULL) >> 32;
+ /*
+ * Work out new stamp_xsec value for any legacy users of systemcfg.
+ * stamp_xsec is in units of 2^-20 seconds.
+ */
+ new_stamp_xsec = frac_sec >> 12;
+ new_stamp_xsec += tk->xtime_sec * XSEC_PER_SEC;
/*
* tb_update_count is used to allow the userspace gettimeofday code
@@ -864,15 +912,13 @@ void update_vsyscall_old(struct timespec *wall_time, struct timespec *wtm,
* the two values of tb_update_count match and are even then the
* tb_to_xs and stamp_xsec values are consistent. If not, then it
* loops back and reads them again until this criteria is met.
- * We expect the caller to have done the first increment of
- * vdso_data->tb_update_count already.
*/
vdso_data->tb_orig_stamp = cycle_last;
vdso_data->stamp_xsec = new_stamp_xsec;
vdso_data->tb_to_xs = new_tb_to_xs;
- vdso_data->wtom_clock_sec = wtm->tv_sec;
- vdso_data->wtom_clock_nsec = wtm->tv_nsec;
- vdso_data->stamp_xtime = *wall_time;
+ vdso_data->wtom_clock_sec = tk->wall_to_monotonic.tv_sec;
+ vdso_data->wtom_clock_nsec = tk->wall_to_monotonic.tv_nsec;
+ vdso_data->stamp_xtime = xt;
vdso_data->stamp_sec_fraction = frac_sec;
smp_wmb();
++(vdso_data->tb_update_count);
diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
index 3a2d04134da9..c4ba37822ba0 100644
--- a/arch/powerpc/kernel/tm.S
+++ b/arch/powerpc/kernel/tm.S
@@ -313,8 +313,8 @@ dont_backup_fp:
blr
- /* void tm_recheckpoint(struct thread_struct *thread,
- * unsigned long orig_msr)
+ /* void __tm_recheckpoint(struct thread_struct *thread,
+ * unsigned long orig_msr)
* - Restore the checkpointed register state saved by tm_reclaim
* when we switch_to a process.
*
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index d4e545d27ef9..bfcfd9ef09f2 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -237,6 +237,7 @@ void die(const char *str, struct pt_regs *regs, long err)
err = 0;
oops_end(flags, regs, err);
}
+NOKPROBE_SYMBOL(die);
void user_single_step_siginfo(struct task_struct *tsk,
struct pt_regs *regs, siginfo_t *info)
@@ -1968,6 +1969,7 @@ void unrecoverable_exception(struct pt_regs *regs)
regs->trap, regs->nip);
die("Unrecoverable exception", regs, SIGABRT);
}
+NOKPROBE_SYMBOL(unrecoverable_exception);
#if defined(CONFIG_BOOKE_WDT) || defined(CONFIG_40x)
/*
@@ -1998,6 +2000,7 @@ void kernel_bad_stack(struct pt_regs *regs)
regs->gpr[1], regs->nip);
die("Bad kernel stack pointer", regs, SIGABRT);
}
+NOKPROBE_SYMBOL(kernel_bad_stack);
void __init trap_init(void)
{
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 2f793be3d2b1..b1a250560198 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -8,6 +8,12 @@
#include <asm/cache.h>
#include <asm/thread_info.h>
+#ifdef CONFIG_STRICT_KERNEL_RWX
+#define STRICT_ALIGN_SIZE (1 << 24)
+#else
+#define STRICT_ALIGN_SIZE PAGE_SIZE
+#endif
+
ENTRY(_stext)
PHDRS {
@@ -58,7 +64,6 @@ SECTIONS
#ifdef CONFIG_PPC64
KEEP(*(.head.text.first_256B));
#ifdef CONFIG_PPC_BOOK3E
-# define END_FIXED 0x100
#else
KEEP(*(.head.text.real_vectors));
*(.head.text.real_trampolines);
@@ -66,12 +71,8 @@ SECTIONS
*(.head.text.virt_trampolines);
# if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
KEEP(*(.head.data.fwnmi_page));
-# define END_FIXED 0x8000
-# else
-# define END_FIXED 0x7000
# endif
#endif
- ASSERT((. == END_FIXED), "vmlinux.lds.S: fixed section overflow error");
#else /* !CONFIG_PPC64 */
HEAD_TEXT
#endif
@@ -79,23 +80,6 @@ SECTIONS
__head_end = .;
- /*
- * If the build dies here, it's likely code in head_64.S is referencing
- * labels it can't reach, and the linker inserting stubs without the
- * assembler's knowledge. To debug, remove the above assert and
- * rebuild. Look for branch stubs in the fixed section region.
- *
- * Linker stub generation could be allowed in "trampoline"
- * sections if absolutely necessary, but this would require
- * some rework of the fixed sections. Before resorting to this,
- * consider references that have sufficient addressing range,
- * (e.g., hand coded trampolines) so the linker does not have
- * to add stubs.
- *
- * Linker stubs at the top of the main text section are currently not
- * detected, and will result in a crash at boot due to offsets being
- * wrong.
- */
#ifdef CONFIG_PPC64
/*
* BLOCK(0) overrides the default output section alignment because
@@ -103,18 +87,31 @@ SECTIONS
* section placement to work.
*/
.text BLOCK(0) : AT(ADDR(.text) - LOAD_OFFSET) {
+#ifdef CONFIG_LD_HEAD_STUB_CATCH
+ *(.linker_stub_catch);
+ . = . ;
+#endif
+
#else
.text : AT(ADDR(.text) - LOAD_OFFSET) {
ALIGN_FUNCTION();
#endif
/* careful! __ftr_alt_* sections need to be close to .text */
- *(.text .fixup __ftr_alt_* .ref.text)
+ *(.text.hot .text .text.fixup .text.unlikely .fixup __ftr_alt_* .ref.text);
SCHED_TEXT
CPUIDLE_TEXT
LOCK_TEXT
KPROBES_TEXT
IRQENTRY_TEXT
SOFTIRQENTRY_TEXT
+ /*
+ * -Os builds call FP save/restore functions. The powerpc64
+ * linker generates those on demand in the .sfpr section.
+ * .sfpr gets placed at the beginning of a group of input
+ * sections, which can break start-of-text offset if it is
+ * included with the main text sections, so put it by itself.
+ */
+ *(.sfpr);
MEM_KEEP(init.text)
MEM_KEEP(exit.text)
@@ -132,7 +129,7 @@ SECTIONS
PROVIDE32 (etext = .);
/* Read-only data */
- RODATA
+ RO_DATA(PAGE_SIZE)
EXCEPTION_TABLE(0)
@@ -149,7 +146,7 @@ SECTIONS
/*
* Init sections discarded at runtime
*/
- . = ALIGN(PAGE_SIZE);
+ . = ALIGN(STRICT_ALIGN_SIZE);
__init_begin = .;
INIT_TEXT_SECTION(PAGE_SIZE) :kernel
@@ -267,7 +264,9 @@ SECTIONS
.data : AT(ADDR(.data) - LOAD_OFFSET) {
DATA_DATA
*(.sdata)
+ *(.sdata2)
*(.got.plt) *(.got)
+ *(.plt)
}
#else
.data : AT(ADDR(.data) - LOAD_OFFSET) {
@@ -330,6 +329,16 @@ SECTIONS
_end = . ;
PROVIDE32 (end = .);
- /* Sections to be discarded. */
+ STABS_DEBUG
+
+ DWARF_DEBUG
+
DISCARDS
+ /DISCARD/ : {
+ *(*.EMB.apuinfo)
+ *(.glink .iplt .plt .rela* .comment)
+ *(.gnu.version*)
+ *(.gnu.attributes)
+ *(.eh_frame)
+ }
}
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index ce6f2121fffe..584c74c8119f 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -15,6 +15,7 @@
#include <linux/log2.h>
#include <asm/tlbflush.h>
+#include <asm/trace.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
#include <asm/book3s/64/mmu-hash.h>
@@ -443,17 +444,23 @@ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
cpu_relax();
if (need_sync)
asm volatile("ptesync" : : : "memory");
- for (i = 0; i < npages; ++i)
+ for (i = 0; i < npages; ++i) {
asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : :
"r" (rbvalues[i]), "r" (kvm->arch.lpid));
+ trace_tlbie(kvm->arch.lpid, 0, rbvalues[i],
+ kvm->arch.lpid, 0, 0, 0);
+ }
asm volatile("eieio; tlbsync; ptesync" : : : "memory");
kvm->arch.tlbie_lock = 0;
} else {
if (need_sync)
asm volatile("ptesync" : : : "memory");
- for (i = 0; i < npages; ++i)
+ for (i = 0; i < npages; ++i) {
asm volatile(PPC_TLBIEL(%0,%1,0,0,0) : :
"r" (rbvalues[i]), "r" (0));
+ trace_tlbie(kvm->arch.lpid, 1, rbvalues[i],
+ 0, 0, 0, 0);
+ }
asm volatile("ptesync" : : : "memory");
}
}
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 6ea4b53f4b16..cb44065e2946 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -313,15 +313,21 @@ kvm_novcpu_exit:
* We come in here when wakened from nap mode.
* Relocation is off and most register values are lost.
* r13 points to the PACA.
+ * r3 contains the SRR1 wakeup value, SRR1 is trashed.
*/
.globl kvm_start_guest
kvm_start_guest:
-
/* Set runlatch bit the minute you wake up from nap */
mfspr r0, SPRN_CTRLF
ori r0, r0, 1
mtspr SPRN_CTRLT, r0
+ /*
+ * Could avoid this and pass it through in r3. For now,
+ * code expects it to be in SRR1.
+ */
+ mtspr SPRN_SRR1,r3
+
ld r2,PACATOC(r13)
li r0,KVM_HWTHREAD_IN_KVM
@@ -440,13 +446,15 @@ kvm_no_guest:
/*
* We jump to pnv_wakeup_loss, which will return to the caller
* of power7_nap in the powernv cpu offline loop. The value we
- * put in r3 becomes the return value for power7_nap.
+ * put in r3 becomes the return value for power7_nap. pnv_wakeup_loss
+ * requires SRR1 in r12.
*/
li r3, LPCR_PECE0
mfspr r4, SPRN_LPCR
rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1
mtspr SPRN_LPCR, r4
li r3, 0
+ mfspr r12,SPRN_SRR1
b pnv_wakeup_loss
53: HMT_LOW
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index ed7dfce331e0..3c3146ba62da 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -9,10 +9,17 @@ ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
CFLAGS_REMOVE_code-patching.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_feature-fixups.o = $(CC_FLAGS_FTRACE)
-obj-y += string.o alloc.o crtsavres.o code-patching.o \
- feature-fixups.o
+obj-y += string.o alloc.o code-patching.o feature-fixups.o
-obj-$(CONFIG_PPC32) += div64.o copy_32.o
+obj-$(CONFIG_PPC32) += div64.o copy_32.o crtsavres.o
+
+# See corresponding test in arch/powerpc/Makefile
+# 64-bit linker creates .sfpr on demand for final link (vmlinux),
+# so it is only needed for modules, and only for older linkers which
+# do not support --save-restore-funcs
+ifeq ($(call ld-ifversion, -lt, 225000000, y),y)
+extra-$(CONFIG_PPC64) += crtsavres.o
+endif
obj64-y += copypage_64.o copyuser_64.o mem_64.o hweight_64.o \
copyuser_power7.o string_64.o copypage_power7.o memcpy_power7.o \
@@ -30,7 +37,7 @@ obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o
obj-$(CONFIG_FTR_FIXUP_SELFTEST) += feature-fixups-test.o
-obj-$(CONFIG_ALTIVEC) += xor_vmx.o
+obj-$(CONFIG_ALTIVEC) += xor_vmx.o xor_vmx_glue.o
CFLAGS_xor_vmx.o += -maltivec $(call cc-option,-mabi=altivec)
obj-$(CONFIG_PPC64) += $(obj64-y)
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index 500b0f6a0b64..c9de03e0c1f1 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -12,23 +12,186 @@
#include <linux/vmalloc.h>
#include <linux/init.h>
#include <linux/mm.h>
-#include <asm/page.h>
-#include <asm/code-patching.h>
+#include <linux/cpuhotplug.h>
+#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/kprobes.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+#include <asm/page.h>
+#include <asm/code-patching.h>
-int patch_instruction(unsigned int *addr, unsigned int instr)
+static int __patch_instruction(unsigned int *addr, unsigned int instr)
{
int err;
__put_user_size(instr, addr, 4, err);
if (err)
return err;
- asm ("dcbst 0, %0; sync; icbi 0,%0; sync; isync" : : "r" (addr));
+
+ asm ("dcbst 0, %0; sync; icbi 0,%0; sync; isync" :: "r" (addr));
+
+ return 0;
+}
+
+#ifdef CONFIG_STRICT_KERNEL_RWX
+static DEFINE_PER_CPU(struct vm_struct *, text_poke_area);
+
+static int text_area_cpu_up(unsigned int cpu)
+{
+ struct vm_struct *area;
+
+ area = get_vm_area(PAGE_SIZE, VM_ALLOC);
+ if (!area) {
+ WARN_ONCE(1, "Failed to create text area for cpu %d\n",
+ cpu);
+ return -1;
+ }
+ this_cpu_write(text_poke_area, area);
+
+ return 0;
+}
+
+static int text_area_cpu_down(unsigned int cpu)
+{
+ free_vm_area(this_cpu_read(text_poke_area));
+ return 0;
+}
+
+/*
+ * Run as a late init call. This allows all the boot time patching to be done
+ * simply by patching the code, and then we're called here prior to
+ * mark_rodata_ro(), which happens after all init calls are run. Although
+ * BUG_ON() is rude, in this case it should only happen if ENOMEM, and we judge
+ * it as being preferable to a kernel that will crash later when someone tries
+ * to use patch_instruction().
+ */
+static int __init setup_text_poke_area(void)
+{
+ BUG_ON(!cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
+ "powerpc/text_poke:online", text_area_cpu_up,
+ text_area_cpu_down));
+
+ return 0;
+}
+late_initcall(setup_text_poke_area);
+
+/*
+ * This can be called for kernel text or a module.
+ */
+static int map_patch_area(void *addr, unsigned long text_poke_addr)
+{
+ unsigned long pfn;
+ int err;
+
+ if (is_vmalloc_addr(addr))
+ pfn = vmalloc_to_pfn(addr);
+ else
+ pfn = __pa_symbol(addr) >> PAGE_SHIFT;
+
+ err = map_kernel_page(text_poke_addr, (pfn << PAGE_SHIFT),
+ pgprot_val(PAGE_KERNEL));
+
+ pr_devel("Mapped addr %lx with pfn %lx:%d\n", text_poke_addr, pfn, err);
+ if (err)
+ return -1;
+
return 0;
}
+static inline int unmap_patch_area(unsigned long addr)
+{
+ pte_t *ptep;
+ pmd_t *pmdp;
+ pud_t *pudp;
+ pgd_t *pgdp;
+
+ pgdp = pgd_offset_k(addr);
+ if (unlikely(!pgdp))
+ return -EINVAL;
+
+ pudp = pud_offset(pgdp, addr);
+ if (unlikely(!pudp))
+ return -EINVAL;
+
+ pmdp = pmd_offset(pudp, addr);
+ if (unlikely(!pmdp))
+ return -EINVAL;
+
+ ptep = pte_offset_kernel(pmdp, addr);
+ if (unlikely(!ptep))
+ return -EINVAL;
+
+ pr_devel("clearing mm %p, pte %p, addr %lx\n", &init_mm, ptep, addr);
+
+ /*
+ * In hash, pte_clear flushes the tlb, in radix, we have to
+ */
+ pte_clear(&init_mm, addr, ptep);
+ flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
+
+ return 0;
+}
+
+int patch_instruction(unsigned int *addr, unsigned int instr)
+{
+ int err;
+ unsigned int *dest = NULL;
+ unsigned long flags;
+ unsigned long text_poke_addr;
+ unsigned long kaddr = (unsigned long)addr;
+
+ /*
+ * During early early boot patch_instruction is called
+ * when text_poke_area is not ready, but we still need
+ * to allow patching. We just do the plain old patching
+ * We use slab_is_available and per cpu read * via this_cpu_read
+ * of text_poke_area. Per-CPU areas might not be up early
+ * this can create problems with just using this_cpu_read()
+ */
+ if (!slab_is_available() || !this_cpu_read(text_poke_area))
+ return __patch_instruction(addr, instr);
+
+ local_irq_save(flags);
+
+ text_poke_addr = (unsigned long)__this_cpu_read(text_poke_area)->addr;
+ if (map_patch_area(addr, text_poke_addr)) {
+ err = -1;
+ goto out;
+ }
+
+ dest = (unsigned int *)(text_poke_addr) +
+ ((kaddr & ~PAGE_MASK) / sizeof(unsigned int));
+
+ /*
+ * We use __put_user_size so that we can handle faults while
+ * writing to dest and return err to handle faults gracefully
+ */
+ __put_user_size(instr, dest, 4, err);
+ if (!err)
+ asm ("dcbst 0, %0; sync; icbi 0,%0; icbi 0,%1; sync; isync"
+ ::"r" (dest), "r"(addr));
+
+ err = unmap_patch_area(text_poke_addr);
+ if (err)
+ pr_warn("failed to unmap %lx\n", text_poke_addr);
+
+out:
+ local_irq_restore(flags);
+
+ return err;
+}
+#else /* !CONFIG_STRICT_KERNEL_RWX */
+
+int patch_instruction(unsigned int *addr, unsigned int instr)
+{
+ return __patch_instruction(addr, instr);
+}
+
+#endif /* CONFIG_STRICT_KERNEL_RWX */
+NOKPROBE_SYMBOL(patch_instruction);
+
int patch_branch(unsigned int *addr, unsigned long target, int flags)
{
return patch_instruction(addr, create_branch(addr, target, flags));
diff --git a/arch/powerpc/lib/copyuser_power7.S b/arch/powerpc/lib/copyuser_power7.S
index a24b4039352c..706b7cc19846 100644
--- a/arch/powerpc/lib/copyuser_power7.S
+++ b/arch/powerpc/lib/copyuser_power7.S
@@ -82,14 +82,14 @@
_GLOBAL(__copy_tofrom_user_power7)
#ifdef CONFIG_ALTIVEC
cmpldi r5,16
- cmpldi cr1,r5,4096
+ cmpldi cr1,r5,3328
std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
std r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
std r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
blt .Lshort_copy
- bgt cr1,.Lvmx_copy
+ bge cr1,.Lvmx_copy
#else
cmpldi r5,16
diff --git a/arch/powerpc/lib/crtsavres.S b/arch/powerpc/lib/crtsavres.S
index 18af0b3d3eb2..7e5e1c28e56a 100644
--- a/arch/powerpc/lib/crtsavres.S
+++ b/arch/powerpc/lib/crtsavres.S
@@ -44,10 +44,10 @@
#ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
-#ifndef CONFIG_PPC64
-
.section ".text"
+#ifndef CONFIG_PPC64
+
/* Routines for saving integer registers, called by the compiler. */
/* Called with r11 pointing to the stack header word of the caller of the */
/* function, just beyond the end of the integer save area. */
@@ -314,8 +314,6 @@ _GLOBAL(_restvr_31)
#else /* CONFIG_PPC64 */
- .section ".text.save.restore","ax",@progbits
-
.globl _savegpr0_14
_savegpr0_14:
std r14,-144(r1)
diff --git a/arch/powerpc/lib/xor_vmx.c b/arch/powerpc/lib/xor_vmx.c
index f9de69a04e88..4df240aa5f81 100644
--- a/arch/powerpc/lib/xor_vmx.c
+++ b/arch/powerpc/lib/xor_vmx.c
@@ -29,10 +29,7 @@
#define vector __attribute__((vector_size(16)))
#endif
-#include <linux/preempt.h>
-#include <linux/export.h>
-#include <linux/sched.h>
-#include <asm/switch_to.h>
+#include "xor_vmx.h"
typedef vector signed char unative_t;
@@ -64,16 +61,13 @@ typedef vector signed char unative_t;
V1##_3 = vec_xor(V1##_3, V2##_3); \
} while (0)
-void xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
- unsigned long *v2_in)
+void __xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
+ unsigned long *v2_in)
{
DEFINE(v1);
DEFINE(v2);
unsigned long lines = bytes / (sizeof(unative_t)) / 4;
- preempt_disable();
- enable_kernel_altivec();
-
do {
LOAD(v1);
LOAD(v2);
@@ -83,23 +77,16 @@ void xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
v1 += 4;
v2 += 4;
} while (--lines > 0);
-
- disable_kernel_altivec();
- preempt_enable();
}
-EXPORT_SYMBOL(xor_altivec_2);
-void xor_altivec_3(unsigned long bytes, unsigned long *v1_in,
- unsigned long *v2_in, unsigned long *v3_in)
+void __xor_altivec_3(unsigned long bytes, unsigned long *v1_in,
+ unsigned long *v2_in, unsigned long *v3_in)
{
DEFINE(v1);
DEFINE(v2);
DEFINE(v3);
unsigned long lines = bytes / (sizeof(unative_t)) / 4;
- preempt_disable();
- enable_kernel_altivec();
-
do {
LOAD(v1);
LOAD(v2);
@@ -112,15 +99,11 @@ void xor_altivec_3(unsigned long bytes, unsigned long *v1_in,
v2 += 4;
v3 += 4;
} while (--lines > 0);
-
- disable_kernel_altivec();
- preempt_enable();
}
-EXPORT_SYMBOL(xor_altivec_3);
-void xor_altivec_4(unsigned long bytes, unsigned long *v1_in,
- unsigned long *v2_in, unsigned long *v3_in,
- unsigned long *v4_in)
+void __xor_altivec_4(unsigned long bytes, unsigned long *v1_in,
+ unsigned long *v2_in, unsigned long *v3_in,
+ unsigned long *v4_in)
{
DEFINE(v1);
DEFINE(v2);
@@ -128,9 +111,6 @@ void xor_altivec_4(unsigned long bytes, unsigned long *v1_in,
DEFINE(v4);
unsigned long lines = bytes / (sizeof(unative_t)) / 4;
- preempt_disable();
- enable_kernel_altivec();
-
do {
LOAD(v1);
LOAD(v2);
@@ -146,15 +126,11 @@ void xor_altivec_4(unsigned long bytes, unsigned long *v1_in,
v3 += 4;
v4 += 4;
} while (--lines > 0);
-
- disable_kernel_altivec();
- preempt_enable();
}
-EXPORT_SYMBOL(xor_altivec_4);
-void xor_altivec_5(unsigned long bytes, unsigned long *v1_in,
- unsigned long *v2_in, unsigned long *v3_in,
- unsigned long *v4_in, unsigned long *v5_in)
+void __xor_altivec_5(unsigned long bytes, unsigned long *v1_in,
+ unsigned long *v2_in, unsigned long *v3_in,
+ unsigned long *v4_in, unsigned long *v5_in)
{
DEFINE(v1);
DEFINE(v2);
@@ -163,9 +139,6 @@ void xor_altivec_5(unsigned long bytes, unsigned long *v1_in,
DEFINE(v5);
unsigned long lines = bytes / (sizeof(unative_t)) / 4;
- preempt_disable();
- enable_kernel_altivec();
-
do {
LOAD(v1);
LOAD(v2);
@@ -184,8 +157,4 @@ void xor_altivec_5(unsigned long bytes, unsigned long *v1_in,
v4 += 4;
v5 += 4;
} while (--lines > 0);
-
- disable_kernel_altivec();
- preempt_enable();
}
-EXPORT_SYMBOL(xor_altivec_5);
diff --git a/arch/powerpc/lib/xor_vmx.h b/arch/powerpc/lib/xor_vmx.h
new file mode 100644
index 000000000000..4746708451ae
--- /dev/null
+++ b/arch/powerpc/lib/xor_vmx.h
@@ -0,0 +1,20 @@
+/*
+ * Simple interface to link xor_vmx.c and xor_vmx_glue.c
+ *
+ * Separating these file ensures that no altivec instructions are run
+ * outside of the enable/disable altivec block.
+ */
+
+void __xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
+ unsigned long *v2_in);
+
+void __xor_altivec_3(unsigned long bytes, unsigned long *v1_in,
+ unsigned long *v2_in, unsigned long *v3_in);
+
+void __xor_altivec_4(unsigned long bytes, unsigned long *v1_in,
+ unsigned long *v2_in, unsigned long *v3_in,
+ unsigned long *v4_in);
+
+void __xor_altivec_5(unsigned long bytes, unsigned long *v1_in,
+ unsigned long *v2_in, unsigned long *v3_in,
+ unsigned long *v4_in, unsigned long *v5_in);
diff --git a/arch/powerpc/lib/xor_vmx_glue.c b/arch/powerpc/lib/xor_vmx_glue.c
new file mode 100644
index 000000000000..6521fe5e8cef
--- /dev/null
+++ b/arch/powerpc/lib/xor_vmx_glue.c
@@ -0,0 +1,62 @@
+/*
+ * Altivec XOR operations
+ *
+ * Copyright 2017 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/preempt.h>
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <asm/switch_to.h>
+#include "xor_vmx.h"
+
+void xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
+ unsigned long *v2_in)
+{
+ preempt_disable();
+ enable_kernel_altivec();
+ __xor_altivec_2(bytes, v1_in, v2_in);
+ disable_kernel_altivec();
+ preempt_enable();
+}
+EXPORT_SYMBOL(xor_altivec_2);
+
+void xor_altivec_3(unsigned long bytes, unsigned long *v1_in,
+ unsigned long *v2_in, unsigned long *v3_in)
+{
+ preempt_disable();
+ enable_kernel_altivec();
+ __xor_altivec_3(bytes, v1_in, v2_in, v3_in);
+ disable_kernel_altivec();
+ preempt_enable();
+}
+EXPORT_SYMBOL(xor_altivec_3);
+
+void xor_altivec_4(unsigned long bytes, unsigned long *v1_in,
+ unsigned long *v2_in, unsigned long *v3_in,
+ unsigned long *v4_in)
+{
+ preempt_disable();
+ enable_kernel_altivec();
+ __xor_altivec_4(bytes, v1_in, v2_in, v3_in, v4_in);
+ disable_kernel_altivec();
+ preempt_enable();
+}
+EXPORT_SYMBOL(xor_altivec_4);
+
+void xor_altivec_5(unsigned long bytes, unsigned long *v1_in,
+ unsigned long *v2_in, unsigned long *v3_in,
+ unsigned long *v4_in, unsigned long *v5_in)
+{
+ preempt_disable();
+ enable_kernel_altivec();
+ __xor_altivec_5(bytes, v1_in, v2_in, v3_in, v4_in, v5_in);
+ disable_kernel_altivec();
+ preempt_enable();
+}
+EXPORT_SYMBOL(xor_altivec_5);
diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c
index 6c5025e81236..f4c6472f2fc4 100644
--- a/arch/powerpc/mm/8xx_mmu.c
+++ b/arch/powerpc/mm/8xx_mmu.c
@@ -88,7 +88,7 @@ static void mmu_mapin_immr(void)
int offset;
for (offset = 0; offset < IMMR_SIZE; offset += PAGE_SIZE)
- map_page(v + offset, p + offset, f);
+ map_kernel_page(v + offset, p + offset, f);
}
/* Address of instructions to patch */
diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c
index 2dc74e5c6458..382528475433 100644
--- a/arch/powerpc/mm/dma-noncoherent.c
+++ b/arch/powerpc/mm/dma-noncoherent.c
@@ -227,7 +227,7 @@ __dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t
do {
SetPageReserved(page);
- map_page(vaddr, page_to_phys(page),
+ map_kernel_page(vaddr, page_to_phys(page),
pgprot_val(pgprot_noncached(PAGE_KERNEL)));
page++;
vaddr += PAGE_SIZE;
diff --git a/arch/powerpc/mm/dump_hashpagetable.c b/arch/powerpc/mm/dump_hashpagetable.c
index c6b900f54c07..b1c144b03fcf 100644
--- a/arch/powerpc/mm/dump_hashpagetable.c
+++ b/arch/powerpc/mm/dump_hashpagetable.c
@@ -335,7 +335,7 @@ static unsigned long hpte_find(struct pg_state *st, unsigned long ea, int psize)
unsigned long rpn, lp_bits;
int base_psize = 0, actual_psize = 0;
- if (ea <= PAGE_OFFSET)
+ if (ea < PAGE_OFFSET)
return -1;
/* Look in primary table */
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 3a7d580fdc59..4c422632047b 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -206,6 +206,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
int is_write = 0;
int trap = TRAP(regs);
int is_exec = trap == 0x400;
+ int is_user = user_mode(regs);
int fault;
int rc = 0, store_update_sp = 0;
@@ -216,7 +217,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
* bits we are interested in. But there are some bits which
* indicate errors in DSISR but can validly be set in SRR1.
*/
- if (trap == 0x400)
+ if (is_exec)
error_code &= 0x48200000;
else
is_write = error_code & DSISR_ISSTORE;
@@ -247,13 +248,13 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
* The kernel should never take an execute fault nor should it
* take a page fault to a kernel address.
*/
- if (!user_mode(regs) && (is_exec || (address >= TASK_SIZE))) {
+ if (!is_user && (is_exec || (address >= TASK_SIZE))) {
rc = SIGSEGV;
goto bail;
}
#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE) || \
- defined(CONFIG_PPC_BOOK3S_64))
+ defined(CONFIG_PPC_BOOK3S_64) || defined(CONFIG_PPC_8xx))
if (error_code & DSISR_DABRMATCH) {
/* breakpoint match */
do_break(regs, address, error_code);
@@ -266,7 +267,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
local_irq_enable();
if (faulthandler_disabled() || mm == NULL) {
- if (!user_mode(regs)) {
+ if (!is_user) {
rc = SIGSEGV;
goto bail;
}
@@ -287,10 +288,10 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
* can result in fault, which will cause a deadlock when called with
* mmap_sem held
*/
- if (!is_exec && user_mode(regs))
+ if (is_write && is_user)
store_update_sp = store_updates_sp(regs);
- if (user_mode(regs))
+ if (is_user)
flags |= FAULT_FLAG_USER;
/* When running in the kernel we expect faults to occur only to
@@ -309,7 +310,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
* thus avoiding the deadlock.
*/
if (!down_read_trylock(&mm->mmap_sem)) {
- if (!user_mode(regs) && !search_exception_tables(regs->nip))
+ if (!is_user && !search_exception_tables(regs->nip))
goto bad_area_nosemaphore;
retry:
@@ -509,7 +510,7 @@ bad_area:
bad_area_nosemaphore:
/* User mode accesses cause a SIGSEGV */
- if (user_mode(regs)) {
+ if (is_user) {
_exception(SIGSEGV, regs, code, address);
goto bail;
}
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 65bb8f33b399..3848af167df9 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -15,6 +15,7 @@
#include <linux/spinlock.h>
#include <linux/bitops.h>
#include <linux/of.h>
+#include <linux/processor.h>
#include <linux/threads.h>
#include <linux/smp.h>
@@ -23,6 +24,7 @@
#include <asm/mmu_context.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
+#include <asm/trace.h>
#include <asm/tlb.h>
#include <asm/cputable.h>
#include <asm/udbg.h>
@@ -98,6 +100,7 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
: "memory");
break;
}
+ trace_tlbie(0, 0, va, 0, 0, 0, 0);
}
static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize)
@@ -147,6 +150,7 @@ static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize)
: "memory");
break;
}
+ trace_tlbie(0, 1, va, 0, 0, 0, 0);
}
@@ -181,8 +185,10 @@ static inline void native_lock_hpte(struct hash_pte *hptep)
while (1) {
if (!test_and_set_bit_lock(HPTE_LOCK_BIT, word))
break;
+ spin_begin();
while(test_bit(HPTE_LOCK_BIT, word))
- cpu_relax();
+ spin_cpu_relax();
+ spin_end();
}
}
@@ -407,6 +413,38 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
tlbie(vpn, psize, psize, ssize, 0);
}
+/*
+ * Remove a bolted kernel entry. Memory hotplug uses this.
+ *
+ * No need to lock here because we should be the only user.
+ */
+static int native_hpte_removebolted(unsigned long ea, int psize, int ssize)
+{
+ unsigned long vpn;
+ unsigned long vsid;
+ long slot;
+ struct hash_pte *hptep;
+
+ vsid = get_kernel_vsid(ea, ssize);
+ vpn = hpt_vpn(ea, vsid, ssize);
+
+ slot = native_hpte_find(vpn, psize, ssize);
+ if (slot == -1)
+ return -ENOENT;
+
+ hptep = htab_address + slot;
+
+ VM_WARN_ON(!(be64_to_cpu(hptep->v) & HPTE_V_BOLTED));
+
+ /* Invalidate the hpte */
+ hptep->v = 0;
+
+ /* Invalidate the TLB */
+ tlbie(vpn, psize, psize, ssize, 0);
+ return 0;
+}
+
+
static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
int bpsize, int apsize, int ssize, int local)
{
@@ -725,6 +763,7 @@ void __init hpte_init_native(void)
mmu_hash_ops.hpte_invalidate = native_hpte_invalidate;
mmu_hash_ops.hpte_updatepp = native_hpte_updatepp;
mmu_hash_ops.hpte_updateboltedpp = native_hpte_updateboltedpp;
+ mmu_hash_ops.hpte_removebolted = native_hpte_removebolted;
mmu_hash_ops.hpte_insert = native_hpte_insert;
mmu_hash_ops.hpte_remove = native_hpte_remove;
mmu_hash_ops.hpte_clear_all = native_hpte_clear;
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index f2095ce9d4b0..7a20669c19e7 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -810,6 +810,8 @@ static void update_hid_for_hash(void)
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(0), "i"(0), "i"(2), "r"(0) : "memory");
asm volatile("eieio; tlbsync; ptesync; isync; slbia": : :"memory");
+ trace_tlbie(0, 0, rb, 0, 2, 0, 0);
+
/*
* now switch the HID
*/
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index c41dc44472c5..e1bf5ca397fe 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -34,6 +34,7 @@
#define PAGE_SHIFT_16G 34
unsigned int HPAGE_SHIFT;
+EXPORT_SYMBOL(HPAGE_SHIFT);
/*
* Tracks gpages after the device tree is scanned and before the
@@ -79,7 +80,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
num_hugepd = 1;
}
- new = kmem_cache_zalloc(cachep, GFP_KERNEL);
+ new = kmem_cache_zalloc(cachep, pgtable_gfp_flags(mm, GFP_KERNEL));
BUG_ON(pshift > HUGEPD_SHIFT_MASK);
BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK);
@@ -945,7 +946,7 @@ pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
if (pmd_none(pmd))
return NULL;
- if (pmd_trans_huge(pmd)) {
+ if (pmd_trans_huge(pmd) || pmd_devmap(pmd)) {
if (is_thp)
*is_thp = true;
ret_pte = (pte_t *) pmdp;
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index ec84b31c6c86..5b4c25d12ff3 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -44,6 +44,7 @@
#include <linux/slab.h>
#include <linux/of_fdt.h>
#include <linux/libfdt.h>
+#include <linux/memremap.h>
#include <asm/pgalloc.h>
#include <asm/page.h>
@@ -110,8 +111,29 @@ static int __meminit vmemmap_populated(unsigned long start, int page_size)
return 0;
}
+/*
+ * vmemmap virtual address space management does not have a traditonal page
+ * table to track which virtual struct pages are backed by physical mapping.
+ * The virtual to physical mappings are tracked in a simple linked list
+ * format. 'vmemmap_list' maintains the entire vmemmap physical mapping at
+ * all times where as the 'next' list maintains the available
+ * vmemmap_backing structures which have been deleted from the
+ * 'vmemmap_global' list during system runtime (memory hotplug remove
+ * operation). The freed 'vmemmap_backing' structures are reused later when
+ * new requests come in without allocating fresh memory. This pointer also
+ * tracks the allocated 'vmemmap_backing' structures as we allocate one
+ * full page memory at a time when we dont have any.
+ */
struct vmemmap_backing *vmemmap_list;
static struct vmemmap_backing *next;
+
+/*
+ * The same pointer 'next' tracks individual chunks inside the allocated
+ * full page during the boot time and again tracks the freeed nodes during
+ * runtime. It is racy but it does not happen as they are separated by the
+ * boot process. Will create problem if some how we have memory hotplug
+ * operation during boot !!
+ */
static int num_left;
static int num_freed;
@@ -171,13 +193,17 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
pr_debug("vmemmap_populate %lx..%lx, node %d\n", start, end, node);
for (; start < end; start += page_size) {
+ struct vmem_altmap *altmap;
void *p;
int rc;
if (vmemmap_populated(start, page_size))
continue;
- p = vmemmap_alloc_block(page_size, node);
+ /* altmap lookups only work at section boundaries */
+ altmap = to_vmem_altmap(SECTION_ALIGN_DOWN(start));
+
+ p = __vmemmap_alloc_block_buf(page_size, node, altmap);
if (!p)
return -ENOMEM;
@@ -234,13 +260,17 @@ static unsigned long vmemmap_list_free(unsigned long start)
void __ref vmemmap_free(unsigned long start, unsigned long end)
{
unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift;
+ unsigned long page_order = get_order(page_size);
start = _ALIGN_DOWN(start, page_size);
pr_debug("vmemmap_free %lx...%lx\n", start, end);
for (; start < end; start += page_size) {
- unsigned long addr;
+ unsigned long nr_pages, addr;
+ struct vmem_altmap *altmap;
+ struct page *section_base;
+ struct page *page;
/*
* the section has already be marked as invalid, so
@@ -251,29 +281,33 @@ void __ref vmemmap_free(unsigned long start, unsigned long end)
continue;
addr = vmemmap_list_free(start);
- if (addr) {
- struct page *page = pfn_to_page(addr >> PAGE_SHIFT);
-
- if (PageReserved(page)) {
- /* allocated from bootmem */
- if (page_size < PAGE_SIZE) {
- /*
- * this shouldn't happen, but if it is
- * the case, leave the memory there
- */
- WARN_ON_ONCE(1);
- } else {
- unsigned int nr_pages =
- 1 << get_order(page_size);
- while (nr_pages--)
- free_reserved_page(page++);
- }
- } else
- free_pages((unsigned long)(__va(addr)),
- get_order(page_size));
-
- vmemmap_remove_mapping(start, page_size);
+ if (!addr)
+ continue;
+
+ page = pfn_to_page(addr >> PAGE_SHIFT);
+ section_base = pfn_to_page(vmemmap_section_start(start));
+ nr_pages = 1 << page_order;
+
+ altmap = to_vmem_altmap((unsigned long) section_base);
+ if (altmap) {
+ vmem_altmap_free(altmap, nr_pages);
+ } else if (PageReserved(page)) {
+ /* allocated from bootmem */
+ if (page_size < PAGE_SIZE) {
+ /*
+ * this shouldn't happen, but if it is
+ * the case, leave the memory there
+ */
+ WARN_ON_ONCE(1);
+ } else {
+ while (nr_pages--)
+ free_reserved_page(page++);
+ }
+ } else {
+ free_pages((unsigned long)(__va(addr)), page_order);
}
+
+ vmemmap_remove_mapping(start, page_size);
}
}
#endif
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index de5a90e1ceaa..8541f18694a4 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -36,6 +36,7 @@
#include <linux/hugetlb.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
+#include <linux/memremap.h>
#include <asm/pgalloc.h>
#include <asm/prom.h>
@@ -151,11 +152,20 @@ int arch_remove_memory(u64 start, u64 size)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
- struct zone *zone;
+ struct vmem_altmap *altmap;
+ struct page *page;
int ret;
- zone = page_zone(pfn_to_page(start_pfn));
- ret = __remove_pages(zone, start_pfn, nr_pages);
+ /*
+ * If we have an altmap then we need to skip over any reserved PFNs
+ * when querying the zone.
+ */
+ page = pfn_to_page(start_pfn);
+ altmap = to_vmem_altmap((unsigned long) page);
+ if (altmap)
+ page += vmem_altmap_offset(altmap);
+
+ ret = __remove_pages(page_zone(page), start_pfn, nr_pages);
if (ret)
return ret;
@@ -305,11 +315,11 @@ void __init paging_init(void)
unsigned long end = __fix_to_virt(FIX_HOLE);
for (; v < end; v += PAGE_SIZE)
- map_page(v, 0, 0); /* XXX gross */
+ map_kernel_page(v, 0, 0); /* XXX gross */
#endif
#ifdef CONFIG_HIGHMEM
- map_page(PKMAP_BASE, 0, 0); /* XXX gross */
+ map_kernel_page(PKMAP_BASE, 0, 0); /* XXX gross */
pkmap_page_table = virt_to_kpte(PKMAP_BASE);
kmap_pte = virt_to_kpte(__fix_to_virt(FIX_KMAP_BEGIN));
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index a3edf813d455..71de2c6d88f3 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -235,10 +235,15 @@ void destroy_context(struct mm_struct *mm)
#ifdef CONFIG_PPC_RADIX_MMU
void radix__switch_mmu_context(struct mm_struct *prev, struct mm_struct *next)
{
- asm volatile("isync": : :"memory");
- mtspr(SPRN_PID, next->context.id);
- asm volatile("isync \n"
- PPC_SLBIA(0x7)
- : : :"memory");
+
+ if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
+ isync();
+ mtspr(SPRN_PID, next->context.id);
+ isync();
+ asm volatile(PPC_INVALIDATE_ERAT : : :"memory");
+ } else {
+ mtspr(SPRN_PID, next->context.id);
+ isync();
+ }
}
#endif
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index f988db655e5b..d46128b22150 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -94,7 +94,6 @@ extern void _tlbia(void);
#ifdef CONFIG_PPC32
extern void mapin_ram(void);
-extern int map_page(unsigned long va, phys_addr_t pa, int flags);
extern void setbat(int index, unsigned long virt, phys_addr_t phys,
unsigned int size, pgprot_t prot);
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index 5fcb3dd74c13..31eed8fa8e99 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -32,7 +32,7 @@ int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
{
int changed;
#ifdef CONFIG_DEBUG_VM
- WARN_ON(!pmd_trans_huge(*pmdp));
+ WARN_ON(!pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp));
assert_spin_locked(&vma->vm_mm->page_table_lock);
#endif
changed = !pmd_same(*(pmdp), entry);
@@ -59,7 +59,7 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
#ifdef CONFIG_DEBUG_VM
WARN_ON(pte_present(pmd_pte(*pmdp)) && !pte_protnone(pmd_pte(*pmdp)));
assert_spin_locked(&mm->page_table_lock);
- WARN_ON(!pmd_trans_huge(pmd));
+ WARN_ON(!(pmd_trans_huge(pmd) || pmd_devmap(pmd)));
#endif
trace_hugepage_set_pmd(addr, pmd_val(pmd));
return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
diff --git a/arch/powerpc/mm/pgtable-hash64.c b/arch/powerpc/mm/pgtable-hash64.c
index 8b85a14b08ea..188b4107584d 100644
--- a/arch/powerpc/mm/pgtable-hash64.c
+++ b/arch/powerpc/mm/pgtable-hash64.c
@@ -11,8 +11,12 @@
#include <linux/sched.h>
#include <linux/mm_types.h>
+#include <linux/mm.h>
#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/sections.h>
+#include <asm/mmu.h>
#include <asm/tlb.h>
#include "mmu_decl.h"
@@ -22,6 +26,81 @@
#ifdef CONFIG_SPARSEMEM_VMEMMAP
/*
+ * vmemmap is the starting address of the virtual address space where
+ * struct pages are allocated for all possible PFNs present on the system
+ * including holes and bad memory (hence sparse). These virtual struct
+ * pages are stored in sequence in this virtual address space irrespective
+ * of the fact whether the corresponding PFN is valid or not. This achieves
+ * constant relationship between address of struct page and its PFN.
+ *
+ * During boot or memory hotplug operation when a new memory section is
+ * added, physical memory allocation (including hash table bolting) will
+ * be performed for the set of struct pages which are part of the memory
+ * section. This saves memory by not allocating struct pages for PFNs
+ * which are not valid.
+ *
+ * ----------------------------------------------
+ * | PHYSICAL ALLOCATION OF VIRTUAL STRUCT PAGES|
+ * ----------------------------------------------
+ *
+ * f000000000000000 c000000000000000
+ * vmemmap +--------------+ +--------------+
+ * + | page struct | +--------------> | page struct |
+ * | +--------------+ +--------------+
+ * | | page struct | +--------------> | page struct |
+ * | +--------------+ | +--------------+
+ * | | page struct | + +------> | page struct |
+ * | +--------------+ | +--------------+
+ * | | page struct | | +--> | page struct |
+ * | +--------------+ | | +--------------+
+ * | | page struct | | |
+ * | +--------------+ | |
+ * | | page struct | | |
+ * | +--------------+ | |
+ * | | page struct | | |
+ * | +--------------+ | |
+ * | | page struct | | |
+ * | +--------------+ | |
+ * | | page struct | +-------+ |
+ * | +--------------+ |
+ * | | page struct | +-----------+
+ * | +--------------+
+ * | | page struct | No mapping
+ * | +--------------+
+ * | | page struct | No mapping
+ * v +--------------+
+ *
+ * -----------------------------------------
+ * | RELATION BETWEEN STRUCT PAGES AND PFNS|
+ * -----------------------------------------
+ *
+ * vmemmap +--------------+ +---------------+
+ * + | page struct | +-------------> | PFN |
+ * | +--------------+ +---------------+
+ * | | page struct | +-------------> | PFN |
+ * | +--------------+ +---------------+
+ * | | page struct | +-------------> | PFN |
+ * | +--------------+ +---------------+
+ * | | page struct | +-------------> | PFN |
+ * | +--------------+ +---------------+
+ * | | |
+ * | +--------------+
+ * | | |
+ * | +--------------+
+ * | | |
+ * | +--------------+ +---------------+
+ * | | page struct | +-------------> | PFN |
+ * | +--------------+ +---------------+
+ * | | |
+ * | +--------------+
+ * | | |
+ * | +--------------+ +---------------+
+ * | | page struct | +-------------> | PFN |
+ * | +--------------+ +---------------+
+ * | | page struct | +-------------> | PFN |
+ * v +--------------+ +---------------+
+ */
+/*
* On hash-based CPUs, the vmemmap is bolted in the hash table.
*
*/
@@ -109,7 +188,7 @@ unsigned long hash__pmd_hugepage_update(struct mm_struct *mm, unsigned long addr
unsigned long old;
#ifdef CONFIG_DEBUG_VM
- WARN_ON(!pmd_trans_huge(*pmdp));
+ WARN_ON(!hash__pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp));
assert_spin_locked(&mm->page_table_lock);
#endif
@@ -141,6 +220,7 @@ pmd_t hash__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long addres
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
VM_BUG_ON(pmd_trans_huge(*pmdp));
+ VM_BUG_ON(pmd_devmap(*pmdp));
pmd = *pmdp;
pmd_clear(pmdp);
@@ -221,6 +301,7 @@ void hash__pmdp_huge_split_prepare(struct vm_area_struct *vma,
{
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
VM_BUG_ON(REGION_ID(address) != USER_REGION_ID);
+ VM_BUG_ON(pmd_devmap(*pmdp));
/*
* We can't mark the pmd none here, because that will cause a race
@@ -342,3 +423,35 @@ int hash__has_transparent_hugepage(void)
return 1;
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+#ifdef CONFIG_STRICT_KERNEL_RWX
+void hash__mark_rodata_ro(void)
+{
+ unsigned long start = (unsigned long)_stext;
+ unsigned long end = (unsigned long)__init_begin;
+ unsigned long idx;
+ unsigned int step, shift;
+ unsigned long newpp = PP_RXXX;
+
+ shift = mmu_psize_defs[mmu_linear_psize].shift;
+ step = 1 << shift;
+
+ start = ((start + step - 1) >> shift) << shift;
+ end = (end >> shift) << shift;
+
+ pr_devel("marking ro start %lx, end %lx, step %x\n",
+ start, end, step);
+
+ if (start == end) {
+ pr_warn("could not set rodata ro, relocate the start"
+ " of the kernel to a 0x%x boundary\n", step);
+ return;
+ }
+
+ for (idx = start; idx < end; idx += step)
+ /* Not sure if we can do much with the return value */
+ mmu_hash_ops.hpte_updateboltedpp(newpp, idx, mmu_linear_psize,
+ mmu_kernel_ssize);
+
+}
+#endif
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index c28165d8970b..8c13e4282308 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -11,6 +11,7 @@
#include <linux/sched/mm.h>
#include <linux/memblock.h>
#include <linux/of_fdt.h>
+#include <linux/mm.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -19,6 +20,8 @@
#include <asm/mmu.h>
#include <asm/firmware.h>
#include <asm/powernv.h>
+#include <asm/sections.h>
+#include <asm/trace.h>
#include <trace/events/thp.h>
@@ -108,6 +111,49 @@ set_the_pte:
return 0;
}
+#ifdef CONFIG_STRICT_KERNEL_RWX
+void radix__mark_rodata_ro(void)
+{
+ unsigned long start = (unsigned long)_stext;
+ unsigned long end = (unsigned long)__init_begin;
+ unsigned long idx;
+ pgd_t *pgdp;
+ pud_t *pudp;
+ pmd_t *pmdp;
+ pte_t *ptep;
+
+ start = ALIGN_DOWN(start, PAGE_SIZE);
+ end = PAGE_ALIGN(end); // aligns up
+
+ pr_devel("marking ro start %lx, end %lx\n", start, end);
+
+ for (idx = start; idx < end; idx += PAGE_SIZE) {
+ pgdp = pgd_offset_k(idx);
+ pudp = pud_alloc(&init_mm, pgdp, idx);
+ if (!pudp)
+ continue;
+ if (pud_huge(*pudp)) {
+ ptep = (pte_t *)pudp;
+ goto update_the_pte;
+ }
+ pmdp = pmd_alloc(&init_mm, pudp, idx);
+ if (!pmdp)
+ continue;
+ if (pmd_huge(*pmdp)) {
+ ptep = pmdp_ptep(pmdp);
+ goto update_the_pte;
+ }
+ ptep = pte_alloc_kernel(pmdp, idx);
+ if (!ptep)
+ continue;
+update_the_pte:
+ radix__pte_update(&init_mm, idx, ptep, _PAGE_WRITE, 0, 0);
+ }
+
+ radix__flush_tlb_kernel_range(start, end);
+}
+#endif /* CONFIG_STRICT_KERNEL_RWX */
+
static inline void __meminit print_mapping(unsigned long start,
unsigned long end,
unsigned long size)
@@ -121,7 +167,14 @@ static inline void __meminit print_mapping(unsigned long start,
static int __meminit create_physical_mapping(unsigned long start,
unsigned long end)
{
- unsigned long addr, mapping_size = 0;
+ unsigned long vaddr, addr, mapping_size = 0;
+ pgprot_t prot;
+ unsigned long max_mapping_size;
+#ifdef CONFIG_STRICT_KERNEL_RWX
+ int split_text_mapping = 1;
+#else
+ int split_text_mapping = 0;
+#endif
start = _ALIGN_UP(start, PAGE_SIZE);
for (addr = start; addr < end; addr += mapping_size) {
@@ -130,9 +183,12 @@ static int __meminit create_physical_mapping(unsigned long start,
gap = end - addr;
previous_size = mapping_size;
+ max_mapping_size = PUD_SIZE;
+retry:
if (IS_ALIGNED(addr, PUD_SIZE) && gap >= PUD_SIZE &&
- mmu_psize_defs[MMU_PAGE_1G].shift)
+ mmu_psize_defs[MMU_PAGE_1G].shift &&
+ PUD_SIZE <= max_mapping_size)
mapping_size = PUD_SIZE;
else if (IS_ALIGNED(addr, PMD_SIZE) && gap >= PMD_SIZE &&
mmu_psize_defs[MMU_PAGE_2M].shift)
@@ -140,13 +196,32 @@ static int __meminit create_physical_mapping(unsigned long start,
else
mapping_size = PAGE_SIZE;
+ if (split_text_mapping && (mapping_size == PUD_SIZE) &&
+ (addr <= __pa_symbol(__init_begin)) &&
+ (addr + mapping_size) >= __pa_symbol(_stext)) {
+ max_mapping_size = PMD_SIZE;
+ goto retry;
+ }
+
+ if (split_text_mapping && (mapping_size == PMD_SIZE) &&
+ (addr <= __pa_symbol(__init_begin)) &&
+ (addr + mapping_size) >= __pa_symbol(_stext))
+ mapping_size = PAGE_SIZE;
+
if (mapping_size != previous_size) {
print_mapping(start, addr, previous_size);
start = addr;
}
- rc = radix__map_kernel_page((unsigned long)__va(addr), addr,
- PAGE_KERNEL_X, mapping_size);
+ vaddr = (unsigned long)__va(addr);
+
+ if (overlaps_kernel_text(vaddr, vaddr + mapping_size) ||
+ overlaps_interrupt_vector_text(vaddr, vaddr + mapping_size))
+ prot = PAGE_KERNEL_X;
+ else
+ prot = PAGE_KERNEL;
+
+ rc = radix__map_kernel_page(vaddr, addr, prot, mapping_size);
if (rc)
return rc;
}
@@ -190,6 +265,7 @@ static void __init radix_init_pgtable(void)
asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : :
"r" (TLBIEL_INVAL_SET_LPID), "r" (0));
asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+ trace_tlbie(0, 0, TLBIEL_INVAL_SET_LPID, 0, 2, 1, 1);
}
static void __init radix_init_partition_table(void)
@@ -316,6 +392,9 @@ static void update_hid_for_radix(void)
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(1), "i"(1), "i"(2), "r"(0) : "memory");
asm volatile("eieio; tlbsync; ptesync; isync; slbia": : :"memory");
+ trace_tlbie(0, 0, rb, 0, 2, 0, 1);
+ trace_tlbie(0, 0, rb, 0, 2, 1, 1);
+
/*
* now switch the HID
*/
@@ -683,7 +762,7 @@ unsigned long radix__pmd_hugepage_update(struct mm_struct *mm, unsigned long add
unsigned long old;
#ifdef CONFIG_DEBUG_VM
- WARN_ON(!radix__pmd_trans_huge(*pmdp));
+ WARN_ON(!radix__pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp));
assert_spin_locked(&mm->page_table_lock);
#endif
@@ -701,6 +780,7 @@ pmd_t radix__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long addre
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
VM_BUG_ON(radix__pmd_trans_huge(*pmdp));
+ VM_BUG_ON(pmd_devmap(*pmdp));
/*
* khugepaged calls this for normal pmd
*/
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index a65c0b4c0669..a9e4bfc025bc 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -60,7 +60,7 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
struct page *ptepage;
- gfp_t flags = GFP_KERNEL | __GFP_ZERO;
+ gfp_t flags = GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT;
ptepage = alloc_pages(flags, 0);
if (!ptepage)
@@ -189,7 +189,7 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, unsigned long flags,
err = 0;
for (i = 0; i < size && err == 0; i += PAGE_SIZE)
- err = map_page(v+i, p+i, flags);
+ err = map_kernel_page(v+i, p+i, flags);
if (err) {
if (slab_is_available())
vunmap((void *)v);
@@ -215,7 +215,7 @@ void iounmap(volatile void __iomem *addr)
}
EXPORT_SYMBOL(iounmap);
-int map_page(unsigned long va, phys_addr_t pa, int flags)
+int map_kernel_page(unsigned long va, phys_addr_t pa, int flags)
{
pmd_t *pd;
pte_t *pg;
@@ -255,7 +255,7 @@ void __init __mapin_ram_chunk(unsigned long offset, unsigned long top)
ktext = ((char *)v >= _stext && (char *)v < etext) ||
((char *)v >= _sinittext && (char *)v < _einittext);
f = ktext ? pgprot_val(PAGE_KERNEL_TEXT) : pgprot_val(PAGE_KERNEL);
- map_page(v, p, f);
+ map_kernel_page(v, p, f);
#ifdef CONFIG_PPC_STD_MMU_32
if (ktext)
hash_preload(&init_mm, v, 0, 0x300);
@@ -387,11 +387,6 @@ void __set_fixmap (enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags)
return;
}
- map_page(address, phys, pgprot_val(flags));
+ map_kernel_page(address, phys, pgprot_val(flags));
fixmaps++;
}
-
-void __this_fixmap_does_not_exist(void)
-{
- WARN_ON(1);
-}
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index db93cf747a03..5c0b795d656c 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -47,6 +47,7 @@
#include <asm/smp.h>
#include <asm/machdep.h>
#include <asm/tlb.h>
+#include <asm/trace.h>
#include <asm/processor.h>
#include <asm/cputable.h>
#include <asm/sections.h>
@@ -323,7 +324,7 @@ struct page *pud_page(pud_t pud)
*/
struct page *pmd_page(pmd_t pmd)
{
- if (pmd_trans_huge(pmd) || pmd_huge(pmd))
+ if (pmd_trans_huge(pmd) || pmd_huge(pmd) || pmd_devmap(pmd))
return pte_page(pmd_pte(pmd));
return virt_to_page(pmd_page_vaddr(pmd));
}
@@ -351,12 +352,20 @@ static pte_t *get_from_cache(struct mm_struct *mm)
static pte_t *__alloc_for_cache(struct mm_struct *mm, int kernel)
{
void *ret = NULL;
- struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
- if (!page)
- return NULL;
- if (!kernel && !pgtable_page_ctor(page)) {
- __free_page(page);
- return NULL;
+ struct page *page;
+
+ if (!kernel) {
+ page = alloc_page(PGALLOC_GFP | __GFP_ACCOUNT);
+ if (!page)
+ return NULL;
+ if (!pgtable_page_ctor(page)) {
+ __free_page(page);
+ return NULL;
+ }
+ } else {
+ page = alloc_page(PGALLOC_GFP);
+ if (!page)
+ return NULL;
}
ret = page_address(page);
@@ -469,13 +478,31 @@ void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
* use of this partition ID was, not the new use.
*/
asm volatile("ptesync" : : : "memory");
- if (old & PATB_HR)
+ if (old & PATB_HR) {
asm volatile(PPC_TLBIE_5(%0,%1,2,0,1) : :
"r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
- else
+ trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 1);
+ } else {
asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : :
"r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
+ trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 0);
+ }
asm volatile("eieio; tlbsync; ptesync" : : : "memory");
}
EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry);
#endif /* CONFIG_PPC_BOOK3S_64 */
+
+#ifdef CONFIG_STRICT_KERNEL_RWX
+void mark_rodata_ro(void)
+{
+ if (!mmu_has_feature(MMU_FTR_KERNEL_RO)) {
+ pr_warn("Warning: Unable to mark rodata read only on this CPU.\n");
+ return;
+ }
+
+ if (radix_enabled())
+ radix__mark_rodata_ro();
+ else
+ hash__mark_rodata_ro();
+}
+#endif
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 654a0d7ba0e7..13cfe413b40d 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -33,15 +33,7 @@ enum slb_index {
KSTACK_INDEX = 2, /* Kernel stack map */
};
-extern void slb_allocate_realmode(unsigned long ea);
-
-static void slb_allocate(unsigned long ea)
-{
- /* Currently, we do real mode for all SLBs including user, but
- * that will change if we bring back dynamic VSIDs
- */
- slb_allocate_realmode(ea);
-}
+extern void slb_allocate(unsigned long ea);
#define slb_esid_mask(ssize) \
(((ssize) == MMU_SEGSIZE_256M)? ESID_MASK: ESID_MASK_1T)
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index 1519617aab36..bde378559d01 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -65,14 +65,15 @@ MMU_FTR_SECTION_ELSE \
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_68_BIT_VA)
-/* void slb_allocate_realmode(unsigned long ea);
+/* void slb_allocate(unsigned long ea);
*
* Create an SLB entry for the given EA (user or kernel).
* r3 = faulting address, r13 = PACA
* r9, r10, r11 are clobbered by this function
+ * r3 is preserved.
* No other registers are examined or changed.
*/
-_GLOBAL(slb_allocate_realmode)
+_GLOBAL(slb_allocate)
/*
* check for bad kernel/user address
* (ea & ~REGION_MASK) >= PGTABLE_RANGE
@@ -235,6 +236,9 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
* dont have any LRU information to help us choose a slot.
*/
+ mr r9,r3
+
+ /* slb_finish_load_1T continues here. r9=EA with non-ESID bits clear */
7: ld r10,PACASTABRR(r13)
addi r10,r10,1
/* This gets soft patched on boot. */
@@ -249,10 +253,10 @@ slb_compare_rr_to_size:
std r10,PACASTABRR(r13)
3:
- rldimi r3,r10,0,36 /* r3= EA[0:35] | entry */
- oris r10,r3,SLB_ESID_V@h /* r3 |= SLB_ESID_V */
+ rldimi r9,r10,0,36 /* r9 = EA[0:35] | entry */
+ oris r10,r9,SLB_ESID_V@h /* r10 = r9 | SLB_ESID_V */
- /* r3 = ESID data, r11 = VSID data */
+ /* r9 = ESID data, r11 = VSID data */
/*
* No need for an isync before or after this slbmte. The exception
@@ -265,21 +269,21 @@ slb_compare_rr_to_size:
bgelr cr7
/* Update the slb cache */
- lhz r3,PACASLBCACHEPTR(r13) /* offset = paca->slb_cache_ptr */
- cmpldi r3,SLB_CACHE_ENTRIES
+ lhz r9,PACASLBCACHEPTR(r13) /* offset = paca->slb_cache_ptr */
+ cmpldi r9,SLB_CACHE_ENTRIES
bge 1f
/* still room in the slb cache */
- sldi r11,r3,2 /* r11 = offset * sizeof(u32) */
+ sldi r11,r9,2 /* r11 = offset * sizeof(u32) */
srdi r10,r10,28 /* get the 36 bits of the ESID */
add r11,r11,r13 /* r11 = (u32 *)paca + offset */
stw r10,PACASLBCACHE(r11) /* paca->slb_cache[offset] = esid */
- addi r3,r3,1 /* offset++ */
+ addi r9,r9,1 /* offset++ */
b 2f
1: /* offset >= SLB_CACHE_ENTRIES */
- li r3,SLB_CACHE_ENTRIES+1
+ li r9,SLB_CACHE_ENTRIES+1
2:
- sth r3,PACASLBCACHEPTR(r13) /* paca->slb_cache_ptr = offset */
+ sth r9,PACASLBCACHEPTR(r13) /* paca->slb_cache_ptr = offset */
crclr 4*cr0+eq /* set result to "success" */
blr
@@ -301,11 +305,11 @@ slb_compare_rr_to_size:
rldimi r11,r10,SLB_VSID_SSIZE_SHIFT,0 /* insert segment size */
/* r3 = EA, r11 = VSID data */
- clrrdi r3,r3,SID_SHIFT_1T /* clear out non-ESID bits */
+ clrrdi r9,r3,SID_SHIFT_1T /* clear out non-ESID bits */
b 7b
-_ASM_NOKPROBE_SYMBOL(slb_allocate_realmode)
+_ASM_NOKPROBE_SYMBOL(slb_allocate)
_ASM_NOKPROBE_SYMBOL(slb_miss_kernel_load_linear)
_ASM_NOKPROBE_SYMBOL(slb_miss_kernel_load_io)
_ASM_NOKPROBE_SYMBOL(slb_compare_rr_to_size)
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index 02e71402fdd3..744e0164ecf5 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -16,6 +16,7 @@
#include <asm/tlb.h>
#include <asm/tlbflush.h>
+#include <asm/trace.h>
#define RIC_FLUSH_TLB 0
@@ -35,6 +36,7 @@ static inline void __tlbiel_pid(unsigned long pid, int set,
asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+ trace_tlbie(0, 1, rb, rs, ric, prs, r);
}
/*
@@ -87,6 +89,7 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ trace_tlbie(0, 0, rb, rs, ric, prs, r);
}
static inline void _tlbiel_va(unsigned long va, unsigned long pid,
@@ -104,6 +107,7 @@ static inline void _tlbiel_va(unsigned long va, unsigned long pid,
asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
asm volatile("ptesync": : :"memory");
+ trace_tlbie(0, 1, rb, rs, ric, prs, r);
}
static inline void _tlbie_va(unsigned long va, unsigned long pid,
@@ -121,6 +125,7 @@ static inline void _tlbie_va(unsigned long va, unsigned long pid,
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ trace_tlbie(0, 0, rb, rs, ric, prs, r);
}
/*
@@ -377,6 +382,7 @@ void radix__flush_tlb_lpid_va(unsigned long lpid, unsigned long gpa,
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
}
EXPORT_SYMBOL(radix__flush_tlb_lpid_va);
@@ -394,6 +400,7 @@ void radix__flush_tlb_lpid(unsigned long lpid)
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
}
EXPORT_SYMBOL(radix__flush_tlb_lpid);
@@ -420,12 +427,14 @@ void radix__flush_tlb_all(void)
*/
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory");
+ trace_tlbie(0, 0, rb, rs, ric, prs, r);
/*
* now flush host entires by passing PRS = 0 and LPID == 0
*/
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory");
asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ trace_tlbie(0, 0, rb, 0, ric, prs, r);
}
void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm,
diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c
index 4517aa43a8b1..b5b0fb97b9c0 100644
--- a/arch/powerpc/mm/tlb_hash64.c
+++ b/arch/powerpc/mm/tlb_hash64.c
@@ -93,12 +93,10 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
/*
* Check if we have an active batch on this CPU. If not, just
- * flush now and return. For now, we don global invalidates
- * in that case, might be worth testing the mm cpu mask though
- * and decide to use local invalidates instead...
+ * flush now and return.
*/
if (!batch->active) {
- flush_hash_page(vpn, rpte, psize, ssize, 0);
+ flush_hash_page(vpn, rpte, psize, ssize, mm_is_thread_local(mm));
put_cpu_var(ppc64_tlb_batch);
return;
}
diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index 7b2ca16b1eb4..9c88b82f6229 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -18,6 +18,7 @@
#include <linux/slab.h>
#include <linux/vmalloc.h>
+#include <asm/cputhreads.h>
#include <asm/firmware.h>
#include <asm/hvcall.h>
#include <asm/io.h>
@@ -27,6 +28,12 @@
#include "hv-24x7-catalog.h"
#include "hv-common.h"
+/* Version of the 24x7 hypervisor API that we should use in this machine. */
+static int interface_version;
+
+/* Whether we have to aggregate result data for some domains. */
+static bool aggregate_result_elements;
+
static bool domain_is_valid(unsigned domain)
{
switch (domain) {
@@ -54,6 +61,15 @@ static bool is_physical_domain(unsigned domain)
}
}
+/* Domains for which more than one result element are returned for each event. */
+static bool domain_needs_aggregation(unsigned int domain)
+{
+ return aggregate_result_elements &&
+ (domain == HV_PERF_DOMAIN_PHYS_CORE ||
+ (domain >= HV_PERF_DOMAIN_VCPU_HOME_CORE &&
+ domain <= HV_PERF_DOMAIN_VCPU_REMOTE_NODE));
+}
+
static const char *domain_name(unsigned domain)
{
if (!domain_is_valid(domain))
@@ -74,7 +90,11 @@ static const char *domain_name(unsigned domain)
static bool catalog_entry_domain_is_valid(unsigned domain)
{
- return is_physical_domain(domain);
+ /* POWER8 doesn't support virtual domains. */
+ if (interface_version == 1)
+ return is_physical_domain(domain);
+ else
+ return domain_is_valid(domain);
}
/*
@@ -166,6 +186,12 @@ DEFINE_PER_CPU(struct hv_24x7_hw, hv_24x7_hw);
DEFINE_PER_CPU(char, hv_24x7_reqb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096);
DEFINE_PER_CPU(char, hv_24x7_resb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096);
+static unsigned int max_num_requests(int interface_version)
+{
+ return (H24x7_DATA_BUFFER_SIZE - sizeof(struct hv_24x7_request_buffer))
+ / H24x7_REQUEST_SIZE(interface_version);
+}
+
static char *event_name(struct hv_24x7_event_data *ev, int *len)
{
*len = be16_to_cpu(ev->event_name_len) - 2;
@@ -260,9 +286,8 @@ static void *event_end(struct hv_24x7_event_data *ev, void *end)
return start + nl + dl + ldl;
}
-static unsigned long h_get_24x7_catalog_page_(unsigned long phys_4096,
- unsigned long version,
- unsigned long index)
+static long h_get_24x7_catalog_page_(unsigned long phys_4096,
+ unsigned long version, unsigned long index)
{
pr_devel("h_get_24x7_catalog_page(0x%lx, %lu, %lu)",
phys_4096, version, index);
@@ -273,8 +298,7 @@ static unsigned long h_get_24x7_catalog_page_(unsigned long phys_4096,
phys_4096, version, index);
}
-static unsigned long h_get_24x7_catalog_page(char page[],
- u64 version, u32 index)
+static long h_get_24x7_catalog_page(char page[], u64 version, u32 index)
{
return h_get_24x7_catalog_page_(virt_to_phys(page),
version, index);
@@ -664,13 +688,13 @@ static int create_events_from_catalog(struct attribute ***events_,
struct attribute ***event_descs_,
struct attribute ***event_long_descs_)
{
- unsigned long hret;
+ long hret;
size_t catalog_len, catalog_page_len, event_entry_count,
event_data_len, event_data_offs,
event_data_bytes, junk_events, event_idx, event_attr_ct, i,
attr_max, event_idx_last, desc_ct, long_desc_ct;
ssize_t ct, ev_len;
- uint32_t catalog_version_num;
+ uint64_t catalog_version_num;
struct attribute **events, **event_descs, **event_long_descs;
struct hv_24x7_catalog_page_0 *page_0 =
kmem_cache_alloc(hv_page_cache, GFP_KERNEL);
@@ -706,8 +730,8 @@ static int create_events_from_catalog(struct attribute ***events_,
event_data_offs = be16_to_cpu(page_0->event_data_offs);
event_data_len = be16_to_cpu(page_0->event_data_len);
- pr_devel("cv %zu cl %zu eec %zu edo %zu edl %zu\n",
- (size_t)catalog_version_num, catalog_len,
+ pr_devel("cv %llu cl %zu eec %zu edo %zu edl %zu\n",
+ catalog_version_num, catalog_len,
event_entry_count, event_data_offs, event_data_len);
if ((MAX_4K < event_data_len)
@@ -761,8 +785,8 @@ static int create_events_from_catalog(struct attribute ***events_,
catalog_version_num,
i + event_data_offs);
if (hret) {
- pr_err("failed to get event data in page %zu\n",
- i + event_data_offs);
+ pr_err("Failed to get event data in page %zu: rc=%ld\n",
+ i + event_data_offs, hret);
ret = -EIO;
goto e_event_data;
}
@@ -903,7 +927,7 @@ static ssize_t catalog_read(struct file *filp, struct kobject *kobj,
struct bin_attribute *bin_attr, char *buf,
loff_t offset, size_t count)
{
- unsigned long hret;
+ long hret;
ssize_t ret = 0;
size_t catalog_len = 0, catalog_page_len = 0;
loff_t page_offset = 0;
@@ -988,7 +1012,7 @@ static ssize_t _name##_show(struct device *dev, \
struct device_attribute *dev_attr, \
char *buf) \
{ \
- unsigned long hret; \
+ long hret; \
ssize_t ret = 0; \
void *page = kmem_cache_alloc(hv_page_cache, GFP_USER); \
struct hv_24x7_catalog_page_0 *page_0 = page; \
@@ -1040,21 +1064,6 @@ static const struct attribute_group *attr_groups[] = {
NULL,
};
-static void log_24x7_hcall(struct hv_24x7_request_buffer *request_buffer,
- struct hv_24x7_data_result_buffer *result_buffer,
- unsigned long ret)
-{
- struct hv_24x7_request *req;
-
- req = &request_buffer->requests[0];
- pr_notice_ratelimited("hcall failed: [%d %#x %#x %d] => "
- "ret 0x%lx (%ld) detail=0x%x failing ix=%x\n",
- req->performance_domain, req->data_offset,
- req->starting_ix, req->starting_lpar_ix, ret, ret,
- result_buffer->detailed_rc,
- result_buffer->failing_request_ix);
-}
-
/*
* Start the process for a new H_GET_24x7_DATA hcall.
*/
@@ -1062,10 +1071,10 @@ static void init_24x7_request(struct hv_24x7_request_buffer *request_buffer,
struct hv_24x7_data_result_buffer *result_buffer)
{
- memset(request_buffer, 0, 4096);
- memset(result_buffer, 0, 4096);
+ memset(request_buffer, 0, H24x7_DATA_BUFFER_SIZE);
+ memset(result_buffer, 0, H24x7_DATA_BUFFER_SIZE);
- request_buffer->interface_version = HV_24X7_IF_VERSION_CURRENT;
+ request_buffer->interface_version = interface_version;
/* memset above set request_buffer->num_requests to 0 */
}
@@ -1076,7 +1085,7 @@ static void init_24x7_request(struct hv_24x7_request_buffer *request_buffer,
static int make_24x7_request(struct hv_24x7_request_buffer *request_buffer,
struct hv_24x7_data_result_buffer *result_buffer)
{
- unsigned long ret;
+ long ret;
/*
* NOTE: Due to variable number of array elements in request and
@@ -1087,10 +1096,19 @@ static int make_24x7_request(struct hv_24x7_request_buffer *request_buffer,
virt_to_phys(request_buffer), H24x7_DATA_BUFFER_SIZE,
virt_to_phys(result_buffer), H24x7_DATA_BUFFER_SIZE);
- if (ret)
- log_24x7_hcall(request_buffer, result_buffer, ret);
+ if (ret) {
+ struct hv_24x7_request *req;
+
+ req = request_buffer->requests;
+ pr_notice_ratelimited("hcall failed: [%d %#x %#x %d] => ret 0x%lx (%ld) detail=0x%x failing ix=%x\n",
+ req->performance_domain, req->data_offset,
+ req->starting_ix, req->starting_lpar_ix,
+ ret, ret, result_buffer->detailed_rc,
+ result_buffer->failing_request_ix);
+ return -EIO;
+ }
- return ret;
+ return 0;
}
/*
@@ -1105,9 +1123,11 @@ static int add_event_to_24x7_request(struct perf_event *event,
{
u16 idx;
int i;
+ size_t req_size;
struct hv_24x7_request *req;
- if (request_buffer->num_requests > 254) {
+ if (request_buffer->num_requests >=
+ max_num_requests(request_buffer->interface_version)) {
pr_devel("Too many requests for 24x7 HCALL %d\n",
request_buffer->num_requests);
return -EINVAL;
@@ -1124,23 +1144,113 @@ static int add_event_to_24x7_request(struct perf_event *event,
idx = event_get_vcpu(event);
}
+ req_size = H24x7_REQUEST_SIZE(request_buffer->interface_version);
+
i = request_buffer->num_requests++;
- req = &request_buffer->requests[i];
+ req = (void *) request_buffer->requests + i * req_size;
req->performance_domain = event_get_domain(event);
req->data_size = cpu_to_be16(8);
req->data_offset = cpu_to_be32(event_get_offset(event));
- req->starting_lpar_ix = cpu_to_be16(event_get_lpar(event)),
+ req->starting_lpar_ix = cpu_to_be16(event_get_lpar(event));
req->max_num_lpars = cpu_to_be16(1);
req->starting_ix = cpu_to_be16(idx);
req->max_ix = cpu_to_be16(1);
+ if (request_buffer->interface_version > 1) {
+ if (domain_needs_aggregation(req->performance_domain))
+ req->max_num_thread_groups = -1;
+ else if (req->performance_domain != HV_PERF_DOMAIN_PHYS_CHIP) {
+ req->starting_thread_group_ix = idx % 2;
+ req->max_num_thread_groups = 1;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * get_count_from_result - get event count from all result elements in result
+ *
+ * If the event corresponding to this result needs aggregation of the result
+ * element values, then this function does that.
+ *
+ * @event: Event associated with @res.
+ * @resb: Result buffer containing @res.
+ * @res: Result to work on.
+ * @countp: Output variable containing the event count.
+ * @next: Optional output variable pointing to the next result in @resb.
+ */
+static int get_count_from_result(struct perf_event *event,
+ struct hv_24x7_data_result_buffer *resb,
+ struct hv_24x7_result *res, u64 *countp,
+ struct hv_24x7_result **next)
+{
+ u16 num_elements = be16_to_cpu(res->num_elements_returned);
+ u16 data_size = be16_to_cpu(res->result_element_data_size);
+ unsigned int data_offset;
+ void *element_data;
+ int i;
+ u64 count;
+
+ /*
+ * We can bail out early if the result is empty.
+ */
+ if (!num_elements) {
+ pr_debug("Result of request %hhu is empty, nothing to do\n",
+ res->result_ix);
+
+ if (next)
+ *next = (struct hv_24x7_result *) res->elements;
+
+ return -ENODATA;
+ }
+
+ /*
+ * Since we always specify 1 as the maximum for the smallest resource
+ * we're requesting, there should to be only one element per result.
+ * Except when an event needs aggregation, in which case there are more.
+ */
+ if (num_elements != 1 &&
+ !domain_needs_aggregation(event_get_domain(event))) {
+ pr_err("Error: result of request %hhu has %hu elements\n",
+ res->result_ix, num_elements);
+
+ return -EIO;
+ }
+
+ if (data_size != sizeof(u64)) {
+ pr_debug("Error: result of request %hhu has data of %hu bytes\n",
+ res->result_ix, data_size);
+
+ return -ENOTSUPP;
+ }
+
+ if (resb->interface_version == 1)
+ data_offset = offsetof(struct hv_24x7_result_element_v1,
+ element_data);
+ else
+ data_offset = offsetof(struct hv_24x7_result_element_v2,
+ element_data);
+
+ /* Go through the result elements in the result. */
+ for (i = count = 0, element_data = res->elements + data_offset;
+ i < num_elements;
+ i++, element_data += data_size + data_offset)
+ count += be64_to_cpu(*((u64 *) element_data));
+
+ *countp = count;
+
+ /* The next result is after the last result element. */
+ if (next)
+ *next = element_data - data_offset;
+
return 0;
}
-static unsigned long single_24x7_request(struct perf_event *event, u64 *count)
+static int single_24x7_request(struct perf_event *event, u64 *count)
{
- unsigned long ret;
+ int ret;
struct hv_24x7_request_buffer *request_buffer;
struct hv_24x7_data_result_buffer *result_buffer;
@@ -1157,13 +1267,12 @@ static unsigned long single_24x7_request(struct perf_event *event, u64 *count)
goto out;
ret = make_24x7_request(request_buffer, result_buffer);
- if (ret) {
- log_24x7_hcall(request_buffer, result_buffer, ret);
+ if (ret)
goto out;
- }
/* process result from hcall */
- *count = be64_to_cpu(result_buffer->results[0].elements[0].element_data[0]);
+ ret = get_count_from_result(event, result_buffer,
+ result_buffer->results, count, NULL);
out:
put_cpu_var(hv_24x7_reqb);
@@ -1216,9 +1325,8 @@ static int h_24x7_event_init(struct perf_event *event)
return -EINVAL;
}
- /* Domains above 6 are invalid */
domain = event_get_domain(event);
- if (domain > 6) {
+ if (domain >= HV_PERF_DOMAIN_MAX) {
pr_devel("invalid domain %d\n", domain);
return -EINVAL;
}
@@ -1250,10 +1358,9 @@ static int h_24x7_event_init(struct perf_event *event)
static u64 h_24x7_get_value(struct perf_event *event)
{
- unsigned long ret;
u64 ct;
- ret = single_24x7_request(event, &ct);
- if (ret)
+
+ if (single_24x7_request(event, &ct))
/* We checked this in event init, shouldn't fail here... */
return 0;
@@ -1396,8 +1503,7 @@ static int h_24x7_event_commit_txn(struct pmu *pmu)
{
struct hv_24x7_request_buffer *request_buffer;
struct hv_24x7_data_result_buffer *result_buffer;
- struct hv_24x7_result *resb;
- struct perf_event *event;
+ struct hv_24x7_result *res, *next_res;
u64 count;
int i, ret, txn_flags;
struct hv_24x7_hw *h24x7hw;
@@ -1417,19 +1523,21 @@ static int h_24x7_event_commit_txn(struct pmu *pmu)
result_buffer = (void *)get_cpu_var(hv_24x7_resb);
ret = make_24x7_request(request_buffer, result_buffer);
- if (ret) {
- log_24x7_hcall(request_buffer, result_buffer, ret);
+ if (ret)
goto put_reqb;
- }
h24x7hw = &get_cpu_var(hv_24x7_hw);
- /* Update event counts from hcall */
- for (i = 0; i < request_buffer->num_requests; i++) {
- resb = &result_buffer->results[i];
- count = be64_to_cpu(resb->elements[0].element_data[0]);
- event = h24x7hw->events[i];
- h24x7hw->events[i] = NULL;
+ /* Go through results in the result buffer to update event counts. */
+ for (i = 0, res = result_buffer->results;
+ i < result_buffer->num_results; i++, res = next_res) {
+ struct perf_event *event = h24x7hw->events[res->result_ix];
+
+ ret = get_count_from_result(event, result_buffer, res, &count,
+ &next_res);
+ if (ret)
+ break;
+
update_event_count(event, count);
}
@@ -1480,6 +1588,18 @@ static int hv_24x7_init(void)
if (!firmware_has_feature(FW_FEATURE_LPAR)) {
pr_debug("not a virtualized system, not enabling\n");
return -ENODEV;
+ } else if (!cur_cpu_spec->oprofile_cpu_type)
+ return -ENODEV;
+
+ /* POWER8 only supports v1, while POWER9 only supports v2. */
+ if (!strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power8"))
+ interface_version = 1;
+ else {
+ interface_version = 2;
+
+ /* SMT8 in POWER9 needs to aggregate result elements. */
+ if (threads_per_core == 8)
+ aggregate_result_elements = true;
}
hret = hv_perf_caps_get(&caps);
diff --git a/arch/powerpc/perf/hv-24x7.h b/arch/powerpc/perf/hv-24x7.h
index 634ef4082cdc..5092c4a222a6 100644
--- a/arch/powerpc/perf/hv-24x7.h
+++ b/arch/powerpc/perf/hv-24x7.h
@@ -10,6 +10,8 @@ enum hv_perf_domains {
HV_PERF_DOMAIN_MAX,
};
+#define H24x7_REQUEST_SIZE(iface_version) (iface_version == 1 ? 16 : 32)
+
struct hv_24x7_request {
/* PHYSICAL domains require enabling via phyp/hmc. */
__u8 performance_domain;
@@ -42,19 +44,27 @@ struct hv_24x7_request {
/* chip, core, or virtual processor based on @performance_domain */
__be16 starting_ix;
__be16 max_ix;
+
+ /* The following fields were added in v2 of the 24x7 interface. */
+
+ __u8 starting_thread_group_ix;
+
+ /* -1 means all thread groups starting at @starting_thread_group_ix */
+ __u8 max_num_thread_groups;
+
+ __u8 reserved2[0xE];
} __packed;
struct hv_24x7_request_buffer {
/* 0 - ? */
/* 1 - ? */
-#define HV_24X7_IF_VERSION_CURRENT 0x01
__u8 interface_version;
__u8 num_requests;
__u8 reserved[0xE];
- struct hv_24x7_request requests[1];
+ struct hv_24x7_request requests[];
} __packed;
-struct hv_24x7_result_element {
+struct hv_24x7_result_element_v1 {
__be16 lpar_ix;
/*
@@ -67,10 +77,38 @@ struct hv_24x7_result_element {
__be32 lpar_cfg_instance_id;
/* size = @result_element_data_size of containing result. */
- __u64 element_data[1];
+ __u64 element_data[];
+} __packed;
+
+/*
+ * We need a separate struct for v2 because the offset of @element_data changed
+ * between versions.
+ */
+struct hv_24x7_result_element_v2 {
+ __be16 lpar_ix;
+
+ /*
+ * represents the core, chip, or virtual processor based on the
+ * request's @performance_domain
+ */
+ __be16 domain_ix;
+
+ /* -1 if @performance_domain does not refer to a virtual processor */
+ __be32 lpar_cfg_instance_id;
+
+ __u8 thread_group_ix;
+
+ __u8 reserved[7];
+
+ /* size = @result_element_data_size of containing result. */
+ __u64 element_data[];
} __packed;
struct hv_24x7_result {
+ /*
+ * The index of the 24x7 Request Structure in the 24x7 Request Buffer
+ * used to request this result.
+ */
__u8 result_ix;
/*
@@ -81,14 +119,25 @@ struct hv_24x7_result {
__u8 results_complete;
__be16 num_elements_returned;
- /* This is a copy of @data_size from the corresponding hv_24x7_request */
+ /*
+ * This is a copy of @data_size from the corresponding hv_24x7_request
+ *
+ * Warning: to obtain the size of each element in @elements you have
+ * to add the size of the other members of the result_element struct.
+ */
__be16 result_element_data_size;
__u8 reserved[0x2];
- /* WARNING: only valid for first result element due to variable sizes
- * of result elements */
- /* struct hv_24x7_result_element[@num_elements_returned] */
- struct hv_24x7_result_element elements[1];
+ /*
+ * Either
+ * struct hv_24x7_result_element_v1[@num_elements_returned]
+ * or
+ * struct hv_24x7_result_element_v2[@num_elements_returned]
+ *
+ * depending on the interface_version field of the
+ * struct hv_24x7_data_result_buffer containing this result.
+ */
+ char elements[];
} __packed;
struct hv_24x7_data_result_buffer {
@@ -104,7 +153,7 @@ struct hv_24x7_data_result_buffer {
__u8 reserved2[0x8];
/* WARNING: only valid for the first result due to variable sizes of
* results */
- struct hv_24x7_result results[1]; /* [@num_results] */
+ struct hv_24x7_result results[]; /* [@num_results] */
} __packed;
#endif
diff --git a/arch/powerpc/perf/power9-events-list.h b/arch/powerpc/perf/power9-events-list.h
index 71a6bfee5c02..80204e064362 100644
--- a/arch/powerpc/perf/power9-events-list.h
+++ b/arch/powerpc/perf/power9-events-list.h
@@ -16,7 +16,7 @@ EVENT(PM_CYC, 0x0001e)
EVENT(PM_ICT_NOSLOT_CYC, 0x100f8)
EVENT(PM_CMPLU_STALL, 0x1e054)
EVENT(PM_INST_CMPL, 0x00002)
-EVENT(PM_BRU_CMPL, 0x10012)
+EVENT(PM_BRU_CMPL, 0x4d05e)
EVENT(PM_BR_MPRED_CMPL, 0x400f6)
/* All L1 D cache load references counted at finish, gated by reject */
@@ -56,3 +56,5 @@ EVENT(PM_RUN_CYC, 0x600f4)
/* Instruction Dispatched */
EVENT(PM_INST_DISP, 0x200f2)
EVENT(PM_INST_DISP_ALT, 0x300f2)
+/* Alternate Branch event code */
+EVENT(PM_BR_CMPL_ALT, 0x10012)
diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c
index bb28e1a41257..f17435e4a489 100644
--- a/arch/powerpc/perf/power9-pmu.c
+++ b/arch/powerpc/perf/power9-pmu.c
@@ -231,7 +231,7 @@ static int power9_generic_events_dd1[] = {
[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = PM_ICT_NOSLOT_CYC,
[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = PM_CMPLU_STALL,
[PERF_COUNT_HW_INSTRUCTIONS] = PM_INST_DISP,
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PM_BRU_CMPL,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PM_BR_CMPL_ALT,
[PERF_COUNT_HW_BRANCH_MISSES] = PM_BR_MPRED_CMPL,
[PERF_COUNT_HW_CACHE_REFERENCES] = PM_LD_REF_L1,
[PERF_COUNT_HW_CACHE_MISSES] = PM_LD_MISS_L1_FIN,
@@ -453,6 +453,12 @@ static int __init init_power9_pmu(void)
* sampling scenarios in power9 DD1, instead use PM_INST_DISP.
*/
EVENT_VAR(PM_INST_CMPL, _g).id = PM_INST_DISP;
+ /*
+ * Power9 DD1 should use PM_BR_CMPL_ALT event code for
+ * "branches" to provide correct counter value.
+ */
+ EVENT_VAR(PM_BRU_CMPL, _g).id = PM_BR_CMPL_ALT;
+ EVENT_VAR(PM_BRU_CMPL, _c).id = PM_BR_CMPL_ALT;
rc = register_power_pmu(&power9_isa207_pmu);
} else {
rc = register_power_pmu(&power9_pmu);
diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig
index 9b0afe935cc1..01cb109ebf17 100644
--- a/arch/powerpc/platforms/44x/Kconfig
+++ b/arch/powerpc/platforms/44x/Kconfig
@@ -199,6 +199,18 @@ config CURRITUCK
help
This option enables support for the IBM Currituck (476fpe) evaluation board
+config FSP2
+ bool "IBM FSP2 (476fpe) Support"
+ depends on PPC_47x
+ default n
+ select 476FPE
+ select IBM_EMAC_EMAC4 if IBM_EMAC
+ select IBM_EMAC_RGMII if IBM_EMAC
+ select COMMON_CLK
+ select DEFAULT_UIMAGE
+ help
+ This option enables support for the IBM FSP2 (476fpe) board
+
config AKEBONO
bool "IBM Akebono (476gtr) Support"
depends on PPC_47x
diff --git a/arch/powerpc/platforms/44x/Makefile b/arch/powerpc/platforms/44x/Makefile
index 26d35b5941f7..72b824160660 100644
--- a/arch/powerpc/platforms/44x/Makefile
+++ b/arch/powerpc/platforms/44x/Makefile
@@ -12,3 +12,4 @@ obj-$(CONFIG_ISS4xx) += iss4xx.o
obj-$(CONFIG_CANYONLANDS)+= canyonlands.o
obj-$(CONFIG_CURRITUCK) += ppc476.o
obj-$(CONFIG_AKEBONO) += ppc476.o
+obj-$(CONFIG_FSP2) += fsp2.o
diff --git a/arch/powerpc/platforms/44x/fsp2.c b/arch/powerpc/platforms/44x/fsp2.c
new file mode 100644
index 000000000000..92e98048404f
--- /dev/null
+++ b/arch/powerpc/platforms/44x/fsp2.c
@@ -0,0 +1,62 @@
+/*
+ * FSP-2 board specific routines
+ *
+ * Based on earlier code:
+ * Matt Porter <mporter@kernel.crashing.org>
+ * Copyright 2002-2005 MontaVista Software Inc.
+ *
+ * Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net>
+ * Copyright (c) 2003-2005 Zultys Technologies
+ *
+ * Rewritten and ported to the merged powerpc tree:
+ * Copyright 2007 David Gibson <dwg@au1.ibm.com>, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/init.h>
+#include <linux/of_platform.h>
+#include <linux/rtc.h>
+
+#include <asm/machdep.h>
+#include <asm/prom.h>
+#include <asm/udbg.h>
+#include <asm/time.h>
+#include <asm/uic.h>
+#include <asm/ppc4xx.h>
+
+static __initdata struct of_device_id fsp2_of_bus[] = {
+ { .compatible = "ibm,plb4", },
+ { .compatible = "ibm,plb6", },
+ { .compatible = "ibm,opb", },
+ {},
+};
+
+static int __init fsp2_device_probe(void)
+{
+ of_platform_bus_probe(NULL, fsp2_of_bus, NULL);
+ return 0;
+}
+machine_device_initcall(fsp2, fsp2_device_probe);
+
+static int __init fsp2_probe(void)
+{
+ unsigned long root = of_get_flat_dt_root();
+
+ if (!of_flat_dt_is_compatible(root, "ibm,fsp2"))
+ return 0;
+ return 1;
+}
+
+define_machine(fsp2) {
+ .name = "FSP-2",
+ .probe = fsp2_probe,
+ .progress = udbg_progress,
+ .init_IRQ = uic_init_tree,
+ .get_irq = uic_get_irq,
+ .restart = ppc4xx_reset_system,
+ .calibrate_decr = generic_calibrate_decr,
+};
diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c
index 895560f4be69..f84d52a2db40 100644
--- a/arch/powerpc/platforms/cell/smp.c
+++ b/arch/powerpc/platforms/cell/smp.c
@@ -115,7 +115,8 @@ static void smp_cell_setup_cpu(int cpu)
static int smp_cell_kick_cpu(int nr)
{
- BUG_ON(nr < 0 || nr >= NR_CPUS);
+ if (nr < 0 || nr >= nr_cpu_ids)
+ return -EINVAL;
if (!smp_startup_cpu(nr))
return -ENOENT;
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index d12ea7b9fd47..3f48f6df1cf3 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -48,6 +48,7 @@ static int pnv_eeh_init(void)
{
struct pci_controller *hose;
struct pnv_phb *phb;
+ int max_diag_size = PNV_PCI_DIAG_BUF_SIZE;
if (!firmware_has_feature(FW_FEATURE_OPAL)) {
pr_warn("%s: OPAL is required !\n",
@@ -69,6 +70,9 @@ static int pnv_eeh_init(void)
if (phb->model == PNV_PHB_MODEL_P7IOC)
eeh_add_flag(EEH_ENABLE_IO_FOR_LOG);
+ if (phb->diag_data_size > max_diag_size)
+ max_diag_size = phb->diag_data_size;
+
/*
* PE#0 should be regarded as valid by EEH core
* if it's not the reserved one. Currently, we
@@ -82,6 +86,8 @@ static int pnv_eeh_init(void)
break;
}
+ eeh_set_pe_aux_size(max_diag_size);
+
return 0;
}
@@ -540,7 +546,7 @@ static void pnv_eeh_get_phb_diag(struct eeh_pe *pe)
s64 rc;
rc = opal_pci_get_phb_diag_data2(phb->opal_id, pe->data,
- PNV_PCI_DIAG_BUF_SIZE);
+ phb->diag_data_size);
if (rc != OPAL_SUCCESS)
pr_warn("%s: Failure %lld getting PHB#%x diag-data\n",
__func__, rc, pe->phb->global_number);
@@ -1314,7 +1320,8 @@ static void pnv_eeh_dump_hub_diag_common(struct OpalIoP7IOCErrorData *data)
static void pnv_eeh_get_and_dump_hub_diag(struct pci_controller *hose)
{
struct pnv_phb *phb = hose->private_data;
- struct OpalIoP7IOCErrorData *data = &phb->diag.hub_diag;
+ struct OpalIoP7IOCErrorData *data =
+ (struct OpalIoP7IOCErrorData*)phb->diag_data;
long rc;
rc = opal_pci_get_hub_diag_data(phb->hub_id, data, sizeof(*data));
@@ -1549,10 +1556,10 @@ static int pnv_eeh_next_error(struct eeh_pe **pe)
/* Dump PHB diag-data */
rc = opal_pci_get_phb_diag_data2(phb->opal_id,
- phb->diag.blob, PNV_PCI_DIAG_BUF_SIZE);
+ phb->diag_data, phb->diag_data_size);
if (rc == OPAL_SUCCESS)
pnv_pci_dump_phb_diag_data(hose,
- phb->diag.blob);
+ phb->diag_data);
/* Try best to clear it */
opal_pci_eeh_freeze_clear(phb->opal_id,
@@ -1795,7 +1802,6 @@ static int __init eeh_powernv_init(void)
{
int ret = -EINVAL;
- eeh_set_pe_aux_size(PNV_PCI_DIAG_BUF_SIZE);
ret = eeh_ops_register(&pnv_eeh_ops);
if (!ret)
pr_info("EEH: PowerNV platform initialized\n");
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 445f30a2c5ef..2abee070373f 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -23,6 +23,7 @@
#include <asm/cpuidle.h>
#include <asm/code-patching.h>
#include <asm/smp.h>
+#include <asm/runlatch.h>
#include "powernv.h"
#include "subcore.h"
@@ -30,8 +31,33 @@
/* Power ISA 3.0 allows for stop states 0x0 - 0xF */
#define MAX_STOP_STATE 0xF
+#define P9_STOP_SPR_MSR 2000
+#define P9_STOP_SPR_PSSCR 855
+
static u32 supported_cpuidle_states;
+/*
+ * The default stop state that will be used by ppc_md.power_save
+ * function on platforms that support stop instruction.
+ */
+static u64 pnv_default_stop_val;
+static u64 pnv_default_stop_mask;
+static bool default_stop_found;
+
+/*
+ * First deep stop state. Used to figure out when to save/restore
+ * hypervisor context.
+ */
+u64 pnv_first_deep_stop_state = MAX_STOP_STATE;
+
+/*
+ * psscr value and mask of the deepest stop idle state.
+ * Used when a cpu is offlined.
+ */
+static u64 pnv_deepest_stop_psscr_val;
+static u64 pnv_deepest_stop_psscr_mask;
+static bool deepest_stop_found;
+
static int pnv_save_sprs_for_deep_states(void)
{
int cpu;
@@ -48,6 +74,8 @@ static int pnv_save_sprs_for_deep_states(void)
uint64_t hid4_val = mfspr(SPRN_HID4);
uint64_t hid5_val = mfspr(SPRN_HID5);
uint64_t hmeer_val = mfspr(SPRN_HMEER);
+ uint64_t msr_val = MSR_IDLE;
+ uint64_t psscr_val = pnv_deepest_stop_psscr_val;
for_each_possible_cpu(cpu) {
uint64_t pir = get_hard_smp_processor_id(cpu);
@@ -61,6 +89,18 @@ static int pnv_save_sprs_for_deep_states(void)
if (rc != 0)
return rc;
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ rc = opal_slw_set_reg(pir, P9_STOP_SPR_MSR, msr_val);
+ if (rc)
+ return rc;
+
+ rc = opal_slw_set_reg(pir,
+ P9_STOP_SPR_PSSCR, psscr_val);
+
+ if (rc)
+ return rc;
+ }
+
/* HIDs are per core registers */
if (cpu_thread_in_core(cpu) == 0) {
@@ -72,17 +112,21 @@ static int pnv_save_sprs_for_deep_states(void)
if (rc != 0)
return rc;
- rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val);
- if (rc != 0)
- return rc;
+ /* Only p8 needs to set extra HID regiters */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
- rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val);
- if (rc != 0)
- return rc;
+ rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val);
+ if (rc != 0)
+ return rc;
- rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val);
- if (rc != 0)
- return rc;
+ rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val);
+ if (rc != 0)
+ return rc;
+
+ rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val);
+ if (rc != 0)
+ return rc;
+ }
}
}
@@ -96,15 +140,24 @@ static void pnv_alloc_idle_core_states(void)
u32 *core_idle_state;
/*
- * core_idle_state - First 8 bits track the idle state of each thread
- * of the core. The 8th bit is the lock bit. Initially all thread bits
- * are set. They are cleared when the thread enters deep idle state
- * like sleep and winkle. Initially the lock bit is cleared.
- * The lock bit has 2 purposes
- * a. While the first thread is restoring core state, it prevents
- * other threads in the core from switching to process context.
- * b. While the last thread in the core is saving the core state, it
- * prevents a different thread from waking up.
+ * core_idle_state - The lower 8 bits track the idle state of
+ * each thread of the core.
+ *
+ * The most significant bit is the lock bit.
+ *
+ * Initially all the bits corresponding to threads_per_core
+ * are set. They are cleared when the thread enters deep idle
+ * state like sleep and winkle/stop.
+ *
+ * Initially the lock bit is cleared. The lock bit has 2
+ * purposes:
+ * a. While the first thread in the core waking up from
+ * idle is restoring core state, it prevents other
+ * threads in the core from switching to process
+ * context.
+ * b. While the last thread in the core is saving the
+ * core state, it prevents a different thread from
+ * waking up.
*/
for (i = 0; i < nr_cores; i++) {
int first_cpu = i * threads_per_core;
@@ -112,7 +165,7 @@ static void pnv_alloc_idle_core_states(void)
size_t paca_ptr_array_size;
core_idle_state = kmalloc_node(sizeof(u32), GFP_KERNEL, node);
- *core_idle_state = PNV_CORE_IDLE_THREAD_BITS;
+ *core_idle_state = (1 << threads_per_core) - 1;
paca_ptr_array_size = (threads_per_core *
sizeof(struct paca_struct *));
@@ -231,56 +284,104 @@ static DEVICE_ATTR(fastsleep_workaround_applyonce, 0600,
show_fastsleep_workaround_applyonce,
store_fastsleep_workaround_applyonce);
-/*
- * The default stop state that will be used by ppc_md.power_save
- * function on platforms that support stop instruction.
- */
-static u64 pnv_default_stop_val;
-static u64 pnv_default_stop_mask;
-static bool default_stop_found;
+static unsigned long __power7_idle_type(unsigned long type)
+{
+ unsigned long srr1;
-/*
- * Used for ppc_md.power_save which needs a function with no parameters
- */
-static void power9_idle(void)
+ if (!prep_irq_for_idle_irqsoff())
+ return 0;
+
+ __ppc64_runlatch_off();
+ srr1 = power7_idle_insn(type);
+ __ppc64_runlatch_on();
+
+ fini_irq_for_idle_irqsoff();
+
+ return srr1;
+}
+
+void power7_idle_type(unsigned long type)
+{
+ unsigned long srr1;
+
+ srr1 = __power7_idle_type(type);
+ irq_set_pending_from_srr1(srr1);
+}
+
+void power7_idle(void)
{
- power9_idle_stop(pnv_default_stop_val, pnv_default_stop_mask);
+ if (!powersave_nap)
+ return;
+
+ power7_idle_type(PNV_THREAD_NAP);
}
-/*
- * First deep stop state. Used to figure out when to save/restore
- * hypervisor context.
- */
-u64 pnv_first_deep_stop_state = MAX_STOP_STATE;
+static unsigned long __power9_idle_type(unsigned long stop_psscr_val,
+ unsigned long stop_psscr_mask)
+{
+ unsigned long psscr;
+ unsigned long srr1;
+
+ if (!prep_irq_for_idle_irqsoff())
+ return 0;
+
+ psscr = mfspr(SPRN_PSSCR);
+ psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val;
+
+ __ppc64_runlatch_off();
+ srr1 = power9_idle_stop(psscr);
+ __ppc64_runlatch_on();
+
+ fini_irq_for_idle_irqsoff();
+
+ return srr1;
+}
+
+void power9_idle_type(unsigned long stop_psscr_val,
+ unsigned long stop_psscr_mask)
+{
+ unsigned long srr1;
+
+ srr1 = __power9_idle_type(stop_psscr_val, stop_psscr_mask);
+ irq_set_pending_from_srr1(srr1);
+}
/*
- * psscr value and mask of the deepest stop idle state.
- * Used when a cpu is offlined.
+ * Used for ppc_md.power_save which needs a function with no parameters
*/
-static u64 pnv_deepest_stop_psscr_val;
-static u64 pnv_deepest_stop_psscr_mask;
-static bool deepest_stop_found;
+void power9_idle(void)
+{
+ power9_idle_type(pnv_default_stop_val, pnv_default_stop_mask);
+}
+#ifdef CONFIG_HOTPLUG_CPU
/*
* pnv_cpu_offline: A function that puts the CPU into the deepest
* available platform idle state on a CPU-Offline.
+ * interrupts hard disabled and no lazy irq pending.
*/
unsigned long pnv_cpu_offline(unsigned int cpu)
{
unsigned long srr1;
-
u32 idle_states = pnv_get_supported_cpuidle_states();
+ __ppc64_runlatch_off();
+
if (cpu_has_feature(CPU_FTR_ARCH_300) && deepest_stop_found) {
- srr1 = power9_idle_stop(pnv_deepest_stop_psscr_val,
- pnv_deepest_stop_psscr_mask);
+ unsigned long psscr;
+
+ psscr = mfspr(SPRN_PSSCR);
+ psscr = (psscr & ~pnv_deepest_stop_psscr_mask) |
+ pnv_deepest_stop_psscr_val;
+ srr1 = power9_idle_stop(psscr);
+
} else if (idle_states & OPAL_PM_WINKLE_ENABLED) {
- srr1 = power7_winkle();
+ srr1 = power7_idle_insn(PNV_THREAD_WINKLE);
} else if ((idle_states & OPAL_PM_SLEEP_ENABLED) ||
(idle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
- srr1 = power7_sleep();
+ srr1 = power7_idle_insn(PNV_THREAD_SLEEP);
} else if (idle_states & OPAL_PM_NAP_ENABLED) {
- srr1 = power7_nap(1);
+ srr1 = power7_idle_insn(PNV_THREAD_NAP);
} else {
/* This is the fallback method. We emulate snooze */
while (!generic_check_cpu_restart(cpu)) {
@@ -291,8 +392,11 @@ unsigned long pnv_cpu_offline(unsigned int cpu)
HMT_medium();
}
+ __ppc64_runlatch_on();
+
return srr1;
}
+#endif
/*
* Power ISA 3.0 idle initialization.
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index f620572f891f..4ca6c26a56d5 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -99,10 +99,10 @@ opal_return:
lwz r4,8(r1);
ld r5,PPC_LR_STKOFF(r1);
ld r6,PACASAVEDMSR(r13);
- mtspr SPRN_SRR0,r5;
- mtspr SPRN_SRR1,r6;
mtcr r4;
- rfid
+ mtspr SPRN_HSRR0,r5;
+ mtspr SPRN_HSRR1,r6;
+ hrfid
opal_real_call:
mfcr r11
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 283caf1070c9..437613588df1 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1718,6 +1718,100 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev
*/
}
+static bool pnv_pci_ioda_pe_single_vendor(struct pnv_ioda_pe *pe)
+{
+ unsigned short vendor = 0;
+ struct pci_dev *pdev;
+
+ if (pe->device_count == 1)
+ return true;
+
+ /* pe->pdev should be set if it's a single device, pe->pbus if not */
+ if (!pe->pbus)
+ return true;
+
+ list_for_each_entry(pdev, &pe->pbus->devices, bus_list) {
+ if (!vendor) {
+ vendor = pdev->vendor;
+ continue;
+ }
+
+ if (pdev->vendor != vendor)
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * Reconfigure TVE#0 to be usable as 64-bit DMA space.
+ *
+ * The first 4GB of virtual memory for a PE is reserved for 32-bit accesses.
+ * Devices can only access more than that if bit 59 of the PCI address is set
+ * by hardware, which indicates TVE#1 should be used instead of TVE#0.
+ * Many PCI devices are not capable of addressing that many bits, and as a
+ * result are limited to the 4GB of virtual memory made available to 32-bit
+ * devices in TVE#0.
+ *
+ * In order to work around this, reconfigure TVE#0 to be suitable for 64-bit
+ * devices by configuring the virtual memory past the first 4GB inaccessible
+ * by 64-bit DMAs. This should only be used by devices that want more than
+ * 4GB, and only on PEs that have no 32-bit devices.
+ *
+ * Currently this will only work on PHB3 (POWER8).
+ */
+static int pnv_pci_ioda_dma_64bit_bypass(struct pnv_ioda_pe *pe)
+{
+ u64 window_size, table_size, tce_count, addr;
+ struct page *table_pages;
+ u64 tce_order = 28; /* 256MB TCEs */
+ __be64 *tces;
+ s64 rc;
+
+ /*
+ * Window size needs to be a power of two, but needs to account for
+ * shifting memory by the 4GB offset required to skip 32bit space.
+ */
+ window_size = roundup_pow_of_two(memory_hotplug_max() + (1ULL << 32));
+ tce_count = window_size >> tce_order;
+ table_size = tce_count << 3;
+
+ if (table_size < PAGE_SIZE)
+ table_size = PAGE_SIZE;
+
+ table_pages = alloc_pages_node(pe->phb->hose->node, GFP_KERNEL,
+ get_order(table_size));
+ if (!table_pages)
+ goto err;
+
+ tces = page_address(table_pages);
+ if (!tces)
+ goto err;
+
+ memset(tces, 0, table_size);
+
+ for (addr = 0; addr < memory_hotplug_max(); addr += (1 << tce_order)) {
+ tces[(addr + (1ULL << 32)) >> tce_order] =
+ cpu_to_be64(addr | TCE_PCI_READ | TCE_PCI_WRITE);
+ }
+
+ rc = opal_pci_map_pe_dma_window(pe->phb->opal_id,
+ pe->pe_number,
+ /* reconfigure window 0 */
+ (pe->pe_number << 1) + 0,
+ 1,
+ __pa(tces),
+ table_size,
+ 1 << tce_order);
+ if (rc == OPAL_SUCCESS) {
+ pe_info(pe, "Using 64-bit DMA iommu bypass (through TVE#0)\n");
+ return 0;
+ }
+err:
+ pe_err(pe, "Error configuring 64-bit DMA bypass\n");
+ return -EIO;
+}
+
static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
{
struct pci_controller *hose = pci_bus_to_host(pdev->bus);
@@ -1726,6 +1820,7 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
struct pnv_ioda_pe *pe;
uint64_t top;
bool bypass = false;
+ s64 rc;
if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
return -ENODEV;;
@@ -1740,8 +1835,27 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
dev_info(&pdev->dev, "Using 64-bit DMA iommu bypass\n");
set_dma_ops(&pdev->dev, &dma_direct_ops);
} else {
- dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n");
- set_dma_ops(&pdev->dev, &dma_iommu_ops);
+ /*
+ * If the device can't set the TCE bypass bit but still wants
+ * to access 4GB or more, on PHB3 we can reconfigure TVE#0 to
+ * bypass the 32-bit region and be usable for 64-bit DMAs.
+ * The device needs to be able to address all of this space.
+ */
+ if (dma_mask >> 32 &&
+ dma_mask > (memory_hotplug_max() + (1ULL << 32)) &&
+ pnv_pci_ioda_pe_single_vendor(pe) &&
+ phb->model == PNV_PHB_MODEL_PHB3) {
+ /* Configure the bypass mode */
+ rc = pnv_pci_ioda_dma_64bit_bypass(pe);
+ if (rc)
+ return rc;
+ /* 4GB offset bypasses 32-bit space */
+ set_dma_offset(&pdev->dev, (1ULL << 32));
+ set_dma_ops(&pdev->dev, &dma_direct_ops);
+ } else {
+ dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n");
+ set_dma_ops(&pdev->dev, &dma_iommu_ops);
+ }
}
*pdev->dev.dma_mask = dma_mask;
@@ -3123,13 +3237,13 @@ static int pnv_pci_diag_data_set(void *data, u64 val)
phb = hose->private_data;
/* Retrieve the diag data from firmware */
- ret = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob,
- PNV_PCI_DIAG_BUF_SIZE);
+ ret = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag_data,
+ phb->diag_data_size);
if (ret != OPAL_SUCCESS)
return -EIO;
/* Print the diag data to the kernel log */
- pnv_pci_dump_phb_diag_data(phb->hose, phb->diag.blob);
+ pnv_pci_dump_phb_diag_data(phb->hose, phb->diag_data);
return 0;
}
@@ -3725,6 +3839,15 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
else
phb->model = PNV_PHB_MODEL_UNKNOWN;
+ /* Initialize diagnostic data buffer */
+ prop32 = of_get_property(np, "ibm,phb-diag-data-size", NULL);
+ if (prop32)
+ phb->diag_data_size = be32_to_cpup(prop32);
+ else
+ phb->diag_data_size = PNV_PCI_DIAG_BUF_SIZE;
+
+ phb->diag_data = memblock_virt_alloc(phb->diag_data_size, 0);
+
/* Parse 32-bit and IO ranges (if any) */
pci_process_bridge_OF_ranges(hose, np, !hose->global_number);
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 935ccb249a8a..7905d179d036 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -227,11 +227,39 @@ void pnv_teardown_msi_irqs(struct pci_dev *pdev)
}
#endif /* CONFIG_PCI_MSI */
+/* Nicely print the contents of the PE State Tables (PEST). */
+static void pnv_pci_dump_pest(__be64 pestA[], __be64 pestB[], int pest_size)
+{
+ __be64 prevA = ULONG_MAX, prevB = ULONG_MAX;
+ bool dup = false;
+ int i;
+
+ for (i = 0; i < pest_size; i++) {
+ __be64 peA = be64_to_cpu(pestA[i]);
+ __be64 peB = be64_to_cpu(pestB[i]);
+
+ if (peA != prevA || peB != prevB) {
+ if (dup) {
+ pr_info("PE[..%03x] A/B: as above\n", i-1);
+ dup = false;
+ }
+ prevA = peA;
+ prevB = peB;
+ if (peA & PNV_IODA_STOPPED_STATE ||
+ peB & PNV_IODA_STOPPED_STATE)
+ pr_info("PE[%03x] A/B: %016llx %016llx\n",
+ i, peA, peB);
+ } else if (!dup && (peA & PNV_IODA_STOPPED_STATE ||
+ peB & PNV_IODA_STOPPED_STATE)) {
+ dup = true;
+ }
+ }
+}
+
static void pnv_pci_dump_p7ioc_diag_data(struct pci_controller *hose,
struct OpalIoPhbErrorCommon *common)
{
struct OpalIoP7IOCPhbErrorData *data;
- int i;
data = (struct OpalIoP7IOCPhbErrorData *)common;
pr_info("P7IOC PHB#%x Diag-data (Version: %d)\n",
@@ -308,22 +336,13 @@ static void pnv_pci_dump_p7ioc_diag_data(struct pci_controller *hose,
be64_to_cpu(data->dma1ErrorLog0),
be64_to_cpu(data->dma1ErrorLog1));
- for (i = 0; i < OPAL_P7IOC_NUM_PEST_REGS; i++) {
- if ((be64_to_cpu(data->pestA[i]) >> 63) == 0 &&
- (be64_to_cpu(data->pestB[i]) >> 63) == 0)
- continue;
-
- pr_info("PE[%3d] A/B: %016llx %016llx\n",
- i, be64_to_cpu(data->pestA[i]),
- be64_to_cpu(data->pestB[i]));
- }
+ pnv_pci_dump_pest(data->pestA, data->pestB, OPAL_P7IOC_NUM_PEST_REGS);
}
static void pnv_pci_dump_phb3_diag_data(struct pci_controller *hose,
struct OpalIoPhbErrorCommon *common)
{
struct OpalIoPhb3ErrorData *data;
- int i;
data = (struct OpalIoPhb3ErrorData*)common;
pr_info("PHB3 PHB#%x Diag-data (Version: %d)\n",
@@ -404,15 +423,109 @@ static void pnv_pci_dump_phb3_diag_data(struct pci_controller *hose,
be64_to_cpu(data->dma1ErrorLog0),
be64_to_cpu(data->dma1ErrorLog1));
- for (i = 0; i < OPAL_PHB3_NUM_PEST_REGS; i++) {
- if ((be64_to_cpu(data->pestA[i]) >> 63) == 0 &&
- (be64_to_cpu(data->pestB[i]) >> 63) == 0)
- continue;
+ pnv_pci_dump_pest(data->pestA, data->pestB, OPAL_PHB3_NUM_PEST_REGS);
+}
- pr_info("PE[%3d] A/B: %016llx %016llx\n",
- i, be64_to_cpu(data->pestA[i]),
- be64_to_cpu(data->pestB[i]));
- }
+static void pnv_pci_dump_phb4_diag_data(struct pci_controller *hose,
+ struct OpalIoPhbErrorCommon *common)
+{
+ struct OpalIoPhb4ErrorData *data;
+
+ data = (struct OpalIoPhb4ErrorData*)common;
+ pr_info("PHB4 PHB#%d Diag-data (Version: %d)\n",
+ hose->global_number, be32_to_cpu(common->version));
+ if (data->brdgCtl)
+ pr_info("brdgCtl: %08x\n",
+ be32_to_cpu(data->brdgCtl));
+ if (data->deviceStatus || data->slotStatus ||
+ data->linkStatus || data->devCmdStatus ||
+ data->devSecStatus)
+ pr_info("RootSts: %08x %08x %08x %08x %08x\n",
+ be32_to_cpu(data->deviceStatus),
+ be32_to_cpu(data->slotStatus),
+ be32_to_cpu(data->linkStatus),
+ be32_to_cpu(data->devCmdStatus),
+ be32_to_cpu(data->devSecStatus));
+ if (data->rootErrorStatus || data->uncorrErrorStatus ||
+ data->corrErrorStatus)
+ pr_info("RootErrSts: %08x %08x %08x\n",
+ be32_to_cpu(data->rootErrorStatus),
+ be32_to_cpu(data->uncorrErrorStatus),
+ be32_to_cpu(data->corrErrorStatus));
+ if (data->tlpHdr1 || data->tlpHdr2 ||
+ data->tlpHdr3 || data->tlpHdr4)
+ pr_info("RootErrLog: %08x %08x %08x %08x\n",
+ be32_to_cpu(data->tlpHdr1),
+ be32_to_cpu(data->tlpHdr2),
+ be32_to_cpu(data->tlpHdr3),
+ be32_to_cpu(data->tlpHdr4));
+ if (data->sourceId)
+ pr_info("sourceId: %08x\n", be32_to_cpu(data->sourceId));
+ if (data->nFir)
+ pr_info("nFir: %016llx %016llx %016llx\n",
+ be64_to_cpu(data->nFir),
+ be64_to_cpu(data->nFirMask),
+ be64_to_cpu(data->nFirWOF));
+ if (data->phbPlssr || data->phbCsr)
+ pr_info("PhbSts: %016llx %016llx\n",
+ be64_to_cpu(data->phbPlssr),
+ be64_to_cpu(data->phbCsr));
+ if (data->lemFir)
+ pr_info("Lem: %016llx %016llx %016llx\n",
+ be64_to_cpu(data->lemFir),
+ be64_to_cpu(data->lemErrorMask),
+ be64_to_cpu(data->lemWOF));
+ if (data->phbErrorStatus)
+ pr_info("PhbErr: %016llx %016llx %016llx %016llx\n",
+ be64_to_cpu(data->phbErrorStatus),
+ be64_to_cpu(data->phbFirstErrorStatus),
+ be64_to_cpu(data->phbErrorLog0),
+ be64_to_cpu(data->phbErrorLog1));
+ if (data->phbTxeErrorStatus)
+ pr_info("PhbTxeErr: %016llx %016llx %016llx %016llx\n",
+ be64_to_cpu(data->phbTxeErrorStatus),
+ be64_to_cpu(data->phbTxeFirstErrorStatus),
+ be64_to_cpu(data->phbTxeErrorLog0),
+ be64_to_cpu(data->phbTxeErrorLog1));
+ if (data->phbRxeArbErrorStatus)
+ pr_info("RxeArbErr: %016llx %016llx %016llx %016llx\n",
+ be64_to_cpu(data->phbRxeArbErrorStatus),
+ be64_to_cpu(data->phbRxeArbFirstErrorStatus),
+ be64_to_cpu(data->phbRxeArbErrorLog0),
+ be64_to_cpu(data->phbRxeArbErrorLog1));
+ if (data->phbRxeMrgErrorStatus)
+ pr_info("RxeMrgErr: %016llx %016llx %016llx %016llx\n",
+ be64_to_cpu(data->phbRxeMrgErrorStatus),
+ be64_to_cpu(data->phbRxeMrgFirstErrorStatus),
+ be64_to_cpu(data->phbRxeMrgErrorLog0),
+ be64_to_cpu(data->phbRxeMrgErrorLog1));
+ if (data->phbRxeTceErrorStatus)
+ pr_info("RxeTceErr: %016llx %016llx %016llx %016llx\n",
+ be64_to_cpu(data->phbRxeTceErrorStatus),
+ be64_to_cpu(data->phbRxeTceFirstErrorStatus),
+ be64_to_cpu(data->phbRxeTceErrorLog0),
+ be64_to_cpu(data->phbRxeTceErrorLog1));
+
+ if (data->phbPblErrorStatus)
+ pr_info("PblErr: %016llx %016llx %016llx %016llx\n",
+ be64_to_cpu(data->phbPblErrorStatus),
+ be64_to_cpu(data->phbPblFirstErrorStatus),
+ be64_to_cpu(data->phbPblErrorLog0),
+ be64_to_cpu(data->phbPblErrorLog1));
+ if (data->phbPcieDlpErrorStatus)
+ pr_info("PcieDlp: %016llx %016llx %016llx\n",
+ be64_to_cpu(data->phbPcieDlpErrorLog1),
+ be64_to_cpu(data->phbPcieDlpErrorLog2),
+ be64_to_cpu(data->phbPcieDlpErrorStatus));
+ if (data->phbRegbErrorStatus)
+ pr_info("RegbErr: %016llx %016llx %016llx %016llx\n",
+ be64_to_cpu(data->phbRegbErrorStatus),
+ be64_to_cpu(data->phbRegbFirstErrorStatus),
+ be64_to_cpu(data->phbRegbErrorLog0),
+ be64_to_cpu(data->phbRegbErrorLog1));
+
+
+ pnv_pci_dump_pest(data->pestA, data->pestB, OPAL_PHB4_NUM_PEST_REGS);
}
void pnv_pci_dump_phb_diag_data(struct pci_controller *hose,
@@ -431,6 +544,9 @@ void pnv_pci_dump_phb_diag_data(struct pci_controller *hose,
case OPAL_PHB_ERROR_DATA_TYPE_PHB3:
pnv_pci_dump_phb3_diag_data(hose, common);
break;
+ case OPAL_PHB_ERROR_DATA_TYPE_PHB4:
+ pnv_pci_dump_phb4_diag_data(hose, common);
+ break;
default:
pr_warn("%s: Unrecognized ioType %d\n",
__func__, be32_to_cpu(common->ioType));
@@ -445,8 +561,8 @@ static void pnv_pci_handle_eeh_config(struct pnv_phb *phb, u32 pe_no)
spin_lock_irqsave(&phb->lock, flags);
/* Fetch PHB diag-data */
- rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob,
- PNV_PCI_DIAG_BUF_SIZE);
+ rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag_data,
+ phb->diag_data_size);
has_diag = (rc == OPAL_SUCCESS);
/* If PHB supports compound PE, to handle it */
@@ -474,7 +590,7 @@ static void pnv_pci_handle_eeh_config(struct pnv_phb *phb, u32 pe_no)
* with the normal errors generated when probing empty slots
*/
if (has_diag && ret)
- pnv_pci_dump_phb_diag_data(phb->hose, phb->diag.blob);
+ pnv_pci_dump_phb_diag_data(phb->hose, phb->diag_data);
spin_unlock_irqrestore(&phb->lock, flags);
}
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 18c8a2fa03b8..f16bc403ec03 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -33,6 +33,9 @@ enum pnv_phb_model {
#define PNV_IODA_PE_SLAVE (1 << 4) /* Slave PE in compound case */
#define PNV_IODA_PE_VF (1 << 5) /* PE for one VF */
+/* Indicates operations are frozen for a PE: MMIO in PESTA & DMA in PESTB. */
+#define PNV_IODA_STOPPED_STATE 0x8000000000000000
+
/* Data associated with a PE, including IOMMU tracking etc.. */
struct pnv_phb;
struct pnv_ioda_pe {
@@ -169,13 +172,9 @@ struct pnv_phb {
unsigned int pe_rmap[0x10000];
} ioda;
- /* PHB and hub status structure */
- union {
- unsigned char blob[PNV_PCI_DIAG_BUF_SIZE];
- struct OpalIoP7IOCPhbErrorData p7ioc;
- struct OpalIoPhb3ErrorData phb3;
- struct OpalIoP7IOCErrorData hub_diag;
- } diag;
+ /* PHB and hub diagnostics */
+ unsigned int diag_data_size;
+ u8 *diag_data;
/* Nvlink2 data */
struct npu {
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index 4aff754b6f2c..40dae96f7e20 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -63,7 +63,8 @@ static int pnv_smp_kick_cpu(int nr)
long rc;
uint8_t status;
- BUG_ON(nr < 0 || nr >= NR_CPUS);
+ if (nr < 0 || nr >= nr_cpu_ids)
+ return -EINVAL;
/*
* If we already started or OPAL is not supported, we just
@@ -144,7 +145,14 @@ static void pnv_smp_cpu_kill_self(void)
unsigned long srr1, wmask;
/* Standard hot unplug procedure */
- local_irq_disable();
+ /*
+ * This hard disables local interurpts, ensuring we have no lazy
+ * irqs pending.
+ */
+ WARN_ON(irqs_disabled());
+ hard_irq_disable();
+ WARN_ON(lazy_irq_pending());
+
idle_task_exit();
current->active_mm = NULL; /* for sanity */
cpu = smp_processor_id();
@@ -162,16 +170,6 @@ static void pnv_smp_cpu_kill_self(void)
*/
mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1);
- /*
- * Hard-disable interrupts, and then clear irq_happened flags
- * that we can safely ignore while off-line, since they
- * are for things for which we do no processing when off-line
- * (or in the case of HMI, all the processing we need to do
- * is done in lower-level real-mode code).
- */
- hard_irq_disable();
- local_paca->irq_happened &= ~(PACA_IRQ_DEC | PACA_IRQ_HMI);
-
while (!generic_check_cpu_restart(cpu)) {
/*
* Clear IPI flag, since we don't handle IPIs while
@@ -182,9 +180,9 @@ static void pnv_smp_cpu_kill_self(void)
*/
kvmppc_set_host_ipi(cpu, 0);
- ppc64_runlatch_off();
srr1 = pnv_cpu_offline(cpu);
- ppc64_runlatch_on();
+
+ WARN_ON(lazy_irq_pending());
/*
* If the SRR1 value indicates that we woke up due to
@@ -198,8 +196,7 @@ static void pnv_smp_cpu_kill_self(void)
* contains 0.
*/
if (((srr1 & wmask) == SRR1_WAKEEE) ||
- ((srr1 & wmask) == SRR1_WAKEHVI) ||
- (local_paca->irq_happened & PACA_IRQ_EE)) {
+ ((srr1 & wmask) == SRR1_WAKEHVI)) {
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
if (xive_enabled())
xive_flush_interrupt();
@@ -211,14 +208,15 @@ static void pnv_smp_cpu_kill_self(void)
unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
asm volatile(PPC_MSGCLR(%0) : : "r" (msg));
}
- local_paca->irq_happened &= ~(PACA_IRQ_EE | PACA_IRQ_DBELL);
smp_mb();
if (cpu_core_split_required())
continue;
if (srr1 && !generic_check_cpu_restart(cpu))
- DBG("CPU%d Unexpected exit while offline !\n", cpu);
+ DBG("CPU%d Unexpected exit while offline srr1=%lx!\n",
+ cpu, srr1);
+
}
/* Re-enable decrementer interrupts */
diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c
index 309876d699e9..596ae2e98040 100644
--- a/arch/powerpc/platforms/powernv/subcore.c
+++ b/arch/powerpc/platforms/powernv/subcore.c
@@ -18,6 +18,7 @@
#include <linux/stop_machine.h>
#include <asm/cputhreads.h>
+#include <asm/cpuidle.h>
#include <asm/kvm_ppc.h>
#include <asm/machdep.h>
#include <asm/opal.h>
@@ -182,7 +183,7 @@ static void unsplit_core(void)
cpu = smp_processor_id();
if (cpu_thread_in_core(cpu) != 0) {
while (mfspr(SPRN_HID0) & mask)
- power7_nap(0);
+ power7_idle_insn(PNV_THREAD_NAP);
per_cpu(split_state, cpu).step = SYNC_STEP_UNSPLIT;
return;
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index 913c54e23eea..3a6dfd14f64b 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -124,7 +124,7 @@ config HV_PERF_CTRS
Enable access to hypervisor supplied counters in perf. Currently,
this enables code that uses the hcall GetPerfCounterInfo and 24x7
interfaces to retrieve counters. GPCI exists on Power 6 and later
- systems. 24x7 is available on Power 8 systems.
+ systems. 24x7 is available on Power 8 and later systems.
If unsure, select Y.
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 7bc0e91f8715..6afd1efd3633 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -554,7 +554,7 @@ static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index)
{
int rc;
- pr_debug("Attemping to remove CPU %s, drc index: %x\n",
+ pr_debug("Attempting to remove CPU %s, drc index: %x\n",
dn->name, drc_index);
rc = dlpar_offline_cpu(dn);
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 1fb162ba9d1c..ca9b2f4aaa22 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -22,6 +22,7 @@
#include <asm/machdep.h>
#include <asm/prom.h>
#include <asm/sparsemem.h>
+#include <asm/fadump.h>
#include "pseries.h"
static bool rtas_hp_event;
@@ -408,6 +409,12 @@ static bool lmb_is_removable(struct of_drconf_cell *lmb)
scns_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE;
phys_addr = lmb->base_addr;
+#ifdef CONFIG_FA_DUMP
+ /* Don't hot-remove memory that falls in fadump boot memory area */
+ if (is_fadump_boot_memory_area(phys_addr, block_sz))
+ return false;
+#endif
+
for (i = 0; i < scns_per_block; i++) {
pfn = PFN_DOWN(phys_addr);
if (!pfn_present(pfn))
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 6541d0b03e4c..495ba4e7336d 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -301,7 +301,7 @@ static long pSeries_lpar_hpte_updatepp(unsigned long slot,
int ssize, unsigned long inv_flags)
{
unsigned long lpar_rc;
- unsigned long flags = (newpp & 7) | H_AVPN;
+ unsigned long flags;
unsigned long want_v;
want_v = hpte_encode_avpn(vpn, psize, ssize);
@@ -309,6 +309,11 @@ static long pSeries_lpar_hpte_updatepp(unsigned long slot,
pr_devel(" update: avpnv=%016lx, hash=%016lx, f=%lx, psize: %d ...",
want_v, slot, flags, psize);
+ flags = (newpp & 7) | H_AVPN;
+ if (mmu_has_feature(MMU_FTR_KERNEL_RO))
+ /* Move pp0 into bit 8 (IBM 55) */
+ flags |= (newpp & HPTE_R_PP0) >> 55;
+
lpar_rc = plpar_pte_protect(flags, slot, want_v);
if (lpar_rc == H_NOT_FOUND) {
@@ -380,6 +385,10 @@ static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp,
BUG_ON(slot == -1);
flags = newpp & 7;
+ if (mmu_has_feature(MMU_FTR_KERNEL_RO))
+ /* Move pp0 into bit 8 (IBM 55) */
+ flags |= (newpp & HPTE_R_PP0) >> 55;
+
lpar_rc = plpar_pte_protect(flags, slot, 0);
BUG_ON(lpar_rc != H_SUCCESS);
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index 52ca6b311d44..24785f63fb40 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -151,7 +151,8 @@ static void smp_setup_cpu(int cpu)
static int smp_pSeries_kick_cpu(int nr)
{
- BUG_ON(nr < 0 || nr >= NR_CPUS);
+ if (nr < 0 || nr >= nr_cpu_ids)
+ return -EINVAL;
if (!smp_startup_cpu(nr))
return -ENOENT;
diff --git a/arch/powerpc/sysdev/mpc8xx_pic.c b/arch/powerpc/sysdev/mpc8xx_pic.c
index 3e828b20c21e..2842f9d63d21 100644
--- a/arch/powerpc/sysdev/mpc8xx_pic.c
+++ b/arch/powerpc/sysdev/mpc8xx_pic.c
@@ -79,7 +79,7 @@ unsigned int mpc8xx_get_irq(void)
irq = in_be32(&siu_reg->sc_sivec) >> 26;
if (irq == PIC_VEC_SPURRIOUS)
- irq = 0;
+ return 0;
return irq_linear_revmap(mpc8xx_pic_host, irq);
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index 8f5e3035483b..6595462b1fc8 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -1417,7 +1417,7 @@ bool xive_core_init(const struct xive_ops *ops, void __iomem *area, u32 offset,
/* Get ready for interrupts */
xive_setup_cpu();
- pr_info("Interrupt handling intialized with %s backend\n",
+ pr_info("Interrupt handling initialized with %s backend\n",
xive_ops->name);
pr_info("Using priority %d for all interrupts\n", max_prio);
diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c
index ab9ecce61ee5..0f95476b01f6 100644
--- a/arch/powerpc/sysdev/xive/native.c
+++ b/arch/powerpc/sysdev/xive/native.c
@@ -633,8 +633,8 @@ u32 xive_native_alloc_vp_block(u32 max_vcpus)
if (max_vcpus > (1 << order))
order++;
- pr_info("VP block alloc, for max VCPUs %d use order %d\n",
- max_vcpus, order);
+ pr_debug("VP block alloc, for max VCPUs %d use order %d\n",
+ max_vcpus, order);
for (;;) {
rc = opal_xive_alloc_vp_block(order);
diff --git a/arch/powerpc/tools/head_check.sh b/arch/powerpc/tools/head_check.sh
new file mode 100644
index 000000000000..ad9e57209aa4
--- /dev/null
+++ b/arch/powerpc/tools/head_check.sh
@@ -0,0 +1,78 @@
+# Copyright © 2016 IBM Corporation
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version
+# 2 of the License, or (at your option) any later version.
+
+# This script checks the head of a vmlinux for linker stubs that
+# break our placement of fixed-location code for 64-bit.
+
+# based on relocs_check.pl
+# Copyright © 2009 IBM Corporation
+
+# NOTE!
+#
+# If the build dies here, it's likely code in head_64.S/exception-64*.S or
+# nearby, is branching to labels it can't reach directly, which results in the
+# linker inserting branch stubs. This can move code around in ways that break
+# the fixed section calculations (head-64.h). To debug this, disassemble the
+# vmlinux and look for branch stubs (long_branch, plt_branch, etc.) in the
+# fixed section region (0 - 0x8000ish). Check what code is calling those stubs,
+# and perhaps change so a direct branch can reach.
+#
+# A ".linker_stub_catch" section is used to catch some stubs generated by
+# early .text code, which tend to get placed at the start of the section.
+# If there are too many such stubs, they can overflow this section. Expanding
+# it may help (or reducing the number of stub branches).
+#
+# Linker stubs use the TOC pointer, so even if fixed section code could
+# tolerate them being inserted into head code, they can't be allowed in low
+# level entry code (boot, interrupt vectors, etc) until r2 is set up. This
+# could cause the kernel to die in early boot.
+
+# Turn this on if you want more debug output:
+# set -x
+
+if [ $# -lt 2 ]; then
+ echo "$0 [path to nm] [path to vmlinux]" 1>&2
+ exit 1
+fi
+
+# Have Kbuild supply the path to nm so we handle cross compilation.
+nm="$1"
+vmlinux="$2"
+
+# gcc-4.6-era toolchain make _stext an A (absolute) symbol rather than T
+$nm "$vmlinux" | grep -e " [TA] _stext$" -e " t start_first_256B$" -e " a text_start$" -e " t start_text$" -m4 > .tmp_symbols.txt
+
+
+vma=$(cat .tmp_symbols.txt | grep -e " [TA] _stext$" | cut -d' ' -f1)
+
+expected_start_head_addr=$vma
+
+start_head_addr=$(cat .tmp_symbols.txt | grep " t start_first_256B$" | cut -d' ' -f1)
+
+if [ "$start_head_addr" != "$expected_start_head_addr" ]; then
+ echo "ERROR: head code starts at $start_head_addr, should be $expected_start_head_addr"
+ echo "ERROR: try to enable LD_HEAD_STUB_CATCH config option"
+ echo "ERROR: see comments in arch/powerpc/tools/head_check.sh"
+
+ exit 1
+fi
+
+top_vma=$(echo $vma | cut -d'0' -f1)
+
+expected_start_text_addr=$(cat .tmp_symbols.txt | grep " a text_start$" | cut -d' ' -f1 | sed "s/^0/$top_vma/")
+
+start_text_addr=$(cat .tmp_symbols.txt | grep " t start_text$" | cut -d' ' -f1)
+
+if [ "$start_text_addr" != "$expected_start_text_addr" ]; then
+ echo "ERROR: start_text address is $start_text_addr, should be $expected_start_text_addr"
+ echo "ERROR: try to enable LD_HEAD_STUB_CATCH config option"
+ echo "ERROR: see comments in arch/powerpc/tools/head_check.sh"
+
+ exit 1
+fi
+
+rm -f .tmp_symbols.txt
diff --git a/arch/powerpc/tools/unrel_branch_check.sh b/arch/powerpc/tools/unrel_branch_check.sh
new file mode 100755
index 000000000000..1e972df3107e
--- /dev/null
+++ b/arch/powerpc/tools/unrel_branch_check.sh
@@ -0,0 +1,57 @@
+# Copyright © 2016 IBM Corporation
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version
+# 2 of the License, or (at your option) any later version.
+#
+# This script checks the relocations of a vmlinux for "suspicious"
+# branches from unrelocated code (head_64.S code).
+
+# Turn this on if you want more debug output:
+# set -x
+
+# Have Kbuild supply the path to objdump so we handle cross compilation.
+objdump="$1"
+vmlinux="$2"
+
+#__end_interrupts should be located within the first 64K
+
+end_intr=0x$(
+"$objdump" -R "$vmlinux" -d --start-address=0xc000000000000000 \
+ --stop-address=0xc000000000010000 |
+grep '\<__end_interrupts>:' |
+awk '{print $1}'
+)
+
+BRANCHES=$(
+"$objdump" -R "$vmlinux" -D --start-address=0xc000000000000000 \
+ --stop-address=${end_intr} |
+grep -e "^c[0-9a-f]*:[[:space:]]*\([0-9a-f][0-9a-f][[:space:]]\)\{4\}[[:space:]]*b" |
+grep -v '\<__start_initialization_multiplatform>' |
+grep -v -e 'b.\?.\?ctr' |
+grep -v -e 'b.\?.\?lr' |
+sed 's/://' |
+awk '{ print $1 ":" $6 ":0x" $7 ":" $8 " "}'
+)
+
+for tuple in $BRANCHES
+do
+ from=`echo $tuple | cut -d':' -f1`
+ branch=`echo $tuple | cut -d':' -f2`
+ to=`echo $tuple | cut -d':' -f3 | sed 's/cr[0-7],//'`
+ sym=`echo $tuple | cut -d':' -f4`
+
+ if (( $to > $end_intr ))
+ then
+ if [ -z "$bad_branches" ]; then
+ echo "WARNING: Unrelocated relative branches"
+ bad_branches="yes"
+ fi
+ echo "$from $branch-> $to $sym"
+ fi
+done
+
+if [ -z "$bad_branches" ]; then
+ exit 0
+fi
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index f11f65634aab..08e367e3e8c3 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -53,6 +53,7 @@
#include <asm/xive.h>
#include <asm/opal.h>
#include <asm/firmware.h>
+#include <asm/code-patching.h>
#ifdef CONFIG_PPC64
#include <asm/hvcall.h>
@@ -837,7 +838,8 @@ static void insert_bpts(void)
store_inst(&bp->instr[0]);
if (bp->enabled & BP_CIABR)
continue;
- if (mwrite(bp->address, &bpinstr, 4) != 4) {
+ if (patch_instruction((unsigned int *)bp->address,
+ bpinstr) != 0) {
printf("Couldn't write instruction at %lx, "
"disabling breakpoint there\n", bp->address);
bp->enabled &= ~BP_TRAP;
@@ -874,7 +876,8 @@ static void remove_bpts(void)
continue;
if (mread(bp->address, &instr, 4) == 4
&& instr == bpinstr
- && mwrite(bp->address, &bp->instr, 4) != 4)
+ && patch_instruction(
+ (unsigned int *)bp->address, bp->instr[0]) != 0)
printf("Couldn't remove breakpoint at %lx\n",
bp->address);
else
@@ -1242,14 +1245,14 @@ bpt_cmds(void)
{
int cmd;
unsigned long a;
- int mode, i;
+ int i;
struct bpt *bp;
- const char badaddr[] = "Only kernel addresses are permitted "
- "for breakpoints\n";
cmd = inchar();
switch (cmd) {
-#ifndef CONFIG_8xx
+#ifndef CONFIG_PPC_8xx
+ static const char badaddr[] = "Only kernel addresses are permitted for breakpoints\n";
+ int mode;
case 'd': /* bd - hardware data breakpoint */
mode = 7;
cmd = inchar();
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 7d7e0e811c46..94a18681353d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -60,6 +60,7 @@ config X86
select ARCH_HAS_STRICT_KERNEL_RWX
select ARCH_HAS_STRICT_MODULE_RWX
select ARCH_HAS_UBSAN_SANITIZE_ALL
+ select ARCH_HAS_ZONE_DEVICE if X86_64
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI
select ARCH_MIGHT_HAVE_PC_PARPORT
diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c
index 12409a519cc5..37b0698b7193 100644
--- a/drivers/cpuidle/cpuidle-powernv.c
+++ b/drivers/cpuidle/cpuidle-powernv.c
@@ -32,18 +32,18 @@ static struct cpuidle_driver powernv_idle_driver = {
.owner = THIS_MODULE,
};
-static int max_idle_state;
-static struct cpuidle_state *cpuidle_state_table;
+static int max_idle_state __read_mostly;
+static struct cpuidle_state *cpuidle_state_table __read_mostly;
struct stop_psscr_table {
u64 val;
u64 mask;
};
-static struct stop_psscr_table stop_psscr_table[CPUIDLE_STATE_MAX];
+static struct stop_psscr_table stop_psscr_table[CPUIDLE_STATE_MAX] __read_mostly;
-static u64 snooze_timeout;
-static bool snooze_timeout_en;
+static u64 snooze_timeout __read_mostly;
+static bool snooze_timeout_en __read_mostly;
static int snooze_loop(struct cpuidle_device *dev,
struct cpuidle_driver *drv,
@@ -51,21 +51,30 @@ static int snooze_loop(struct cpuidle_device *dev,
{
u64 snooze_exit_time;
- local_irq_enable();
set_thread_flag(TIF_POLLING_NRFLAG);
+ local_irq_enable();
+
snooze_exit_time = get_tb() + snooze_timeout;
ppc64_runlatch_off();
HMT_very_low();
while (!need_resched()) {
- if (likely(snooze_timeout_en) && get_tb() > snooze_exit_time)
+ if (likely(snooze_timeout_en) && get_tb() > snooze_exit_time) {
+ /*
+ * Task has not woken up but we are exiting the polling
+ * loop anyway. Require a barrier after polling is
+ * cleared to order subsequent test of need_resched().
+ */
+ clear_thread_flag(TIF_POLLING_NRFLAG);
+ smp_mb();
break;
+ }
}
HMT_medium();
ppc64_runlatch_on();
clear_thread_flag(TIF_POLLING_NRFLAG);
- smp_mb();
+
return index;
}
@@ -73,9 +82,8 @@ static int nap_loop(struct cpuidle_device *dev,
struct cpuidle_driver *drv,
int index)
{
- ppc64_runlatch_off();
- power7_idle();
- ppc64_runlatch_on();
+ power7_idle_type(PNV_THREAD_NAP);
+
return index;
}
@@ -98,7 +106,8 @@ static int fastsleep_loop(struct cpuidle_device *dev,
new_lpcr &= ~LPCR_PECE1;
mtspr(SPRN_LPCR, new_lpcr);
- power7_sleep();
+
+ power7_idle_type(PNV_THREAD_SLEEP);
mtspr(SPRN_LPCR, old_lpcr);
@@ -110,10 +119,8 @@ static int stop_loop(struct cpuidle_device *dev,
struct cpuidle_driver *drv,
int index)
{
- ppc64_runlatch_off();
- power9_idle_stop(stop_psscr_table[index].val,
+ power9_idle_type(stop_psscr_table[index].val,
stop_psscr_table[index].mask);
- ppc64_runlatch_on();
return index;
}
@@ -354,6 +361,7 @@ static int powernv_add_idle_states(void)
for (i = 0; i < dt_idle_states; i++) {
unsigned int exit_latency, target_residency;
+ bool stops_timebase = false;
/*
* If an idle state has exit latency beyond
* POWERNV_THRESHOLD_LATENCY_NS then don't use it
@@ -381,6 +389,9 @@ static int powernv_add_idle_states(void)
}
}
+ if (flags[i] & OPAL_PM_TIMEBASE_STOP)
+ stops_timebase = true;
+
/*
* For nap and fastsleep, use default target_residency
* values if f/w does not expose it.
@@ -392,8 +403,7 @@ static int powernv_add_idle_states(void)
add_powernv_state(nr_idle_states, "Nap",
CPUIDLE_FLAG_NONE, nap_loop,
target_residency, exit_latency, 0, 0);
- } else if ((flags[i] & OPAL_PM_STOP_INST_FAST) &&
- !(flags[i] & OPAL_PM_TIMEBASE_STOP)) {
+ } else if (has_stop_states && !stops_timebase) {
add_powernv_state(nr_idle_states, names[i],
CPUIDLE_FLAG_NONE, stop_loop,
target_residency, exit_latency,
@@ -405,8 +415,8 @@ static int powernv_add_idle_states(void)
* within this config dependency check.
*/
#ifdef CONFIG_TICK_ONESHOT
- if (flags[i] & OPAL_PM_SLEEP_ENABLED ||
- flags[i] & OPAL_PM_SLEEP_ENABLED_ER1) {
+ else if (flags[i] & OPAL_PM_SLEEP_ENABLED ||
+ flags[i] & OPAL_PM_SLEEP_ENABLED_ER1) {
if (!rc)
target_residency = 300000;
/* Add FASTSLEEP state */
@@ -414,14 +424,15 @@ static int powernv_add_idle_states(void)
CPUIDLE_FLAG_TIMER_STOP,
fastsleep_loop,
target_residency, exit_latency, 0, 0);
- } else if ((flags[i] & OPAL_PM_STOP_INST_DEEP) &&
- (flags[i] & OPAL_PM_TIMEBASE_STOP)) {
+ } else if (has_stop_states && stops_timebase) {
add_powernv_state(nr_idle_states, names[i],
CPUIDLE_FLAG_TIMER_STOP, stop_loop,
target_residency, exit_latency,
psscr_val[i], psscr_mask[i]);
}
#endif
+ else
+ continue;
nr_idle_states++;
}
out:
diff --git a/drivers/cpuidle/cpuidle-pseries.c b/drivers/cpuidle/cpuidle-pseries.c
index 166ccd711ec9..e9b3853d93ea 100644
--- a/drivers/cpuidle/cpuidle-pseries.c
+++ b/drivers/cpuidle/cpuidle-pseries.c
@@ -25,10 +25,10 @@ struct cpuidle_driver pseries_idle_driver = {
.owner = THIS_MODULE,
};
-static int max_idle_state;
-static struct cpuidle_state *cpuidle_state_table;
-static u64 snooze_timeout;
-static bool snooze_timeout_en;
+static int max_idle_state __read_mostly;
+static struct cpuidle_state *cpuidle_state_table __read_mostly;
+static u64 snooze_timeout __read_mostly;
+static bool snooze_timeout_en __read_mostly;
static inline void idle_loop_prolog(unsigned long *in_purr)
{
@@ -62,21 +62,29 @@ static int snooze_loop(struct cpuidle_device *dev,
unsigned long in_purr;
u64 snooze_exit_time;
+ set_thread_flag(TIF_POLLING_NRFLAG);
+
idle_loop_prolog(&in_purr);
local_irq_enable();
- set_thread_flag(TIF_POLLING_NRFLAG);
snooze_exit_time = get_tb() + snooze_timeout;
while (!need_resched()) {
HMT_low();
HMT_very_low();
- if (snooze_timeout_en && get_tb() > snooze_exit_time)
+ if (likely(snooze_timeout_en) && get_tb() > snooze_exit_time) {
+ /*
+ * Task has not woken up but we are exiting the polling
+ * loop anyway. Require a barrier after polling is
+ * cleared to order subsequent test of need_resched().
+ */
+ clear_thread_flag(TIF_POLLING_NRFLAG);
+ smp_mb();
break;
+ }
}
HMT_medium();
clear_thread_flag(TIF_POLLING_NRFLAG);
- smp_mb();
idle_loop_epilog(in_purr);
diff --git a/drivers/misc/cxl/Kconfig b/drivers/misc/cxl/Kconfig
index b75cf830d08a..93397cb05b15 100644
--- a/drivers/misc/cxl/Kconfig
+++ b/drivers/misc/cxl/Kconfig
@@ -11,11 +11,16 @@ config CXL_AFU_DRIVER_OPS
bool
default n
+config CXL_LIB
+ bool
+ default n
+
config CXL
tristate "Support for IBM Coherent Accelerators (CXL)"
depends on PPC_POWERNV && PCI_MSI && EEH
select CXL_BASE
select CXL_AFU_DRIVER_OPS
+ select CXL_LIB
default m
help
Select this option to enable driver support for IBM Coherent
diff --git a/drivers/misc/cxl/Makefile b/drivers/misc/cxl/Makefile
index c14fd6b65b5a..0b5fd749d96d 100644
--- a/drivers/misc/cxl/Makefile
+++ b/drivers/misc/cxl/Makefile
@@ -3,7 +3,7 @@ ccflags-$(CONFIG_PPC_WERROR) += -Werror
cxl-y += main.o file.o irq.o fault.o native.o
cxl-y += context.o sysfs.o pci.o trace.o
-cxl-y += vphb.o phb.o api.o
+cxl-y += vphb.o phb.o api.o cxllib.o
cxl-$(CONFIG_PPC_PSERIES) += flash.o guest.o of.o hcalls.o
cxl-$(CONFIG_DEBUG_FS) += debugfs.o
obj-$(CONFIG_CXL) += cxl.o
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index a03f8e7535e5..b1afeccbb97f 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -1010,6 +1010,7 @@ static inline void cxl_debugfs_add_afu_regs_psl8(struct cxl_afu *afu, struct den
void cxl_handle_fault(struct work_struct *work);
void cxl_prefault(struct cxl_context *ctx, u64 wed);
+int cxl_handle_mm_fault(struct mm_struct *mm, u64 dsisr, u64 dar);
struct cxl *get_cxl_adapter(int num);
int cxl_alloc_sst(struct cxl_context *ctx);
@@ -1061,6 +1062,11 @@ int cxl_afu_slbia(struct cxl_afu *afu);
int cxl_data_cache_flush(struct cxl *adapter);
int cxl_afu_disable(struct cxl_afu *afu);
int cxl_psl_purge(struct cxl_afu *afu);
+int cxl_calc_capp_routing(struct pci_dev *dev, u64 *chipid,
+ u32 *phb_index, u64 *capp_unit_id);
+int cxl_slot_is_switched(struct pci_dev *dev);
+int cxl_get_xsl9_dsnctl(u64 capp_unit_id, u64 *reg);
+u64 cxl_calculate_sr(bool master, bool kernel, bool real_mode, bool p9);
void cxl_native_irq_dump_regs_psl9(struct cxl_context *ctx);
void cxl_native_irq_dump_regs_psl8(struct cxl_context *ctx);
diff --git a/drivers/misc/cxl/cxllib.c b/drivers/misc/cxl/cxllib.c
new file mode 100644
index 000000000000..5dba23ca2e5f
--- /dev/null
+++ b/drivers/misc/cxl/cxllib.c
@@ -0,0 +1,246 @@
+/*
+ * Copyright 2017 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/hugetlb.h>
+#include <linux/sched/mm.h>
+#include <asm/pnv-pci.h>
+#include <misc/cxllib.h>
+
+#include "cxl.h"
+
+#define CXL_INVALID_DRA ~0ull
+#define CXL_DUMMY_READ_SIZE 128
+#define CXL_DUMMY_READ_ALIGN 8
+#define CXL_CAPI_WINDOW_START 0x2000000000000ull
+#define CXL_CAPI_WINDOW_LOG_SIZE 48
+#define CXL_XSL_CONFIG_CURRENT_VERSION CXL_XSL_CONFIG_VERSION1
+
+
+bool cxllib_slot_is_supported(struct pci_dev *dev, unsigned long flags)
+{
+ int rc;
+ u32 phb_index;
+ u64 chip_id, capp_unit_id;
+
+ /* No flags currently supported */
+ if (flags)
+ return false;
+
+ if (!cpu_has_feature(CPU_FTR_HVMODE))
+ return false;
+
+ if (!cxl_is_power9())
+ return false;
+
+ if (cxl_slot_is_switched(dev))
+ return false;
+
+ /* on p9, some pci slots are not connected to a CAPP unit */
+ rc = cxl_calc_capp_routing(dev, &chip_id, &phb_index, &capp_unit_id);
+ if (rc)
+ return false;
+
+ return true;
+}
+EXPORT_SYMBOL_GPL(cxllib_slot_is_supported);
+
+static DEFINE_MUTEX(dra_mutex);
+static u64 dummy_read_addr = CXL_INVALID_DRA;
+
+static int allocate_dummy_read_buf(void)
+{
+ u64 buf, vaddr;
+ size_t buf_size;
+
+ /*
+ * Dummy read buffer is 128-byte long, aligned on a
+ * 256-byte boundary and we need the physical address.
+ */
+ buf_size = CXL_DUMMY_READ_SIZE + (1ull << CXL_DUMMY_READ_ALIGN);
+ buf = (u64) kzalloc(buf_size, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ vaddr = (buf + (1ull << CXL_DUMMY_READ_ALIGN) - 1) &
+ (~0ull << CXL_DUMMY_READ_ALIGN);
+
+ WARN((vaddr + CXL_DUMMY_READ_SIZE) > (buf + buf_size),
+ "Dummy read buffer alignment issue");
+ dummy_read_addr = virt_to_phys((void *) vaddr);
+ return 0;
+}
+
+int cxllib_get_xsl_config(struct pci_dev *dev, struct cxllib_xsl_config *cfg)
+{
+ int rc;
+ u32 phb_index;
+ u64 chip_id, capp_unit_id;
+
+ if (!cpu_has_feature(CPU_FTR_HVMODE))
+ return -EINVAL;
+
+ mutex_lock(&dra_mutex);
+ if (dummy_read_addr == CXL_INVALID_DRA) {
+ rc = allocate_dummy_read_buf();
+ if (rc) {
+ mutex_unlock(&dra_mutex);
+ return rc;
+ }
+ }
+ mutex_unlock(&dra_mutex);
+
+ rc = cxl_calc_capp_routing(dev, &chip_id, &phb_index, &capp_unit_id);
+ if (rc)
+ return rc;
+
+ rc = cxl_get_xsl9_dsnctl(capp_unit_id, &cfg->dsnctl);
+ if (rc)
+ return rc;
+ if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
+ /* workaround for DD1 - nbwind = capiind */
+ cfg->dsnctl |= ((u64)0x02 << (63-47));
+ }
+
+ cfg->version = CXL_XSL_CONFIG_CURRENT_VERSION;
+ cfg->log_bar_size = CXL_CAPI_WINDOW_LOG_SIZE;
+ cfg->bar_addr = CXL_CAPI_WINDOW_START;
+ cfg->dra = dummy_read_addr;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(cxllib_get_xsl_config);
+
+int cxllib_switch_phb_mode(struct pci_dev *dev, enum cxllib_mode mode,
+ unsigned long flags)
+{
+ int rc = 0;
+
+ if (!cpu_has_feature(CPU_FTR_HVMODE))
+ return -EINVAL;
+
+ switch (mode) {
+ case CXL_MODE_PCI:
+ /*
+ * We currently don't support going back to PCI mode
+ * However, we'll turn the invalidations off, so that
+ * the firmware doesn't have to ack them and can do
+ * things like reset, etc.. with no worries.
+ * So always return EPERM (can't go back to PCI) or
+ * EBUSY if we couldn't even turn off snooping
+ */
+ rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_OFF);
+ if (rc)
+ rc = -EBUSY;
+ else
+ rc = -EPERM;
+ break;
+ case CXL_MODE_CXL:
+ /* DMA only supported on TVT1 for the time being */
+ if (flags != CXL_MODE_DMA_TVT1)
+ return -EINVAL;
+ rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_DMA_TVT1);
+ if (rc)
+ return rc;
+ rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_ON);
+ break;
+ default:
+ rc = -EINVAL;
+ }
+ return rc;
+}
+EXPORT_SYMBOL_GPL(cxllib_switch_phb_mode);
+
+/*
+ * When switching the PHB to capi mode, the TVT#1 entry for
+ * the Partitionable Endpoint is set in bypass mode, like
+ * in PCI mode.
+ * Configure the device dma to use TVT#1, which is done
+ * by calling dma_set_mask() with a mask large enough.
+ */
+int cxllib_set_device_dma(struct pci_dev *dev, unsigned long flags)
+{
+ int rc;
+
+ if (flags)
+ return -EINVAL;
+
+ rc = dma_set_mask(&dev->dev, DMA_BIT_MASK(64));
+ return rc;
+}
+EXPORT_SYMBOL_GPL(cxllib_set_device_dma);
+
+int cxllib_get_PE_attributes(struct task_struct *task,
+ unsigned long translation_mode,
+ struct cxllib_pe_attributes *attr)
+{
+ struct mm_struct *mm = NULL;
+
+ if (translation_mode != CXL_TRANSLATED_MODE &&
+ translation_mode != CXL_REAL_MODE)
+ return -EINVAL;
+
+ attr->sr = cxl_calculate_sr(false,
+ task == NULL,
+ translation_mode == CXL_REAL_MODE,
+ true);
+ attr->lpid = mfspr(SPRN_LPID);
+ if (task) {
+ mm = get_task_mm(task);
+ if (mm == NULL)
+ return -EINVAL;
+ /*
+ * Caller is keeping a reference on mm_users for as long
+ * as XSL uses the memory context
+ */
+ attr->pid = mm->context.id;
+ mmput(mm);
+ } else {
+ attr->pid = 0;
+ }
+ attr->tid = 0;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(cxllib_get_PE_attributes);
+
+int cxllib_handle_fault(struct mm_struct *mm, u64 addr, u64 size, u64 flags)
+{
+ int rc;
+ u64 dar;
+ struct vm_area_struct *vma = NULL;
+ unsigned long page_size;
+
+ if (mm == NULL)
+ return -EFAULT;
+
+ down_read(&mm->mmap_sem);
+
+ for (dar = addr; dar < addr + size; dar += page_size) {
+ if (!vma || dar < vma->vm_start || dar > vma->vm_end) {
+ vma = find_vma(mm, addr);
+ if (!vma) {
+ pr_err("Can't find vma for addr %016llx\n", addr);
+ rc = -EFAULT;
+ goto out;
+ }
+ /* get the size of the pages allocated */
+ page_size = vma_kernel_pagesize(vma);
+ }
+
+ rc = cxl_handle_mm_fault(mm, flags, dar);
+ if (rc) {
+ pr_err("cxl_handle_mm_fault failed %d", rc);
+ rc = -EFAULT;
+ goto out;
+ }
+ }
+ rc = 0;
+out:
+ up_read(&mm->mmap_sem);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(cxllib_handle_fault);
diff --git a/drivers/misc/cxl/fault.c b/drivers/misc/cxl/fault.c
index c79e39bad7a4..6eed7d03e2b5 100644
--- a/drivers/misc/cxl/fault.c
+++ b/drivers/misc/cxl/fault.c
@@ -132,18 +132,15 @@ static int cxl_handle_segment_miss(struct cxl_context *ctx,
return IRQ_HANDLED;
}
-static void cxl_handle_page_fault(struct cxl_context *ctx,
- struct mm_struct *mm, u64 dsisr, u64 dar)
+int cxl_handle_mm_fault(struct mm_struct *mm, u64 dsisr, u64 dar)
{
unsigned flt = 0;
int result;
unsigned long access, flags, inv_flags = 0;
- trace_cxl_pte_miss(ctx, dsisr, dar);
-
if ((result = copro_handle_mm_fault(mm, dar, dsisr, &flt))) {
pr_devel("copro_handle_mm_fault failed: %#x\n", result);
- return cxl_ack_ae(ctx);
+ return result;
}
if (!radix_enabled()) {
@@ -155,9 +152,8 @@ static void cxl_handle_page_fault(struct cxl_context *ctx,
if (dsisr & CXL_PSL_DSISR_An_S)
access |= _PAGE_WRITE;
- access |= _PAGE_PRIVILEGED;
- if ((!ctx->kernel) || (REGION_ID(dar) == USER_REGION_ID))
- access &= ~_PAGE_PRIVILEGED;
+ if (!mm && (REGION_ID(dar) != USER_REGION_ID))
+ access |= _PAGE_PRIVILEGED;
if (dsisr & DSISR_NOHPTE)
inv_flags |= HPTE_NOHPTE_UPDATE;
@@ -166,8 +162,21 @@ static void cxl_handle_page_fault(struct cxl_context *ctx,
hash_page_mm(mm, dar, access, 0x300, inv_flags);
local_irq_restore(flags);
}
- pr_devel("Page fault successfully handled for pe: %i!\n", ctx->pe);
- cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_R, 0);
+ return 0;
+}
+
+static void cxl_handle_page_fault(struct cxl_context *ctx,
+ struct mm_struct *mm,
+ u64 dsisr, u64 dar)
+{
+ trace_cxl_pte_miss(ctx, dsisr, dar);
+
+ if (cxl_handle_mm_fault(mm, dsisr, dar)) {
+ cxl_ack_ae(ctx);
+ } else {
+ pr_devel("Page fault successfully handled for pe: %i!\n", ctx->pe);
+ cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_R, 0);
+ }
}
/*
diff --git a/drivers/misc/cxl/flash.c b/drivers/misc/cxl/flash.c
index 7c61c70ba3f6..3aa216bf0939 100644
--- a/drivers/misc/cxl/flash.c
+++ b/drivers/misc/cxl/flash.c
@@ -401,8 +401,10 @@ static int device_open(struct inode *inode, struct file *file)
if (down_interruptible(&sem) != 0)
return -EPERM;
- if (!(adapter = get_cxl_adapter(adapter_num)))
- return -ENODEV;
+ if (!(adapter = get_cxl_adapter(adapter_num))) {
+ rc = -ENODEV;
+ goto err_unlock;
+ }
file->private_data = adapter;
continue_token = 0;
@@ -446,6 +448,8 @@ err1:
free_page((unsigned long) le);
err:
put_device(&adapter->dev);
+err_unlock:
+ up(&sem);
return rc;
}
diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c
index 2b2f8894149d..4a82c313cf71 100644
--- a/drivers/misc/cxl/native.c
+++ b/drivers/misc/cxl/native.c
@@ -586,17 +586,17 @@ err:
#define set_endian(sr) ((sr) &= ~(CXL_PSL_SR_An_LE))
#endif
-static u64 calculate_sr(struct cxl_context *ctx)
+u64 cxl_calculate_sr(bool master, bool kernel, bool real_mode, bool p9)
{
u64 sr = 0;
set_endian(sr);
- if (ctx->master)
+ if (master)
sr |= CXL_PSL_SR_An_MP;
if (mfspr(SPRN_LPCR) & LPCR_TC)
sr |= CXL_PSL_SR_An_TC;
- if (ctx->kernel) {
- if (!ctx->real_mode)
+ if (kernel) {
+ if (!real_mode)
sr |= CXL_PSL_SR_An_R;
sr |= (mfmsr() & MSR_SF) | CXL_PSL_SR_An_HV;
} else {
@@ -608,7 +608,7 @@ static u64 calculate_sr(struct cxl_context *ctx)
if (!test_tsk_thread_flag(current, TIF_32BIT))
sr |= CXL_PSL_SR_An_SF;
}
- if (cxl_is_power9()) {
+ if (p9) {
if (radix_enabled())
sr |= CXL_PSL_SR_An_XLAT_ror;
else
@@ -617,6 +617,12 @@ static u64 calculate_sr(struct cxl_context *ctx)
return sr;
}
+static u64 calculate_sr(struct cxl_context *ctx)
+{
+ return cxl_calculate_sr(ctx->master, ctx->kernel, ctx->real_mode,
+ cxl_is_power9());
+}
+
static void update_ivtes_directed(struct cxl_context *ctx)
{
bool need_update = (ctx->status == STARTED);
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 1eb9859809bf..d18b3d9292fd 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -375,7 +375,7 @@ static u64 get_capp_unit_id(struct device_node *np, u32 phb_index)
return 0;
}
-static int calc_capp_routing(struct pci_dev *dev, u64 *chipid,
+int cxl_calc_capp_routing(struct pci_dev *dev, u64 *chipid,
u32 *phb_index, u64 *capp_unit_id)
{
int rc;
@@ -408,17 +408,9 @@ static int calc_capp_routing(struct pci_dev *dev, u64 *chipid,
return 0;
}
-static int init_implementation_adapter_regs_psl9(struct cxl *adapter, struct pci_dev *dev)
+int cxl_get_xsl9_dsnctl(u64 capp_unit_id, u64 *reg)
{
- u64 xsl_dsnctl, psl_fircntl;
- u64 chipid;
- u32 phb_index;
- u64 capp_unit_id;
- int rc;
-
- rc = calc_capp_routing(dev, &chipid, &phb_index, &capp_unit_id);
- if (rc)
- return rc;
+ u64 xsl_dsnctl;
/*
* CAPI Identifier bits [0:7]
@@ -454,6 +446,27 @@ static int init_implementation_adapter_regs_psl9(struct cxl *adapter, struct pci
xsl_dsnctl |= ((u64)0x04 << (63-55));
}
+ *reg = xsl_dsnctl;
+ return 0;
+}
+
+static int init_implementation_adapter_regs_psl9(struct cxl *adapter,
+ struct pci_dev *dev)
+{
+ u64 xsl_dsnctl, psl_fircntl;
+ u64 chipid;
+ u32 phb_index;
+ u64 capp_unit_id;
+ int rc;
+
+ rc = cxl_calc_capp_routing(dev, &chipid, &phb_index, &capp_unit_id);
+ if (rc)
+ return rc;
+
+ rc = cxl_get_xsl9_dsnctl(capp_unit_id, &xsl_dsnctl);
+ if (rc)
+ return rc;
+
cxl_p1_write(adapter, CXL_XSL9_DSNCTL, xsl_dsnctl);
/* Set fir_cntl to recommended value for production env */
@@ -505,7 +518,7 @@ static int init_implementation_adapter_regs_psl8(struct cxl *adapter, struct pci
u64 capp_unit_id;
int rc;
- rc = calc_capp_routing(dev, &chipid, &phb_index, &capp_unit_id);
+ rc = cxl_calc_capp_routing(dev, &chipid, &phb_index, &capp_unit_id);
if (rc)
return rc;
@@ -538,7 +551,7 @@ static int init_implementation_adapter_regs_xsl(struct cxl *adapter, struct pci_
u64 capp_unit_id;
int rc;
- rc = calc_capp_routing(dev, &chipid, &phb_index, &capp_unit_id);
+ rc = cxl_calc_capp_routing(dev, &chipid, &phb_index, &capp_unit_id);
if (rc)
return rc;
@@ -1897,7 +1910,7 @@ static void cxl_pci_remove_adapter(struct cxl *adapter)
#define CXL_MAX_PCIEX_PARENT 2
-static int cxl_slot_is_switched(struct pci_dev *dev)
+int cxl_slot_is_switched(struct pci_dev *dev)
{
struct device_node *np;
int depth = 0;
diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 8b9049dac094..e6e31a16f68f 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -1688,7 +1688,7 @@ config MEN_A21_WDT
config WATCHDOG_RTAS
tristate "RTAS watchdog"
- depends on PPC_RTAS || (PPC64 && COMPILE_TEST)
+ depends on PPC_RTAS
help
This driver adds watchdog support for the RTAS watchdog.
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index f9f56f231ae6..da0be9a8d1de 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -594,6 +594,7 @@
#define SBSS(sbss_align) \
. = ALIGN(sbss_align); \
.sbss : AT(ADDR(.sbss) - LOAD_OFFSET) { \
+ *(.dynsbss) \
*(.sbss) \
*(.scommon) \
}
@@ -640,11 +641,22 @@
.debug_str 0 : { *(.debug_str) } \
.debug_loc 0 : { *(.debug_loc) } \
.debug_macinfo 0 : { *(.debug_macinfo) } \
+ .debug_pubtypes 0 : { *(.debug_pubtypes) } \
+ /* DWARF 3 */ \
+ .debug_ranges 0 : { *(.debug_ranges) } \
/* SGI/MIPS DWARF 2 extensions */ \
.debug_weaknames 0 : { *(.debug_weaknames) } \
.debug_funcnames 0 : { *(.debug_funcnames) } \
.debug_typenames 0 : { *(.debug_typenames) } \
.debug_varnames 0 : { *(.debug_varnames) } \
+ /* GNU DWARF 2 extensions */ \
+ .debug_gnu_pubnames 0 : { *(.debug_gnu_pubnames) } \
+ .debug_gnu_pubtypes 0 : { *(.debug_gnu_pubtypes) } \
+ /* DWARF 4 */ \
+ .debug_types 0 : { *(.debug_types) } \
+ /* DWARF 5 */ \
+ .debug_macro 0 : { *(.debug_macro) } \
+ .debug_addr 0 : { *(.debug_addr) }
/* Stabs debugging sections. */
#define STABS_DEBUG \
diff --git a/include/linux/processor.h b/include/linux/processor.h
new file mode 100644
index 000000000000..da0c5e56ca02
--- /dev/null
+++ b/include/linux/processor.h
@@ -0,0 +1,70 @@
+/* Misc low level processor primitives */
+#ifndef _LINUX_PROCESSOR_H
+#define _LINUX_PROCESSOR_H
+
+#include <asm/processor.h>
+
+/*
+ * spin_begin is used before beginning a busy-wait loop, and must be paired
+ * with spin_end when the loop is exited. spin_cpu_relax must be called
+ * within the loop.
+ *
+ * The loop body should be as small and fast as possible, on the order of
+ * tens of instructions/cycles as a guide. It should and avoid calling
+ * cpu_relax, or any "spin" or sleep type of primitive including nested uses
+ * of these primitives. It should not lock or take any other resource.
+ * Violations of these guidelies will not cause a bug, but may cause sub
+ * optimal performance.
+ *
+ * These loops are optimized to be used where wait times are expected to be
+ * less than the cost of a context switch (and associated overhead).
+ *
+ * Detection of resource owner and decision to spin or sleep or guest-yield
+ * (e.g., spin lock holder vcpu preempted, or mutex owner not on CPU) can be
+ * tested within the loop body.
+ */
+#ifndef spin_begin
+#define spin_begin()
+#endif
+
+#ifndef spin_cpu_relax
+#define spin_cpu_relax() cpu_relax()
+#endif
+
+/*
+ * spin_cpu_yield may be called to yield (undirected) to the hypervisor if
+ * necessary. This should be used if the wait is expected to take longer
+ * than context switch overhead, but we can't sleep or do a directed yield.
+ */
+#ifndef spin_cpu_yield
+#define spin_cpu_yield() cpu_relax_yield()
+#endif
+
+#ifndef spin_end
+#define spin_end()
+#endif
+
+/*
+ * spin_until_cond can be used to wait for a condition to become true. It
+ * may be expected that the first iteration will true in the common case
+ * (no spinning), so that callers should not require a first "likely" test
+ * for the uncontended case before using this primitive.
+ *
+ * Usage and implementation guidelines are the same as for the spin_begin
+ * primitives, above.
+ */
+#ifndef spin_until_cond
+#define spin_until_cond(cond) \
+do { \
+ if (unlikely(!(cond))) { \
+ spin_begin(); \
+ do { \
+ spin_cpu_relax(); \
+ } while (!(cond)); \
+ spin_end(); \
+ } \
+} while (0)
+
+#endif
+
+#endif /* _LINUX_PROCESSOR_H */
diff --git a/include/misc/cxllib.h b/include/misc/cxllib.h
new file mode 100644
index 000000000000..e5aa29f019a6
--- /dev/null
+++ b/include/misc/cxllib.h
@@ -0,0 +1,133 @@
+/*
+ * Copyright 2017 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _MISC_CXLLIB_H
+#define _MISC_CXLLIB_H
+
+#include <linux/pci.h>
+#include <asm/reg.h>
+
+/*
+ * cxl driver exports a in-kernel 'library' API which can be called by
+ * other drivers to help interacting with an IBM XSL.
+ */
+
+/*
+ * tells whether capi is supported on the PCIe slot where the
+ * device is seated
+ *
+ * Input:
+ * dev: device whose slot needs to be checked
+ * flags: 0 for the time being
+ */
+bool cxllib_slot_is_supported(struct pci_dev *dev, unsigned long flags);
+
+
+/*
+ * Returns the configuration parameters to be used by the XSL or device
+ *
+ * Input:
+ * dev: device, used to find PHB
+ * Output:
+ * struct cxllib_xsl_config:
+ * version
+ * capi BAR address, i.e. 0x2000000000000-0x2FFFFFFFFFFFF
+ * capi BAR size
+ * data send control (XSL_DSNCTL)
+ * dummy read address (XSL_DRA)
+ */
+#define CXL_XSL_CONFIG_VERSION1 1
+struct cxllib_xsl_config {
+ u32 version; /* format version for register encoding */
+ u32 log_bar_size;/* log size of the capi_window */
+ u64 bar_addr; /* address of the start of capi window */
+ u64 dsnctl; /* matches definition of XSL_DSNCTL */
+ u64 dra; /* real address that can be used for dummy read */
+};
+
+int cxllib_get_xsl_config(struct pci_dev *dev, struct cxllib_xsl_config *cfg);
+
+
+/*
+ * Activate capi for the pci host bridge associated with the device.
+ * Can be extended to deactivate once we know how to do it.
+ * Device must be ready to accept messages from the CAPP unit and
+ * respond accordingly (TLB invalidates, ...)
+ *
+ * PHB is switched to capi mode through calls to skiboot.
+ * CAPP snooping is activated
+ *
+ * Input:
+ * dev: device whose PHB should switch mode
+ * mode: mode to switch to i.e. CAPI or PCI
+ * flags: options related to the mode
+ */
+enum cxllib_mode {
+ CXL_MODE_CXL,
+ CXL_MODE_PCI,
+};
+
+#define CXL_MODE_NO_DMA 0
+#define CXL_MODE_DMA_TVT0 1
+#define CXL_MODE_DMA_TVT1 2
+
+int cxllib_switch_phb_mode(struct pci_dev *dev, enum cxllib_mode mode,
+ unsigned long flags);
+
+
+/*
+ * Set the device for capi DMA.
+ * Define its dma_ops and dma offset so that allocations will be using TVT#1
+ *
+ * Input:
+ * dev: device to set
+ * flags: options. CXL_MODE_DMA_TVT1 should be used
+ */
+int cxllib_set_device_dma(struct pci_dev *dev, unsigned long flags);
+
+
+/*
+ * Get the Process Element structure for the given thread
+ *
+ * Input:
+ * task: task_struct for the context of the translation
+ * translation_mode: whether addresses should be translated
+ * Output:
+ * attr: attributes to fill up the Process Element structure from CAIA
+ */
+struct cxllib_pe_attributes {
+ u64 sr;
+ u32 lpid;
+ u32 tid;
+ u32 pid;
+};
+#define CXL_TRANSLATED_MODE 0
+#define CXL_REAL_MODE 1
+
+int cxllib_get_PE_attributes(struct task_struct *task,
+ unsigned long translation_mode, struct cxllib_pe_attributes *attr);
+
+
+/*
+ * Handle memory fault.
+ * Fault in all the pages of the specified buffer for the permissions
+ * provided in ‘flags’
+ *
+ * Shouldn't be called from interrupt context
+ *
+ * Input:
+ * mm: struct mm for the thread faulting the pages
+ * addr: base address of the buffer to page in
+ * size: size of the buffer to page in
+ * flags: permission requested (DSISR_ISSTORE...)
+ */
+int cxllib_handle_fault(struct mm_struct *mm, u64 addr, u64 size, u64 flags);
+
+
+#endif /* _MISC_CXLLIB_H */
diff --git a/mm/Kconfig b/mm/Kconfig
index 857f6ef368d4..46ef77d5c332 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -669,12 +669,16 @@ config IDLE_PAGE_TRACKING
See Documentation/vm/idle_page_tracking.txt for more details.
+# arch_add_memory() comprehends device memory
+config ARCH_HAS_ZONE_DEVICE
+ bool
+
config ZONE_DEVICE
bool "Device memory (pmem, etc...) hotplug support"
depends on MEMORY_HOTPLUG
depends on MEMORY_HOTREMOVE
depends on SPARSEMEM_VMEMMAP
- depends on X86_64 #arch_add_memory() comprehends device memory
+ depends on ARCH_HAS_ZONE_DEVICE
help
Device memory hotplug support allows for establishing pmem,
diff --git a/tools/testing/selftests/powerpc/benchmarks/context_switch.c b/tools/testing/selftests/powerpc/benchmarks/context_switch.c
index 778f5fbfd784..f4241339edd2 100644
--- a/tools/testing/selftests/powerpc/benchmarks/context_switch.c
+++ b/tools/testing/selftests/powerpc/benchmarks/context_switch.c
@@ -258,9 +258,14 @@ static unsigned long xchg(unsigned long *p, unsigned long val)
return __atomic_exchange_n(p, val, __ATOMIC_SEQ_CST);
}
+static int processes;
+
static int mutex_lock(unsigned long *m)
{
int c;
+ int flags = FUTEX_WAIT;
+ if (!processes)
+ flags |= FUTEX_PRIVATE_FLAG;
c = cmpxchg(m, 0, 1);
if (!c)
@@ -270,7 +275,7 @@ static int mutex_lock(unsigned long *m)
c = xchg(m, 2);
while (c) {
- sys_futex(m, FUTEX_WAIT, 2, NULL, NULL, 0);
+ sys_futex(m, flags, 2, NULL, NULL, 0);
c = xchg(m, 2);
}
@@ -279,12 +284,16 @@ static int mutex_lock(unsigned long *m)
static int mutex_unlock(unsigned long *m)
{
+ int flags = FUTEX_WAKE;
+ if (!processes)
+ flags |= FUTEX_PRIVATE_FLAG;
+
if (*m == 2)
*m = 0;
else if (xchg(m, 0) == 1)
return 0;
- sys_futex(m, FUTEX_WAKE, 1, NULL, NULL, 0);
+ sys_futex(m, flags, 1, NULL, NULL, 0);
return 0;
}
@@ -293,26 +302,32 @@ static unsigned long *m1, *m2;
static void futex_setup(int cpu1, int cpu2)
{
- int shmid;
- void *shmaddr;
+ if (!processes) {
+ static unsigned long _m1, _m2;
+ m1 = &_m1;
+ m2 = &_m2;
+ } else {
+ int shmid;
+ void *shmaddr;
- shmid = shmget(IPC_PRIVATE, getpagesize(), SHM_R | SHM_W);
- if (shmid < 0) {
- perror("shmget");
- exit(1);
- }
+ shmid = shmget(IPC_PRIVATE, getpagesize(), SHM_R | SHM_W);
+ if (shmid < 0) {
+ perror("shmget");
+ exit(1);
+ }
- shmaddr = shmat(shmid, NULL, 0);
- if (shmaddr == (char *)-1) {
- perror("shmat");
- shmctl(shmid, IPC_RMID, NULL);
- exit(1);
- }
+ shmaddr = shmat(shmid, NULL, 0);
+ if (shmaddr == (char *)-1) {
+ perror("shmat");
+ shmctl(shmid, IPC_RMID, NULL);
+ exit(1);
+ }
- shmctl(shmid, IPC_RMID, NULL);
+ shmctl(shmid, IPC_RMID, NULL);
- m1 = shmaddr;
- m2 = shmaddr + sizeof(*m1);
+ m1 = shmaddr;
+ m2 = shmaddr + sizeof(*m1);
+ }
*m1 = 0;
*m2 = 0;
@@ -352,8 +367,6 @@ static struct actions futex_actions = {
.thread2 = futex_thread2,
};
-static int processes;
-
static struct option options[] = {
{ "test", required_argument, 0, 't' },
{ "process", no_argument, &processes, 1 },