aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/arm64/cpu-feature-registers.txt18
-rw-r--r--Documentation/arm64/elf_hwcaps.txt160
-rw-r--r--Documentation/arm64/memory.txt10
-rw-r--r--Documentation/arm64/sve.txt508
-rw-r--r--Documentation/devicetree/bindings/arm/spe-pmu.txt20
-rw-r--r--Documentation/perf/hisi-pmu.txt53
-rw-r--r--MAINTAINERS7
-rw-r--r--arch/arm/include/asm/arch_timer.h1
-rw-r--r--arch/arm/include/asm/kvm_host.h3
-rw-r--r--arch/arm64/Kconfig18
-rw-r--r--arch/arm64/Makefile10
-rw-r--r--arch/arm64/include/asm/arch_timer.h1
-rw-r--r--arch/arm64/include/asm/asm-bug.h8
-rw-r--r--arch/arm64/include/asm/assembler.h51
-rw-r--r--arch/arm64/include/asm/barrier.h2
-rw-r--r--arch/arm64/include/asm/cpu.h4
-rw-r--r--arch/arm64/include/asm/cpucaps.h3
-rw-r--r--arch/arm64/include/asm/cpufeature.h42
-rw-r--r--arch/arm64/include/asm/daifflags.h72
-rw-r--r--arch/arm64/include/asm/elf.h4
-rw-r--r--arch/arm64/include/asm/esr.h3
-rw-r--r--arch/arm64/include/asm/fpsimd.h71
-rw-r--r--arch/arm64/include/asm/fpsimdmacros.h148
-rw-r--r--arch/arm64/include/asm/irqflags.h40
-rw-r--r--arch/arm64/include/asm/kvm_arm.h5
-rw-r--r--arch/arm64/include/asm/kvm_host.h11
-rw-r--r--arch/arm64/include/asm/memory.h15
-rw-r--r--arch/arm64/include/asm/pgtable.h14
-rw-r--r--arch/arm64/include/asm/processor.h28
-rw-r--r--arch/arm64/include/asm/sysreg.h121
-rw-r--r--arch/arm64/include/asm/thread_info.h5
-rw-r--r--arch/arm64/include/asm/traps.h8
-rw-r--r--arch/arm64/include/uapi/asm/hwcap.h6
-rw-r--r--arch/arm64/include/uapi/asm/ptrace.h139
-rw-r--r--arch/arm64/include/uapi/asm/sigcontext.h120
-rw-r--r--arch/arm64/kernel/Makefile2
-rw-r--r--arch/arm64/kernel/armv8_deprecated.c23
-rw-r--r--arch/arm64/kernel/cpufeature.c204
-rw-r--r--arch/arm64/kernel/cpuinfo.c12
-rw-r--r--arch/arm64/kernel/debug-monitors.c5
-rw-r--r--arch/arm64/kernel/entry-fpsimd.S17
-rw-r--r--arch/arm64/kernel/entry-ftrace.S12
-rw-r--r--arch/arm64/kernel/entry.S128
-rw-r--r--arch/arm64/kernel/fpsimd.c908
-rw-r--r--arch/arm64/kernel/head.S30
-rw-r--r--arch/arm64/kernel/hibernate.c5
-rw-r--r--arch/arm64/kernel/io.c12
-rw-r--r--arch/arm64/kernel/machine_kexec.c4
-rw-r--r--arch/arm64/kernel/process.c64
-rw-r--r--arch/arm64/kernel/ptrace.c280
-rw-r--r--arch/arm64/kernel/setup.c15
-rw-r--r--arch/arm64/kernel/signal.c179
-rw-r--r--arch/arm64/kernel/signal32.c2
-rw-r--r--arch/arm64/kernel/smp.c18
-rw-r--r--arch/arm64/kernel/suspend.c8
-rw-r--r--arch/arm64/kernel/traps.c109
-rw-r--r--arch/arm64/kernel/vdso/gettimeofday.S2
-rw-r--r--arch/arm64/kvm/handle_exit.c8
-rw-r--r--arch/arm64/kvm/hyp/debug-sr.c24
-rw-r--r--arch/arm64/kvm/hyp/switch.c12
-rw-r--r--arch/arm64/kvm/sys_regs.c292
-rw-r--r--arch/arm64/lib/Makefile2
-rw-r--r--arch/arm64/lib/delay.c23
-rw-r--r--arch/arm64/lib/tishift.S80
-rw-r--r--arch/arm64/mm/dma-mapping.c5
-rw-r--r--arch/arm64/mm/fault.c72
-rw-r--r--arch/arm64/mm/proc.S9
-rw-r--r--drivers/acpi/arm64/gtdt.c2
-rw-r--r--drivers/acpi/arm64/iort.c258
-rw-r--r--drivers/bus/arm-ccn.c1
-rw-r--r--drivers/clocksource/arm_arch_timer.c25
-rw-r--r--drivers/perf/Kconfig15
-rw-r--r--drivers/perf/Makefile2
-rw-r--r--drivers/perf/arm_pmu.c10
-rw-r--r--drivers/perf/arm_pmu_acpi.c3
-rw-r--r--drivers/perf/arm_pmu_platform.c4
-rw-r--r--drivers/perf/arm_spe_pmu.c1249
-rw-r--r--drivers/perf/hisilicon/Makefile1
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c463
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_hha_pmu.c473
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c463
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_pmu.c447
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_pmu.h102
-rw-r--r--drivers/perf/qcom_l2_pmu.c54
-rw-r--r--fs/binfmt_elf.c10
-rw-r--r--include/clocksource/arm_arch_timer.h10
-rw-r--r--include/linux/acpi_iort.h4
-rw-r--r--include/linux/cpuhotplug.h3
-rw-r--r--include/linux/irqdesc.h8
-rw-r--r--include/linux/regset.h67
-rw-r--r--include/uapi/linux/elf.h1
-rw-r--r--include/uapi/linux/perf_event.h1
-rw-r--r--include/uapi/linux/prctl.h9
-rw-r--r--kernel/events/ring_buffer.c4
-rw-r--r--kernel/irq/irqdesc.c1
-rw-r--r--kernel/sys.c12
-rw-r--r--virt/kvm/arm/arm.c3
97 files changed, 7399 insertions, 601 deletions
diff --git a/Documentation/arm64/cpu-feature-registers.txt b/Documentation/arm64/cpu-feature-registers.txt
index dad411d635d8..bd9b3faab2c4 100644
--- a/Documentation/arm64/cpu-feature-registers.txt
+++ b/Documentation/arm64/cpu-feature-registers.txt
@@ -110,10 +110,20 @@ infrastructure:
x--------------------------------------------------x
| Name | bits | visible |
|--------------------------------------------------|
- | RES0 | [63-32] | n |
+ | RES0 | [63-48] | n |
+ |--------------------------------------------------|
+ | DP | [47-44] | y |
+ |--------------------------------------------------|
+ | SM4 | [43-40] | y |
+ |--------------------------------------------------|
+ | SM3 | [39-36] | y |
+ |--------------------------------------------------|
+ | SHA3 | [35-32] | y |
|--------------------------------------------------|
| RDM | [31-28] | y |
|--------------------------------------------------|
+ | RES0 | [27-24] | n |
+ |--------------------------------------------------|
| ATOMICS | [23-20] | y |
|--------------------------------------------------|
| CRC32 | [19-16] | y |
@@ -132,7 +142,11 @@ infrastructure:
x--------------------------------------------------x
| Name | bits | visible |
|--------------------------------------------------|
- | RES0 | [63-28] | n |
+ | RES0 | [63-36] | n |
+ |--------------------------------------------------|
+ | SVE | [35-32] | y |
+ |--------------------------------------------------|
+ | RES0 | [31-28] | n |
|--------------------------------------------------|
| GIC | [27-24] | n |
|--------------------------------------------------|
diff --git a/Documentation/arm64/elf_hwcaps.txt b/Documentation/arm64/elf_hwcaps.txt
new file mode 100644
index 000000000000..89edba12a9e0
--- /dev/null
+++ b/Documentation/arm64/elf_hwcaps.txt
@@ -0,0 +1,160 @@
+ARM64 ELF hwcaps
+================
+
+This document describes the usage and semantics of the arm64 ELF hwcaps.
+
+
+1. Introduction
+---------------
+
+Some hardware or software features are only available on some CPU
+implementations, and/or with certain kernel configurations, but have no
+architected discovery mechanism available to userspace code at EL0. The
+kernel exposes the presence of these features to userspace through a set
+of flags called hwcaps, exposed in the auxilliary vector.
+
+Userspace software can test for features by acquiring the AT_HWCAP entry
+of the auxilliary vector, and testing whether the relevant flags are
+set, e.g.
+
+bool floating_point_is_present(void)
+{
+ unsigned long hwcaps = getauxval(AT_HWCAP);
+ if (hwcaps & HWCAP_FP)
+ return true;
+
+ return false;
+}
+
+Where software relies on a feature described by a hwcap, it should check
+the relevant hwcap flag to verify that the feature is present before
+attempting to make use of the feature.
+
+Features cannot be probed reliably through other means. When a feature
+is not available, attempting to use it may result in unpredictable
+behaviour, and is not guaranteed to result in any reliable indication
+that the feature is unavailable, such as a SIGILL.
+
+
+2. Interpretation of hwcaps
+---------------------------
+
+The majority of hwcaps are intended to indicate the presence of features
+which are described by architected ID registers inaccessible to
+userspace code at EL0. These hwcaps are defined in terms of ID register
+fields, and should be interpreted with reference to the definition of
+these fields in the ARM Architecture Reference Manual (ARM ARM).
+
+Such hwcaps are described below in the form:
+
+ Functionality implied by idreg.field == val.
+
+Such hwcaps indicate the availability of functionality that the ARM ARM
+defines as being present when idreg.field has value val, but do not
+indicate that idreg.field is precisely equal to val, nor do they
+indicate the absence of functionality implied by other values of
+idreg.field.
+
+Other hwcaps may indicate the presence of features which cannot be
+described by ID registers alone. These may be described without
+reference to ID registers, and may refer to other documentation.
+
+
+3. The hwcaps exposed in AT_HWCAP
+---------------------------------
+
+HWCAP_FP
+
+ Functionality implied by ID_AA64PFR0_EL1.FP == 0b0000.
+
+HWCAP_ASIMD
+
+ Functionality implied by ID_AA64PFR0_EL1.AdvSIMD == 0b0000.
+
+HWCAP_EVTSTRM
+
+ The generic timer is configured to generate events at a frequency of
+ approximately 100KHz.
+
+HWCAP_AES
+
+ Functionality implied by ID_AA64ISAR1_EL1.AES == 0b0001.
+
+HWCAP_PMULL
+
+ Functionality implied by ID_AA64ISAR1_EL1.AES == 0b0010.
+
+HWCAP_SHA1
+
+ Functionality implied by ID_AA64ISAR0_EL1.SHA1 == 0b0001.
+
+HWCAP_SHA2
+
+ Functionality implied by ID_AA64ISAR0_EL1.SHA2 == 0b0001.
+
+HWCAP_CRC32
+
+ Functionality implied by ID_AA64ISAR0_EL1.CRC32 == 0b0001.
+
+HWCAP_ATOMICS
+
+ Functionality implied by ID_AA64ISAR0_EL1.Atomic == 0b0010.
+
+HWCAP_FPHP
+
+ Functionality implied by ID_AA64PFR0_EL1.FP == 0b0001.
+
+HWCAP_ASIMDHP
+
+ Functionality implied by ID_AA64PFR0_EL1.AdvSIMD == 0b0001.
+
+HWCAP_CPUID
+
+ EL0 access to certain ID registers is available, to the extent
+ described by Documentation/arm64/cpu-feature-registers.txt.
+
+ These ID registers may imply the availability of features.
+
+HWCAP_ASIMDRDM
+
+ Functionality implied by ID_AA64ISAR0_EL1.RDM == 0b0001.
+
+HWCAP_JSCVT
+
+ Functionality implied by ID_AA64ISAR1_EL1.JSCVT == 0b0001.
+
+HWCAP_FCMA
+
+ Functionality implied by ID_AA64ISAR1_EL1.FCMA == 0b0001.
+
+HWCAP_LRCPC
+
+ Functionality implied by ID_AA64ISAR1_EL1.LRCPC == 0b0001.
+
+HWCAP_DCPOP
+
+ Functionality implied by ID_AA64ISAR1_EL1.DPB == 0b0001.
+
+HWCAP_SHA3
+
+ Functionality implied by ID_AA64ISAR0_EL1.SHA3 == 0b0001.
+
+HWCAP_SM3
+
+ Functionality implied by ID_AA64ISAR0_EL1.SM3 == 0b0001.
+
+HWCAP_SM4
+
+ Functionality implied by ID_AA64ISAR0_EL1.SM4 == 0b0001.
+
+HWCAP_ASIMDDP
+
+ Functionality implied by ID_AA64ISAR0_EL1.DP == 0b0001.
+
+HWCAP_SHA512
+
+ Functionality implied by ID_AA64ISAR0_EL1.SHA2 == 0b0002.
+
+HWCAP_SVE
+
+ Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001.
diff --git a/Documentation/arm64/memory.txt b/Documentation/arm64/memory.txt
index d7273a5f6456..671bc0639262 100644
--- a/Documentation/arm64/memory.txt
+++ b/Documentation/arm64/memory.txt
@@ -86,9 +86,9 @@ Translation table lookup with 64KB pages:
+-------------------------------------------------> [63] TTBR0/1
-When using KVM, the hypervisor maps kernel pages in EL2, at a fixed
-offset from the kernel VA (top 24bits of the kernel VA set to zero):
+When using KVM without the Virtualization Host Extensions, the hypervisor
+maps kernel pages in EL2 at a fixed offset from the kernel VA. See the
+kern_hyp_va macro for more details.
-Start End Size Use
------------------------------------------------------------------------
-0000004000000000 0000007fffffffff 256GB kernel objects mapped in HYP
+When using KVM with the Virtualization Host Extensions, no additional
+mappings are created, since the host kernel runs directly in EL2.
diff --git a/Documentation/arm64/sve.txt b/Documentation/arm64/sve.txt
new file mode 100644
index 000000000000..f128f736b4a5
--- /dev/null
+++ b/Documentation/arm64/sve.txt
@@ -0,0 +1,508 @@
+ Scalable Vector Extension support for AArch64 Linux
+ ===================================================
+
+Author: Dave Martin <Dave.Martin@arm.com>
+Date: 4 August 2017
+
+This document outlines briefly the interface provided to userspace by Linux in
+order to support use of the ARM Scalable Vector Extension (SVE).
+
+This is an outline of the most important features and issues only and not
+intended to be exhaustive.
+
+This document does not aim to describe the SVE architecture or programmer's
+model. To aid understanding, a minimal description of relevant programmer's
+model features for SVE is included in Appendix A.
+
+
+1. General
+-----------
+
+* SVE registers Z0..Z31, P0..P15 and FFR and the current vector length VL, are
+ tracked per-thread.
+
+* The presence of SVE is reported to userspace via HWCAP_SVE in the aux vector
+ AT_HWCAP entry. Presence of this flag implies the presence of the SVE
+ instructions and registers, and the Linux-specific system interfaces
+ described in this document. SVE is reported in /proc/cpuinfo as "sve".
+
+* Support for the execution of SVE instructions in userspace can also be
+ detected by reading the CPU ID register ID_AA64PFR0_EL1 using an MRS
+ instruction, and checking that the value of the SVE field is nonzero. [3]
+
+ It does not guarantee the presence of the system interfaces described in the
+ following sections: software that needs to verify that those interfaces are
+ present must check for HWCAP_SVE instead.
+
+* Debuggers should restrict themselves to interacting with the target via the
+ NT_ARM_SVE regset. The recommended way of detecting support for this regset
+ is to connect to a target process first and then attempt a
+ ptrace(PTRACE_GETREGSET, pid, NT_ARM_SVE, &iov).
+
+
+2. Vector length terminology
+-----------------------------
+
+The size of an SVE vector (Z) register is referred to as the "vector length".
+
+To avoid confusion about the units used to express vector length, the kernel
+adopts the following conventions:
+
+* Vector length (VL) = size of a Z-register in bytes
+
+* Vector quadwords (VQ) = size of a Z-register in units of 128 bits
+
+(So, VL = 16 * VQ.)
+
+The VQ convention is used where the underlying granularity is important, such
+as in data structure definitions. In most other situations, the VL convention
+is used. This is consistent with the meaning of the "VL" pseudo-register in
+the SVE instruction set architecture.
+
+
+3. System call behaviour
+-------------------------
+
+* On syscall, V0..V31 are preserved (as without SVE). Thus, bits [127:0] of
+ Z0..Z31 are preserved. All other bits of Z0..Z31, and all of P0..P15 and FFR
+ become unspecified on return from a syscall.
+
+* The SVE registers are not used to pass arguments to or receive results from
+ any syscall.
+
+* In practice the affected registers/bits will be preserved or will be replaced
+ with zeros on return from a syscall, but userspace should not make
+ assumptions about this. The kernel behaviour may vary on a case-by-case
+ basis.
+
+* All other SVE state of a thread, including the currently configured vector
+ length, the state of the PR_SVE_VL_INHERIT flag, and the deferred vector
+ length (if any), is preserved across all syscalls, subject to the specific
+ exceptions for execve() described in section 6.
+
+ In particular, on return from a fork() or clone(), the parent and new child
+ process or thread share identical SVE configuration, matching that of the
+ parent before the call.
+
+
+4. Signal handling
+-------------------
+
+* A new signal frame record sve_context encodes the SVE registers on signal
+ delivery. [1]
+
+* This record is supplementary to fpsimd_context. The FPSR and FPCR registers
+ are only present in fpsimd_context. For convenience, the content of V0..V31
+ is duplicated between sve_context and fpsimd_context.
+
+* The signal frame record for SVE always contains basic metadata, in particular
+ the thread's vector length (in sve_context.vl).
+
+* The SVE registers may or may not be included in the record, depending on
+ whether the registers are live for the thread. The registers are present if
+ and only if:
+ sve_context.head.size >= SVE_SIG_CONTEXT_SIZE(sve_vq_from_vl(sve_context.vl)).
+
+* If the registers are present, the remainder of the record has a vl-dependent
+ size and layout. Macros SVE_SIG_* are defined [1] to facilitate access to
+ the members.
+
+* If the SVE context is too big to fit in sigcontext.__reserved[], then extra
+ space is allocated on the stack, an extra_context record is written in
+ __reserved[] referencing this space. sve_context is then written in the
+ extra space. Refer to [1] for further details about this mechanism.
+
+
+5. Signal return
+-----------------
+
+When returning from a signal handler:
+
+* If there is no sve_context record in the signal frame, or if the record is
+ present but contains no register data as desribed in the previous section,
+ then the SVE registers/bits become non-live and take unspecified values.
+
+* If sve_context is present in the signal frame and contains full register
+ data, the SVE registers become live and are populated with the specified
+ data. However, for backward compatibility reasons, bits [127:0] of Z0..Z31
+ are always restored from the corresponding members of fpsimd_context.vregs[]
+ and not from sve_context. The remaining bits are restored from sve_context.
+
+* Inclusion of fpsimd_context in the signal frame remains mandatory,
+ irrespective of whether sve_context is present or not.
+
+* The vector length cannot be changed via signal return. If sve_context.vl in
+ the signal frame does not match the current vector length, the signal return
+ attempt is treated as illegal, resulting in a forced SIGSEGV.
+
+
+6. prctl extensions
+--------------------
+
+Some new prctl() calls are added to allow programs to manage the SVE vector
+length:
+
+prctl(PR_SVE_SET_VL, unsigned long arg)
+
+ Sets the vector length of the calling thread and related flags, where
+ arg == vl | flags. Other threads of the calling process are unaffected.
+
+ vl is the desired vector length, where sve_vl_valid(vl) must be true.
+
+ flags:
+
+ PR_SVE_SET_VL_INHERIT
+
+ Inherit the current vector length across execve(). Otherwise, the
+ vector length is reset to the system default at execve(). (See
+ Section 9.)
+
+ PR_SVE_SET_VL_ONEXEC
+
+ Defer the requested vector length change until the next execve()
+ performed by this thread.
+
+ The effect is equivalent to implicit exceution of the following
+ call immediately after the next execve() (if any) by the thread:
+
+ prctl(PR_SVE_SET_VL, arg & ~PR_SVE_SET_VL_ONEXEC)
+
+ This allows launching of a new program with a different vector
+ length, while avoiding runtime side effects in the caller.
+
+
+ Without PR_SVE_SET_VL_ONEXEC, the requested change takes effect
+ immediately.
+
+
+ Return value: a nonnegative on success, or a negative value on error:
+ EINVAL: SVE not supported, invalid vector length requested, or
+ invalid flags.
+
+
+ On success:
+
+ * Either the calling thread's vector length or the deferred vector length
+ to be applied at the next execve() by the thread (dependent on whether
+ PR_SVE_SET_VL_ONEXEC is present in arg), is set to the largest value
+ supported by the system that is less than or equal to vl. If vl ==
+ SVE_VL_MAX, the value set will be the largest value supported by the
+ system.
+
+ * Any previously outstanding deferred vector length change in the calling
+ thread is cancelled.
+
+ * The returned value describes the resulting configuration, encoded as for
+ PR_SVE_GET_VL. The vector length reported in this value is the new
+ current vector length for this thread if PR_SVE_SET_VL_ONEXEC was not
+ present in arg; otherwise, the reported vector length is the deferred
+ vector length that will be applied at the next execve() by the calling
+ thread.
+
+ * Changing the vector length causes all of P0..P15, FFR and all bits of
+ Z0..V31 except for Z0 bits [127:0] .. Z31 bits [127:0] to become
+ unspecified. Calling PR_SVE_SET_VL with vl equal to the thread's current
+ vector length, or calling PR_SVE_SET_VL with the PR_SVE_SET_VL_ONEXEC
+ flag, does not constitute a change to the vector length for this purpose.
+
+
+prctl(PR_SVE_GET_VL)
+
+ Gets the vector length of the calling thread.
+
+ The following flag may be OR-ed into the result:
+
+ PR_SVE_SET_VL_INHERIT
+
+ Vector length will be inherited across execve().
+
+ There is no way to determine whether there is an outstanding deferred
+ vector length change (which would only normally be the case between a
+ fork() or vfork() and the corresponding execve() in typical use).
+
+ To extract the vector length from the result, and it with
+ PR_SVE_VL_LEN_MASK.
+
+ Return value: a nonnegative value on success, or a negative value on error:
+ EINVAL: SVE not supported.
+
+
+7. ptrace extensions
+---------------------
+
+* A new regset NT_ARM_SVE is defined for use with PTRACE_GETREGSET and
+ PTRACE_SETREGSET.
+
+ Refer to [2] for definitions.
+
+The regset data starts with struct user_sve_header, containing:
+
+ size
+
+ Size of the complete regset, in bytes.
+ This depends on vl and possibly on other things in the future.
+
+ If a call to PTRACE_GETREGSET requests less data than the value of
+ size, the caller can allocate a larger buffer and retry in order to
+ read the complete regset.
+
+ max_size
+
+ Maximum size in bytes that the regset can grow to for the target
+ thread. The regset won't grow bigger than this even if the target
+ thread changes its vector length etc.
+
+ vl
+
+ Target thread's current vector length, in bytes.
+
+ max_vl
+
+ Maximum possible vector length for the target thread.
+
+ flags
+
+ either
+
+ SVE_PT_REGS_FPSIMD
+
+ SVE registers are not live (GETREGSET) or are to be made
+ non-live (SETREGSET).
+
+ The payload is of type struct user_fpsimd_state, with the same
+ meaning as for NT_PRFPREG, starting at offset
+ SVE_PT_FPSIMD_OFFSET from the start of user_sve_header.
+
+ Extra data might be appended in the future: the size of the
+ payload should be obtained using SVE_PT_FPSIMD_SIZE(vq, flags).
+
+ vq should be obtained using sve_vq_from_vl(vl).
+
+ or
+
+ SVE_PT_REGS_SVE
+
+ SVE registers are live (GETREGSET) or are to be made live
+ (SETREGSET).
+
+ The payload contains the SVE register data, starting at offset
+ SVE_PT_SVE_OFFSET from the start of user_sve_header, and with
+ size SVE_PT_SVE_SIZE(vq, flags);
+
+ ... OR-ed with zero or more of the following flags, which have the same
+ meaning and behaviour as the corresponding PR_SET_VL_* flags:
+
+ SVE_PT_VL_INHERIT
+
+ SVE_PT_VL_ONEXEC (SETREGSET only).
+
+* The effects of changing the vector length and/or flags are equivalent to
+ those documented for PR_SVE_SET_VL.
+
+ The caller must make a further GETREGSET call if it needs to know what VL is
+ actually set by SETREGSET, unless is it known in advance that the requested
+ VL is supported.
+
+* In the SVE_PT_REGS_SVE case, the size and layout of the payload depends on
+ the header fields. The SVE_PT_SVE_*() macros are provided to facilitate
+ access to the members.
+
+* In either case, for SETREGSET it is permissible to omit the payload, in which
+ case only the vector length and flags are changed (along with any
+ consequences of those changes).
+
+* For SETREGSET, if an SVE_PT_REGS_SVE payload is present and the
+ requested VL is not supported, the effect will be the same as if the
+ payload were omitted, except that an EIO error is reported. No
+ attempt is made to translate the payload data to the correct layout
+ for the vector length actually set. The thread's FPSIMD state is
+ preserved, but the remaining bits of the SVE registers become
+ unspecified. It is up to the caller to translate the payload layout
+ for the actual VL and retry.
+
+* The effect of writing a partial, incomplete payload is unspecified.
+
+
+8. ELF coredump extensions
+---------------------------
+
+* A NT_ARM_SVE note will be added to each coredump for each thread of the
+ dumped process. The contents will be equivalent to the data that would have
+ been read if a PTRACE_GETREGSET of NT_ARM_SVE were executed for each thread
+ when the coredump was generated.
+
+
+9. System runtime configuration
+--------------------------------
+
+* To mitigate the ABI impact of expansion of the signal frame, a policy
+ mechanism is provided for administrators, distro maintainers and developers
+ to set the default vector length for userspace processes:
+
+/proc/sys/abi/sve_default_vector_length
+
+ Writing the text representation of an integer to this file sets the system
+ default vector length to the specified value, unless the value is greater
+ than the maximum vector length supported by the system in which case the
+ default vector length is set to that maximum.
+
+ The result can be determined by reopening the file and reading its
+ contents.
+
+ At boot, the default vector length is initially set to 64 or the maximum
+ supported vector length, whichever is smaller. This determines the initial
+ vector length of the init process (PID 1).
+
+ Reading this file returns the current system default vector length.
+
+* At every execve() call, the new vector length of the new process is set to
+ the system default vector length, unless
+
+ * PR_SVE_SET_VL_INHERIT (or equivalently SVE_PT_VL_INHERIT) is set for the
+ calling thread, or
+
+ * a deferred vector length change is pending, established via the
+ PR_SVE_SET_VL_ONEXEC flag (or SVE_PT_VL_ONEXEC).
+
+* Modifying the system default vector length does not affect the vector length
+ of any existing process or thread that does not make an execve() call.
+
+
+Appendix A. SVE programmer's model (informative)
+=================================================
+
+This section provides a minimal description of the additions made by SVE to the
+ARMv8-A programmer's model that are relevant to this document.
+
+Note: This section is for information only and not intended to be complete or
+to replace any architectural specification.
+
+A.1. Registers
+---------------
+
+In A64 state, SVE adds the following:
+
+* 32 8VL-bit vector registers Z0..Z31
+ For each Zn, Zn bits [127:0] alias the ARMv8-A vector register Vn.
+
+ A register write using a Vn register name zeros all bits of the corresponding
+ Zn except for bits [127:0].
+
+* 16 VL-bit predicate registers P0..P15
+
+* 1 VL-bit special-purpose predicate register FFR (the "first-fault register")
+
+* a VL "pseudo-register" that determines the size of each vector register
+
+ The SVE instruction set architecture provides no way to write VL directly.
+ Instead, it can be modified only by EL1 and above, by writing appropriate
+ system registers.
+
+* The value of VL can be configured at runtime by EL1 and above:
+ 16 <= VL <= VLmax, where VL must be a multiple of 16.
+
+* The maximum vector length is determined by the hardware:
+ 16 <= VLmax <= 256.
+
+ (The SVE architecture specifies 256, but permits future architecture
+ revisions to raise this limit.)
+
+* FPSR and FPCR are retained from ARMv8-A, and interact with SVE floating-point
+ operations in a similar way to the way in which they interact with ARMv8
+ floating-point operations.
+
+ 8VL-1 128 0 bit index
+ +---- //// -----------------+
+ Z0 | : V0 |
+ : :
+ Z7 | : V7 |
+ Z8 | : * V8 |
+ : : :
+ Z15 | : *V15 |
+ Z16 | : V16 |
+ : :
+ Z31 | : V31 |
+ +---- //// -----------------+
+ 31 0
+ VL-1 0 +-------+
+ +---- //// --+ FPSR | |
+ P0 | | +-------+
+ : | | *FPCR | |
+ P15 | | +-------+
+ +---- //// --+
+ FFR | | +-----+
+ +---- //// --+ VL | |
+ +-----+
+
+(*) callee-save:
+ This only applies to bits [63:0] of Z-/V-registers.
+ FPCR contains callee-save and caller-save bits. See [4] for details.
+
+
+A.2. Procedure call standard
+-----------------------------
+
+The ARMv8-A base procedure call standard is extended as follows with respect to
+the additional SVE register state:
+
+* All SVE register bits that are not shared with FP/SIMD are caller-save.
+
+* Z8 bits [63:0] .. Z15 bits [63:0] are callee-save.
+
+ This follows from the way these bits are mapped to V8..V15, which are caller-
+ save in the base procedure call standard.
+
+
+Appendix B. ARMv8-A FP/SIMD programmer's model
+===============================================
+
+Note: This section is for information only and not intended to be complete or
+to replace any architectural specification.
+
+Refer to [4] for for more information.
+
+ARMv8-A defines the following floating-point / SIMD register state:
+
+* 32 128-bit vector registers V0..V31
+* 2 32-bit status/control registers FPSR, FPCR
+
+ 127 0 bit index
+ +---------------+
+ V0 | |
+ : : :
+ V7 | |
+ * V8 | |
+ : : : :
+ *V15 | |
+ V16 | |
+ : : :
+ V31 | |
+ +---------------+
+
+ 31 0
+ +-------+
+ FPSR | |
+ +-------+
+ *FPCR | |
+ +-------+
+
+(*) callee-save:
+ This only applies to bits [63:0] of V-registers.
+ FPCR contains a mixture of callee-save and caller-save bits.
+
+
+References
+==========
+
+[1] arch/arm64/include/uapi/asm/sigcontext.h
+ AArch64 Linux signal ABI definitions
+
+[2] arch/arm64/include/uapi/asm/ptrace.h
+ AArch64 Linux ptrace ABI definitions
+
+[3] linux/Documentation/arm64/cpu-feature-registers.txt
+
+[4] ARM IHI0055C
+ http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055c/IHI0055C_beta_aapcs64.pdf
+ http://infocenter.arm.com/help/topic/com.arm.doc.subset.swdev.abi/index.html
+ Procedure Call Standard for the ARM 64-bit Architecture (AArch64)
diff --git a/Documentation/devicetree/bindings/arm/spe-pmu.txt b/Documentation/devicetree/bindings/arm/spe-pmu.txt
new file mode 100644
index 000000000000..93372f2a7df9
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/spe-pmu.txt
@@ -0,0 +1,20 @@
+* ARMv8.2 Statistical Profiling Extension (SPE) Performance Monitor Units (PMU)
+
+ARMv8.2 introduces the optional Statistical Profiling Extension for collecting
+performance sample data using an in-memory trace buffer.
+
+** SPE Required properties:
+
+- compatible : should be one of:
+ "arm,statistical-profiling-extension-v1"
+
+- interrupts : Exactly 1 PPI must be listed. For heterogeneous systems where
+ SPE is only supported on a subset of the CPUs, please consult
+ the arm,gic-v3 binding for details on describing a PPI partition.
+
+** Example:
+
+spe-pmu {
+ compatible = "arm,statistical-profiling-extension-v1";
+ interrupts = <GIC_PPI 05 IRQ_TYPE_LEVEL_HIGH &part1>;
+};
diff --git a/Documentation/perf/hisi-pmu.txt b/Documentation/perf/hisi-pmu.txt
new file mode 100644
index 000000000000..267a028b2741
--- /dev/null
+++ b/Documentation/perf/hisi-pmu.txt
@@ -0,0 +1,53 @@
+HiSilicon SoC uncore Performance Monitoring Unit (PMU)
+======================================================
+The HiSilicon SoC chip includes various independent system device PMUs
+such as L3 cache (L3C), Hydra Home Agent (HHA) and DDRC. These PMUs are
+independent and have hardware logic to gather statistics and performance
+information.
+
+The HiSilicon SoC encapsulates multiple CPU and IO dies. Each CPU cluster
+(CCL) is made up of 4 cpu cores sharing one L3 cache; each CPU die is
+called Super CPU cluster (SCCL) and is made up of 6 CCLs. Each SCCL has
+two HHAs (0 - 1) and four DDRCs (0 - 3), respectively.
+
+HiSilicon SoC uncore PMU driver
+---------------------------------------
+Each device PMU has separate registers for event counting, control and
+interrupt, and the PMU driver shall register perf PMU drivers like L3C,
+HHA and DDRC etc. The available events and configuration options shall
+be described in the sysfs, see :
+/sys/devices/hisi_sccl{X}_<l3c{Y}/hha{Y}/ddrc{Y}>/, or
+/sys/bus/event_source/devices/hisi_sccl{X}_<l3c{Y}/hha{Y}/ddrc{Y}>.
+The "perf list" command shall list the available events from sysfs.
+
+Each L3C, HHA and DDRC is registered as a separate PMU with perf. The PMU
+name will appear in event listing as hisi_sccl<sccl-id>_module<index-id>.
+where "sccl-id" is the identifier of the SCCL and "index-id" is the index of
+module.
+e.g. hisi_sccl3_l3c0/rd_hit_cpipe is READ_HIT_CPIPE event of L3C index #0 in
+SCCL ID #3.
+e.g. hisi_sccl1_hha0/rx_operations is RX_OPERATIONS event of HHA index #0 in
+SCCL ID #1.
+
+The driver also provides a "cpumask" sysfs attribute, which shows the CPU core
+ID used to count the uncore PMU event.
+
+Example usage of perf:
+$# perf list
+hisi_sccl3_l3c0/rd_hit_cpipe/ [kernel PMU event]
+------------------------------------------
+hisi_sccl3_l3c0/wr_hit_cpipe/ [kernel PMU event]
+------------------------------------------
+hisi_sccl1_l3c0/rd_hit_cpipe/ [kernel PMU event]
+------------------------------------------
+hisi_sccl1_l3c0/wr_hit_cpipe/ [kernel PMU event]
+------------------------------------------
+
+$# perf stat -a -e hisi_sccl3_l3c0/rd_hit_cpipe/ sleep 5
+$# perf stat -a -e hisi_sccl3_l3c0/config=0x02/ sleep 5
+
+The current driver does not support sampling. So "perf record" is unsupported.
+Also attach to a task is unsupported as the events are all uncore.
+
+Note: Please contact the maintainer for a complete list of events supported for
+the PMU devices in the SoC and its information if needed.
diff --git a/MAINTAINERS b/MAINTAINERS
index 82ed85135971..7e9c887ad951 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6259,6 +6259,13 @@ S: Maintained
F: drivers/net/ethernet/hisilicon/
F: Documentation/devicetree/bindings/net/hisilicon*.txt
+HISILICON PMU DRIVER
+M: Shaokun Zhang <zhangshaokun@hisilicon.com>
+W: http://www.hisilicon.com
+S: Supported
+F: drivers/perf/hisilicon
+F: Documentation/perf/hisi-pmu.txt
+
HISILICON ROCE DRIVER
M: Lijun Ou <oulijun@huawei.com>
M: Wei Hu(Xavier) <xavier.huwei@huawei.com>
diff --git a/arch/arm/include/asm/arch_timer.h b/arch/arm/include/asm/arch_timer.h
index 9327e3a101dc..0a8d7bba2cb0 100644
--- a/arch/arm/include/asm/arch_timer.h
+++ b/arch/arm/include/asm/arch_timer.h
@@ -107,6 +107,7 @@ static inline u32 arch_timer_get_cntkctl(void)
static inline void arch_timer_set_cntkctl(u32 cntkctl)
{
asm volatile("mcr p15, 0, %0, c14, c1, 0" : : "r" (cntkctl));
+ isb();
}
#endif
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 4a879f6ff13b..242151ea6908 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -293,4 +293,7 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr);
+/* All host FP/SIMD state is restored on guest exit, so nothing to save: */
+static inline void kvm_fpsimd_flush_cpu_state(void) {}
+
#endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index b907bf4403b5..ba6aab55d464 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -21,7 +21,7 @@ config ARM64
select ARCH_HAS_STRICT_KERNEL_RWX
select ARCH_HAS_STRICT_MODULE_RWX
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
- select ARCH_HAVE_NMI_SAFE_CMPXCHG if ACPI_APEI_SEA
+ select ARCH_HAVE_NMI_SAFE_CMPXCHG
select ARCH_INLINE_READ_LOCK if !PREEMPT
select ARCH_INLINE_READ_LOCK_BH if !PREEMPT
select ARCH_INLINE_READ_LOCK_IRQ if !PREEMPT
@@ -115,7 +115,7 @@ config ARM64
select HAVE_IRQ_TIME_ACCOUNTING
select HAVE_MEMBLOCK
select HAVE_MEMBLOCK_NODE_MAP if NUMA
- select HAVE_NMI if ACPI_APEI_SEA
+ select HAVE_NMI
select HAVE_PATA_PLATFORM
select HAVE_PERF_EVENTS
select HAVE_PERF_REGS
@@ -136,6 +136,7 @@ config ARM64
select PCI_ECAM if ACPI
select POWER_RESET
select POWER_SUPPLY
+ select REFCOUNT_FULL
select SPARSE_IRQ
select SYSCTL_EXCEPTION_TRACE
select THREAD_INFO_IN_TASK
@@ -842,6 +843,7 @@ config FORCE_MAX_ZONEORDER
menuconfig ARMV8_DEPRECATED
bool "Emulate deprecated/obsolete ARMv8 instructions"
depends on COMPAT
+ depends on SYSCTL
help
Legacy software support may require certain instructions
that have been deprecated or obsoleted in the architecture.
@@ -1011,6 +1013,17 @@ config ARM64_PMEM
endmenu
+config ARM64_SVE
+ bool "ARM Scalable Vector Extension support"
+ default y
+ help
+ The Scalable Vector Extension (SVE) is an extension to the AArch64
+ execution state which complements and extends the SIMD functionality
+ of the base architecture to support much larger vectors and to enable
+ additional vectorisation opportunities.
+
+ To enable use of this extension on CPUs that implement it, say Y.
+
config ARM64_MODULE_CMODEL_LARGE
bool
@@ -1099,6 +1112,7 @@ config EFI_STUB
config EFI
bool "UEFI runtime support"
depends on OF && !CPU_BIG_ENDIAN
+ depends on KERNEL_MODE_NEON
select LIBFDT
select UCS2_STRING
select EFI_PARAMS_FROM_FDT
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 939b310913cf..b35788c909f1 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -14,8 +14,12 @@ LDFLAGS_vmlinux :=-p --no-undefined -X
CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET)
GZFLAGS :=-9
-ifneq ($(CONFIG_RELOCATABLE),)
-LDFLAGS_vmlinux += -pie -shared -Bsymbolic
+ifeq ($(CONFIG_RELOCATABLE), y)
+# Pass --no-apply-dynamic-relocs to restore pre-binutils-2.27 behaviour
+# for relative relocs, since this leads to better Image compression
+# with the relocation offsets always being zero.
+LDFLAGS_vmlinux += -pie -shared -Bsymbolic \
+ $(call ld-option, --no-apply-dynamic-relocs)
endif
ifeq ($(CONFIG_ARM64_ERRATUM_843419),y)
@@ -53,6 +57,8 @@ KBUILD_AFLAGS += $(lseinstr) $(brokengasinst)
KBUILD_CFLAGS += $(call cc-option,-mabi=lp64)
KBUILD_AFLAGS += $(call cc-option,-mabi=lp64)
+KBUILD_CFLAGS += $(call cc-ifversion, -ge, 0500, -DCONFIG_ARCH_SUPPORTS_INT128)
+
ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
KBUILD_CPPFLAGS += -mbig-endian
CHECKFLAGS += -D__AARCH64EB__
diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h
index a652ce0a5cb2..bdedd8f748d1 100644
--- a/arch/arm64/include/asm/arch_timer.h
+++ b/arch/arm64/include/asm/arch_timer.h
@@ -144,6 +144,7 @@ static inline u32 arch_timer_get_cntkctl(void)
static inline void arch_timer_set_cntkctl(u32 cntkctl)
{
write_sysreg(cntkctl, cntkctl_el1);
+ isb();
}
static inline u64 arch_counter_get_cntpct(void)
diff --git a/arch/arm64/include/asm/asm-bug.h b/arch/arm64/include/asm/asm-bug.h
index 636e755bcdca..b3552c4a405f 100644
--- a/arch/arm64/include/asm/asm-bug.h
+++ b/arch/arm64/include/asm/asm-bug.h
@@ -22,10 +22,10 @@
#define _BUGVERBOSE_LOCATION(file, line) __BUGVERBOSE_LOCATION(file, line)
#define __BUGVERBOSE_LOCATION(file, line) \
.pushsection .rodata.str,"aMS",@progbits,1; \
- 2: .string file; \
+ 14472: .string file; \
.popsection; \
\
- .long 2b - 0b; \
+ .long 14472b - 14470b; \
.short line;
#else
#define _BUGVERBOSE_LOCATION(file, line)
@@ -36,11 +36,11 @@
#define __BUG_ENTRY(flags) \
.pushsection __bug_table,"aw"; \
.align 2; \
- 0: .long 1f - 0b; \
+ 14470: .long 14471f - 14470b; \
_BUGVERBOSE_LOCATION(__FILE__, __LINE__) \
.short flags; \
.popsection; \
- 1:
+ 14471:
#else
#define __BUG_ENTRY(flags)
#endif
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index d58a6253c6ab..aef72d886677 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -25,12 +25,41 @@
#include <asm/asm-offsets.h>
#include <asm/cpufeature.h>
+#include <asm/debug-monitors.h>
#include <asm/mmu_context.h>
#include <asm/page.h>
#include <asm/pgtable-hwdef.h>
#include <asm/ptrace.h>
#include <asm/thread_info.h>
+ .macro save_and_disable_daif, flags
+ mrs \flags, daif
+ msr daifset, #0xf
+ .endm
+
+ .macro disable_daif
+ msr daifset, #0xf
+ .endm
+
+ .macro enable_daif
+ msr daifclr, #0xf
+ .endm
+
+ .macro restore_daif, flags:req
+ msr daif, \flags
+ .endm
+
+ /* Only on aarch64 pstate, PSR_D_BIT is different for aarch32 */
+ .macro inherit_daif, pstate:req, tmp:req
+ and \tmp, \pstate, #(PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT)
+ msr daif, \tmp
+ .endm
+
+ /* IRQ is the lowest priority flag, unconditionally unmask the rest. */
+ .macro enable_da_f
+ msr daifclr, #(8 | 4 | 1)
+ .endm
+
/*
* Enable and disable interrupts.
*/
@@ -51,13 +80,6 @@
msr daif, \flags
.endm
-/*
- * Enable and disable debug exceptions.
- */
- .macro disable_dbg
- msr daifset, #8
- .endm
-
.macro enable_dbg
msr daifclr, #8
.endm
@@ -65,31 +87,22 @@
.macro disable_step_tsk, flgs, tmp
tbz \flgs, #TIF_SINGLESTEP, 9990f
mrs \tmp, mdscr_el1
- bic \tmp, \tmp, #1
+ bic \tmp, \tmp, #DBG_MDSCR_SS
msr mdscr_el1, \tmp
isb // Synchronise with enable_dbg
9990:
.endm
+ /* call with daif masked */
.macro enable_step_tsk, flgs, tmp
tbz \flgs, #TIF_SINGLESTEP, 9990f
- disable_dbg
mrs \tmp, mdscr_el1
- orr \tmp, \tmp, #1
+ orr \tmp, \tmp, #DBG_MDSCR_SS
msr mdscr_el1, \tmp
9990:
.endm
/*
- * Enable both debug exceptions and interrupts. This is likely to be
- * faster than two daifclr operations, since writes to this register
- * are self-synchronising.
- */
- .macro enable_dbg_and_irq
- msr daifclr, #(8 | 2)
- .endm
-
-/*
* SMP data memory barrier
*/
.macro smp_dmb, opt
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 0fe7e43b7fbc..77651c49ef44 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -31,6 +31,8 @@
#define dmb(opt) asm volatile("dmb " #opt : : : "memory")
#define dsb(opt) asm volatile("dsb " #opt : : : "memory")
+#define psb_csync() asm volatile("hint #17" : : : "memory")
+
#define mb() dsb(sy)
#define rmb() dsb(ld)
#define wmb() dsb(st)
diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h
index 889226b4c6e1..88392272250e 100644
--- a/arch/arm64/include/asm/cpu.h
+++ b/arch/arm64/include/asm/cpu.h
@@ -41,6 +41,7 @@ struct cpuinfo_arm64 {
u64 reg_id_aa64mmfr2;
u64 reg_id_aa64pfr0;
u64 reg_id_aa64pfr1;
+ u64 reg_id_aa64zfr0;
u32 reg_id_dfr0;
u32 reg_id_isar0;
@@ -59,6 +60,9 @@ struct cpuinfo_arm64 {
u32 reg_mvfr0;
u32 reg_mvfr1;
u32 reg_mvfr2;
+
+ /* pseudo-ZCR for recording maximum ZCR_EL1 LEN value: */
+ u64 reg_zcr;
};
DECLARE_PER_CPU(struct cpuinfo_arm64, cpu_data);
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index 8da621627d7c..2ff7c5e8efab 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -40,7 +40,8 @@
#define ARM64_WORKAROUND_858921 19
#define ARM64_WORKAROUND_CAVIUM_30115 20
#define ARM64_HAS_DCPOP 21
+#define ARM64_SVE 22
-#define ARM64_NCAPS 22
+#define ARM64_NCAPS 23
#endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 428ee1f2468c..ac67cfc2585a 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -10,7 +10,9 @@
#define __ASM_CPUFEATURE_H
#include <asm/cpucaps.h>
+#include <asm/fpsimd.h>
#include <asm/hwcap.h>
+#include <asm/sigcontext.h>
#include <asm/sysreg.h>
/*
@@ -223,6 +225,13 @@ static inline bool id_aa64pfr0_32bit_el0(u64 pfr0)
return val == ID_AA64PFR0_EL0_32BIT_64BIT;
}
+static inline bool id_aa64pfr0_sve(u64 pfr0)
+{
+ u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_SVE_SHIFT);
+
+ return val > 0;
+}
+
void __init setup_cpu_features(void);
void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
@@ -262,6 +271,39 @@ static inline bool system_uses_ttbr0_pan(void)
!cpus_have_const_cap(ARM64_HAS_PAN);
}
+static inline bool system_supports_sve(void)
+{
+ return IS_ENABLED(CONFIG_ARM64_SVE) &&
+ cpus_have_const_cap(ARM64_SVE);
+}
+
+/*
+ * Read the pseudo-ZCR used by cpufeatures to identify the supported SVE
+ * vector length.
+ *
+ * Use only if SVE is present.
+ * This function clobbers the SVE vector length.
+ */
+static inline u64 read_zcr_features(void)
+{
+ u64 zcr;
+ unsigned int vq_max;
+
+ /*
+ * Set the maximum possible VL, and write zeroes to all other
+ * bits to see if they stick.
+ */
+ sve_kernel_enable(NULL);
+ write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL1);
+
+ zcr = read_sysreg_s(SYS_ZCR_EL1);
+ zcr &= ~(u64)ZCR_ELx_LEN_MASK; /* find sticky 1s outside LEN field */
+ vq_max = sve_vq_from_vl(sve_get_vl());
+ zcr |= vq_max - 1; /* set LEN field to maximum effective value */
+
+ return zcr;
+}
+
#endif /* __ASSEMBLY__ */
#endif
diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h
new file mode 100644
index 000000000000..22e4c83de5a5
--- /dev/null
+++ b/arch/arm64/include/asm/daifflags.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2017 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_DAIFFLAGS_H
+#define __ASM_DAIFFLAGS_H
+
+#include <linux/irqflags.h>
+
+#define DAIF_PROCCTX 0
+#define DAIF_PROCCTX_NOIRQ PSR_I_BIT
+
+/* mask/save/unmask/restore all exceptions, including interrupts. */
+static inline void local_daif_mask(void)
+{
+ asm volatile(
+ "msr daifset, #0xf // local_daif_mask\n"
+ :
+ :
+ : "memory");
+ trace_hardirqs_off();
+}
+
+static inline unsigned long local_daif_save(void)
+{
+ unsigned long flags;
+
+ asm volatile(
+ "mrs %0, daif // local_daif_save\n"
+ : "=r" (flags)
+ :
+ : "memory");
+ local_daif_mask();
+
+ return flags;
+}
+
+static inline void local_daif_unmask(void)
+{
+ trace_hardirqs_on();
+ asm volatile(
+ "msr daifclr, #0xf // local_daif_unmask"
+ :
+ :
+ : "memory");
+}
+
+static inline void local_daif_restore(unsigned long flags)
+{
+ if (!arch_irqs_disabled_flags(flags))
+ trace_hardirqs_on();
+ asm volatile(
+ "msr daif, %0 // local_daif_restore"
+ :
+ : "r" (flags)
+ : "memory");
+ if (arch_irqs_disabled_flags(flags))
+ trace_hardirqs_off();
+}
+
+#endif
diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
index 33be513ef24c..fac1c4de7898 100644
--- a/arch/arm64/include/asm/elf.h
+++ b/arch/arm64/include/asm/elf.h
@@ -188,8 +188,8 @@ typedef compat_elf_greg_t compat_elf_gregset_t[COMPAT_ELF_NGREG];
#define compat_start_thread compat_start_thread
/*
- * Unlike the native SET_PERSONALITY macro, the compat version inherits
- * READ_IMPLIES_EXEC across a fork() since this is the behaviour on
+ * Unlike the native SET_PERSONALITY macro, the compat version maintains
+ * READ_IMPLIES_EXEC across an execve() since this is the behaviour on
* arch/arm/.
*/
#define COMPAT_SET_PERSONALITY(ex) \
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index 66ed8b6b9976..014d7d8edcf9 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -43,7 +43,8 @@
#define ESR_ELx_EC_HVC64 (0x16)
#define ESR_ELx_EC_SMC64 (0x17)
#define ESR_ELx_EC_SYS64 (0x18)
-/* Unallocated EC: 0x19 - 0x1E */
+#define ESR_ELx_EC_SVE (0x19)
+/* Unallocated EC: 0x1A - 0x1E */
#define ESR_ELx_EC_IMP_DEF (0x1f)
#define ESR_ELx_EC_IABT_LOW (0x20)
#define ESR_ELx_EC_IABT_CUR (0x21)
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index 410c48163c6a..74f34392a531 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -17,9 +17,13 @@
#define __ASM_FP_H
#include <asm/ptrace.h>
+#include <asm/errno.h>
#ifndef __ASSEMBLY__
+#include <linux/cache.h>
+#include <linux/stddef.h>
+
/*
* FP/SIMD storage area has:
* - FPSR and FPCR
@@ -35,13 +39,16 @@ struct fpsimd_state {
__uint128_t vregs[32];
u32 fpsr;
u32 fpcr;
+ /*
+ * For ptrace compatibility, pad to next 128-bit
+ * boundary here if extending this struct.
+ */
};
};
/* the id of the last cpu to have restored this state */
unsigned int cpu;
};
-
#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
/* Masks for extracting the FPSR and FPCR from the FPSCR */
#define VFP_FPSCR_STAT_MASK 0xf800009f
@@ -61,11 +68,73 @@ extern void fpsimd_load_state(struct fpsimd_state *state);
extern void fpsimd_thread_switch(struct task_struct *next);
extern void fpsimd_flush_thread(void);
+extern void fpsimd_signal_preserve_current_state(void);
extern void fpsimd_preserve_current_state(void);
extern void fpsimd_restore_current_state(void);
extern void fpsimd_update_current_state(struct fpsimd_state *state);
extern void fpsimd_flush_task_state(struct task_struct *target);
+extern void sve_flush_cpu_state(void);
+
+/* Maximum VL that SVE VL-agnostic software can transparently support */
+#define SVE_VL_ARCH_MAX 0x100
+
+extern void sve_save_state(void *state, u32 *pfpsr);
+extern void sve_load_state(void const *state, u32 const *pfpsr,
+ unsigned long vq_minus_1);
+extern unsigned int sve_get_vl(void);
+extern int sve_kernel_enable(void *);
+
+extern int __ro_after_init sve_max_vl;
+
+#ifdef CONFIG_ARM64_SVE
+
+extern size_t sve_state_size(struct task_struct const *task);
+
+extern void sve_alloc(struct task_struct *task);
+extern void fpsimd_release_task(struct task_struct *task);
+extern void fpsimd_sync_to_sve(struct task_struct *task);
+extern void sve_sync_to_fpsimd(struct task_struct *task);
+extern void sve_sync_from_fpsimd_zeropad(struct task_struct *task);
+
+extern int sve_set_vector_length(struct task_struct *task,
+ unsigned long vl, unsigned long flags);
+
+extern int sve_set_current_vl(unsigned long arg);
+extern int sve_get_current_vl(void);
+
+/*
+ * Probing and setup functions.
+ * Calls to these functions must be serialised with one another.
+ */
+extern void __init sve_init_vq_map(void);
+extern void sve_update_vq_map(void);
+extern int sve_verify_vq_map(void);
+extern void __init sve_setup(void);
+
+#else /* ! CONFIG_ARM64_SVE */
+
+static inline void sve_alloc(struct task_struct *task) { }
+static inline void fpsimd_release_task(struct task_struct *task) { }
+static inline void sve_sync_to_fpsimd(struct task_struct *task) { }
+static inline void sve_sync_from_fpsimd_zeropad(struct task_struct *task) { }
+
+static inline int sve_set_current_vl(unsigned long arg)
+{
+ return -EINVAL;
+}
+
+static inline int sve_get_current_vl(void)
+{
+ return -EINVAL;
+}
+
+static inline void sve_init_vq_map(void) { }
+static inline void sve_update_vq_map(void) { }
+static inline int sve_verify_vq_map(void) { return 0; }
+static inline void sve_setup(void) { }
+
+#endif /* ! CONFIG_ARM64_SVE */
/* For use by EFI runtime services calls only */
extern void __efi_fpsimd_begin(void);
diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h
index 0f5fdd388b0d..e050d765ca9e 100644
--- a/arch/arm64/include/asm/fpsimdmacros.h
+++ b/arch/arm64/include/asm/fpsimdmacros.h
@@ -75,3 +75,151 @@
ldr w\tmpnr, [\state, #16 * 2 + 4]
fpsimd_restore_fpcr x\tmpnr, \state
.endm
+
+/* Sanity-check macros to help avoid encoding garbage instructions */
+
+.macro _check_general_reg nr
+ .if (\nr) < 0 || (\nr) > 30
+ .error "Bad register number \nr."
+ .endif
+.endm
+
+.macro _sve_check_zreg znr
+ .if (\znr) < 0 || (\znr) > 31
+ .error "Bad Scalable Vector Extension vector register number \znr."
+ .endif
+.endm
+
+.macro _sve_check_preg pnr
+ .if (\pnr) < 0 || (\pnr) > 15
+ .error "Bad Scalable Vector Extension predicate register number \pnr."
+ .endif
+.endm
+
+.macro _check_num n, min, max
+ .if (\n) < (\min) || (\n) > (\max)
+ .error "Number \n out of range [\min,\max]"
+ .endif
+.endm
+
+/* SVE instruction encodings for non-SVE-capable assemblers */
+
+/* STR (vector): STR Z\nz, [X\nxbase, #\offset, MUL VL] */
+.macro _sve_str_v nz, nxbase, offset=0
+ _sve_check_zreg \nz
+ _check_general_reg \nxbase
+ _check_num (\offset), -0x100, 0xff
+ .inst 0xe5804000 \
+ | (\nz) \
+ | ((\nxbase) << 5) \
+ | (((\offset) & 7) << 10) \
+ | (((\offset) & 0x1f8) << 13)
+.endm
+
+/* LDR (vector): LDR Z\nz, [X\nxbase, #\offset, MUL VL] */
+.macro _sve_ldr_v nz, nxbase, offset=0
+ _sve_check_zreg \nz
+ _check_general_reg \nxbase
+ _check_num (\offset), -0x100, 0xff
+ .inst 0x85804000 \
+ | (\nz) \
+ | ((\nxbase) << 5) \
+ | (((\offset) & 7) << 10) \
+ | (((\offset) & 0x1f8) << 13)
+.endm
+
+/* STR (predicate): STR P\np, [X\nxbase, #\offset, MUL VL] */
+.macro _sve_str_p np, nxbase, offset=0
+ _sve_check_preg \np
+ _check_general_reg \nxbase
+ _check_num (\offset), -0x100, 0xff
+ .inst 0xe5800000 \
+ | (\np) \
+ | ((\nxbase) << 5) \
+ | (((\offset) & 7) << 10) \
+ | (((\offset) & 0x1f8) << 13)
+.endm
+
+/* LDR (predicate): LDR P\np, [X\nxbase, #\offset, MUL VL] */
+.macro _sve_ldr_p np, nxbase, offset=0
+ _sve_check_preg \np
+ _check_general_reg \nxbase
+ _check_num (\offset), -0x100, 0xff
+ .inst 0x85800000 \
+ | (\np) \
+ | ((\nxbase) << 5) \
+ | (((\offset) & 7) << 10) \
+ | (((\offset) & 0x1f8) << 13)
+.endm
+
+/* RDVL X\nx, #\imm */
+.macro _sve_rdvl nx, imm
+ _check_general_reg \nx
+ _check_num (\imm), -0x20, 0x1f
+ .inst 0x04bf5000 \
+ | (\nx) \
+ | (((\imm) & 0x3f) << 5)
+.endm
+
+/* RDFFR (unpredicated): RDFFR P\np.B */
+.macro _sve_rdffr np
+ _sve_check_preg \np
+ .inst 0x2519f000 \
+ | (\np)
+.endm
+
+/* WRFFR P\np.B */
+.macro _sve_wrffr np
+ _sve_check_preg \np
+ .inst 0x25289000 \
+ | ((\np) << 5)
+.endm
+
+.macro __for from:req, to:req
+ .if (\from) == (\to)
+ _for__body \from
+ .else
+ __for \from, (\from) + ((\to) - (\from)) / 2
+ __for (\from) + ((\to) - (\from)) / 2 + 1, \to
+ .endif
+.endm
+
+.macro _for var:req, from:req, to:req, insn:vararg
+ .macro _for__body \var:req
+ \insn
+ .endm
+
+ __for \from, \to
+
+ .purgem _for__body
+.endm
+
+.macro sve_save nxbase, xpfpsr, nxtmp
+ _for n, 0, 31, _sve_str_v \n, \nxbase, \n - 34
+ _for n, 0, 15, _sve_str_p \n, \nxbase, \n - 16
+ _sve_rdffr 0
+ _sve_str_p 0, \nxbase
+ _sve_ldr_p 0, \nxbase, -16
+
+ mrs x\nxtmp, fpsr
+ str w\nxtmp, [\xpfpsr]
+ mrs x\nxtmp, fpcr
+ str w\nxtmp, [\xpfpsr, #4]
+.endm
+
+.macro sve_load nxbase, xpfpsr, xvqminus1, nxtmp
+ mrs_s x\nxtmp, SYS_ZCR_EL1
+ bic x\nxtmp, x\nxtmp, ZCR_ELx_LEN_MASK
+ orr x\nxtmp, x\nxtmp, \xvqminus1
+ msr_s SYS_ZCR_EL1, x\nxtmp // self-synchronising
+
+ _for n, 0, 31, _sve_ldr_v \n, \nxbase, \n - 34
+ _sve_ldr_p 0, \nxbase
+ _sve_wrffr 0
+ _for n, 0, 15, _sve_ldr_p \n, \nxbase, \n - 16
+
+ ldr w\nxtmp, [\xpfpsr]
+ msr fpsr, x\nxtmp
+ ldr w\nxtmp, [\xpfpsr, #4]
+ msr fpcr, x\nxtmp
+.endm
diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h
index 8c581281fa12..24692edf1a69 100644
--- a/arch/arm64/include/asm/irqflags.h
+++ b/arch/arm64/include/asm/irqflags.h
@@ -21,6 +21,19 @@
#include <asm/ptrace.h>
/*
+ * Aarch64 has flags for masking: Debug, Asynchronous (serror), Interrupts and
+ * FIQ exceptions, in the 'daif' register. We mask and unmask them in 'dai'
+ * order:
+ * Masking debug exceptions causes all other exceptions to be masked too/
+ * Masking SError masks irq, but not debug exceptions. Masking irqs has no
+ * side effects for other flags. Keeping to this order makes it easier for
+ * entry.S to know which exceptions should be unmasked.
+ *
+ * FIQ is never expected, but we mask it when we disable debug exceptions, and
+ * unmask it at all other times.
+ */
+
+/*
* CPU interrupt mask handling.
*/
static inline unsigned long arch_local_irq_save(void)
@@ -53,12 +66,6 @@ static inline void arch_local_irq_disable(void)
: "memory");
}
-#define local_fiq_enable() asm("msr daifclr, #1" : : : "memory")
-#define local_fiq_disable() asm("msr daifset, #1" : : : "memory")
-
-#define local_async_enable() asm("msr daifclr, #4" : : : "memory")
-#define local_async_disable() asm("msr daifset, #4" : : : "memory")
-
/*
* Save the current interrupt enable state.
*/
@@ -89,26 +96,5 @@ static inline int arch_irqs_disabled_flags(unsigned long flags)
{
return flags & PSR_I_BIT;
}
-
-/*
- * save and restore debug state
- */
-#define local_dbg_save(flags) \
- do { \
- typecheck(unsigned long, flags); \
- asm volatile( \
- "mrs %0, daif // local_dbg_save\n" \
- "msr daifset, #8" \
- : "=r" (flags) : : "memory"); \
- } while (0)
-
-#define local_dbg_restore(flags) \
- do { \
- typecheck(unsigned long, flags); \
- asm volatile( \
- "msr daif, %0 // local_dbg_restore\n" \
- : : "r" (flags) : "memory"); \
- } while (0)
-
#endif
#endif
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 61d694c2eae5..7f069ff37f06 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -185,7 +185,9 @@
#define CPTR_EL2_TCPAC (1 << 31)
#define CPTR_EL2_TTA (1 << 20)
#define CPTR_EL2_TFP (1 << CPTR_EL2_TFP_SHIFT)
-#define CPTR_EL2_DEFAULT 0x000033ff
+#define CPTR_EL2_TZ (1 << 8)
+#define CPTR_EL2_RES1 0x000032ff /* known RES1 bits in CPTR_EL2 */
+#define CPTR_EL2_DEFAULT CPTR_EL2_RES1
/* Hyp Debug Configuration Register bits */
#define MDCR_EL2_TPMS (1 << 14)
@@ -236,5 +238,6 @@
#define CPACR_EL1_FPEN (3 << 20)
#define CPACR_EL1_TTA (1 << 28)
+#define CPACR_EL1_DEFAULT (CPACR_EL1_FPEN | CPACR_EL1_ZEN_EL1EN)
#endif /* __ARM64_KVM_ARM_H__ */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index e923b58606e2..674912d7a571 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -25,6 +25,7 @@
#include <linux/types.h>
#include <linux/kvm_types.h>
#include <asm/cpufeature.h>
+#include <asm/fpsimd.h>
#include <asm/kvm.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_mmio.h>
@@ -384,4 +385,14 @@ static inline void __cpu_init_stage2(void)
"PARange is %d bits, unsupported configuration!", parange);
}
+/*
+ * All host FP/SIMD state is restored on guest exit, so nothing needs
+ * doing here except in the SVE case:
+*/
+static inline void kvm_fpsimd_flush_cpu_state(void)
+{
+ if (system_supports_sve())
+ sve_flush_cpu_state();
+}
+
#endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index f7c4d2146aed..d4bae7d6e0d8 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -61,8 +61,6 @@
* KIMAGE_VADDR - the virtual address of the start of the kernel image
* VA_BITS - the maximum number of bits for virtual addresses.
* VA_START - the first kernel virtual address.
- * TASK_SIZE - the maximum size of a user space task.
- * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area.
*/
#define VA_BITS (CONFIG_ARM64_VA_BITS)
#define VA_START (UL(0xffffffffffffffff) - \
@@ -77,19 +75,6 @@
#define PCI_IO_END (VMEMMAP_START - SZ_2M)
#define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE)
#define FIXADDR_TOP (PCI_IO_START - SZ_2M)
-#define TASK_SIZE_64 (UL(1) << VA_BITS)
-
-#ifdef CONFIG_COMPAT
-#define TASK_SIZE_32 UL(0x100000000)
-#define TASK_SIZE (test_thread_flag(TIF_32BIT) ? \
- TASK_SIZE_32 : TASK_SIZE_64)
-#define TASK_SIZE_OF(tsk) (test_tsk_thread_flag(tsk, TIF_32BIT) ? \
- TASK_SIZE_32 : TASK_SIZE_64)
-#else
-#define TASK_SIZE TASK_SIZE_64
-#endif /* CONFIG_COMPAT */
-
-#define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 4))
#define KERNEL_START _text
#define KERNEL_END _end
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index b46e54c2399b..c9530b5b5ca8 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -98,6 +98,8 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
((pte_val(pte) & (PTE_VALID | PTE_USER | PTE_UXN)) == (PTE_VALID | PTE_UXN))
#define pte_valid_young(pte) \
((pte_val(pte) & (PTE_VALID | PTE_AF)) == (PTE_VALID | PTE_AF))
+#define pte_valid_user(pte) \
+ ((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER))
/*
* Could the pte be present in the TLB? We must check mm_tlb_flush_pending
@@ -107,6 +109,18 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
#define pte_accessible(mm, pte) \
(mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid_young(pte))
+/*
+ * p??_access_permitted() is true for valid user mappings (subject to the
+ * write permission check) other than user execute-only which do not have the
+ * PTE_USER bit set. PROT_NONE mappings do not have the PTE_VALID bit set.
+ */
+#define pte_access_permitted(pte, write) \
+ (pte_valid_user(pte) && (!(write) || pte_write(pte)))
+#define pmd_access_permitted(pmd, write) \
+ (pte_access_permitted(pmd_pte(pmd), (write)))
+#define pud_access_permitted(pud, write) \
+ (pte_access_permitted(pud_pte(pud), (write)))
+
static inline pte_t clear_pte_bit(pte_t pte, pgprot_t prot)
{
pte_val(pte) &= ~pgprot_val(prot);
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 29adab8138c3..023cacb946c3 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -19,6 +19,10 @@
#ifndef __ASM_PROCESSOR_H
#define __ASM_PROCESSOR_H
+#define TASK_SIZE_64 (UL(1) << VA_BITS)
+
+#ifndef __ASSEMBLY__
+
/*
* Default implementation of macro that returns current
* instruction pointer ("program counter").
@@ -37,6 +41,22 @@
#include <asm/ptrace.h>
#include <asm/types.h>
+/*
+ * TASK_SIZE - the maximum size of a user space task.
+ * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area.
+ */
+#ifdef CONFIG_COMPAT
+#define TASK_SIZE_32 UL(0x100000000)
+#define TASK_SIZE (test_thread_flag(TIF_32BIT) ? \
+ TASK_SIZE_32 : TASK_SIZE_64)
+#define TASK_SIZE_OF(tsk) (test_tsk_thread_flag(tsk, TIF_32BIT) ? \
+ TASK_SIZE_32 : TASK_SIZE_64)
+#else
+#define TASK_SIZE TASK_SIZE_64
+#endif /* CONFIG_COMPAT */
+
+#define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 4))
+
#define STACK_TOP_MAX TASK_SIZE_64
#ifdef CONFIG_COMPAT
#define AARCH32_VECTORS_BASE 0xffff0000
@@ -85,6 +105,9 @@ struct thread_struct {
unsigned long tp2_value;
#endif
struct fpsimd_state fpsimd_state;
+ void *sve_state; /* SVE registers, if any */
+ unsigned int sve_vl; /* SVE vector length */
+ unsigned int sve_vl_onexec; /* SVE vl after next exec */
unsigned long fault_address; /* fault info */
unsigned long fault_code; /* ESR_EL1 value */
struct debug_info debug; /* debugging */
@@ -194,4 +217,9 @@ static inline void spin_lock_prefetch(const void *ptr)
int cpu_enable_pan(void *__unused);
int cpu_enable_cache_maint_trap(void *__unused);
+/* Userspace interface for PR_SVE_{SET,GET}_VL prctl()s: */
+#define SVE_SET_VL(arg) sve_set_current_vl(arg)
+#define SVE_GET_VL() sve_get_current_vl()
+
+#endif /* __ASSEMBLY__ */
#endif /* __ASM_PROCESSOR_H */
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index f707fed5886f..08cc88574659 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -145,10 +145,14 @@
#define SYS_ID_AA64PFR0_EL1 sys_reg(3, 0, 0, 4, 0)
#define SYS_ID_AA64PFR1_EL1 sys_reg(3, 0, 0, 4, 1)
+#define SYS_ID_AA64ZFR0_EL1 sys_reg(3, 0, 0, 4, 4)
#define SYS_ID_AA64DFR0_EL1 sys_reg(3, 0, 0, 5, 0)
#define SYS_ID_AA64DFR1_EL1 sys_reg(3, 0, 0, 5, 1)
+#define SYS_ID_AA64AFR0_EL1 sys_reg(3, 0, 0, 5, 4)
+#define SYS_ID_AA64AFR1_EL1 sys_reg(3, 0, 0, 5, 5)
+
#define SYS_ID_AA64ISAR0_EL1 sys_reg(3, 0, 0, 6, 0)
#define SYS_ID_AA64ISAR1_EL1 sys_reg(3, 0, 0, 6, 1)
@@ -160,6 +164,8 @@
#define SYS_ACTLR_EL1 sys_reg(3, 0, 1, 0, 1)
#define SYS_CPACR_EL1 sys_reg(3, 0, 1, 0, 2)
+#define SYS_ZCR_EL1 sys_reg(3, 0, 1, 2, 0)
+
#define SYS_TTBR0_EL1 sys_reg(3, 0, 2, 0, 0)
#define SYS_TTBR1_EL1 sys_reg(3, 0, 2, 0, 1)
#define SYS_TCR_EL1 sys_reg(3, 0, 2, 0, 2)
@@ -172,6 +178,99 @@
#define SYS_FAR_EL1 sys_reg(3, 0, 6, 0, 0)
#define SYS_PAR_EL1 sys_reg(3, 0, 7, 4, 0)
+/*** Statistical Profiling Extension ***/
+/* ID registers */
+#define SYS_PMSIDR_EL1 sys_reg(3, 0, 9, 9, 7)
+#define SYS_PMSIDR_EL1_FE_SHIFT 0
+#define SYS_PMSIDR_EL1_FT_SHIFT 1
+#define SYS_PMSIDR_EL1_FL_SHIFT 2
+#define SYS_PMSIDR_EL1_ARCHINST_SHIFT 3
+#define SYS_PMSIDR_EL1_LDS_SHIFT 4
+#define SYS_PMSIDR_EL1_ERND_SHIFT 5
+#define SYS_PMSIDR_EL1_INTERVAL_SHIFT 8
+#define SYS_PMSIDR_EL1_INTERVAL_MASK 0xfUL
+#define SYS_PMSIDR_EL1_MAXSIZE_SHIFT 12
+#define SYS_PMSIDR_EL1_MAXSIZE_MASK 0xfUL
+#define SYS_PMSIDR_EL1_COUNTSIZE_SHIFT 16
+#define SYS_PMSIDR_EL1_COUNTSIZE_MASK 0xfUL
+
+#define SYS_PMBIDR_EL1 sys_reg(3, 0, 9, 10, 7)
+#define SYS_PMBIDR_EL1_ALIGN_SHIFT 0
+#define SYS_PMBIDR_EL1_ALIGN_MASK 0xfU
+#define SYS_PMBIDR_EL1_P_SHIFT 4
+#define SYS_PMBIDR_EL1_F_SHIFT 5
+
+/* Sampling controls */
+#define SYS_PMSCR_EL1 sys_reg(3, 0, 9, 9, 0)
+#define SYS_PMSCR_EL1_E0SPE_SHIFT 0
+#define SYS_PMSCR_EL1_E1SPE_SHIFT 1
+#define SYS_PMSCR_EL1_CX_SHIFT 3
+#define SYS_PMSCR_EL1_PA_SHIFT 4
+#define SYS_PMSCR_EL1_TS_SHIFT 5
+#define SYS_PMSCR_EL1_PCT_SHIFT 6
+
+#define SYS_PMSCR_EL2 sys_reg(3, 4, 9, 9, 0)
+#define SYS_PMSCR_EL2_E0HSPE_SHIFT 0
+#define SYS_PMSCR_EL2_E2SPE_SHIFT 1
+#define SYS_PMSCR_EL2_CX_SHIFT 3
+#define SYS_PMSCR_EL2_PA_SHIFT 4
+#define SYS_PMSCR_EL2_TS_SHIFT 5
+#define SYS_PMSCR_EL2_PCT_SHIFT 6
+
+#define SYS_PMSICR_EL1 sys_reg(3, 0, 9, 9, 2)
+
+#define SYS_PMSIRR_EL1 sys_reg(3, 0, 9, 9, 3)
+#define SYS_PMSIRR_EL1_RND_SHIFT 0
+#define SYS_PMSIRR_EL1_INTERVAL_SHIFT 8
+#define SYS_PMSIRR_EL1_INTERVAL_MASK 0xffffffUL
+
+/* Filtering controls */
+#define SYS_PMSFCR_EL1 sys_reg(3, 0, 9, 9, 4)
+#define SYS_PMSFCR_EL1_FE_SHIFT 0
+#define SYS_PMSFCR_EL1_FT_SHIFT 1
+#define SYS_PMSFCR_EL1_FL_SHIFT 2
+#define SYS_PMSFCR_EL1_B_SHIFT 16
+#define SYS_PMSFCR_EL1_LD_SHIFT 17
+#define SYS_PMSFCR_EL1_ST_SHIFT 18
+
+#define SYS_PMSEVFR_EL1 sys_reg(3, 0, 9, 9, 5)
+#define SYS_PMSEVFR_EL1_RES0 0x0000ffff00ff0f55UL
+
+#define SYS_PMSLATFR_EL1 sys_reg(3, 0, 9, 9, 6)
+#define SYS_PMSLATFR_EL1_MINLAT_SHIFT 0
+
+/* Buffer controls */
+#define SYS_PMBLIMITR_EL1 sys_reg(3, 0, 9, 10, 0)
+#define SYS_PMBLIMITR_EL1_E_SHIFT 0
+#define SYS_PMBLIMITR_EL1_FM_SHIFT 1
+#define SYS_PMBLIMITR_EL1_FM_MASK 0x3UL
+#define SYS_PMBLIMITR_EL1_FM_STOP_IRQ (0 << SYS_PMBLIMITR_EL1_FM_SHIFT)
+
+#define SYS_PMBPTR_EL1 sys_reg(3, 0, 9, 10, 1)
+
+/* Buffer error reporting */
+#define SYS_PMBSR_EL1 sys_reg(3, 0, 9, 10, 3)
+#define SYS_PMBSR_EL1_COLL_SHIFT 16
+#define SYS_PMBSR_EL1_S_SHIFT 17
+#define SYS_PMBSR_EL1_EA_SHIFT 18
+#define SYS_PMBSR_EL1_DL_SHIFT 19
+#define SYS_PMBSR_EL1_EC_SHIFT 26
+#define SYS_PMBSR_EL1_EC_MASK 0x3fUL
+
+#define SYS_PMBSR_EL1_EC_BUF (0x0UL << SYS_PMBSR_EL1_EC_SHIFT)
+#define SYS_PMBSR_EL1_EC_FAULT_S1 (0x24UL << SYS_PMBSR_EL1_EC_SHIFT)
+#define SYS_PMBSR_EL1_EC_FAULT_S2 (0x25UL << SYS_PMBSR_EL1_EC_SHIFT)
+
+#define SYS_PMBSR_EL1_FAULT_FSC_SHIFT 0
+#define SYS_PMBSR_EL1_FAULT_FSC_MASK 0x3fUL
+
+#define SYS_PMBSR_EL1_BUF_BSC_SHIFT 0
+#define SYS_PMBSR_EL1_BUF_BSC_MASK 0x3fUL
+
+#define SYS_PMBSR_EL1_BUF_BSC_FULL (0x1UL << SYS_PMBSR_EL1_BUF_BSC_SHIFT)
+
+/*** End of Statistical Profiling Extension ***/
+
#define SYS_PMINTENSET_EL1 sys_reg(3, 0, 9, 14, 1)
#define SYS_PMINTENCLR_EL1 sys_reg(3, 0, 9, 14, 2)
@@ -250,6 +349,8 @@
#define SYS_PMCCFILTR_EL0 sys_reg (3, 3, 14, 15, 7)
+#define SYS_ZCR_EL2 sys_reg(3, 4, 1, 2, 0)
+
#define SYS_DACR32_EL2 sys_reg(3, 4, 3, 0, 0)
#define SYS_IFSR32_EL2 sys_reg(3, 4, 5, 0, 1)
#define SYS_FPEXC32_EL2 sys_reg(3, 4, 5, 3, 0)
@@ -318,6 +419,10 @@
#define SCTLR_EL1_CP15BEN (1 << 5)
/* id_aa64isar0 */
+#define ID_AA64ISAR0_DP_SHIFT 44
+#define ID_AA64ISAR0_SM4_SHIFT 40
+#define ID_AA64ISAR0_SM3_SHIFT 36
+#define ID_AA64ISAR0_SHA3_SHIFT 32
#define ID_AA64ISAR0_RDM_SHIFT 28
#define ID_AA64ISAR0_ATOMICS_SHIFT 20
#define ID_AA64ISAR0_CRC32_SHIFT 16
@@ -332,6 +437,7 @@
#define ID_AA64ISAR1_DPB_SHIFT 0
/* id_aa64pfr0 */
+#define ID_AA64PFR0_SVE_SHIFT 32
#define ID_AA64PFR0_GIC_SHIFT 24
#define ID_AA64PFR0_ASIMD_SHIFT 20
#define ID_AA64PFR0_FP_SHIFT 16
@@ -340,6 +446,7 @@
#define ID_AA64PFR0_EL1_SHIFT 4
#define ID_AA64PFR0_EL0_SHIFT 0
+#define ID_AA64PFR0_SVE 0x1
#define ID_AA64PFR0_FP_NI 0xf
#define ID_AA64PFR0_FP_SUPPORTED 0x0
#define ID_AA64PFR0_ASIMD_NI 0xf
@@ -441,6 +548,20 @@
#endif
+/*
+ * The ZCR_ELx_LEN_* definitions intentionally include bits [8:4] which
+ * are reserved by the SVE architecture for future expansion of the LEN
+ * field, with compatible semantics.
+ */
+#define ZCR_ELx_LEN_SHIFT 0
+#define ZCR_ELx_LEN_SIZE 9
+#define ZCR_ELx_LEN_MASK 0x1ff
+
+#define CPACR_EL1_ZEN_EL1EN (1 << 16) /* enable EL1 access */
+#define CPACR_EL1_ZEN_EL0EN (1 << 17) /* enable EL0 access, if EL1EN set */
+#define CPACR_EL1_ZEN (CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN)
+
+
/* Safe value for MPIDR_EL1: Bit31:RES1, Bit30:U:0, Bit24:MT:0 */
#define SYS_MPIDR_SAFE_VAL (1UL << 31)
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index ddded6497a8a..eb431286bacd 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -63,6 +63,8 @@ struct thread_info {
void arch_setup_new_exec(void);
#define arch_setup_new_exec arch_setup_new_exec
+void arch_release_task_struct(struct task_struct *tsk);
+
#endif
/*
@@ -92,6 +94,8 @@ void arch_setup_new_exec(void);
#define TIF_RESTORE_SIGMASK 20
#define TIF_SINGLESTEP 21
#define TIF_32BIT 22 /* 32bit process */
+#define TIF_SVE 23 /* Scalable Vector Extension in use */
+#define TIF_SVE_VL_INHERIT 24 /* Inherit sve_vl_onexec across exec */
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
@@ -105,6 +109,7 @@ void arch_setup_new_exec(void);
#define _TIF_UPROBE (1 << TIF_UPROBE)
#define _TIF_FSCHECK (1 << TIF_FSCHECK)
#define _TIF_32BIT (1 << TIF_32BIT)
+#define _TIF_SVE (1 << TIF_SVE)
#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
_TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \
diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h
index d131501c6222..1696f9de9359 100644
--- a/arch/arm64/include/asm/traps.h
+++ b/arch/arm64/include/asm/traps.h
@@ -34,9 +34,17 @@ struct undef_hook {
void register_undef_hook(struct undef_hook *hook);
void unregister_undef_hook(struct undef_hook *hook);
+void force_signal_inject(int signal, int code, struct pt_regs *regs,
+ unsigned long address);
void arm64_notify_segfault(struct pt_regs *regs, unsigned long addr);
+/*
+ * Move regs->pc to next instruction and do necessary setup before it
+ * is executed.
+ */
+void arm64_skip_faulting_instruction(struct pt_regs *regs, unsigned long size);
+
static inline int __in_irqentry_text(unsigned long ptr)
{
return ptr >= (unsigned long)&__irqentry_text_start &&
diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h
index b3fdeee739ea..cda76fa8b9b2 100644
--- a/arch/arm64/include/uapi/asm/hwcap.h
+++ b/arch/arm64/include/uapi/asm/hwcap.h
@@ -37,5 +37,11 @@
#define HWCAP_FCMA (1 << 14)
#define HWCAP_LRCPC (1 << 15)
#define HWCAP_DCPOP (1 << 16)
+#define HWCAP_SHA3 (1 << 17)
+#define HWCAP_SM3 (1 << 18)
+#define HWCAP_SM4 (1 << 19)
+#define HWCAP_ASIMDDP (1 << 20)
+#define HWCAP_SHA512 (1 << 21)
+#define HWCAP_SVE (1 << 22)
#endif /* _UAPI__ASM_HWCAP_H */
diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h
index 67d4c33974e8..98c4ce55d9c3 100644
--- a/arch/arm64/include/uapi/asm/ptrace.h
+++ b/arch/arm64/include/uapi/asm/ptrace.h
@@ -23,6 +23,7 @@
#include <linux/types.h>
#include <asm/hwcap.h>
+#include <asm/sigcontext.h>
/*
@@ -47,7 +48,6 @@
#define PSR_D_BIT 0x00000200
#define PSR_PAN_BIT 0x00400000
#define PSR_UAO_BIT 0x00800000
-#define PSR_Q_BIT 0x08000000
#define PSR_V_BIT 0x10000000
#define PSR_C_BIT 0x20000000
#define PSR_Z_BIT 0x40000000
@@ -64,6 +64,8 @@
#ifndef __ASSEMBLY__
+#include <linux/prctl.h>
+
/*
* User structures for general purpose, floating point and debug registers.
*/
@@ -91,6 +93,141 @@ struct user_hwdebug_state {
} dbg_regs[16];
};
+/* SVE/FP/SIMD state (NT_ARM_SVE) */
+
+struct user_sve_header {
+ __u32 size; /* total meaningful regset content in bytes */
+ __u32 max_size; /* maxmium possible size for this thread */
+ __u16 vl; /* current vector length */
+ __u16 max_vl; /* maximum possible vector length */
+ __u16 flags;
+ __u16 __reserved;
+};
+
+/* Definitions for user_sve_header.flags: */
+#define SVE_PT_REGS_MASK (1 << 0)
+
+#define SVE_PT_REGS_FPSIMD 0
+#define SVE_PT_REGS_SVE SVE_PT_REGS_MASK
+
+/*
+ * Common SVE_PT_* flags:
+ * These must be kept in sync with prctl interface in <linux/ptrace.h>
+ */
+#define SVE_PT_VL_INHERIT (PR_SVE_VL_INHERIT >> 16)
+#define SVE_PT_VL_ONEXEC (PR_SVE_SET_VL_ONEXEC >> 16)
+
+
+/*
+ * The remainder of the SVE state follows struct user_sve_header. The
+ * total size of the SVE state (including header) depends on the
+ * metadata in the header: SVE_PT_SIZE(vq, flags) gives the total size
+ * of the state in bytes, including the header.
+ *
+ * Refer to <asm/sigcontext.h> for details of how to pass the correct
+ * "vq" argument to these macros.
+ */
+
+/* Offset from the start of struct user_sve_header to the register data */
+#define SVE_PT_REGS_OFFSET \
+ ((sizeof(struct sve_context) + (SVE_VQ_BYTES - 1)) \
+ / SVE_VQ_BYTES * SVE_VQ_BYTES)
+
+/*
+ * The register data content and layout depends on the value of the
+ * flags field.
+ */
+
+/*
+ * (flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD case:
+ *
+ * The payload starts at offset SVE_PT_FPSIMD_OFFSET, and is of type
+ * struct user_fpsimd_state. Additional data might be appended in the
+ * future: use SVE_PT_FPSIMD_SIZE(vq, flags) to compute the total size.
+ * SVE_PT_FPSIMD_SIZE(vq, flags) will never be less than
+ * sizeof(struct user_fpsimd_state).
+ */
+
+#define SVE_PT_FPSIMD_OFFSET SVE_PT_REGS_OFFSET
+
+#define SVE_PT_FPSIMD_SIZE(vq, flags) (sizeof(struct user_fpsimd_state))
+
+/*
+ * (flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_SVE case:
+ *
+ * The payload starts at offset SVE_PT_SVE_OFFSET, and is of size
+ * SVE_PT_SVE_SIZE(vq, flags).
+ *
+ * Additional macros describe the contents and layout of the payload.
+ * For each, SVE_PT_SVE_x_OFFSET(args) is the start offset relative to
+ * the start of struct user_sve_header, and SVE_PT_SVE_x_SIZE(args) is
+ * the size in bytes:
+ *
+ * x type description
+ * - ---- -----------
+ * ZREGS \
+ * ZREG |
+ * PREGS | refer to <asm/sigcontext.h>
+ * PREG |
+ * FFR /
+ *
+ * FPSR uint32_t FPSR
+ * FPCR uint32_t FPCR
+ *
+ * Additional data might be appended in the future.
+ */
+
+#define SVE_PT_SVE_ZREG_SIZE(vq) SVE_SIG_ZREG_SIZE(vq)
+#define SVE_PT_SVE_PREG_SIZE(vq) SVE_SIG_PREG_SIZE(vq)
+#define SVE_PT_SVE_FFR_SIZE(vq) SVE_SIG_FFR_SIZE(vq)
+#define SVE_PT_SVE_FPSR_SIZE sizeof(__u32)
+#define SVE_PT_SVE_FPCR_SIZE sizeof(__u32)
+
+#define __SVE_SIG_TO_PT(offset) \
+ ((offset) - SVE_SIG_REGS_OFFSET + SVE_PT_REGS_OFFSET)
+
+#define SVE_PT_SVE_OFFSET SVE_PT_REGS_OFFSET
+
+#define SVE_PT_SVE_ZREGS_OFFSET \
+ __SVE_SIG_TO_PT(SVE_SIG_ZREGS_OFFSET)
+#define SVE_PT_SVE_ZREG_OFFSET(vq, n) \
+ __SVE_SIG_TO_PT(SVE_SIG_ZREG_OFFSET(vq, n))
+#define SVE_PT_SVE_ZREGS_SIZE(vq) \
+ (SVE_PT_SVE_ZREG_OFFSET(vq, SVE_NUM_ZREGS) - SVE_PT_SVE_ZREGS_OFFSET)
+
+#define SVE_PT_SVE_PREGS_OFFSET(vq) \
+ __SVE_SIG_TO_PT(SVE_SIG_PREGS_OFFSET(vq))
+#define SVE_PT_SVE_PREG_OFFSET(vq, n) \
+ __SVE_SIG_TO_PT(SVE_SIG_PREG_OFFSET(vq, n))
+#define SVE_PT_SVE_PREGS_SIZE(vq) \
+ (SVE_PT_SVE_PREG_OFFSET(vq, SVE_NUM_PREGS) - \
+ SVE_PT_SVE_PREGS_OFFSET(vq))
+
+#define SVE_PT_SVE_FFR_OFFSET(vq) \
+ __SVE_SIG_TO_PT(SVE_SIG_FFR_OFFSET(vq))
+
+#define SVE_PT_SVE_FPSR_OFFSET(vq) \
+ ((SVE_PT_SVE_FFR_OFFSET(vq) + SVE_PT_SVE_FFR_SIZE(vq) + \
+ (SVE_VQ_BYTES - 1)) \
+ / SVE_VQ_BYTES * SVE_VQ_BYTES)
+#define SVE_PT_SVE_FPCR_OFFSET(vq) \
+ (SVE_PT_SVE_FPSR_OFFSET(vq) + SVE_PT_SVE_FPSR_SIZE)
+
+/*
+ * Any future extension appended after FPCR must be aligned to the next
+ * 128-bit boundary.
+ */
+
+#define SVE_PT_SVE_SIZE(vq, flags) \
+ ((SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE \
+ - SVE_PT_SVE_OFFSET + (SVE_VQ_BYTES - 1)) \
+ / SVE_VQ_BYTES * SVE_VQ_BYTES)
+
+#define SVE_PT_SIZE(vq, flags) \
+ (((flags) & SVE_PT_REGS_MASK) == SVE_PT_REGS_SVE ? \
+ SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, flags) \
+ : SVE_PT_FPSIMD_OFFSET + SVE_PT_FPSIMD_SIZE(vq, flags))
+
#endif /* __ASSEMBLY__ */
#endif /* _UAPI__ASM_PTRACE_H */
diff --git a/arch/arm64/include/uapi/asm/sigcontext.h b/arch/arm64/include/uapi/asm/sigcontext.h
index f6cc3061b1ae..dca8f8b5168b 100644
--- a/arch/arm64/include/uapi/asm/sigcontext.h
+++ b/arch/arm64/include/uapi/asm/sigcontext.h
@@ -17,6 +17,8 @@
#ifndef _UAPI__ASM_SIGCONTEXT_H
#define _UAPI__ASM_SIGCONTEXT_H
+#ifndef __ASSEMBLY__
+
#include <linux/types.h>
/*
@@ -42,10 +44,11 @@ struct sigcontext {
*
* 0x210 fpsimd_context
* 0x10 esr_context
+ * 0x8a0 sve_context (vl <= 64) (optional)
* 0x20 extra_context (optional)
* 0x10 terminator (null _aarch64_ctx)
*
- * 0xdb0 (reserved for future allocation)
+ * 0x510 (reserved for future allocation)
*
* New records that can exceed this space need to be opt-in for userspace, so
* that an expanded signal frame is not generated unexpectedly. The mechanism
@@ -117,4 +120,119 @@ struct extra_context {
__u32 __reserved[3];
};
+#define SVE_MAGIC 0x53564501
+
+struct sve_context {
+ struct _aarch64_ctx head;
+ __u16 vl;
+ __u16 __reserved[3];
+};
+
+#endif /* !__ASSEMBLY__ */
+
+/*
+ * The SVE architecture leaves space for future expansion of the
+ * vector length beyond its initial architectural limit of 2048 bits
+ * (16 quadwords).
+ *
+ * See linux/Documentation/arm64/sve.txt for a description of the VL/VQ
+ * terminology.
+ */
+#define SVE_VQ_BYTES 16 /* number of bytes per quadword */
+
+#define SVE_VQ_MIN 1
+#define SVE_VQ_MAX 512
+
+#define SVE_VL_MIN (SVE_VQ_MIN * SVE_VQ_BYTES)
+#define SVE_VL_MAX (SVE_VQ_MAX * SVE_VQ_BYTES)
+
+#define SVE_NUM_ZREGS 32
+#define SVE_NUM_PREGS 16
+
+#define sve_vl_valid(vl) \
+ ((vl) % SVE_VQ_BYTES == 0 && (vl) >= SVE_VL_MIN && (vl) <= SVE_VL_MAX)
+#define sve_vq_from_vl(vl) ((vl) / SVE_VQ_BYTES)
+#define sve_vl_from_vq(vq) ((vq) * SVE_VQ_BYTES)
+
+/*
+ * If the SVE registers are currently live for the thread at signal delivery,
+ * sve_context.head.size >=
+ * SVE_SIG_CONTEXT_SIZE(sve_vq_from_vl(sve_context.vl))
+ * and the register data may be accessed using the SVE_SIG_*() macros.
+ *
+ * If sve_context.head.size <
+ * SVE_SIG_CONTEXT_SIZE(sve_vq_from_vl(sve_context.vl)),
+ * the SVE registers were not live for the thread and no register data
+ * is included: in this case, the SVE_SIG_*() macros should not be
+ * used except for this check.
+ *
+ * The same convention applies when returning from a signal: a caller
+ * will need to remove or resize the sve_context block if it wants to
+ * make the SVE registers live when they were previously non-live or
+ * vice-versa. This may require the the caller to allocate fresh
+ * memory and/or move other context blocks in the signal frame.
+ *
+ * Changing the vector length during signal return is not permitted:
+ * sve_context.vl must equal the thread's current vector length when
+ * doing a sigreturn.
+ *
+ *
+ * Note: for all these macros, the "vq" argument denotes the SVE
+ * vector length in quadwords (i.e., units of 128 bits).
+ *
+ * The correct way to obtain vq is to use sve_vq_from_vl(vl). The
+ * result is valid if and only if sve_vl_valid(vl) is true. This is
+ * guaranteed for a struct sve_context written by the kernel.
+ *
+ *
+ * Additional macros describe the contents and layout of the payload.
+ * For each, SVE_SIG_x_OFFSET(args) is the start offset relative to
+ * the start of struct sve_context, and SVE_SIG_x_SIZE(args) is the
+ * size in bytes:
+ *
+ * x type description
+ * - ---- -----------
+ * REGS the entire SVE context
+ *
+ * ZREGS __uint128_t[SVE_NUM_ZREGS][vq] all Z-registers
+ * ZREG __uint128_t[vq] individual Z-register Zn
+ *
+ * PREGS uint16_t[SVE_NUM_PREGS][vq] all P-registers
+ * PREG uint16_t[vq] individual P-register Pn
+ *
+ * FFR uint16_t[vq] first-fault status register
+ *
+ * Additional data might be appended in the future.
+ */
+
+#define SVE_SIG_ZREG_SIZE(vq) ((__u32)(vq) * SVE_VQ_BYTES)
+#define SVE_SIG_PREG_SIZE(vq) ((__u32)(vq) * (SVE_VQ_BYTES / 8))
+#define SVE_SIG_FFR_SIZE(vq) SVE_SIG_PREG_SIZE(vq)
+
+#define SVE_SIG_REGS_OFFSET \
+ ((sizeof(struct sve_context) + (SVE_VQ_BYTES - 1)) \
+ / SVE_VQ_BYTES * SVE_VQ_BYTES)
+
+#define SVE_SIG_ZREGS_OFFSET SVE_SIG_REGS_OFFSET
+#define SVE_SIG_ZREG_OFFSET(vq, n) \
+ (SVE_SIG_ZREGS_OFFSET + SVE_SIG_ZREG_SIZE(vq) * (n))
+#define SVE_SIG_ZREGS_SIZE(vq) \
+ (SVE_SIG_ZREG_OFFSET(vq, SVE_NUM_ZREGS) - SVE_SIG_ZREGS_OFFSET)
+
+#define SVE_SIG_PREGS_OFFSET(vq) \
+ (SVE_SIG_ZREGS_OFFSET + SVE_SIG_ZREGS_SIZE(vq))
+#define SVE_SIG_PREG_OFFSET(vq, n) \
+ (SVE_SIG_PREGS_OFFSET(vq) + SVE_SIG_PREG_SIZE(vq) * (n))
+#define SVE_SIG_PREGS_SIZE(vq) \
+ (SVE_SIG_PREG_OFFSET(vq, SVE_NUM_PREGS) - SVE_SIG_PREGS_OFFSET(vq))
+
+#define SVE_SIG_FFR_OFFSET(vq) \
+ (SVE_SIG_PREGS_OFFSET(vq) + SVE_SIG_PREGS_SIZE(vq))
+
+#define SVE_SIG_REGS_SIZE(vq) \
+ (SVE_SIG_FFR_OFFSET(vq) + SVE_SIG_FFR_SIZE(vq) - SVE_SIG_REGS_OFFSET)
+
+#define SVE_SIG_CONTEXT_SIZE(vq) (SVE_SIG_REGS_OFFSET + SVE_SIG_REGS_SIZE(vq))
+
+
#endif /* _UAPI__ASM_SIGCONTEXT_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 0029e13adb59..8265dd790895 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -11,8 +11,6 @@ CFLAGS_REMOVE_ftrace.o = -pg
CFLAGS_REMOVE_insn.o = -pg
CFLAGS_REMOVE_return_address.o = -pg
-CFLAGS_setup.o = -DUTS_MACHINE='"$(UTS_MACHINE)"'
-
# Object file lists.
arm64-obj-y := debug-monitors.o entry.o irq.o fpsimd.o \
entry-fpsimd.o process.o ptrace.o setup.o signal.o \
diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c
index d06fbe4cd38d..c33b5e4010ab 100644
--- a/arch/arm64/kernel/armv8_deprecated.c
+++ b/arch/arm64/kernel/armv8_deprecated.c
@@ -228,15 +228,7 @@ ret:
return ret;
}
-static struct ctl_table ctl_abi[] = {
- {
- .procname = "abi",
- .mode = 0555,
- },
- { }
-};
-
-static void __init register_insn_emulation_sysctl(struct ctl_table *table)
+static void __init register_insn_emulation_sysctl(void)
{
unsigned long flags;
int i = 0;
@@ -262,8 +254,7 @@ static void __init register_insn_emulation_sysctl(struct ctl_table *table)
}
raw_spin_unlock_irqrestore(&insn_emulation_lock, flags);
- table->child = insns_sysctl;
- register_sysctl_table(table);
+ register_sysctl("abi", insns_sysctl);
}
/*
@@ -431,7 +422,7 @@ ret:
pr_warn_ratelimited("\"%s\" (%ld) uses obsolete SWP{B} instruction at 0x%llx\n",
current->comm, (unsigned long)current->pid, regs->pc);
- regs->pc += 4;
+ arm64_skip_faulting_instruction(regs, 4);
return 0;
fault:
@@ -512,7 +503,7 @@ ret:
pr_warn_ratelimited("\"%s\" (%ld) uses deprecated CP15 Barrier instruction at 0x%llx\n",
current->comm, (unsigned long)current->pid, regs->pc);
- regs->pc += 4;
+ arm64_skip_faulting_instruction(regs, 4);
return 0;
}
@@ -586,14 +577,14 @@ static int compat_setend_handler(struct pt_regs *regs, u32 big_endian)
static int a32_setend_handler(struct pt_regs *regs, u32 instr)
{
int rc = compat_setend_handler(regs, (instr >> 9) & 1);
- regs->pc += 4;
+ arm64_skip_faulting_instruction(regs, 4);
return rc;
}
static int t16_setend_handler(struct pt_regs *regs, u32 instr)
{
int rc = compat_setend_handler(regs, (instr >> 3) & 1);
- regs->pc += 2;
+ arm64_skip_faulting_instruction(regs, 2);
return rc;
}
@@ -644,7 +635,7 @@ static int __init armv8_deprecated_init(void)
cpuhp_setup_state_nocalls(CPUHP_AP_ARM64_ISNDEP_STARTING,
"arm64/isndep:starting",
run_all_insn_set_hw_mode, NULL);
- register_insn_emulation_sysctl(ctl_abi);
+ register_insn_emulation_sysctl();
return 0;
}
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 21e2c95d24e7..c5ba0097887f 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -27,6 +27,7 @@
#include <asm/cpu.h>
#include <asm/cpufeature.h>
#include <asm/cpu_ops.h>
+#include <asm/fpsimd.h>
#include <asm/mmu_context.h>
#include <asm/processor.h>
#include <asm/sysreg.h>
@@ -51,6 +52,21 @@ unsigned int compat_elf_hwcap2 __read_mostly;
DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
EXPORT_SYMBOL(cpu_hwcaps);
+/*
+ * Flag to indicate if we have computed the system wide
+ * capabilities based on the boot time active CPUs. This
+ * will be used to determine if a new booting CPU should
+ * go through the verification process to make sure that it
+ * supports the system capabilities, without using a hotplug
+ * notifier.
+ */
+static bool sys_caps_initialised;
+
+static inline void set_sys_caps_initialised(void)
+{
+ sys_caps_initialised = true;
+}
+
static int dump_cpu_hwcaps(struct notifier_block *self, unsigned long v, void *p)
{
/* file-wide pr_fmt adds "CPU features: " prefix */
@@ -107,7 +123,11 @@ cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused)
* sync with the documentation of the CPU feature register ABI.
*/
static const struct arm64_ftr_bits ftr_id_aa64isar0[] = {
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_RDM_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_DP_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SM4_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SM3_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SHA3_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_RDM_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_ATOMICS_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_CRC32_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SHA2_SHIFT, 4, 0),
@@ -117,34 +137,35 @@ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = {
};
static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_LRCPC_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_FCMA_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_JSCVT_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_DPB_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_LRCPC_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_FCMA_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_JSCVT_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_DPB_SHIFT, 4, 0),
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64PFR0_GIC_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SVE_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_GIC_SHIFT, 4, 0),
S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI),
S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI),
/* Linux doesn't care about the EL3 */
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64PFR0_EL3_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL2_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL1_SHIFT, 4, ID_AA64PFR0_EL1_64BIT_ONLY),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL0_SHIFT, 4, ID_AA64PFR0_EL0_64BIT_ONLY),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL3_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL2_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_SHIFT, 4, ID_AA64PFR0_EL1_64BIT_ONLY),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL0_SHIFT, 4, ID_AA64PFR0_EL0_64BIT_ONLY),
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
- S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI),
- S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN16_SHIFT, 4, ID_AA64MMFR0_TGRAN16_NI),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_BIGENDEL0_SHIFT, 4, 0),
+ S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI),
+ S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN16_SHIFT, 4, ID_AA64MMFR0_TGRAN16_NI),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_BIGENDEL0_SHIFT, 4, 0),
/* Linux shouldn't care about secure memory */
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64MMFR0_SNSMEM_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_BIGENDEL_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_ASID_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_SNSMEM_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_BIGENDEL_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_ASID_SHIFT, 4, 0),
/*
* Differing PARange is fine as long as all peripherals and memory are mapped
* within the minimum PARange of all CPUs
@@ -155,20 +176,20 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = {
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_PAN_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_LOR_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_HPD_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_VHE_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_VMIDBITS_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_HADBS_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_LOR_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_HPD_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_VHE_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_VMIDBITS_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_HADBS_SHIFT, 4, 0),
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = {
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_LVA_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_IESB_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_LSM_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_UAO_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_CNP_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_LVA_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_IESB_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_LSM_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_UAO_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_CNP_SHIFT, 4, 0),
ARM64_FTR_END,
};
@@ -193,14 +214,14 @@ struct arm64_ftr_reg arm64_ftr_reg_ctrel0 = {
};
static const struct arm64_ftr_bits ftr_id_mmfr0[] = {
- S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 28, 4, 0xf), /* InnerShr */
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 24, 4, 0), /* FCSE */
+ S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0xf), /* InnerShr */
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0), /* FCSE */
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, 20, 4, 0), /* AuxReg */
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 16, 4, 0), /* TCM */
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 12, 4, 0), /* ShareLvl */
- S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 8, 4, 0xf), /* OuterShr */
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 4, 4, 0), /* PMSA */
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 0, 4, 0), /* VMSA */
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 0), /* TCM */
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0), /* ShareLvl */
+ S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0xf), /* OuterShr */
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0), /* PMSA */
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* VMSA */
ARM64_FTR_END,
};
@@ -221,8 +242,8 @@ static const struct arm64_ftr_bits ftr_id_aa64dfr0[] = {
};
static const struct arm64_ftr_bits ftr_mvfr2[] = {
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 4, 4, 0), /* FPMisc */
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 0, 4, 0), /* SIMDMisc */
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0), /* FPMisc */
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* SIMDMisc */
ARM64_FTR_END,
};
@@ -234,25 +255,25 @@ static const struct arm64_ftr_bits ftr_dczid[] = {
static const struct arm64_ftr_bits ftr_id_isar5[] = {
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_ISAR5_RDM_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_ISAR5_CRC32_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_ISAR5_SHA2_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_ISAR5_SHA1_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_ISAR5_AES_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_ISAR5_SEVL_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_RDM_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_CRC32_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_SHA2_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_SHA1_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_AES_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_SEVL_SHIFT, 4, 0),
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_id_mmfr4[] = {
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 4, 4, 0), /* ac2 */
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0), /* ac2 */
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_id_pfr0[] = {
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 12, 4, 0), /* State3 */
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 8, 4, 0), /* State2 */
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 4, 4, 0), /* State1 */
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 0, 4, 0), /* State0 */
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0), /* State3 */
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0), /* State2 */
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0), /* State1 */
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* State0 */
ARM64_FTR_END,
};
@@ -268,6 +289,12 @@ static const struct arm64_ftr_bits ftr_id_dfr0[] = {
ARM64_FTR_END,
};
+static const struct arm64_ftr_bits ftr_zcr[] = {
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE,
+ ZCR_ELx_LEN_SHIFT, ZCR_ELx_LEN_SIZE, 0), /* LEN */
+ ARM64_FTR_END,
+};
+
/*
* Common ftr bits for a 32bit register with all hidden, strict
* attributes, with 4bit feature fields and a default safe value of
@@ -334,6 +361,7 @@ static const struct __ftr_reg_entry {
/* Op1 = 0, CRn = 0, CRm = 4 */
ARM64_FTR_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0),
ARM64_FTR_REG(SYS_ID_AA64PFR1_EL1, ftr_raz),
+ ARM64_FTR_REG(SYS_ID_AA64ZFR0_EL1, ftr_raz),
/* Op1 = 0, CRn = 0, CRm = 5 */
ARM64_FTR_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0),
@@ -348,6 +376,9 @@ static const struct __ftr_reg_entry {
ARM64_FTR_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1),
ARM64_FTR_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2),
+ /* Op1 = 0, CRn = 1, CRm = 2 */
+ ARM64_FTR_REG(SYS_ZCR_EL1, ftr_zcr),
+
/* Op1 = 3, CRn = 0, CRm = 0 */
{ SYS_CTR_EL0, &arm64_ftr_reg_ctrel0 },
ARM64_FTR_REG(SYS_DCZID_EL0, ftr_dczid),
@@ -485,6 +516,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
init_cpu_ftr_reg(SYS_ID_AA64MMFR2_EL1, info->reg_id_aa64mmfr2);
init_cpu_ftr_reg(SYS_ID_AA64PFR0_EL1, info->reg_id_aa64pfr0);
init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1);
+ init_cpu_ftr_reg(SYS_ID_AA64ZFR0_EL1, info->reg_id_aa64zfr0);
if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) {
init_cpu_ftr_reg(SYS_ID_DFR0_EL1, info->reg_id_dfr0);
@@ -505,6 +537,10 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
init_cpu_ftr_reg(SYS_MVFR2_EL1, info->reg_mvfr2);
}
+ if (id_aa64pfr0_sve(info->reg_id_aa64pfr0)) {
+ init_cpu_ftr_reg(SYS_ZCR_EL1, info->reg_zcr);
+ sve_init_vq_map();
+ }
}
static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new)
@@ -608,6 +644,9 @@ void update_cpu_features(int cpu,
taint |= check_update_ftr_reg(SYS_ID_AA64PFR1_EL1, cpu,
info->reg_id_aa64pfr1, boot->reg_id_aa64pfr1);
+ taint |= check_update_ftr_reg(SYS_ID_AA64ZFR0_EL1, cpu,
+ info->reg_id_aa64zfr0, boot->reg_id_aa64zfr0);
+
/*
* If we have AArch32, we care about 32-bit features for compat.
* If the system doesn't support AArch32, don't update them.
@@ -655,6 +694,16 @@ void update_cpu_features(int cpu,
info->reg_mvfr2, boot->reg_mvfr2);
}
+ if (id_aa64pfr0_sve(info->reg_id_aa64pfr0)) {
+ taint |= check_update_ftr_reg(SYS_ZCR_EL1, cpu,
+ info->reg_zcr, boot->reg_zcr);
+
+ /* Probe vector lengths, unless we already gave up on SVE */
+ if (id_aa64pfr0_sve(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1)) &&
+ !sys_caps_initialised)
+ sve_update_vq_map();
+ }
+
/*
* Mismatched CPU features are a recipe for disaster. Don't even
* pretend to support them.
@@ -900,6 +949,19 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.min_field_value = 1,
},
#endif
+#ifdef CONFIG_ARM64_SVE
+ {
+ .desc = "Scalable Vector Extension",
+ .capability = ARM64_SVE,
+ .def_scope = SCOPE_SYSTEM,
+ .sys_reg = SYS_ID_AA64PFR0_EL1,
+ .sign = FTR_UNSIGNED,
+ .field_pos = ID_AA64PFR0_SVE_SHIFT,
+ .min_field_value = ID_AA64PFR0_SVE,
+ .matches = has_cpuid_feature,
+ .enable = sve_kernel_enable,
+ },
+#endif /* CONFIG_ARM64_SVE */
{},
};
@@ -921,9 +983,14 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_AES),
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA1),
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA2),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_SHA512),
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_CRC32),
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_ATOMICS),
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RDM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_ASIMDRDM),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA3),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SM3),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM4_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SM4),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_ASIMDDP),
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_FP),
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_FPHP),
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_ASIMD),
@@ -932,6 +999,9 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_JSCVT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_JSCVT),
HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_FCMA),
HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_LRCPC),
+#ifdef CONFIG_ARM64_SVE
+ HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, FTR_UNSIGNED, ID_AA64PFR0_SVE, CAP_HWCAP, HWCAP_SVE),
+#endif
{},
};
@@ -1041,21 +1111,6 @@ void __init enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps)
}
/*
- * Flag to indicate if we have computed the system wide
- * capabilities based on the boot time active CPUs. This
- * will be used to determine if a new booting CPU should
- * go through the verification process to make sure that it
- * supports the system capabilities, without using a hotplug
- * notifier.
- */
-static bool sys_caps_initialised;
-
-static inline void set_sys_caps_initialised(void)
-{
- sys_caps_initialised = true;
-}
-
-/*
* Check for CPU features that are used in early boot
* based on the Boot CPU value.
*/
@@ -1097,6 +1152,23 @@ verify_local_cpu_features(const struct arm64_cpu_capabilities *caps)
}
}
+static void verify_sve_features(void)
+{
+ u64 safe_zcr = read_sanitised_ftr_reg(SYS_ZCR_EL1);
+ u64 zcr = read_zcr_features();
+
+ unsigned int safe_len = safe_zcr & ZCR_ELx_LEN_MASK;
+ unsigned int len = zcr & ZCR_ELx_LEN_MASK;
+
+ if (len < safe_len || sve_verify_vq_map()) {
+ pr_crit("CPU%d: SVE: required vector length(s) missing\n",
+ smp_processor_id());
+ cpu_die_early();
+ }
+
+ /* Add checks on other ZCR bits here if necessary */
+}
+
/*
* Run through the enabled system capabilities and enable() it on this CPU.
* The capabilities were decided based on the available CPUs at the boot time.
@@ -1110,8 +1182,12 @@ static void verify_local_cpu_capabilities(void)
verify_local_cpu_errata_workarounds();
verify_local_cpu_features(arm64_features);
verify_local_elf_hwcaps(arm64_elf_hwcaps);
+
if (system_supports_32bit_el0())
verify_local_elf_hwcaps(compat_elf_hwcaps);
+
+ if (system_supports_sve())
+ verify_sve_features();
}
void check_local_cpu_capabilities(void)
@@ -1189,6 +1265,8 @@ void __init setup_cpu_features(void)
if (system_supports_32bit_el0())
setup_elf_hwcaps(compat_elf_hwcaps);
+ sve_setup();
+
/* Advertise that we have computed the system capabilities */
set_sys_caps_initialised();
@@ -1287,7 +1365,7 @@ static int emulate_mrs(struct pt_regs *regs, u32 insn)
if (!rc) {
dst = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RT, insn);
pt_regs_write_reg(regs, dst, val);
- regs->pc += 4;
+ arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
}
return rc;
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 311885962830..1e2554543506 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -19,6 +19,7 @@
#include <asm/cpu.h>
#include <asm/cputype.h>
#include <asm/cpufeature.h>
+#include <asm/fpsimd.h>
#include <linux/bitops.h>
#include <linux/bug.h>
@@ -69,6 +70,12 @@ static const char *const hwcap_str[] = {
"fcma",
"lrcpc",
"dcpop",
+ "sha3",
+ "sm3",
+ "sm4",
+ "asimddp",
+ "sha512",
+ "sve",
NULL
};
@@ -326,6 +333,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
info->reg_id_aa64mmfr2 = read_cpuid(ID_AA64MMFR2_EL1);
info->reg_id_aa64pfr0 = read_cpuid(ID_AA64PFR0_EL1);
info->reg_id_aa64pfr1 = read_cpuid(ID_AA64PFR1_EL1);
+ info->reg_id_aa64zfr0 = read_cpuid(ID_AA64ZFR0_EL1);
/* Update the 32bit ID registers only if AArch32 is implemented */
if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) {
@@ -348,6 +356,10 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
info->reg_mvfr2 = read_cpuid(MVFR2_EL1);
}
+ if (IS_ENABLED(CONFIG_ARM64_SVE) &&
+ id_aa64pfr0_sve(info->reg_id_aa64pfr0))
+ info->reg_zcr = read_zcr_features();
+
cpuinfo_detect_icache_policy(info);
}
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index c7ef99904934..a88b6ccebbb4 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -30,6 +30,7 @@
#include <asm/cpufeature.h>
#include <asm/cputype.h>
+#include <asm/daifflags.h>
#include <asm/debug-monitors.h>
#include <asm/system_misc.h>
@@ -46,9 +47,9 @@ u8 debug_monitors_arch(void)
static void mdscr_write(u32 mdscr)
{
unsigned long flags;
- local_dbg_save(flags);
+ flags = local_daif_save();
write_sysreg(mdscr, mdscr_el1);
- local_dbg_restore(flags);
+ local_daif_restore(flags);
}
NOKPROBE_SYMBOL(mdscr_write);
diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S
index 6a27cd6dbfa6..73f17bffcd23 100644
--- a/arch/arm64/kernel/entry-fpsimd.S
+++ b/arch/arm64/kernel/entry-fpsimd.S
@@ -41,3 +41,20 @@ ENTRY(fpsimd_load_state)
fpsimd_restore x0, 8
ret
ENDPROC(fpsimd_load_state)
+
+#ifdef CONFIG_ARM64_SVE
+ENTRY(sve_save_state)
+ sve_save 0, x1, 2
+ ret
+ENDPROC(sve_save_state)
+
+ENTRY(sve_load_state)
+ sve_load 0, x1, x2, 3
+ ret
+ENDPROC(sve_load_state)
+
+ENTRY(sve_get_vl)
+ _sve_rdvl 0, 1
+ ret
+ENDPROC(sve_get_vl)
+#endif /* CONFIG_ARM64_SVE */
diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S
index e1be42e11ff5..1175f5827ae1 100644
--- a/arch/arm64/kernel/entry-ftrace.S
+++ b/arch/arm64/kernel/entry-ftrace.S
@@ -108,13 +108,8 @@ ENTRY(_mcount)
mcount_get_lr x1 // function's lr (= parent's pc)
blr x2 // (*ftrace_trace_function)(pc, lr);
-#ifndef CONFIG_FUNCTION_GRAPH_TRACER
-skip_ftrace_call: // return;
- mcount_exit // }
-#else
- mcount_exit // return;
- // }
-skip_ftrace_call:
+skip_ftrace_call: // }
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
ldr_l x2, ftrace_graph_return
cmp x0, x2 // if ((ftrace_graph_return
b.ne ftrace_graph_caller // != ftrace_stub)
@@ -123,9 +118,8 @@ skip_ftrace_call:
adr_l x0, ftrace_graph_entry_stub // != ftrace_graph_entry_stub))
cmp x0, x2
b.ne ftrace_graph_caller // ftrace_graph_caller();
-
- mcount_exit
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+ mcount_exit
ENDPROC(_mcount)
#else /* CONFIG_DYNAMIC_FTRACE */
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index e1c59d4008a8..6d14b8f29b5f 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -28,7 +28,7 @@
#include <asm/errno.h>
#include <asm/esr.h>
#include <asm/irq.h>
-#include <asm/memory.h>
+#include <asm/processor.h>
#include <asm/ptrace.h>
#include <asm/thread_info.h>
#include <asm/asm-uaccess.h>
@@ -221,6 +221,8 @@ alternative_else_nop_endif
.macro kernel_exit, el
.if \el != 0
+ disable_daif
+
/* Restore the task's original addr_limit. */
ldr x20, [sp, #S_ORIG_ADDR_LIMIT]
str x20, [tsk, #TSK_TI_ADDR_LIMIT]
@@ -373,18 +375,18 @@ ENTRY(vectors)
kernel_ventry el1_sync // Synchronous EL1h
kernel_ventry el1_irq // IRQ EL1h
kernel_ventry el1_fiq_invalid // FIQ EL1h
- kernel_ventry el1_error_invalid // Error EL1h
+ kernel_ventry el1_error // Error EL1h
kernel_ventry el0_sync // Synchronous 64-bit EL0
kernel_ventry el0_irq // IRQ 64-bit EL0
kernel_ventry el0_fiq_invalid // FIQ 64-bit EL0
- kernel_ventry el0_error_invalid // Error 64-bit EL0
+ kernel_ventry el0_error // Error 64-bit EL0
#ifdef CONFIG_COMPAT
kernel_ventry el0_sync_compat // Synchronous 32-bit EL0
kernel_ventry el0_irq_compat // IRQ 32-bit EL0
kernel_ventry el0_fiq_invalid_compat // FIQ 32-bit EL0
- kernel_ventry el0_error_invalid_compat // Error 32-bit EL0
+ kernel_ventry el0_error_compat // Error 32-bit EL0
#else
kernel_ventry el0_sync_invalid // Synchronous 32-bit EL0
kernel_ventry el0_irq_invalid // IRQ 32-bit EL0
@@ -453,10 +455,6 @@ ENDPROC(el0_error_invalid)
el0_fiq_invalid_compat:
inv_entry 0, BAD_FIQ, 32
ENDPROC(el0_fiq_invalid_compat)
-
-el0_error_invalid_compat:
- inv_entry 0, BAD_ERROR, 32
-ENDPROC(el0_error_invalid_compat)
#endif
el1_sync_invalid:
@@ -508,24 +506,18 @@ el1_da:
* Data abort handling
*/
mrs x3, far_el1
- enable_dbg
- // re-enable interrupts if they were enabled in the aborted context
- tbnz x23, #7, 1f // PSR_I_BIT
- enable_irq
-1:
+ inherit_daif pstate=x23, tmp=x2
clear_address_tag x0, x3
mov x2, sp // struct pt_regs
bl do_mem_abort
- // disable interrupts before pulling preserved data off the stack
- disable_irq
kernel_exit 1
el1_sp_pc:
/*
* Stack or PC alignment exception handling
*/
mrs x0, far_el1
- enable_dbg
+ inherit_daif pstate=x23, tmp=x2
mov x2, sp
bl do_sp_pc_abort
ASM_BUG()
@@ -533,7 +525,7 @@ el1_undef:
/*
* Undefined instruction
*/
- enable_dbg
+ inherit_daif pstate=x23, tmp=x2
mov x0, sp
bl do_undefinstr
ASM_BUG()
@@ -550,7 +542,7 @@ el1_dbg:
kernel_exit 1
el1_inv:
// TODO: add support for undefined instructions in kernel mode
- enable_dbg
+ inherit_daif pstate=x23, tmp=x2
mov x0, sp
mov x2, x1
mov x1, #BAD_SYNC
@@ -561,7 +553,7 @@ ENDPROC(el1_sync)
.align 6
el1_irq:
kernel_entry 1
- enable_dbg
+ enable_da_f
#ifdef CONFIG_TRACE_IRQFLAGS
bl trace_hardirqs_off
#endif
@@ -607,6 +599,8 @@ el0_sync:
b.eq el0_ia
cmp x24, #ESR_ELx_EC_FP_ASIMD // FP/ASIMD access
b.eq el0_fpsimd_acc
+ cmp x24, #ESR_ELx_EC_SVE // SVE access
+ b.eq el0_sve_acc
cmp x24, #ESR_ELx_EC_FP_EXC64 // FP/ASIMD exception
b.eq el0_fpsimd_exc
cmp x24, #ESR_ELx_EC_SYS64 // configurable trap
@@ -658,6 +652,7 @@ el0_svc_compat:
/*
* AArch32 syscall handling
*/
+ ldr x16, [tsk, #TSK_TI_FLAGS] // load thread flags
adrp stbl, compat_sys_call_table // load compat syscall table pointer
mov wscno, w7 // syscall number in w7 (r7)
mov wsc_nr, #__NR_compat_syscalls
@@ -667,6 +662,10 @@ el0_svc_compat:
el0_irq_compat:
kernel_entry 0, 32
b el0_irq_naked
+
+el0_error_compat:
+ kernel_entry 0, 32
+ b el0_error_naked
#endif
el0_da:
@@ -674,8 +673,7 @@ el0_da:
* Data abort handling
*/
mrs x26, far_el1
- // enable interrupts before calling the main handler
- enable_dbg_and_irq
+ enable_daif
ct_user_exit
clear_address_tag x0, x26
mov x1, x25
@@ -687,8 +685,7 @@ el0_ia:
* Instruction abort handling
*/
mrs x26, far_el1
- // enable interrupts before calling the main handler
- enable_dbg_and_irq
+ enable_daif
ct_user_exit
mov x0, x26
mov x1, x25
@@ -699,17 +696,27 @@ el0_fpsimd_acc:
/*
* Floating Point or Advanced SIMD access
*/
- enable_dbg
+ enable_daif
ct_user_exit
mov x0, x25
mov x1, sp
bl do_fpsimd_acc
b ret_to_user
+el0_sve_acc:
+ /*
+ * Scalable Vector Extension access
+ */
+ enable_daif
+ ct_user_exit
+ mov x0, x25
+ mov x1, sp
+ bl do_sve_acc
+ b ret_to_user
el0_fpsimd_exc:
/*
- * Floating Point or Advanced SIMD exception
+ * Floating Point, Advanced SIMD or SVE exception
*/
- enable_dbg
+ enable_daif
ct_user_exit
mov x0, x25
mov x1, sp
@@ -720,8 +727,7 @@ el0_sp_pc:
* Stack or PC alignment exception handling
*/
mrs x26, far_el1
- // enable interrupts before calling the main handler
- enable_dbg_and_irq
+ enable_daif
ct_user_exit
mov x0, x26
mov x1, x25
@@ -732,8 +738,7 @@ el0_undef:
/*
* Undefined instruction
*/
- // enable interrupts before calling the main handler
- enable_dbg_and_irq
+ enable_daif
ct_user_exit
mov x0, sp
bl do_undefinstr
@@ -742,7 +747,7 @@ el0_sys:
/*
* System instructions, for trapped cache maintenance instructions
*/
- enable_dbg_and_irq
+ enable_daif
ct_user_exit
mov x0, x25
mov x1, sp
@@ -757,11 +762,11 @@ el0_dbg:
mov x1, x25
mov x2, sp
bl do_debug_exception
- enable_dbg
+ enable_daif
ct_user_exit
b ret_to_user
el0_inv:
- enable_dbg
+ enable_daif
ct_user_exit
mov x0, sp
mov x1, #BAD_SYNC
@@ -774,7 +779,7 @@ ENDPROC(el0_sync)
el0_irq:
kernel_entry 0
el0_irq_naked:
- enable_dbg
+ enable_da_f
#ifdef CONFIG_TRACE_IRQFLAGS
bl trace_hardirqs_off
#endif
@@ -788,12 +793,34 @@ el0_irq_naked:
b ret_to_user
ENDPROC(el0_irq)
+el1_error:
+ kernel_entry 1
+ mrs x1, esr_el1
+ enable_dbg
+ mov x0, sp
+ bl do_serror
+ kernel_exit 1
+ENDPROC(el1_error)
+
+el0_error:
+ kernel_entry 0
+el0_error_naked:
+ mrs x1, esr_el1
+ enable_dbg
+ mov x0, sp
+ bl do_serror
+ enable_daif
+ ct_user_exit
+ b ret_to_user
+ENDPROC(el0_error)
+
+
/*
* This is the fast syscall return path. We do as little as possible here,
* and this includes saving x0 back into the kernel stack.
*/
ret_fast_syscall:
- disable_irq // disable interrupts
+ disable_daif
str x0, [sp, #S_X0] // returned x0
ldr x1, [tsk, #TSK_TI_FLAGS] // re-check for syscall tracing
and x2, x1, #_TIF_SYSCALL_WORK
@@ -803,7 +830,7 @@ ret_fast_syscall:
enable_step_tsk x1, x2
kernel_exit 0
ret_fast_syscall_trace:
- enable_irq // enable interrupts
+ enable_daif
b __sys_trace_return_skipped // we already saved x0
/*
@@ -821,7 +848,7 @@ work_pending:
* "slow" syscall return path.
*/
ret_to_user:
- disable_irq // disable interrupts
+ disable_daif
ldr x1, [tsk, #TSK_TI_FLAGS]
and x2, x1, #_TIF_WORK_MASK
cbnz x2, work_pending
@@ -835,16 +862,37 @@ ENDPROC(ret_to_user)
*/
.align 6
el0_svc:
+ ldr x16, [tsk, #TSK_TI_FLAGS] // load thread flags
adrp stbl, sys_call_table // load syscall table pointer
mov wscno, w8 // syscall number in w8
mov wsc_nr, #__NR_syscalls
+
+#ifdef CONFIG_ARM64_SVE
+alternative_if_not ARM64_SVE
+ b el0_svc_naked
+alternative_else_nop_endif
+ tbz x16, #TIF_SVE, el0_svc_naked // Skip unless TIF_SVE set:
+ bic x16, x16, #_TIF_SVE // discard SVE state
+ str x16, [tsk, #TSK_TI_FLAGS]
+
+ /*
+ * task_fpsimd_load() won't be called to update CPACR_EL1 in
+ * ret_to_user unless TIF_FOREIGN_FPSTATE is still set, which only
+ * happens if a context switch or kernel_neon_begin() or context
+ * modification (sigreturn, ptrace) intervenes.
+ * So, ensure that CPACR_EL1 is already correct for the fast-path case:
+ */
+ mrs x9, cpacr_el1
+ bic x9, x9, #CPACR_EL1_ZEN_EL0EN // disable SVE for el0
+ msr cpacr_el1, x9 // synchronised by eret to el0
+#endif
+
el0_svc_naked: // compat entry point
stp x0, xscno, [sp, #S_ORIG_X0] // save the original x0 and syscall number
- enable_dbg_and_irq
+ enable_daif
ct_user_exit 1
- ldr x16, [tsk, #TSK_TI_FLAGS] // check for syscall hooks
- tst x16, #_TIF_SYSCALL_WORK
+ tst x16, #_TIF_SYSCALL_WORK // check for syscall hooks
b.ne __sys_trace
cmp wscno, wsc_nr // check upper syscall limit
b.hs ni_sys
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 5d547deb6996..143b3e72c25e 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -17,19 +17,34 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#include <linux/bitmap.h>
#include <linux/bottom_half.h>
+#include <linux/bug.h>
+#include <linux/cache.h>
+#include <linux/compat.h>
#include <linux/cpu.h>
#include <linux/cpu_pm.h>
#include <linux/kernel.h>
+#include <linux/linkage.h>
+#include <linux/irqflags.h>
#include <linux/init.h>
#include <linux/percpu.h>
+#include <linux/prctl.h>
#include <linux/preempt.h>
+#include <linux/prctl.h>
+#include <linux/ptrace.h>
#include <linux/sched/signal.h>
+#include <linux/sched/task_stack.h>
#include <linux/signal.h>
+#include <linux/slab.h>
+#include <linux/sysctl.h>
#include <asm/fpsimd.h>
#include <asm/cputype.h>
#include <asm/simd.h>
+#include <asm/sigcontext.h>
+#include <asm/sysreg.h>
+#include <asm/traps.h>
#define FPEXC_IOF (1 << 0)
#define FPEXC_DZF (1 << 1)
@@ -39,6 +54,8 @@
#define FPEXC_IDF (1 << 7)
/*
+ * (Note: in this discussion, statements about FPSIMD apply equally to SVE.)
+ *
* In order to reduce the number of times the FPSIMD state is needlessly saved
* and restored, we need to keep track of two things:
* (a) for each task, we need to remember which CPU was the last one to have
@@ -99,10 +116,741 @@
*/
static DEFINE_PER_CPU(struct fpsimd_state *, fpsimd_last_state);
+/* Default VL for tasks that don't set it explicitly: */
+static int sve_default_vl = -1;
+
+#ifdef CONFIG_ARM64_SVE
+
+/* Maximum supported vector length across all CPUs (initially poisoned) */
+int __ro_after_init sve_max_vl = -1;
+/* Set of available vector lengths, as vq_to_bit(vq): */
+static __ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
+static void __percpu *efi_sve_state;
+
+#else /* ! CONFIG_ARM64_SVE */
+
+/* Dummy declaration for code that will be optimised out: */
+extern __ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
+extern void __percpu *efi_sve_state;
+
+#endif /* ! CONFIG_ARM64_SVE */
+
+/*
+ * Call __sve_free() directly only if you know task can't be scheduled
+ * or preempted.
+ */
+static void __sve_free(struct task_struct *task)
+{
+ kfree(task->thread.sve_state);
+ task->thread.sve_state = NULL;
+}
+
+static void sve_free(struct task_struct *task)
+{
+ WARN_ON(test_tsk_thread_flag(task, TIF_SVE));
+
+ __sve_free(task);
+}
+
+
+/* Offset of FFR in the SVE register dump */
+static size_t sve_ffr_offset(int vl)
+{
+ return SVE_SIG_FFR_OFFSET(sve_vq_from_vl(vl)) - SVE_SIG_REGS_OFFSET;
+}
+
+static void *sve_pffr(struct task_struct *task)
+{
+ return (char *)task->thread.sve_state +
+ sve_ffr_offset(task->thread.sve_vl);
+}
+
+static void change_cpacr(u64 val, u64 mask)
+{
+ u64 cpacr = read_sysreg(CPACR_EL1);
+ u64 new = (cpacr & ~mask) | val;
+
+ if (new != cpacr)
+ write_sysreg(new, CPACR_EL1);
+}
+
+static void sve_user_disable(void)
+{
+ change_cpacr(0, CPACR_EL1_ZEN_EL0EN);
+}
+
+static void sve_user_enable(void)
+{
+ change_cpacr(CPACR_EL1_ZEN_EL0EN, CPACR_EL1_ZEN_EL0EN);
+}
+
+/*
+ * TIF_SVE controls whether a task can use SVE without trapping while
+ * in userspace, and also the way a task's FPSIMD/SVE state is stored
+ * in thread_struct.
+ *
+ * The kernel uses this flag to track whether a user task is actively
+ * using SVE, and therefore whether full SVE register state needs to
+ * be tracked. If not, the cheaper FPSIMD context handling code can
+ * be used instead of the more costly SVE equivalents.
+ *
+ * * TIF_SVE set:
+ *
+ * The task can execute SVE instructions while in userspace without
+ * trapping to the kernel.
+ *
+ * When stored, Z0-Z31 (incorporating Vn in bits[127:0] or the
+ * corresponding Zn), P0-P15 and FFR are encoded in in
+ * task->thread.sve_state, formatted appropriately for vector
+ * length task->thread.sve_vl.
+ *
+ * task->thread.sve_state must point to a valid buffer at least
+ * sve_state_size(task) bytes in size.
+ *
+ * During any syscall, the kernel may optionally clear TIF_SVE and
+ * discard the vector state except for the FPSIMD subset.
+ *
+ * * TIF_SVE clear:
+ *
+ * An attempt by the user task to execute an SVE instruction causes
+ * do_sve_acc() to be called, which does some preparation and then
+ * sets TIF_SVE.
+ *
+ * When stored, FPSIMD registers V0-V31 are encoded in
+ * task->fpsimd_state; bits [max : 128] for each of Z0-Z31 are
+ * logically zero but not stored anywhere; P0-P15 and FFR are not
+ * stored and have unspecified values from userspace's point of
+ * view. For hygiene purposes, the kernel zeroes them on next use,
+ * but userspace is discouraged from relying on this.
+ *
+ * task->thread.sve_state does not need to be non-NULL, valid or any
+ * particular size: it must not be dereferenced.
+ *
+ * * FPSR and FPCR are always stored in task->fpsimd_state irrespctive of
+ * whether TIF_SVE is clear or set, since these are not vector length
+ * dependent.
+ */
+
+/*
+ * Update current's FPSIMD/SVE registers from thread_struct.
+ *
+ * This function should be called only when the FPSIMD/SVE state in
+ * thread_struct is known to be up to date, when preparing to enter
+ * userspace.
+ *
+ * Softirqs (and preemption) must be disabled.
+ */
+static void task_fpsimd_load(void)
+{
+ WARN_ON(!in_softirq() && !irqs_disabled());
+
+ if (system_supports_sve() && test_thread_flag(TIF_SVE))
+ sve_load_state(sve_pffr(current),
+ &current->thread.fpsimd_state.fpsr,
+ sve_vq_from_vl(current->thread.sve_vl) - 1);
+ else
+ fpsimd_load_state(&current->thread.fpsimd_state);
+
+ if (system_supports_sve()) {
+ /* Toggle SVE trapping for userspace if needed */
+ if (test_thread_flag(TIF_SVE))
+ sve_user_enable();
+ else
+ sve_user_disable();
+
+ /* Serialised by exception return to user */
+ }
+}
+
+/*
+ * Ensure current's FPSIMD/SVE storage in thread_struct is up to date
+ * with respect to the CPU registers.
+ *
+ * Softirqs (and preemption) must be disabled.
+ */
+static void task_fpsimd_save(void)
+{
+ WARN_ON(!in_softirq() && !irqs_disabled());
+
+ if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
+ if (system_supports_sve() && test_thread_flag(TIF_SVE)) {
+ if (WARN_ON(sve_get_vl() != current->thread.sve_vl)) {
+ /*
+ * Can't save the user regs, so current would
+ * re-enter user with corrupt state.
+ * There's no way to recover, so kill it:
+ */
+ force_signal_inject(
+ SIGKILL, 0, current_pt_regs(), 0);
+ return;
+ }
+
+ sve_save_state(sve_pffr(current),
+ &current->thread.fpsimd_state.fpsr);
+ } else
+ fpsimd_save_state(&current->thread.fpsimd_state);
+ }
+}
+
+/*
+ * Helpers to translate bit indices in sve_vq_map to VQ values (and
+ * vice versa). This allows find_next_bit() to be used to find the
+ * _maximum_ VQ not exceeding a certain value.
+ */
+
+static unsigned int vq_to_bit(unsigned int vq)
+{
+ return SVE_VQ_MAX - vq;
+}
+
+static unsigned int bit_to_vq(unsigned int bit)
+{
+ if (WARN_ON(bit >= SVE_VQ_MAX))
+ bit = SVE_VQ_MAX - 1;
+
+ return SVE_VQ_MAX - bit;
+}
+
+/*
+ * All vector length selection from userspace comes through here.
+ * We're on a slow path, so some sanity-checks are included.
+ * If things go wrong there's a bug somewhere, but try to fall back to a
+ * safe choice.
+ */
+static unsigned int find_supported_vector_length(unsigned int vl)
+{
+ int bit;
+ int max_vl = sve_max_vl;
+
+ if (WARN_ON(!sve_vl_valid(vl)))
+ vl = SVE_VL_MIN;
+
+ if (WARN_ON(!sve_vl_valid(max_vl)))
+ max_vl = SVE_VL_MIN;
+
+ if (vl > max_vl)
+ vl = max_vl;
+
+ bit = find_next_bit(sve_vq_map, SVE_VQ_MAX,
+ vq_to_bit(sve_vq_from_vl(vl)));
+ return sve_vl_from_vq(bit_to_vq(bit));
+}
+
+#ifdef CONFIG_SYSCTL
+
+static int sve_proc_do_default_vl(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ int ret;
+ int vl = sve_default_vl;
+ struct ctl_table tmp_table = {
+ .data = &vl,
+ .maxlen = sizeof(vl),
+ };
+
+ ret = proc_dointvec(&tmp_table, write, buffer, lenp, ppos);
+ if (ret || !write)
+ return ret;
+
+ /* Writing -1 has the special meaning "set to max": */
+ if (vl == -1) {
+ /* Fail safe if sve_max_vl wasn't initialised */
+ if (WARN_ON(!sve_vl_valid(sve_max_vl)))
+ vl = SVE_VL_MIN;
+ else
+ vl = sve_max_vl;
+
+ goto chosen;
+ }
+
+ if (!sve_vl_valid(vl))
+ return -EINVAL;
+
+ vl = find_supported_vector_length(vl);
+chosen:
+ sve_default_vl = vl;
+ return 0;
+}
+
+static struct ctl_table sve_default_vl_table[] = {
+ {
+ .procname = "sve_default_vector_length",
+ .mode = 0644,
+ .proc_handler = sve_proc_do_default_vl,
+ },
+ { }
+};
+
+static int __init sve_sysctl_init(void)
+{
+ if (system_supports_sve())
+ if (!register_sysctl("abi", sve_default_vl_table))
+ return -EINVAL;
+
+ return 0;
+}
+
+#else /* ! CONFIG_SYSCTL */
+static int __init sve_sysctl_init(void) { return 0; }
+#endif /* ! CONFIG_SYSCTL */
+
+#define ZREG(sve_state, vq, n) ((char *)(sve_state) + \
+ (SVE_SIG_ZREG_OFFSET(vq, n) - SVE_SIG_REGS_OFFSET))
+
+/*
+ * Transfer the FPSIMD state in task->thread.fpsimd_state to
+ * task->thread.sve_state.
+ *
+ * Task can be a non-runnable task, or current. In the latter case,
+ * softirqs (and preemption) must be disabled.
+ * task->thread.sve_state must point to at least sve_state_size(task)
+ * bytes of allocated kernel memory.
+ * task->thread.fpsimd_state must be up to date before calling this function.
+ */
+static void fpsimd_to_sve(struct task_struct *task)
+{
+ unsigned int vq;
+ void *sst = task->thread.sve_state;
+ struct fpsimd_state const *fst = &task->thread.fpsimd_state;
+ unsigned int i;
+
+ if (!system_supports_sve())
+ return;
+
+ vq = sve_vq_from_vl(task->thread.sve_vl);
+ for (i = 0; i < 32; ++i)
+ memcpy(ZREG(sst, vq, i), &fst->vregs[i],
+ sizeof(fst->vregs[i]));
+}
+
+/*
+ * Transfer the SVE state in task->thread.sve_state to
+ * task->thread.fpsimd_state.
+ *
+ * Task can be a non-runnable task, or current. In the latter case,
+ * softirqs (and preemption) must be disabled.
+ * task->thread.sve_state must point to at least sve_state_size(task)
+ * bytes of allocated kernel memory.
+ * task->thread.sve_state must be up to date before calling this function.
+ */
+static void sve_to_fpsimd(struct task_struct *task)
+{
+ unsigned int vq;
+ void const *sst = task->thread.sve_state;
+ struct fpsimd_state *fst = &task->thread.fpsimd_state;
+ unsigned int i;
+
+ if (!system_supports_sve())
+ return;
+
+ vq = sve_vq_from_vl(task->thread.sve_vl);
+ for (i = 0; i < 32; ++i)
+ memcpy(&fst->vregs[i], ZREG(sst, vq, i),
+ sizeof(fst->vregs[i]));
+}
+
+#ifdef CONFIG_ARM64_SVE
+
+/*
+ * Return how many bytes of memory are required to store the full SVE
+ * state for task, given task's currently configured vector length.
+ */
+size_t sve_state_size(struct task_struct const *task)
+{
+ return SVE_SIG_REGS_SIZE(sve_vq_from_vl(task->thread.sve_vl));
+}
+
+/*
+ * Ensure that task->thread.sve_state is allocated and sufficiently large.
+ *
+ * This function should be used only in preparation for replacing
+ * task->thread.sve_state with new data. The memory is always zeroed
+ * here to prevent stale data from showing through: this is done in
+ * the interest of testability and predictability: except in the
+ * do_sve_acc() case, there is no ABI requirement to hide stale data
+ * written previously be task.
+ */
+void sve_alloc(struct task_struct *task)
+{
+ if (task->thread.sve_state) {
+ memset(task->thread.sve_state, 0, sve_state_size(current));
+ return;
+ }
+
+ /* This is a small allocation (maximum ~8KB) and Should Not Fail. */
+ task->thread.sve_state =
+ kzalloc(sve_state_size(task), GFP_KERNEL);
+
+ /*
+ * If future SVE revisions can have larger vectors though,
+ * this may cease to be true:
+ */
+ BUG_ON(!task->thread.sve_state);
+}
+
+
+/*
+ * Ensure that task->thread.sve_state is up to date with respect to
+ * the user task, irrespective of when SVE is in use or not.
+ *
+ * This should only be called by ptrace. task must be non-runnable.
+ * task->thread.sve_state must point to at least sve_state_size(task)
+ * bytes of allocated kernel memory.
+ */
+void fpsimd_sync_to_sve(struct task_struct *task)
+{
+ if (!test_tsk_thread_flag(task, TIF_SVE))
+ fpsimd_to_sve(task);
+}
+
+/*
+ * Ensure that task->thread.fpsimd_state is up to date with respect to
+ * the user task, irrespective of whether SVE is in use or not.
+ *
+ * This should only be called by ptrace. task must be non-runnable.
+ * task->thread.sve_state must point to at least sve_state_size(task)
+ * bytes of allocated kernel memory.
+ */
+void sve_sync_to_fpsimd(struct task_struct *task)
+{
+ if (test_tsk_thread_flag(task, TIF_SVE))
+ sve_to_fpsimd(task);
+}
+
+/*
+ * Ensure that task->thread.sve_state is up to date with respect to
+ * the task->thread.fpsimd_state.
+ *
+ * This should only be called by ptrace to merge new FPSIMD register
+ * values into a task for which SVE is currently active.
+ * task must be non-runnable.
+ * task->thread.sve_state must point to at least sve_state_size(task)
+ * bytes of allocated kernel memory.
+ * task->thread.fpsimd_state must already have been initialised with
+ * the new FPSIMD register values to be merged in.
+ */
+void sve_sync_from_fpsimd_zeropad(struct task_struct *task)
+{
+ unsigned int vq;
+ void *sst = task->thread.sve_state;
+ struct fpsimd_state const *fst = &task->thread.fpsimd_state;
+ unsigned int i;
+
+ if (!test_tsk_thread_flag(task, TIF_SVE))
+ return;
+
+ vq = sve_vq_from_vl(task->thread.sve_vl);
+
+ memset(sst, 0, SVE_SIG_REGS_SIZE(vq));
+
+ for (i = 0; i < 32; ++i)
+ memcpy(ZREG(sst, vq, i), &fst->vregs[i],
+ sizeof(fst->vregs[i]));
+}
+
+int sve_set_vector_length(struct task_struct *task,
+ unsigned long vl, unsigned long flags)
+{
+ if (flags & ~(unsigned long)(PR_SVE_VL_INHERIT |
+ PR_SVE_SET_VL_ONEXEC))
+ return -EINVAL;
+
+ if (!sve_vl_valid(vl))
+ return -EINVAL;
+
+ /*
+ * Clamp to the maximum vector length that VL-agnostic SVE code can
+ * work with. A flag may be assigned in the future to allow setting
+ * of larger vector lengths without confusing older software.
+ */
+ if (vl > SVE_VL_ARCH_MAX)
+ vl = SVE_VL_ARCH_MAX;
+
+ vl = find_supported_vector_length(vl);
+
+ if (flags & (PR_SVE_VL_INHERIT |
+ PR_SVE_SET_VL_ONEXEC))
+ task->thread.sve_vl_onexec = vl;
+ else
+ /* Reset VL to system default on next exec: */
+ task->thread.sve_vl_onexec = 0;
+
+ /* Only actually set the VL if not deferred: */
+ if (flags & PR_SVE_SET_VL_ONEXEC)
+ goto out;
+
+ if (vl == task->thread.sve_vl)
+ goto out;
+
+ /*
+ * To ensure the FPSIMD bits of the SVE vector registers are preserved,
+ * write any live register state back to task_struct, and convert to a
+ * non-SVE thread.
+ */
+ if (task == current) {
+ local_bh_disable();
+
+ task_fpsimd_save();
+ set_thread_flag(TIF_FOREIGN_FPSTATE);
+ }
+
+ fpsimd_flush_task_state(task);
+ if (test_and_clear_tsk_thread_flag(task, TIF_SVE))
+ sve_to_fpsimd(task);
+
+ if (task == current)
+ local_bh_enable();
+
+ /*
+ * Force reallocation of task SVE state to the correct size
+ * on next use:
+ */
+ sve_free(task);
+
+ task->thread.sve_vl = vl;
+
+out:
+ if (flags & PR_SVE_VL_INHERIT)
+ set_tsk_thread_flag(task, TIF_SVE_VL_INHERIT);
+ else
+ clear_tsk_thread_flag(task, TIF_SVE_VL_INHERIT);
+
+ return 0;
+}
+
+/*
+ * Encode the current vector length and flags for return.
+ * This is only required for prctl(): ptrace has separate fields
+ *
+ * flags are as for sve_set_vector_length().
+ */
+static int sve_prctl_status(unsigned long flags)
+{
+ int ret;
+
+ if (flags & PR_SVE_SET_VL_ONEXEC)
+ ret = current->thread.sve_vl_onexec;
+ else
+ ret = current->thread.sve_vl;
+
+ if (test_thread_flag(TIF_SVE_VL_INHERIT))
+ ret |= PR_SVE_VL_INHERIT;
+
+ return ret;
+}
+
+/* PR_SVE_SET_VL */
+int sve_set_current_vl(unsigned long arg)
+{
+ unsigned long vl, flags;
+ int ret;
+
+ vl = arg & PR_SVE_VL_LEN_MASK;
+ flags = arg & ~vl;
+
+ if (!system_supports_sve())
+ return -EINVAL;
+
+ ret = sve_set_vector_length(current, vl, flags);
+ if (ret)
+ return ret;
+
+ return sve_prctl_status(flags);
+}
+
+/* PR_SVE_GET_VL */
+int sve_get_current_vl(void)
+{
+ if (!system_supports_sve())
+ return -EINVAL;
+
+ return sve_prctl_status(0);
+}
+
+/*
+ * Bitmap for temporary storage of the per-CPU set of supported vector lengths
+ * during secondary boot.
+ */
+static DECLARE_BITMAP(sve_secondary_vq_map, SVE_VQ_MAX);
+
+static void sve_probe_vqs(DECLARE_BITMAP(map, SVE_VQ_MAX))
+{
+ unsigned int vq, vl;
+ unsigned long zcr;
+
+ bitmap_zero(map, SVE_VQ_MAX);
+
+ zcr = ZCR_ELx_LEN_MASK;
+ zcr = read_sysreg_s(SYS_ZCR_EL1) & ~zcr;
+
+ for (vq = SVE_VQ_MAX; vq >= SVE_VQ_MIN; --vq) {
+ write_sysreg_s(zcr | (vq - 1), SYS_ZCR_EL1); /* self-syncing */
+ vl = sve_get_vl();
+ vq = sve_vq_from_vl(vl); /* skip intervening lengths */
+ set_bit(vq_to_bit(vq), map);
+ }
+}
+
+void __init sve_init_vq_map(void)
+{
+ sve_probe_vqs(sve_vq_map);
+}
+
+/*
+ * If we haven't committed to the set of supported VQs yet, filter out
+ * those not supported by the current CPU.
+ */
+void sve_update_vq_map(void)
+{
+ sve_probe_vqs(sve_secondary_vq_map);
+ bitmap_and(sve_vq_map, sve_vq_map, sve_secondary_vq_map, SVE_VQ_MAX);
+}
+
+/* Check whether the current CPU supports all VQs in the committed set */
+int sve_verify_vq_map(void)
+{
+ int ret = 0;
+
+ sve_probe_vqs(sve_secondary_vq_map);
+ bitmap_andnot(sve_secondary_vq_map, sve_vq_map, sve_secondary_vq_map,
+ SVE_VQ_MAX);
+ if (!bitmap_empty(sve_secondary_vq_map, SVE_VQ_MAX)) {
+ pr_warn("SVE: cpu%d: Required vector length(s) missing\n",
+ smp_processor_id());
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+static void __init sve_efi_setup(void)
+{
+ if (!IS_ENABLED(CONFIG_EFI))
+ return;
+
+ /*
+ * alloc_percpu() warns and prints a backtrace if this goes wrong.
+ * This is evidence of a crippled system and we are returning void,
+ * so no attempt is made to handle this situation here.
+ */
+ if (!sve_vl_valid(sve_max_vl))
+ goto fail;
+
+ efi_sve_state = __alloc_percpu(
+ SVE_SIG_REGS_SIZE(sve_vq_from_vl(sve_max_vl)), SVE_VQ_BYTES);
+ if (!efi_sve_state)
+ goto fail;
+
+ return;
+
+fail:
+ panic("Cannot allocate percpu memory for EFI SVE save/restore");
+}
+
+/*
+ * Enable SVE for EL1.
+ * Intended for use by the cpufeatures code during CPU boot.
+ */
+int sve_kernel_enable(void *__always_unused p)
+{
+ write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_ZEN_EL1EN, CPACR_EL1);
+ isb();
+
+ return 0;
+}
+
+void __init sve_setup(void)
+{
+ u64 zcr;
+
+ if (!system_supports_sve())
+ return;
+
+ /*
+ * The SVE architecture mandates support for 128-bit vectors,
+ * so sve_vq_map must have at least SVE_VQ_MIN set.
+ * If something went wrong, at least try to patch it up:
+ */
+ if (WARN_ON(!test_bit(vq_to_bit(SVE_VQ_MIN), sve_vq_map)))
+ set_bit(vq_to_bit(SVE_VQ_MIN), sve_vq_map);
+
+ zcr = read_sanitised_ftr_reg(SYS_ZCR_EL1);
+ sve_max_vl = sve_vl_from_vq((zcr & ZCR_ELx_LEN_MASK) + 1);
+
+ /*
+ * Sanity-check that the max VL we determined through CPU features
+ * corresponds properly to sve_vq_map. If not, do our best:
+ */
+ if (WARN_ON(sve_max_vl != find_supported_vector_length(sve_max_vl)))
+ sve_max_vl = find_supported_vector_length(sve_max_vl);
+
+ /*
+ * For the default VL, pick the maximum supported value <= 64.
+ * VL == 64 is guaranteed not to grow the signal frame.
+ */
+ sve_default_vl = find_supported_vector_length(64);
+
+ pr_info("SVE: maximum available vector length %u bytes per vector\n",
+ sve_max_vl);
+ pr_info("SVE: default vector length %u bytes per vector\n",
+ sve_default_vl);
+
+ sve_efi_setup();
+}
+
+/*
+ * Called from the put_task_struct() path, which cannot get here
+ * unless dead_task is really dead and not schedulable.
+ */
+void fpsimd_release_task(struct task_struct *dead_task)
+{
+ __sve_free(dead_task);
+}
+
+#endif /* CONFIG_ARM64_SVE */
+
+/*
+ * Trapped SVE access
+ *
+ * Storage is allocated for the full SVE state, the current FPSIMD
+ * register contents are migrated across, and TIF_SVE is set so that
+ * the SVE access trap will be disabled the next time this task
+ * reaches ret_to_user.
+ *
+ * TIF_SVE should be clear on entry: otherwise, task_fpsimd_load()
+ * would have disabled the SVE access trap for userspace during
+ * ret_to_user, making an SVE access trap impossible in that case.
+ */
+asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs)
+{
+ /* Even if we chose not to use SVE, the hardware could still trap: */
+ if (unlikely(!system_supports_sve()) || WARN_ON(is_compat_task())) {
+ force_signal_inject(SIGILL, ILL_ILLOPC, regs, 0);
+ return;
+ }
+
+ sve_alloc(current);
+
+ local_bh_disable();
+
+ task_fpsimd_save();
+ fpsimd_to_sve(current);
+
+ /* Force ret_to_user to reload the registers: */
+ fpsimd_flush_task_state(current);
+ set_thread_flag(TIF_FOREIGN_FPSTATE);
+
+ if (test_and_set_thread_flag(TIF_SVE))
+ WARN_ON(1); /* SVE access shouldn't have trapped */
+
+ local_bh_enable();
+}
+
/*
* Trapped FP/ASIMD access.
*/
-void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs)
+asmlinkage void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs)
{
/* TODO: implement lazy context saving/restoring */
WARN_ON(1);
@@ -111,7 +859,7 @@ void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs)
/*
* Raise a SIGFPE for the current process.
*/
-void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs)
+asmlinkage void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs)
{
siginfo_t info;
unsigned int si_code = 0;
@@ -144,8 +892,8 @@ void fpsimd_thread_switch(struct task_struct *next)
* the registers is in fact the most recent userland FPSIMD state of
* 'current'.
*/
- if (current->mm && !test_thread_flag(TIF_FOREIGN_FPSTATE))
- fpsimd_save_state(&current->thread.fpsimd_state);
+ if (current->mm)
+ task_fpsimd_save();
if (next->mm) {
/*
@@ -159,16 +907,16 @@ void fpsimd_thread_switch(struct task_struct *next)
if (__this_cpu_read(fpsimd_last_state) == st
&& st->cpu == smp_processor_id())
- clear_ti_thread_flag(task_thread_info(next),
- TIF_FOREIGN_FPSTATE);
+ clear_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE);
else
- set_ti_thread_flag(task_thread_info(next),
- TIF_FOREIGN_FPSTATE);
+ set_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE);
}
}
void fpsimd_flush_thread(void)
{
+ int vl, supported_vl;
+
if (!system_supports_fpsimd())
return;
@@ -176,6 +924,42 @@ void fpsimd_flush_thread(void)
memset(&current->thread.fpsimd_state, 0, sizeof(struct fpsimd_state));
fpsimd_flush_task_state(current);
+
+ if (system_supports_sve()) {
+ clear_thread_flag(TIF_SVE);
+ sve_free(current);
+
+ /*
+ * Reset the task vector length as required.
+ * This is where we ensure that all user tasks have a valid
+ * vector length configured: no kernel task can become a user
+ * task without an exec and hence a call to this function.
+ * By the time the first call to this function is made, all
+ * early hardware probing is complete, so sve_default_vl
+ * should be valid.
+ * If a bug causes this to go wrong, we make some noise and
+ * try to fudge thread.sve_vl to a safe value here.
+ */
+ vl = current->thread.sve_vl_onexec ?
+ current->thread.sve_vl_onexec : sve_default_vl;
+
+ if (WARN_ON(!sve_vl_valid(vl)))
+ vl = SVE_VL_MIN;
+
+ supported_vl = find_supported_vector_length(vl);
+ if (WARN_ON(supported_vl != vl))
+ vl = supported_vl;
+
+ current->thread.sve_vl = vl;
+
+ /*
+ * If the task is not set to inherit, ensure that the vector
+ * length will be reset by a subsequent exec:
+ */
+ if (!test_thread_flag(TIF_SVE_VL_INHERIT))
+ current->thread.sve_vl_onexec = 0;
+ }
+
set_thread_flag(TIF_FOREIGN_FPSTATE);
local_bh_enable();
@@ -191,14 +975,23 @@ void fpsimd_preserve_current_state(void)
return;
local_bh_disable();
-
- if (!test_thread_flag(TIF_FOREIGN_FPSTATE))
- fpsimd_save_state(&current->thread.fpsimd_state);
-
+ task_fpsimd_save();
local_bh_enable();
}
/*
+ * Like fpsimd_preserve_current_state(), but ensure that
+ * current->thread.fpsimd_state is updated so that it can be copied to
+ * the signal frame.
+ */
+void fpsimd_signal_preserve_current_state(void)
+{
+ fpsimd_preserve_current_state();
+ if (system_supports_sve() && test_thread_flag(TIF_SVE))
+ sve_to_fpsimd(current);
+}
+
+/*
* Load the userland FPSIMD state of 'current' from memory, but only if the
* FPSIMD state already held in the registers is /not/ the most recent FPSIMD
* state of 'current'
@@ -213,7 +1006,7 @@ void fpsimd_restore_current_state(void)
if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
struct fpsimd_state *st = &current->thread.fpsimd_state;
- fpsimd_load_state(st);
+ task_fpsimd_load();
__this_cpu_write(fpsimd_last_state, st);
st->cpu = smp_processor_id();
}
@@ -233,7 +1026,12 @@ void fpsimd_update_current_state(struct fpsimd_state *state)
local_bh_disable();
- fpsimd_load_state(state);
+ if (system_supports_sve() && test_thread_flag(TIF_SVE)) {
+ current->thread.fpsimd_state = *state;
+ fpsimd_to_sve(current);
+ }
+ task_fpsimd_load();
+
if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
struct fpsimd_state *st = &current->thread.fpsimd_state;
@@ -252,6 +1050,33 @@ void fpsimd_flush_task_state(struct task_struct *t)
t->thread.fpsimd_state.cpu = NR_CPUS;
}
+static inline void fpsimd_flush_cpu_state(void)
+{
+ __this_cpu_write(fpsimd_last_state, NULL);
+}
+
+/*
+ * Invalidate any task SVE state currently held in this CPU's regs.
+ *
+ * This is used to prevent the kernel from trying to reuse SVE register data
+ * that is detroyed by KVM guest enter/exit. This function should go away when
+ * KVM SVE support is implemented. Don't use it for anything else.
+ */
+#ifdef CONFIG_ARM64_SVE
+void sve_flush_cpu_state(void)
+{
+ struct fpsimd_state *const fpstate = __this_cpu_read(fpsimd_last_state);
+ struct task_struct *tsk;
+
+ if (!fpstate)
+ return;
+
+ tsk = container_of(fpstate, struct task_struct, thread.fpsimd_state);
+ if (test_tsk_thread_flag(tsk, TIF_SVE))
+ fpsimd_flush_cpu_state();
+}
+#endif /* CONFIG_ARM64_SVE */
+
#ifdef CONFIG_KERNEL_MODE_NEON
DEFINE_PER_CPU(bool, kernel_neon_busy);
@@ -286,11 +1111,13 @@ void kernel_neon_begin(void)
__this_cpu_write(kernel_neon_busy, true);
/* Save unsaved task fpsimd state, if any: */
- if (current->mm && !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
- fpsimd_save_state(&current->thread.fpsimd_state);
+ if (current->mm) {
+ task_fpsimd_save();
+ set_thread_flag(TIF_FOREIGN_FPSTATE);
+ }
/* Invalidate any task state remaining in the fpsimd regs: */
- __this_cpu_write(fpsimd_last_state, NULL);
+ fpsimd_flush_cpu_state();
preempt_disable();
@@ -325,6 +1152,7 @@ EXPORT_SYMBOL(kernel_neon_end);
static DEFINE_PER_CPU(struct fpsimd_state, efi_fpsimd_state);
static DEFINE_PER_CPU(bool, efi_fpsimd_state_used);
+static DEFINE_PER_CPU(bool, efi_sve_state_used);
/*
* EFI runtime services support functions
@@ -350,10 +1178,24 @@ void __efi_fpsimd_begin(void)
WARN_ON(preemptible());
- if (may_use_simd())
+ if (may_use_simd()) {
kernel_neon_begin();
- else {
- fpsimd_save_state(this_cpu_ptr(&efi_fpsimd_state));
+ } else {
+ /*
+ * If !efi_sve_state, SVE can't be in use yet and doesn't need
+ * preserving:
+ */
+ if (system_supports_sve() && likely(efi_sve_state)) {
+ char *sve_state = this_cpu_ptr(efi_sve_state);
+
+ __this_cpu_write(efi_sve_state_used, true);
+
+ sve_save_state(sve_state + sve_ffr_offset(sve_max_vl),
+ &this_cpu_ptr(&efi_fpsimd_state)->fpsr);
+ } else {
+ fpsimd_save_state(this_cpu_ptr(&efi_fpsimd_state));
+ }
+
__this_cpu_write(efi_fpsimd_state_used, true);
}
}
@@ -366,10 +1208,22 @@ void __efi_fpsimd_end(void)
if (!system_supports_fpsimd())
return;
- if (__this_cpu_xchg(efi_fpsimd_state_used, false))
- fpsimd_load_state(this_cpu_ptr(&efi_fpsimd_state));
- else
+ if (!__this_cpu_xchg(efi_fpsimd_state_used, false)) {
kernel_neon_end();
+ } else {
+ if (system_supports_sve() &&
+ likely(__this_cpu_read(efi_sve_state_used))) {
+ char const *sve_state = this_cpu_ptr(efi_sve_state);
+
+ sve_load_state(sve_state + sve_ffr_offset(sve_max_vl),
+ &this_cpu_ptr(&efi_fpsimd_state)->fpsr,
+ sve_vq_from_vl(sve_get_vl()) - 1);
+
+ __this_cpu_write(efi_sve_state_used, false);
+ } else {
+ fpsimd_load_state(this_cpu_ptr(&efi_fpsimd_state));
+ }
+ }
}
#endif /* CONFIG_EFI */
@@ -382,9 +1236,9 @@ static int fpsimd_cpu_pm_notifier(struct notifier_block *self,
{
switch (cmd) {
case CPU_PM_ENTER:
- if (current->mm && !test_thread_flag(TIF_FOREIGN_FPSTATE))
- fpsimd_save_state(&current->thread.fpsimd_state);
- this_cpu_write(fpsimd_last_state, NULL);
+ if (current->mm)
+ task_fpsimd_save();
+ fpsimd_flush_cpu_state();
break;
case CPU_PM_EXIT:
if (current->mm)
@@ -442,6 +1296,6 @@ static int __init fpsimd_init(void)
if (!(elf_hwcap & HWCAP_ASIMD))
pr_notice("Advanced SIMD is not implemented\n");
- return 0;
+ return sve_sysctl_init();
}
core_initcall(fpsimd_init);
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 0b243ecaf7ac..67e86a0f57ac 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -480,14 +480,21 @@ set_hcr:
/* Statistical profiling */
ubfx x0, x1, #32, #4 // Check ID_AA64DFR0_EL1 PMSVer
- cbz x0, 6f // Skip if SPE not present
- cbnz x2, 5f // VHE?
+ cbz x0, 7f // Skip if SPE not present
+ cbnz x2, 6f // VHE?
+ mrs_s x4, SYS_PMBIDR_EL1 // If SPE available at EL2,
+ and x4, x4, #(1 << SYS_PMBIDR_EL1_P_SHIFT)
+ cbnz x4, 5f // then permit sampling of physical
+ mov x4, #(1 << SYS_PMSCR_EL2_PCT_SHIFT | \
+ 1 << SYS_PMSCR_EL2_PA_SHIFT)
+ msr_s SYS_PMSCR_EL2, x4 // addresses and physical counter
+5:
mov x1, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT)
orr x3, x3, x1 // If we don't have VHE, then
- b 6f // use EL1&0 translation.
-5: // For VHE, use EL2 translation
+ b 7f // use EL1&0 translation.
+6: // For VHE, use EL2 translation
orr x3, x3, #MDCR_EL2_TPMS // and disable access from EL1
-6:
+7:
msr mdcr_el2, x3 // Configure debug traps
/* Stage-2 translation */
@@ -517,8 +524,19 @@ CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems
mov x0, #0x33ff
msr cptr_el2, x0 // Disable copro. traps to EL2
+ /* SVE register access */
+ mrs x1, id_aa64pfr0_el1
+ ubfx x1, x1, #ID_AA64PFR0_SVE_SHIFT, #4
+ cbz x1, 7f
+
+ bic x0, x0, #CPTR_EL2_TZ // Also disable SVE traps
+ msr cptr_el2, x0 // Disable copro. traps to EL2
+ isb
+ mov x1, #ZCR_ELx_LEN_MASK // SVE: Enable full vector
+ msr_s SYS_ZCR_EL2, x1 // length for EL1.
+
/* Hypervisor stub */
- adr_l x0, __hyp_stub_vectors
+7: adr_l x0, __hyp_stub_vectors
msr vbar_el2, x0
/* spsr */
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index 095d3c170f5d..3009b8b80f08 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -27,6 +27,7 @@
#include <asm/barrier.h>
#include <asm/cacheflush.h>
#include <asm/cputype.h>
+#include <asm/daifflags.h>
#include <asm/irqflags.h>
#include <asm/kexec.h>
#include <asm/memory.h>
@@ -285,7 +286,7 @@ int swsusp_arch_suspend(void)
return -EBUSY;
}
- local_dbg_save(flags);
+ flags = local_daif_save();
if (__cpu_suspend_enter(&state)) {
/* make the crash dump kernel image visible/saveable */
@@ -315,7 +316,7 @@ int swsusp_arch_suspend(void)
__cpu_suspend_exit();
}
- local_dbg_restore(flags);
+ local_daif_restore(flags);
return ret;
}
diff --git a/arch/arm64/kernel/io.c b/arch/arm64/kernel/io.c
index 354be2a872ae..79b17384effa 100644
--- a/arch/arm64/kernel/io.c
+++ b/arch/arm64/kernel/io.c
@@ -25,8 +25,7 @@
*/
void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t count)
{
- while (count && (!IS_ALIGNED((unsigned long)from, 8) ||
- !IS_ALIGNED((unsigned long)to, 8))) {
+ while (count && !IS_ALIGNED((unsigned long)from, 8)) {
*(u8 *)to = __raw_readb(from);
from++;
to++;
@@ -54,23 +53,22 @@ EXPORT_SYMBOL(__memcpy_fromio);
*/
void __memcpy_toio(volatile void __iomem *to, const void *from, size_t count)
{
- while (count && (!IS_ALIGNED((unsigned long)to, 8) ||
- !IS_ALIGNED((unsigned long)from, 8))) {
- __raw_writeb(*(volatile u8 *)from, to);
+ while (count && !IS_ALIGNED((unsigned long)to, 8)) {
+ __raw_writeb(*(u8 *)from, to);
from++;
to++;
count--;
}
while (count >= 8) {
- __raw_writeq(*(volatile u64 *)from, to);
+ __raw_writeq(*(u64 *)from, to);
from += 8;
to += 8;
count -= 8;
}
while (count) {
- __raw_writeb(*(volatile u8 *)from, to);
+ __raw_writeb(*(u8 *)from, to);
from++;
to++;
count--;
diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
index 11121f608eb5..f76ea92dff91 100644
--- a/arch/arm64/kernel/machine_kexec.c
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -18,6 +18,7 @@
#include <asm/cacheflush.h>
#include <asm/cpu_ops.h>
+#include <asm/daifflags.h>
#include <asm/memory.h>
#include <asm/mmu.h>
#include <asm/mmu_context.h>
@@ -195,8 +196,7 @@ void machine_kexec(struct kimage *kimage)
pr_info("Bye!\n");
- /* Disable all DAIF exceptions. */
- asm volatile ("msr daifset, #0xf" : : : "memory");
+ local_daif_mask();
/*
* cpu_soft_restart will shutdown the MMU, disable data caches, then
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 2dc0f8482210..b2adcce7bc18 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -49,6 +49,7 @@
#include <linux/notifier.h>
#include <trace/events/power.h>
#include <linux/percpu.h>
+#include <linux/thread_info.h>
#include <asm/alternative.h>
#include <asm/compat.h>
@@ -170,6 +171,39 @@ void machine_restart(char *cmd)
while (1);
}
+static void print_pstate(struct pt_regs *regs)
+{
+ u64 pstate = regs->pstate;
+
+ if (compat_user_mode(regs)) {
+ printk("pstate: %08llx (%c%c%c%c %c %s %s %c%c%c)\n",
+ pstate,
+ pstate & COMPAT_PSR_N_BIT ? 'N' : 'n',
+ pstate & COMPAT_PSR_Z_BIT ? 'Z' : 'z',
+ pstate & COMPAT_PSR_C_BIT ? 'C' : 'c',
+ pstate & COMPAT_PSR_V_BIT ? 'V' : 'v',
+ pstate & COMPAT_PSR_Q_BIT ? 'Q' : 'q',
+ pstate & COMPAT_PSR_T_BIT ? "T32" : "A32",
+ pstate & COMPAT_PSR_E_BIT ? "BE" : "LE",
+ pstate & COMPAT_PSR_A_BIT ? 'A' : 'a',
+ pstate & COMPAT_PSR_I_BIT ? 'I' : 'i',
+ pstate & COMPAT_PSR_F_BIT ? 'F' : 'f');
+ } else {
+ printk("pstate: %08llx (%c%c%c%c %c%c%c%c %cPAN %cUAO)\n",
+ pstate,
+ pstate & PSR_N_BIT ? 'N' : 'n',
+ pstate & PSR_Z_BIT ? 'Z' : 'z',
+ pstate & PSR_C_BIT ? 'C' : 'c',
+ pstate & PSR_V_BIT ? 'V' : 'v',
+ pstate & PSR_D_BIT ? 'D' : 'd',
+ pstate & PSR_A_BIT ? 'A' : 'a',
+ pstate & PSR_I_BIT ? 'I' : 'i',
+ pstate & PSR_F_BIT ? 'F' : 'f',
+ pstate & PSR_PAN_BIT ? '+' : '-',
+ pstate & PSR_UAO_BIT ? '+' : '-');
+ }
+}
+
void __show_regs(struct pt_regs *regs)
{
int i, top_reg;
@@ -186,10 +220,9 @@ void __show_regs(struct pt_regs *regs)
}
show_regs_print_info(KERN_DEFAULT);
- print_symbol("PC is at %s\n", instruction_pointer(regs));
- print_symbol("LR is at %s\n", lr);
- printk("pc : [<%016llx>] lr : [<%016llx>] pstate: %08llx\n",
- regs->pc, lr, regs->pstate);
+ print_pstate(regs);
+ print_symbol("pc : %s\n", regs->pc);
+ print_symbol("lr : %s\n", lr);
printk("sp : %016llx\n", sp);
i = top_reg;
@@ -241,11 +274,27 @@ void release_thread(struct task_struct *dead_task)
{
}
+void arch_release_task_struct(struct task_struct *tsk)
+{
+ fpsimd_release_task(tsk);
+}
+
+/*
+ * src and dst may temporarily have aliased sve_state after task_struct
+ * is copied. We cannot fix this properly here, because src may have
+ * live SVE state and dst's thread_info may not exist yet, so tweaking
+ * either src's or dst's TIF_SVE is not safe.
+ *
+ * The unaliasing is done in copy_thread() instead. This works because
+ * dst is not schedulable or traceable until both of these functions
+ * have been called.
+ */
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
if (current->mm)
fpsimd_preserve_current_state();
*dst = *src;
+
return 0;
}
@@ -258,6 +307,13 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
memset(&p->thread.cpu_context, 0, sizeof(struct cpu_context));
+ /*
+ * Unalias p->thread.sve_state (if any) from the parent task
+ * and disable discard SVE state for p:
+ */
+ clear_tsk_thread_flag(p, TIF_SVE);
+ p->thread.sve_state = NULL;
+
if (likely(!(p->flags & PF_KTHREAD))) {
*childregs = *current_pt_regs();
childregs->regs[0] = 0;
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 9cbb6123208f..7c44658b316d 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -32,6 +32,7 @@
#include <linux/security.h>
#include <linux/init.h>
#include <linux/signal.h>
+#include <linux/string.h>
#include <linux/uaccess.h>
#include <linux/perf_event.h>
#include <linux/hw_breakpoint.h>
@@ -40,6 +41,7 @@
#include <linux/elf.h>
#include <asm/compat.h>
+#include <asm/cpufeature.h>
#include <asm/debug-monitors.h>
#include <asm/pgtable.h>
#include <asm/stacktrace.h>
@@ -618,17 +620,56 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset,
/*
* TODO: update fp accessors for lazy context switching (sync/flush hwstate)
*/
-static int fpr_get(struct task_struct *target, const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+static int __fpr_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf, unsigned int start_pos)
{
struct user_fpsimd_state *uregs;
+
+ sve_sync_to_fpsimd(target);
+
uregs = &target->thread.fpsimd_state.user_fpsimd;
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs,
+ start_pos, start_pos + sizeof(*uregs));
+}
+
+static int fpr_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
if (target == current)
fpsimd_preserve_current_state();
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, 0, -1);
+ return __fpr_get(target, regset, pos, count, kbuf, ubuf, 0);
+}
+
+static int __fpr_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf,
+ unsigned int start_pos)
+{
+ int ret;
+ struct user_fpsimd_state newstate;
+
+ /*
+ * Ensure target->thread.fpsimd_state is up to date, so that a
+ * short copyin can't resurrect stale data.
+ */
+ sve_sync_to_fpsimd(target);
+
+ newstate = target->thread.fpsimd_state.user_fpsimd;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newstate,
+ start_pos, start_pos + sizeof(newstate));
+ if (ret)
+ return ret;
+
+ target->thread.fpsimd_state.user_fpsimd = newstate;
+
+ return ret;
}
static int fpr_set(struct task_struct *target, const struct user_regset *regset,
@@ -636,15 +677,14 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset,
const void *kbuf, const void __user *ubuf)
{
int ret;
- struct user_fpsimd_state newstate =
- target->thread.fpsimd_state.user_fpsimd;
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newstate, 0, -1);
+ ret = __fpr_set(target, regset, pos, count, kbuf, ubuf, 0);
if (ret)
return ret;
- target->thread.fpsimd_state.user_fpsimd = newstate;
+ sve_sync_from_fpsimd_zeropad(target);
fpsimd_flush_task_state(target);
+
return ret;
}
@@ -702,6 +742,215 @@ static int system_call_set(struct task_struct *target,
return ret;
}
+#ifdef CONFIG_ARM64_SVE
+
+static void sve_init_header_from_task(struct user_sve_header *header,
+ struct task_struct *target)
+{
+ unsigned int vq;
+
+ memset(header, 0, sizeof(*header));
+
+ header->flags = test_tsk_thread_flag(target, TIF_SVE) ?
+ SVE_PT_REGS_SVE : SVE_PT_REGS_FPSIMD;
+ if (test_tsk_thread_flag(target, TIF_SVE_VL_INHERIT))
+ header->flags |= SVE_PT_VL_INHERIT;
+
+ header->vl = target->thread.sve_vl;
+ vq = sve_vq_from_vl(header->vl);
+
+ header->max_vl = sve_max_vl;
+ if (WARN_ON(!sve_vl_valid(sve_max_vl)))
+ header->max_vl = header->vl;
+
+ header->size = SVE_PT_SIZE(vq, header->flags);
+ header->max_size = SVE_PT_SIZE(sve_vq_from_vl(header->max_vl),
+ SVE_PT_REGS_SVE);
+}
+
+static unsigned int sve_size_from_header(struct user_sve_header const *header)
+{
+ return ALIGN(header->size, SVE_VQ_BYTES);
+}
+
+static unsigned int sve_get_size(struct task_struct *target,
+ const struct user_regset *regset)
+{
+ struct user_sve_header header;
+
+ if (!system_supports_sve())
+ return 0;
+
+ sve_init_header_from_task(&header, target);
+ return sve_size_from_header(&header);
+}
+
+static int sve_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ int ret;
+ struct user_sve_header header;
+ unsigned int vq;
+ unsigned long start, end;
+
+ if (!system_supports_sve())
+ return -EINVAL;
+
+ /* Header */
+ sve_init_header_from_task(&header, target);
+ vq = sve_vq_from_vl(header.vl);
+
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &header,
+ 0, sizeof(header));
+ if (ret)
+ return ret;
+
+ if (target == current)
+ fpsimd_preserve_current_state();
+
+ /* Registers: FPSIMD-only case */
+
+ BUILD_BUG_ON(SVE_PT_FPSIMD_OFFSET != sizeof(header));
+ if ((header.flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD)
+ return __fpr_get(target, regset, pos, count, kbuf, ubuf,
+ SVE_PT_FPSIMD_OFFSET);
+
+ /* Otherwise: full SVE case */
+
+ BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header));
+ start = SVE_PT_SVE_OFFSET;
+ end = SVE_PT_SVE_FFR_OFFSET(vq) + SVE_PT_SVE_FFR_SIZE(vq);
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ target->thread.sve_state,
+ start, end);
+ if (ret)
+ return ret;
+
+ start = end;
+ end = SVE_PT_SVE_FPSR_OFFSET(vq);
+ ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+ start, end);
+ if (ret)
+ return ret;
+
+ /*
+ * Copy fpsr, and fpcr which must follow contiguously in
+ * struct fpsimd_state:
+ */
+ start = end;
+ end = SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE;
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.fpsimd_state.fpsr,
+ start, end);
+ if (ret)
+ return ret;
+
+ start = end;
+ end = sve_size_from_header(&header);
+ return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+ start, end);
+}
+
+static int sve_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+ struct user_sve_header header;
+ unsigned int vq;
+ unsigned long start, end;
+
+ if (!system_supports_sve())
+ return -EINVAL;
+
+ /* Header */
+ if (count < sizeof(header))
+ return -EINVAL;
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &header,
+ 0, sizeof(header));
+ if (ret)
+ goto out;
+
+ /*
+ * Apart from PT_SVE_REGS_MASK, all PT_SVE_* flags are consumed by
+ * sve_set_vector_length(), which will also validate them for us:
+ */
+ ret = sve_set_vector_length(target, header.vl,
+ ((unsigned long)header.flags & ~SVE_PT_REGS_MASK) << 16);
+ if (ret)
+ goto out;
+
+ /* Actual VL set may be less than the user asked for: */
+ vq = sve_vq_from_vl(target->thread.sve_vl);
+
+ /* Registers: FPSIMD-only case */
+
+ BUILD_BUG_ON(SVE_PT_FPSIMD_OFFSET != sizeof(header));
+ if ((header.flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD) {
+ ret = __fpr_set(target, regset, pos, count, kbuf, ubuf,
+ SVE_PT_FPSIMD_OFFSET);
+ clear_tsk_thread_flag(target, TIF_SVE);
+ goto out;
+ }
+
+ /* Otherwise: full SVE case */
+
+ /*
+ * If setting a different VL from the requested VL and there is
+ * register data, the data layout will be wrong: don't even
+ * try to set the registers in this case.
+ */
+ if (count && vq != sve_vq_from_vl(header.vl)) {
+ ret = -EIO;
+ goto out;
+ }
+
+ sve_alloc(target);
+
+ /*
+ * Ensure target->thread.sve_state is up to date with target's
+ * FPSIMD regs, so that a short copyin leaves trailing registers
+ * unmodified.
+ */
+ fpsimd_sync_to_sve(target);
+ set_tsk_thread_flag(target, TIF_SVE);
+
+ BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header));
+ start = SVE_PT_SVE_OFFSET;
+ end = SVE_PT_SVE_FFR_OFFSET(vq) + SVE_PT_SVE_FFR_SIZE(vq);
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ target->thread.sve_state,
+ start, end);
+ if (ret)
+ goto out;
+
+ start = end;
+ end = SVE_PT_SVE_FPSR_OFFSET(vq);
+ ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+ start, end);
+ if (ret)
+ goto out;
+
+ /*
+ * Copy fpsr, and fpcr which must follow contiguously in
+ * struct fpsimd_state:
+ */
+ start = end;
+ end = SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE;
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.fpsimd_state.fpsr,
+ start, end);
+
+out:
+ fpsimd_flush_task_state(target);
+ return ret;
+}
+
+#endif /* CONFIG_ARM64_SVE */
+
enum aarch64_regset {
REGSET_GPR,
REGSET_FPR,
@@ -711,6 +960,9 @@ enum aarch64_regset {
REGSET_HW_WATCH,
#endif
REGSET_SYSTEM_CALL,
+#ifdef CONFIG_ARM64_SVE
+ REGSET_SVE,
+#endif
};
static const struct user_regset aarch64_regsets[] = {
@@ -768,6 +1020,18 @@ static const struct user_regset aarch64_regsets[] = {
.get = system_call_get,
.set = system_call_set,
},
+#ifdef CONFIG_ARM64_SVE
+ [REGSET_SVE] = { /* Scalable Vector Extension */
+ .core_note_type = NT_ARM_SVE,
+ .n = DIV_ROUND_UP(SVE_PT_SIZE(SVE_VQ_MAX, SVE_PT_REGS_SVE),
+ SVE_VQ_BYTES),
+ .size = SVE_VQ_BYTES,
+ .align = SVE_VQ_BYTES,
+ .get = sve_get,
+ .set = sve_set,
+ .get_size = sve_get_size,
+ },
+#endif
};
static const struct user_regset_view user_aarch64_view = {
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index d4b740538ad5..30ad2f085d1f 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -23,7 +23,6 @@
#include <linux/stddef.h>
#include <linux/ioport.h>
#include <linux/delay.h>
-#include <linux/utsname.h>
#include <linux/initrd.h>
#include <linux/console.h>
#include <linux/cache.h>
@@ -48,6 +47,7 @@
#include <asm/fixmap.h>
#include <asm/cpu.h>
#include <asm/cputype.h>
+#include <asm/daifflags.h>
#include <asm/elf.h>
#include <asm/cpufeature.h>
#include <asm/cpu_ops.h>
@@ -103,7 +103,8 @@ void __init smp_setup_processor_id(void)
* access percpu variable inside lock_release
*/
set_my_cpu_offset(0);
- pr_info("Booting Linux on physical CPU 0x%lx\n", (unsigned long)mpidr);
+ pr_info("Booting Linux on physical CPU 0x%010lx [0x%08x]\n",
+ (unsigned long)mpidr, read_cpuid_id());
}
bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
@@ -244,9 +245,6 @@ u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID };
void __init setup_arch(char **cmdline_p)
{
- pr_info("Boot CPU: AArch64 Processor [%08x]\n", read_cpuid_id());
-
- sprintf(init_utsname()->machine, UTS_MACHINE);
init_mm.start_code = (unsigned long) _text;
init_mm.end_code = (unsigned long) _etext;
init_mm.end_data = (unsigned long) _edata;
@@ -262,10 +260,11 @@ void __init setup_arch(char **cmdline_p)
parse_early_param();
/*
- * Unmask asynchronous aborts after bringing up possible earlycon.
- * (Report possible System Errors once we can report this occurred)
+ * Unmask asynchronous aborts and fiq after bringing up possible
+ * earlycon. (Report possible System Errors once we can report this
+ * occurred).
*/
- local_async_enable();
+ local_daif_restore(DAIF_PROCCTX_NOIRQ);
/*
* TTBR0 is only used for the identity mapping at this stage. Make it
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 0bdc96c61bc0..b120111a46be 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -31,6 +31,7 @@
#include <linux/ratelimit.h>
#include <linux/syscalls.h>
+#include <asm/daifflags.h>
#include <asm/debug-monitors.h>
#include <asm/elf.h>
#include <asm/cacheflush.h>
@@ -63,6 +64,7 @@ struct rt_sigframe_user_layout {
unsigned long fpsimd_offset;
unsigned long esr_offset;
+ unsigned long sve_offset;
unsigned long extra_offset;
unsigned long end_offset;
};
@@ -179,9 +181,6 @@ static int preserve_fpsimd_context(struct fpsimd_context __user *ctx)
struct fpsimd_state *fpsimd = &current->thread.fpsimd_state;
int err;
- /* dump the hardware registers to the fpsimd_state structure */
- fpsimd_preserve_current_state();
-
/* copy the FP and status/control registers */
err = __copy_to_user(ctx->vregs, fpsimd->vregs, sizeof(fpsimd->vregs));
__put_user_error(fpsimd->fpsr, &ctx->fpsr, err);
@@ -214,6 +213,8 @@ static int restore_fpsimd_context(struct fpsimd_context __user *ctx)
__get_user_error(fpsimd.fpsr, &ctx->fpsr, err);
__get_user_error(fpsimd.fpcr, &ctx->fpcr, err);
+ clear_thread_flag(TIF_SVE);
+
/* load the hardware registers from the fpsimd_state structure */
if (!err)
fpsimd_update_current_state(&fpsimd);
@@ -221,10 +222,118 @@ static int restore_fpsimd_context(struct fpsimd_context __user *ctx)
return err ? -EFAULT : 0;
}
+
struct user_ctxs {
struct fpsimd_context __user *fpsimd;
+ struct sve_context __user *sve;
};
+#ifdef CONFIG_ARM64_SVE
+
+static int preserve_sve_context(struct sve_context __user *ctx)
+{
+ int err = 0;
+ u16 reserved[ARRAY_SIZE(ctx->__reserved)];
+ unsigned int vl = current->thread.sve_vl;
+ unsigned int vq = 0;
+
+ if (test_thread_flag(TIF_SVE))
+ vq = sve_vq_from_vl(vl);
+
+ memset(reserved, 0, sizeof(reserved));
+
+ __put_user_error(SVE_MAGIC, &ctx->head.magic, err);
+ __put_user_error(round_up(SVE_SIG_CONTEXT_SIZE(vq), 16),
+ &ctx->head.size, err);
+ __put_user_error(vl, &ctx->vl, err);
+ BUILD_BUG_ON(sizeof(ctx->__reserved) != sizeof(reserved));
+ err |= __copy_to_user(&ctx->__reserved, reserved, sizeof(reserved));
+
+ if (vq) {
+ /*
+ * This assumes that the SVE state has already been saved to
+ * the task struct by calling preserve_fpsimd_context().
+ */
+ err |= __copy_to_user((char __user *)ctx + SVE_SIG_REGS_OFFSET,
+ current->thread.sve_state,
+ SVE_SIG_REGS_SIZE(vq));
+ }
+
+ return err ? -EFAULT : 0;
+}
+
+static int restore_sve_fpsimd_context(struct user_ctxs *user)
+{
+ int err;
+ unsigned int vq;
+ struct fpsimd_state fpsimd;
+ struct sve_context sve;
+
+ if (__copy_from_user(&sve, user->sve, sizeof(sve)))
+ return -EFAULT;
+
+ if (sve.vl != current->thread.sve_vl)
+ return -EINVAL;
+
+ if (sve.head.size <= sizeof(*user->sve)) {
+ clear_thread_flag(TIF_SVE);
+ goto fpsimd_only;
+ }
+
+ vq = sve_vq_from_vl(sve.vl);
+
+ if (sve.head.size < SVE_SIG_CONTEXT_SIZE(vq))
+ return -EINVAL;
+
+ /*
+ * Careful: we are about __copy_from_user() directly into
+ * thread.sve_state with preemption enabled, so protection is
+ * needed to prevent a racing context switch from writing stale
+ * registers back over the new data.
+ */
+
+ fpsimd_flush_task_state(current);
+ barrier();
+ /* From now, fpsimd_thread_switch() won't clear TIF_FOREIGN_FPSTATE */
+
+ set_thread_flag(TIF_FOREIGN_FPSTATE);
+ barrier();
+ /* From now, fpsimd_thread_switch() won't touch thread.sve_state */
+
+ sve_alloc(current);
+ err = __copy_from_user(current->thread.sve_state,
+ (char __user const *)user->sve +
+ SVE_SIG_REGS_OFFSET,
+ SVE_SIG_REGS_SIZE(vq));
+ if (err)
+ return -EFAULT;
+
+ set_thread_flag(TIF_SVE);
+
+fpsimd_only:
+ /* copy the FP and status/control registers */
+ /* restore_sigframe() already checked that user->fpsimd != NULL. */
+ err = __copy_from_user(fpsimd.vregs, user->fpsimd->vregs,
+ sizeof(fpsimd.vregs));
+ __get_user_error(fpsimd.fpsr, &user->fpsimd->fpsr, err);
+ __get_user_error(fpsimd.fpcr, &user->fpsimd->fpcr, err);
+
+ /* load the hardware registers from the fpsimd_state structure */
+ if (!err)
+ fpsimd_update_current_state(&fpsimd);
+
+ return err ? -EFAULT : 0;
+}
+
+#else /* ! CONFIG_ARM64_SVE */
+
+/* Turn any non-optimised out attempts to use these into a link error: */
+extern int preserve_sve_context(void __user *ctx);
+extern int restore_sve_fpsimd_context(struct user_ctxs *user);
+
+#endif /* ! CONFIG_ARM64_SVE */
+
+
static int parse_user_sigframe(struct user_ctxs *user,
struct rt_sigframe __user *sf)
{
@@ -237,6 +346,7 @@ static int parse_user_sigframe(struct user_ctxs *user,
char const __user *const sfp = (char const __user *)sf;
user->fpsimd = NULL;
+ user->sve = NULL;
if (!IS_ALIGNED((unsigned long)base, 16))
goto invalid;
@@ -287,6 +397,19 @@ static int parse_user_sigframe(struct user_ctxs *user,
/* ignore */
break;
+ case SVE_MAGIC:
+ if (!system_supports_sve())
+ goto invalid;
+
+ if (user->sve)
+ goto invalid;
+
+ if (size < sizeof(*user->sve))
+ goto invalid;
+
+ user->sve = (struct sve_context __user *)head;
+ break;
+
case EXTRA_MAGIC:
if (have_extra_context)
goto invalid;
@@ -343,6 +466,10 @@ static int parse_user_sigframe(struct user_ctxs *user,
*/
offset = 0;
limit = extra_size;
+
+ if (!access_ok(VERIFY_READ, base, limit))
+ goto invalid;
+
continue;
default:
@@ -359,9 +486,6 @@ static int parse_user_sigframe(struct user_ctxs *user,
}
done:
- if (!user->fpsimd)
- goto invalid;
-
return 0;
invalid:
@@ -395,8 +519,19 @@ static int restore_sigframe(struct pt_regs *regs,
if (err == 0)
err = parse_user_sigframe(&user, sf);
- if (err == 0)
- err = restore_fpsimd_context(user.fpsimd);
+ if (err == 0) {
+ if (!user.fpsimd)
+ return -EINVAL;
+
+ if (user.sve) {
+ if (!system_supports_sve())
+ return -EINVAL;
+
+ err = restore_sve_fpsimd_context(&user);
+ } else {
+ err = restore_fpsimd_context(user.fpsimd);
+ }
+ }
return err;
}
@@ -455,6 +590,18 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user)
return err;
}
+ if (system_supports_sve()) {
+ unsigned int vq = 0;
+
+ if (test_thread_flag(TIF_SVE))
+ vq = sve_vq_from_vl(current->thread.sve_vl);
+
+ err = sigframe_alloc(user, &user->sve_offset,
+ SVE_SIG_CONTEXT_SIZE(vq));
+ if (err)
+ return err;
+ }
+
return sigframe_alloc_end(user);
}
@@ -496,6 +643,13 @@ static int setup_sigframe(struct rt_sigframe_user_layout *user,
__put_user_error(current->thread.fault_code, &esr_ctx->esr, err);
}
+ /* Scalable Vector Extension state, if present */
+ if (system_supports_sve() && err == 0 && user->sve_offset) {
+ struct sve_context __user *sve_ctx =
+ apply_user_offset(user, user->sve_offset);
+ err |= preserve_sve_context(sve_ctx);
+ }
+
if (err == 0 && user->extra_offset) {
char __user *sfp = (char __user *)user->sigframe;
char __user *userp =
@@ -595,6 +749,8 @@ static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set,
struct rt_sigframe __user *frame;
int err = 0;
+ fpsimd_signal_preserve_current_state();
+
if (get_sigframe(&user, ksig, regs))
return 1;
@@ -756,9 +912,12 @@ asmlinkage void do_notify_resume(struct pt_regs *regs,
addr_limit_user_check();
if (thread_flags & _TIF_NEED_RESCHED) {
+ /* Unmask Debug and SError for the next task */
+ local_daif_restore(DAIF_PROCCTX_NOIRQ);
+
schedule();
} else {
- local_irq_enable();
+ local_daif_restore(DAIF_PROCCTX);
if (thread_flags & _TIF_UPROBE)
uprobe_notify_resume(regs);
@@ -775,7 +934,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs,
fpsimd_restore_current_state();
}
- local_irq_disable();
+ local_daif_mask();
thread_flags = READ_ONCE(current_thread_info()->flags);
} while (thread_flags & _TIF_WORK_MASK);
}
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index e09bf5d15606..22711ee8e36c 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -239,7 +239,7 @@ static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame)
* Note that this also saves V16-31, which aren't visible
* in AArch32.
*/
- fpsimd_preserve_current_state();
+ fpsimd_signal_preserve_current_state();
/* Place structure header on the stack */
__put_user_error(magic, &frame->magic, err);
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 9f7195a5773e..551eb07c53b6 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -47,6 +47,7 @@
#include <asm/cpu.h>
#include <asm/cputype.h>
#include <asm/cpu_ops.h>
+#include <asm/daifflags.h>
#include <asm/mmu_context.h>
#include <asm/numa.h>
#include <asm/pgtable.h>
@@ -216,6 +217,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
*/
asmlinkage void secondary_start_kernel(void)
{
+ u64 mpidr = read_cpuid_mpidr() & MPIDR_HWID_BITMASK;
struct mm_struct *mm = &init_mm;
unsigned int cpu;
@@ -265,14 +267,14 @@ asmlinkage void secondary_start_kernel(void)
* the CPU migration code to notice that the CPU is online
* before we continue.
*/
- pr_info("CPU%u: Booted secondary processor [%08x]\n",
- cpu, read_cpuid_id());
+ pr_info("CPU%u: Booted secondary processor 0x%010lx [0x%08x]\n",
+ cpu, (unsigned long)mpidr,
+ read_cpuid_id());
update_cpu_boot_status(CPU_BOOT_SUCCESS);
set_cpu_online(cpu, true);
complete(&cpu_running);
- local_irq_enable();
- local_async_enable();
+ local_daif_restore(DAIF_PROCCTX);
/*
* OK, it's off to the idle thread for us
@@ -368,10 +370,6 @@ void __cpu_die(unsigned int cpu)
/*
* Called from the idle thread for the CPU which has been shutdown.
*
- * Note that we disable IRQs here, but do not re-enable them
- * before returning to the caller. This is also the behaviour
- * of the other hotplug-cpu capable cores, so presumably coming
- * out of idle fixes this.
*/
void cpu_die(void)
{
@@ -379,7 +377,7 @@ void cpu_die(void)
idle_task_exit();
- local_irq_disable();
+ local_daif_mask();
/* Tell __cpu_die() that this CPU is now safe to dispose of */
(void)cpu_report_death();
@@ -837,7 +835,7 @@ static void ipi_cpu_stop(unsigned int cpu)
{
set_cpu_online(cpu, false);
- local_irq_disable();
+ local_daif_mask();
while (1)
cpu_relax();
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index 77cd655e6eb7..3fe5ad884418 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -5,6 +5,7 @@
#include <asm/alternative.h>
#include <asm/cacheflush.h>
#include <asm/cpufeature.h>
+#include <asm/daifflags.h>
#include <asm/debug-monitors.h>
#include <asm/exec.h>
#include <asm/pgtable.h>
@@ -12,7 +13,6 @@
#include <asm/mmu_context.h>
#include <asm/smp_plat.h>
#include <asm/suspend.h>
-#include <asm/tlbflush.h>
/*
* This is allocated by cpu_suspend_init(), and used to store a pointer to
@@ -58,7 +58,7 @@ void notrace __cpu_suspend_exit(void)
/*
* Restore HW breakpoint registers to sane values
* before debug exceptions are possibly reenabled
- * through local_dbg_restore.
+ * by cpu_suspend()s local_daif_restore() call.
*/
if (hw_breakpoint_restore)
hw_breakpoint_restore(cpu);
@@ -82,7 +82,7 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
* updates to mdscr register (saved and restored along with
* general purpose registers) from kernel debuggers.
*/
- local_dbg_save(flags);
+ flags = local_daif_save();
/*
* Function graph tracer state gets incosistent when the kernel
@@ -115,7 +115,7 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
* restored, so from this point onwards, debugging is fully
* renabled if it was enabled when core started shutdown.
*/
- local_dbg_restore(flags);
+ local_daif_restore(flags);
return ret;
}
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 8383af15a759..3d3588fcd1c7 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -38,6 +38,7 @@
#include <asm/atomic.h>
#include <asm/bug.h>
+#include <asm/daifflags.h>
#include <asm/debug-monitors.h>
#include <asm/esr.h>
#include <asm/insn.h>
@@ -58,55 +59,9 @@ static const char *handler[]= {
int show_unhandled_signals = 1;
-/*
- * Dump out the contents of some kernel memory nicely...
- */
-static void dump_mem(const char *lvl, const char *str, unsigned long bottom,
- unsigned long top)
-{
- unsigned long first;
- mm_segment_t fs;
- int i;
-
- /*
- * We need to switch to kernel mode so that we can use __get_user
- * to safely read from kernel space.
- */
- fs = get_fs();
- set_fs(KERNEL_DS);
-
- printk("%s%s(0x%016lx to 0x%016lx)\n", lvl, str, bottom, top);
-
- for (first = bottom & ~31; first < top; first += 32) {
- unsigned long p;
- char str[sizeof(" 12345678") * 8 + 1];
-
- memset(str, ' ', sizeof(str));
- str[sizeof(str) - 1] = '\0';
-
- for (p = first, i = 0; i < (32 / 8)
- && p < top; i++, p += 8) {
- if (p >= bottom && p < top) {
- unsigned long val;
-
- if (__get_user(val, (unsigned long *)p) == 0)
- sprintf(str + i * 17, " %016lx", val);
- else
- sprintf(str + i * 17, " ????????????????");
- }
- }
- printk("%s%04lx:%s\n", lvl, first & 0xffff, str);
- }
-
- set_fs(fs);
-}
-
static void dump_backtrace_entry(unsigned long where)
{
- /*
- * Note that 'where' can have a physical address, but it's not handled.
- */
- print_ip_sym(where);
+ printk(" %pS\n", (void *)where);
}
static void __dump_instr(const char *lvl, struct pt_regs *regs)
@@ -171,10 +126,7 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
skip = !!regs;
printk("Call trace:\n");
- while (1) {
- unsigned long stack;
- int ret;
-
+ do {
/* skip until specified stack frame */
if (!skip) {
dump_backtrace_entry(frame.pc);
@@ -189,17 +141,7 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
*/
dump_backtrace_entry(regs->pc);
}
- ret = unwind_frame(tsk, &frame);
- if (ret < 0)
- break;
- if (in_entry_text(frame.pc)) {
- stack = frame.fp - offsetof(struct pt_regs, stackframe);
-
- if (on_accessible_stack(tsk, stack))
- dump_mem("", "Exception stack", stack,
- stack + sizeof(struct pt_regs));
- }
- }
+ } while (!unwind_frame(tsk, &frame));
put_task_stack(tsk);
}
@@ -293,6 +235,17 @@ void arm64_notify_die(const char *str, struct pt_regs *regs,
}
}
+void arm64_skip_faulting_instruction(struct pt_regs *regs, unsigned long size)
+{
+ regs->pc += size;
+
+ /*
+ * If we were single stepping, we want to get the step exception after
+ * we return from the trap.
+ */
+ user_fastforward_single_step(current);
+}
+
static LIST_HEAD(undef_hook);
static DEFINE_RAW_SPINLOCK(undef_lock);
@@ -358,8 +311,8 @@ exit:
return fn ? fn(regs, instr) : 1;
}
-static void force_signal_inject(int signal, int code, struct pt_regs *regs,
- unsigned long address)
+void force_signal_inject(int signal, int code, struct pt_regs *regs,
+ unsigned long address)
{
siginfo_t info;
void __user *pc = (void __user *)instruction_pointer(regs);
@@ -373,7 +326,7 @@ static void force_signal_inject(int signal, int code, struct pt_regs *regs,
desc = "illegal memory access";
break;
default:
- desc = "bad mode";
+ desc = "unknown or unrecoverable error";
break;
}
@@ -480,7 +433,7 @@ static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs)
if (ret)
arm64_notify_segfault(regs, address);
else
- regs->pc += 4;
+ arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
}
static void ctr_read_handler(unsigned int esr, struct pt_regs *regs)
@@ -490,7 +443,7 @@ static void ctr_read_handler(unsigned int esr, struct pt_regs *regs)
pt_regs_write_reg(regs, rt, val);
- regs->pc += 4;
+ arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
}
static void cntvct_read_handler(unsigned int esr, struct pt_regs *regs)
@@ -498,7 +451,7 @@ static void cntvct_read_handler(unsigned int esr, struct pt_regs *regs)
int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT;
pt_regs_write_reg(regs, rt, arch_counter_get_cntvct());
- regs->pc += 4;
+ arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
}
static void cntfrq_read_handler(unsigned int esr, struct pt_regs *regs)
@@ -506,7 +459,7 @@ static void cntfrq_read_handler(unsigned int esr, struct pt_regs *regs)
int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT;
pt_regs_write_reg(regs, rt, arch_timer_get_rate());
- regs->pc += 4;
+ arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
}
struct sys64_hook {
@@ -603,6 +556,7 @@ static const char *esr_class_str[] = {
[ESR_ELx_EC_HVC64] = "HVC (AArch64)",
[ESR_ELx_EC_SMC64] = "SMC (AArch64)",
[ESR_ELx_EC_SYS64] = "MSR/MRS (AArch64)",
+ [ESR_ELx_EC_SVE] = "SVE",
[ESR_ELx_EC_IMP_DEF] = "EL3 IMP DEF",
[ESR_ELx_EC_IABT_LOW] = "IABT (lower EL)",
[ESR_ELx_EC_IABT_CUR] = "IABT (current EL)",
@@ -642,7 +596,7 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr)
esr_get_class_string(esr));
die("Oops - bad mode", regs, 0);
- local_irq_disable();
+ local_daif_mask();
panic("bad mode");
}
@@ -708,6 +662,19 @@ asmlinkage void handle_bad_stack(struct pt_regs *regs)
}
#endif
+asmlinkage void do_serror(struct pt_regs *regs, unsigned int esr)
+{
+ nmi_enter();
+
+ console_verbose();
+
+ pr_crit("SError Interrupt on CPU%d, code 0x%08x -- %s\n",
+ smp_processor_id(), esr, esr_get_class_string(esr));
+ __show_regs(regs);
+
+ panic("Asynchronous SError Interrupt");
+}
+
void __pte_error(const char *file, int line, unsigned long val)
{
pr_err("%s:%d: bad pte %016lx.\n", file, line, val);
@@ -761,7 +728,7 @@ static int bug_handler(struct pt_regs *regs, unsigned int esr)
}
/* If thread survives, skip over the BUG instruction and continue: */
- regs->pc += AARCH64_INSN_SIZE; /* skip BRK and resume */
+ arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
return DBG_HOOK_HANDLED;
}
diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S
index 76320e920965..c39872a7b03c 100644
--- a/arch/arm64/kernel/vdso/gettimeofday.S
+++ b/arch/arm64/kernel/vdso/gettimeofday.S
@@ -309,7 +309,7 @@ ENTRY(__kernel_clock_getres)
b.ne 4f
ldr x2, 6f
2:
- cbz w1, 3f
+ cbz x1, 3f
stp xzr, x2, [x1]
3: /* res == NULL. */
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 7debb74843a0..b71247995469 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -147,6 +147,13 @@ static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu, struct kvm_run *run)
return 1;
}
+static int handle_sve(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ /* Until SVE is supported for guests: */
+ kvm_inject_undefined(vcpu);
+ return 1;
+}
+
static exit_handle_fn arm_exit_handlers[] = {
[0 ... ESR_ELx_EC_MAX] = kvm_handle_unknown_ec,
[ESR_ELx_EC_WFx] = kvm_handle_wfx,
@@ -160,6 +167,7 @@ static exit_handle_fn arm_exit_handlers[] = {
[ESR_ELx_EC_HVC64] = handle_hvc,
[ESR_ELx_EC_SMC64] = handle_smc,
[ESR_ELx_EC_SYS64] = kvm_handle_sys_reg,
+ [ESR_ELx_EC_SVE] = handle_sve,
[ESR_ELx_EC_IABT_LOW] = kvm_handle_guest_abort,
[ESR_ELx_EC_DABT_LOW] = kvm_handle_guest_abort,
[ESR_ELx_EC_SOFTSTP_LOW]= kvm_handle_guest_debug,
diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c
index f5154ed3da6c..321c9c05dd9e 100644
--- a/arch/arm64/kvm/hyp/debug-sr.c
+++ b/arch/arm64/kvm/hyp/debug-sr.c
@@ -65,16 +65,6 @@
default: write_debug(ptr[0], reg, 0); \
}
-#define PMSCR_EL1 sys_reg(3, 0, 9, 9, 0)
-
-#define PMBLIMITR_EL1 sys_reg(3, 0, 9, 10, 0)
-#define PMBLIMITR_EL1_E BIT(0)
-
-#define PMBIDR_EL1 sys_reg(3, 0, 9, 10, 7)
-#define PMBIDR_EL1_P BIT(4)
-
-#define psb_csync() asm volatile("hint #17")
-
static void __hyp_text __debug_save_spe_vhe(u64 *pmscr_el1)
{
/* The vcpu can run. but it can't hide. */
@@ -90,18 +80,18 @@ static void __hyp_text __debug_save_spe_nvhe(u64 *pmscr_el1)
return;
/* Yes; is it owned by EL3? */
- reg = read_sysreg_s(PMBIDR_EL1);
- if (reg & PMBIDR_EL1_P)
+ reg = read_sysreg_s(SYS_PMBIDR_EL1);
+ if (reg & BIT(SYS_PMBIDR_EL1_P_SHIFT))
return;
/* No; is the host actually using the thing? */
- reg = read_sysreg_s(PMBLIMITR_EL1);
- if (!(reg & PMBLIMITR_EL1_E))
+ reg = read_sysreg_s(SYS_PMBLIMITR_EL1);
+ if (!(reg & BIT(SYS_PMBLIMITR_EL1_E_SHIFT)))
return;
/* Yes; save the control register and disable data generation */
- *pmscr_el1 = read_sysreg_s(PMSCR_EL1);
- write_sysreg_s(0, PMSCR_EL1);
+ *pmscr_el1 = read_sysreg_s(SYS_PMSCR_EL1);
+ write_sysreg_s(0, SYS_PMSCR_EL1);
isb();
/* Now drain all buffered data to memory */
@@ -122,7 +112,7 @@ static void __hyp_text __debug_restore_spe(u64 pmscr_el1)
isb();
/* Re-enable data generation */
- write_sysreg_s(pmscr_el1, PMSCR_EL1);
+ write_sysreg_s(pmscr_el1, SYS_PMSCR_EL1);
}
void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu,
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 945e79c641c4..951f3ebaff26 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -48,7 +48,7 @@ static void __hyp_text __activate_traps_vhe(void)
val = read_sysreg(cpacr_el1);
val |= CPACR_EL1_TTA;
- val &= ~CPACR_EL1_FPEN;
+ val &= ~(CPACR_EL1_FPEN | CPACR_EL1_ZEN);
write_sysreg(val, cpacr_el1);
write_sysreg(__kvm_hyp_vector, vbar_el1);
@@ -59,7 +59,7 @@ static void __hyp_text __activate_traps_nvhe(void)
u64 val;
val = CPTR_EL2_DEFAULT;
- val |= CPTR_EL2_TTA | CPTR_EL2_TFP;
+ val |= CPTR_EL2_TTA | CPTR_EL2_TFP | CPTR_EL2_TZ;
write_sysreg(val, cptr_el2);
}
@@ -81,11 +81,17 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
* it will cause an exception.
*/
val = vcpu->arch.hcr_el2;
+
if (!(val & HCR_RW) && system_supports_fpsimd()) {
write_sysreg(1 << 30, fpexc32_el2);
isb();
}
+
+ if (val & HCR_RW) /* for AArch64 only: */
+ val |= HCR_TID3; /* TID3: trap feature register accesses */
+
write_sysreg(val, hcr_el2);
+
/* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */
write_sysreg(1 << 15, hstr_el2);
/*
@@ -111,7 +117,7 @@ static void __hyp_text __deactivate_traps_vhe(void)
write_sysreg(mdcr_el2, mdcr_el2);
write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
- write_sysreg(CPACR_EL1_FPEN, cpacr_el1);
+ write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1);
write_sysreg(vectors, vbar_el1);
}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 2e070d3baf9f..a0ee9b05e3d4 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -23,6 +23,7 @@
#include <linux/bsearch.h>
#include <linux/kvm_host.h>
#include <linux/mm.h>
+#include <linux/printk.h>
#include <linux/uaccess.h>
#include <asm/cacheflush.h>
@@ -892,6 +893,146 @@ static bool access_cntp_cval(struct kvm_vcpu *vcpu,
return true;
}
+/* Read a sanitised cpufeature ID register by sys_reg_desc */
+static u64 read_id_reg(struct sys_reg_desc const *r, bool raz)
+{
+ u32 id = sys_reg((u32)r->Op0, (u32)r->Op1,
+ (u32)r->CRn, (u32)r->CRm, (u32)r->Op2);
+ u64 val = raz ? 0 : read_sanitised_ftr_reg(id);
+
+ if (id == SYS_ID_AA64PFR0_EL1) {
+ if (val & (0xfUL << ID_AA64PFR0_SVE_SHIFT))
+ pr_err_once("kvm [%i]: SVE unsupported for guests, suppressing\n",
+ task_pid_nr(current));
+
+ val &= ~(0xfUL << ID_AA64PFR0_SVE_SHIFT);
+ }
+
+ return val;
+}
+
+/* cpufeature ID register access trap handlers */
+
+static bool __access_id_reg(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r,
+ bool raz)
+{
+ if (p->is_write)
+ return write_to_read_only(vcpu, p, r);
+
+ p->regval = read_id_reg(r, raz);
+ return true;
+}
+
+static bool access_id_reg(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ return __access_id_reg(vcpu, p, r, false);
+}
+
+static bool access_raz_id_reg(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ return __access_id_reg(vcpu, p, r, true);
+}
+
+static int reg_from_user(u64 *val, const void __user *uaddr, u64 id);
+static int reg_to_user(void __user *uaddr, const u64 *val, u64 id);
+static u64 sys_reg_to_index(const struct sys_reg_desc *reg);
+
+/*
+ * cpufeature ID register user accessors
+ *
+ * For now, these registers are immutable for userspace, so no values
+ * are stored, and for set_id_reg() we don't allow the effective value
+ * to be changed.
+ */
+static int __get_id_reg(const struct sys_reg_desc *rd, void __user *uaddr,
+ bool raz)
+{
+ const u64 id = sys_reg_to_index(rd);
+ const u64 val = read_id_reg(rd, raz);
+
+ return reg_to_user(uaddr, &val, id);
+}
+
+static int __set_id_reg(const struct sys_reg_desc *rd, void __user *uaddr,
+ bool raz)
+{
+ const u64 id = sys_reg_to_index(rd);
+ int err;
+ u64 val;
+
+ err = reg_from_user(&val, uaddr, id);
+ if (err)
+ return err;
+
+ /* This is what we mean by invariant: you can't change it. */
+ if (val != read_id_reg(rd, raz))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int get_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ const struct kvm_one_reg *reg, void __user *uaddr)
+{
+ return __get_id_reg(rd, uaddr, false);
+}
+
+static int set_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ const struct kvm_one_reg *reg, void __user *uaddr)
+{
+ return __set_id_reg(rd, uaddr, false);
+}
+
+static int get_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ const struct kvm_one_reg *reg, void __user *uaddr)
+{
+ return __get_id_reg(rd, uaddr, true);
+}
+
+static int set_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ const struct kvm_one_reg *reg, void __user *uaddr)
+{
+ return __set_id_reg(rd, uaddr, true);
+}
+
+/* sys_reg_desc initialiser for known cpufeature ID registers */
+#define ID_SANITISED(name) { \
+ SYS_DESC(SYS_##name), \
+ .access = access_id_reg, \
+ .get_user = get_id_reg, \
+ .set_user = set_id_reg, \
+}
+
+/*
+ * sys_reg_desc initialiser for architecturally unallocated cpufeature ID
+ * register with encoding Op0=3, Op1=0, CRn=0, CRm=crm, Op2=op2
+ * (1 <= crm < 8, 0 <= Op2 < 8).
+ */
+#define ID_UNALLOCATED(crm, op2) { \
+ Op0(3), Op1(0), CRn(0), CRm(crm), Op2(op2), \
+ .access = access_raz_id_reg, \
+ .get_user = get_raz_id_reg, \
+ .set_user = set_raz_id_reg, \
+}
+
+/*
+ * sys_reg_desc initialiser for known ID registers that we hide from guests.
+ * For now, these are exposed just like unallocated ID regs: they appear
+ * RAZ for the guest.
+ */
+#define ID_HIDDEN(name) { \
+ SYS_DESC(SYS_##name), \
+ .access = access_raz_id_reg, \
+ .get_user = get_raz_id_reg, \
+ .set_user = set_raz_id_reg, \
+}
+
/*
* Architected system registers.
* Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
@@ -944,6 +1085,84 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_DBGVCR32_EL2), NULL, reset_val, DBGVCR32_EL2, 0 },
{ SYS_DESC(SYS_MPIDR_EL1), NULL, reset_mpidr, MPIDR_EL1 },
+
+ /*
+ * ID regs: all ID_SANITISED() entries here must have corresponding
+ * entries in arm64_ftr_regs[].
+ */
+
+ /* AArch64 mappings of the AArch32 ID registers */
+ /* CRm=1 */
+ ID_SANITISED(ID_PFR0_EL1),
+ ID_SANITISED(ID_PFR1_EL1),
+ ID_SANITISED(ID_DFR0_EL1),
+ ID_HIDDEN(ID_AFR0_EL1),
+ ID_SANITISED(ID_MMFR0_EL1),
+ ID_SANITISED(ID_MMFR1_EL1),
+ ID_SANITISED(ID_MMFR2_EL1),
+ ID_SANITISED(ID_MMFR3_EL1),
+
+ /* CRm=2 */
+ ID_SANITISED(ID_ISAR0_EL1),
+ ID_SANITISED(ID_ISAR1_EL1),
+ ID_SANITISED(ID_ISAR2_EL1),
+ ID_SANITISED(ID_ISAR3_EL1),
+ ID_SANITISED(ID_ISAR4_EL1),
+ ID_SANITISED(ID_ISAR5_EL1),
+ ID_SANITISED(ID_MMFR4_EL1),
+ ID_UNALLOCATED(2,7),
+
+ /* CRm=3 */
+ ID_SANITISED(MVFR0_EL1),
+ ID_SANITISED(MVFR1_EL1),
+ ID_SANITISED(MVFR2_EL1),
+ ID_UNALLOCATED(3,3),
+ ID_UNALLOCATED(3,4),
+ ID_UNALLOCATED(3,5),
+ ID_UNALLOCATED(3,6),
+ ID_UNALLOCATED(3,7),
+
+ /* AArch64 ID registers */
+ /* CRm=4 */
+ ID_SANITISED(ID_AA64PFR0_EL1),
+ ID_SANITISED(ID_AA64PFR1_EL1),
+ ID_UNALLOCATED(4,2),
+ ID_UNALLOCATED(4,3),
+ ID_UNALLOCATED(4,4),
+ ID_UNALLOCATED(4,5),
+ ID_UNALLOCATED(4,6),
+ ID_UNALLOCATED(4,7),
+
+ /* CRm=5 */
+ ID_SANITISED(ID_AA64DFR0_EL1),
+ ID_SANITISED(ID_AA64DFR1_EL1),
+ ID_UNALLOCATED(5,2),
+ ID_UNALLOCATED(5,3),
+ ID_HIDDEN(ID_AA64AFR0_EL1),
+ ID_HIDDEN(ID_AA64AFR1_EL1),
+ ID_UNALLOCATED(5,6),
+ ID_UNALLOCATED(5,7),
+
+ /* CRm=6 */
+ ID_SANITISED(ID_AA64ISAR0_EL1),
+ ID_SANITISED(ID_AA64ISAR1_EL1),
+ ID_UNALLOCATED(6,2),
+ ID_UNALLOCATED(6,3),
+ ID_UNALLOCATED(6,4),
+ ID_UNALLOCATED(6,5),
+ ID_UNALLOCATED(6,6),
+ ID_UNALLOCATED(6,7),
+
+ /* CRm=7 */
+ ID_SANITISED(ID_AA64MMFR0_EL1),
+ ID_SANITISED(ID_AA64MMFR1_EL1),
+ ID_SANITISED(ID_AA64MMFR2_EL1),
+ ID_UNALLOCATED(7,3),
+ ID_UNALLOCATED(7,4),
+ ID_UNALLOCATED(7,5),
+ ID_UNALLOCATED(7,6),
+ ID_UNALLOCATED(7,7),
+
{ SYS_DESC(SYS_SCTLR_EL1), access_vm_reg, reset_val, SCTLR_EL1, 0x00C50078 },
{ SYS_DESC(SYS_CPACR_EL1), NULL, reset_val, CPACR_EL1, 0 },
{ SYS_DESC(SYS_TTBR0_EL1), access_vm_reg, reset_unknown, TTBR0_EL1 },
@@ -1790,8 +2009,8 @@ static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu,
if (!r)
r = find_reg(&params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
- /* Not saved in the sys_reg array? */
- if (r && !r->reg)
+ /* Not saved in the sys_reg array and not otherwise accessible? */
+ if (r && !(r->reg || r->get_user))
r = NULL;
return r;
@@ -1815,20 +2034,6 @@ static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu,
FUNCTION_INVARIANT(midr_el1)
FUNCTION_INVARIANT(ctr_el0)
FUNCTION_INVARIANT(revidr_el1)
-FUNCTION_INVARIANT(id_pfr0_el1)
-FUNCTION_INVARIANT(id_pfr1_el1)
-FUNCTION_INVARIANT(id_dfr0_el1)
-FUNCTION_INVARIANT(id_afr0_el1)
-FUNCTION_INVARIANT(id_mmfr0_el1)
-FUNCTION_INVARIANT(id_mmfr1_el1)
-FUNCTION_INVARIANT(id_mmfr2_el1)
-FUNCTION_INVARIANT(id_mmfr3_el1)
-FUNCTION_INVARIANT(id_isar0_el1)
-FUNCTION_INVARIANT(id_isar1_el1)
-FUNCTION_INVARIANT(id_isar2_el1)
-FUNCTION_INVARIANT(id_isar3_el1)
-FUNCTION_INVARIANT(id_isar4_el1)
-FUNCTION_INVARIANT(id_isar5_el1)
FUNCTION_INVARIANT(clidr_el1)
FUNCTION_INVARIANT(aidr_el1)
@@ -1836,20 +2041,6 @@ FUNCTION_INVARIANT(aidr_el1)
static struct sys_reg_desc invariant_sys_regs[] = {
{ SYS_DESC(SYS_MIDR_EL1), NULL, get_midr_el1 },
{ SYS_DESC(SYS_REVIDR_EL1), NULL, get_revidr_el1 },
- { SYS_DESC(SYS_ID_PFR0_EL1), NULL, get_id_pfr0_el1 },
- { SYS_DESC(SYS_ID_PFR1_EL1), NULL, get_id_pfr1_el1 },
- { SYS_DESC(SYS_ID_DFR0_EL1), NULL, get_id_dfr0_el1 },
- { SYS_DESC(SYS_ID_AFR0_EL1), NULL, get_id_afr0_el1 },
- { SYS_DESC(SYS_ID_MMFR0_EL1), NULL, get_id_mmfr0_el1 },
- { SYS_DESC(SYS_ID_MMFR1_EL1), NULL, get_id_mmfr1_el1 },
- { SYS_DESC(SYS_ID_MMFR2_EL1), NULL, get_id_mmfr2_el1 },
- { SYS_DESC(SYS_ID_MMFR3_EL1), NULL, get_id_mmfr3_el1 },
- { SYS_DESC(SYS_ID_ISAR0_EL1), NULL, get_id_isar0_el1 },
- { SYS_DESC(SYS_ID_ISAR1_EL1), NULL, get_id_isar1_el1 },
- { SYS_DESC(SYS_ID_ISAR2_EL1), NULL, get_id_isar2_el1 },
- { SYS_DESC(SYS_ID_ISAR3_EL1), NULL, get_id_isar3_el1 },
- { SYS_DESC(SYS_ID_ISAR4_EL1), NULL, get_id_isar4_el1 },
- { SYS_DESC(SYS_ID_ISAR5_EL1), NULL, get_id_isar5_el1 },
{ SYS_DESC(SYS_CLIDR_EL1), NULL, get_clidr_el1 },
{ SYS_DESC(SYS_AIDR_EL1), NULL, get_aidr_el1 },
{ SYS_DESC(SYS_CTR_EL0), NULL, get_ctr_el0 },
@@ -2079,12 +2270,31 @@ static bool copy_reg_to_user(const struct sys_reg_desc *reg, u64 __user **uind)
return true;
}
+static int walk_one_sys_reg(const struct sys_reg_desc *rd,
+ u64 __user **uind,
+ unsigned int *total)
+{
+ /*
+ * Ignore registers we trap but don't save,
+ * and for which no custom user accessor is provided.
+ */
+ if (!(rd->reg || rd->get_user))
+ return 0;
+
+ if (!copy_reg_to_user(rd, uind))
+ return -EFAULT;
+
+ (*total)++;
+ return 0;
+}
+
/* Assumed ordered tables, see kvm_sys_reg_table_init. */
static int walk_sys_regs(struct kvm_vcpu *vcpu, u64 __user *uind)
{
const struct sys_reg_desc *i1, *i2, *end1, *end2;
unsigned int total = 0;
size_t num;
+ int err;
/* We check for duplicates here, to allow arch-specific overrides. */
i1 = get_target_table(vcpu->arch.target, true, &num);
@@ -2098,21 +2308,13 @@ static int walk_sys_regs(struct kvm_vcpu *vcpu, u64 __user *uind)
while (i1 || i2) {
int cmp = cmp_sys_reg(i1, i2);
/* target-specific overrides generic entry. */
- if (cmp <= 0) {
- /* Ignore registers we trap but don't save. */
- if (i1->reg) {
- if (!copy_reg_to_user(i1, &uind))
- return -EFAULT;
- total++;
- }
- } else {
- /* Ignore registers we trap but don't save. */
- if (i2->reg) {
- if (!copy_reg_to_user(i2, &uind))
- return -EFAULT;
- total++;
- }
- }
+ if (cmp <= 0)
+ err = walk_one_sys_reg(i1, &uind, &total);
+ else
+ err = walk_one_sys_reg(i2, &uind, &total);
+
+ if (err)
+ return err;
if (cmp <= 0 && ++i1 == end1)
i1 = NULL;
diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile
index 9a8cb96555d6..4e696f96451f 100644
--- a/arch/arm64/lib/Makefile
+++ b/arch/arm64/lib/Makefile
@@ -3,7 +3,7 @@ lib-y := bitops.o clear_user.o delay.o copy_from_user.o \
copy_to_user.o copy_in_user.o copy_page.o \
clear_page.o memchr.o memcpy.o memmove.o memset.o \
memcmp.o strcmp.o strncmp.o strlen.o strnlen.o \
- strchr.o strrchr.o
+ strchr.o strrchr.o tishift.o
# Tell the compiler to treat all general purpose registers (with the
# exception of the IP registers, which are already handled by the caller
diff --git a/arch/arm64/lib/delay.c b/arch/arm64/lib/delay.c
index dad4ec9bbfd1..e48ac402e7be 100644
--- a/arch/arm64/lib/delay.c
+++ b/arch/arm64/lib/delay.c
@@ -24,10 +24,28 @@
#include <linux/module.h>
#include <linux/timex.h>
+#include <clocksource/arm_arch_timer.h>
+
+#define USECS_TO_CYCLES(time_usecs) \
+ xloops_to_cycles((time_usecs) * 0x10C7UL)
+
+static inline unsigned long xloops_to_cycles(unsigned long xloops)
+{
+ return (xloops * loops_per_jiffy * HZ) >> 32;
+}
+
void __delay(unsigned long cycles)
{
cycles_t start = get_cycles();
+ if (arch_timer_evtstrm_available()) {
+ const cycles_t timer_evt_period =
+ USECS_TO_CYCLES(ARCH_TIMER_EVT_STREAM_PERIOD_US);
+
+ while ((get_cycles() - start + timer_evt_period) < cycles)
+ wfe();
+ }
+
while ((get_cycles() - start) < cycles)
cpu_relax();
}
@@ -35,10 +53,7 @@ EXPORT_SYMBOL(__delay);
inline void __const_udelay(unsigned long xloops)
{
- unsigned long loops;
-
- loops = xloops * loops_per_jiffy * HZ;
- __delay(loops >> 32);
+ __delay(xloops_to_cycles(xloops));
}
EXPORT_SYMBOL(__const_udelay);
diff --git a/arch/arm64/lib/tishift.S b/arch/arm64/lib/tishift.S
new file mode 100644
index 000000000000..0179a43cc045
--- /dev/null
+++ b/arch/arm64/lib/tishift.S
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+
+ENTRY(__ashlti3)
+ cbz x2, 1f
+ mov x3, #64
+ sub x3, x3, x2
+ cmp x3, #0
+ b.le 2f
+ lsl x1, x1, x2
+ lsr x3, x0, x3
+ lsl x2, x0, x2
+ orr x1, x1, x3
+ mov x0, x2
+1:
+ ret
+2:
+ neg w1, w3
+ mov x2, #0
+ lsl x1, x0, x1
+ mov x0, x2
+ ret
+ENDPROC(__ashlti3)
+
+ENTRY(__ashrti3)
+ cbz x2, 3f
+ mov x3, #64
+ sub x3, x3, x2
+ cmp x3, #0
+ b.le 4f
+ lsr x0, x0, x2
+ lsl x3, x1, x3
+ asr x2, x1, x2
+ orr x0, x0, x3
+ mov x1, x2
+3:
+ ret
+4:
+ neg w0, w3
+ asr x2, x1, #63
+ asr x0, x1, x0
+ mov x1, x2
+ ret
+ENDPROC(__ashrti3)
+
+ENTRY(__lshrti3)
+ cbz x2, 1f
+ mov x3, #64
+ sub x3, x3, x2
+ cmp x3, #0
+ b.le 2f
+ lsr x0, x0, x2
+ lsl x3, x1, x3
+ lsr x2, x1, x2
+ orr x0, x0, x3
+ mov x1, x2
+1:
+ ret
+2:
+ neg w0, w3
+ mov x2, #0
+ lsr x0, x1, x0
+ mov x1, x2
+ ret
+ENDPROC(__lshrti3)
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 614af886b7ef..b45c5bcaeccb 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -166,7 +166,7 @@ static void *__dma_alloc(struct device *dev, size_t size,
/* create a coherent mapping */
page = virt_to_page(ptr);
coherent_ptr = dma_common_contiguous_remap(page, size, VM_USERMAP,
- prot, NULL);
+ prot, __builtin_return_address(0));
if (!coherent_ptr)
goto no_map;
@@ -303,8 +303,7 @@ static int __swiotlb_mmap_pfn(struct vm_area_struct *vma,
unsigned long pfn, size_t size)
{
int ret = -ENXIO;
- unsigned long nr_vma_pages = (vma->vm_end - vma->vm_start) >>
- PAGE_SHIFT;
+ unsigned long nr_vma_pages = vma_pages(vma);
unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
unsigned long off = vma->vm_pgoff;
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index b64958b23a7f..22168cd0dde7 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -105,13 +105,11 @@ static void data_abort_decode(unsigned int esr)
(esr & ESR_ELx_WNR) >> ESR_ELx_WNR_SHIFT);
}
-/*
- * Decode mem abort information
- */
static void mem_abort_decode(unsigned int esr)
{
pr_alert("Mem abort info:\n");
+ pr_alert(" ESR = 0x%08x\n", esr);
pr_alert(" Exception class = %s, IL = %u bits\n",
esr_get_class_string(esr),
(esr & ESR_ELx_IL) ? 32 : 16);
@@ -249,9 +247,6 @@ static inline bool is_permission_fault(unsigned int esr, struct pt_regs *regs,
return false;
}
-/*
- * The kernel tried to access some page that wasn't present.
- */
static void __do_kernel_fault(unsigned long addr, unsigned int esr,
struct pt_regs *regs)
{
@@ -264,9 +259,6 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr,
if (!is_el1_instruction_abort(esr) && fixup_exception(regs))
return;
- /*
- * No handler, we'll have to terminate things with extreme prejudice.
- */
bust_spinlocks(1);
if (is_permission_fault(esr, regs, addr)) {
@@ -291,10 +283,6 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr,
do_exit(SIGKILL);
}
-/*
- * Something tried to access memory that isn't in our memory map. User mode
- * accesses just cause a SIGSEGV
- */
static void __do_user_fault(struct task_struct *tsk, unsigned long addr,
unsigned int esr, unsigned int sig, int code,
struct pt_regs *regs, int fault)
@@ -559,23 +547,6 @@ no_context:
return 0;
}
-/*
- * First Level Translation Fault Handler
- *
- * We enter here because the first level page table doesn't contain a valid
- * entry for the address.
- *
- * If the address is in kernel space (>= TASK_SIZE), then we are probably
- * faulting in the vmalloc() area.
- *
- * If the init_task's first level page tables contains the relevant entry, we
- * copy the it to this task. If not, we send the process a signal, fixup the
- * exception, or oops the kernel.
- *
- * NOTE! We MUST NOT take any locks for this case. We may be in an interrupt
- * or a critical region, and should only copy the information from the master
- * page table, nothing more.
- */
static int __kprobes do_translation_fault(unsigned long addr,
unsigned int esr,
struct pt_regs *regs)
@@ -594,18 +565,11 @@ static int do_alignment_fault(unsigned long addr, unsigned int esr,
return 0;
}
-/*
- * This abort handler always returns "fault".
- */
static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs)
{
- return 1;
+ return 1; /* "fault" */
}
-/*
- * This abort handler deals with Synchronous External Abort.
- * It calls notifiers, and then returns "fault".
- */
static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
{
struct siginfo info;
@@ -668,14 +632,14 @@ static const struct fault_info fault_info[] = {
{ do_sea, SIGBUS, 0, "level 1 (translation table walk)" },
{ do_sea, SIGBUS, 0, "level 2 (translation table walk)" },
{ do_sea, SIGBUS, 0, "level 3 (translation table walk)" },
- { do_sea, SIGBUS, 0, "synchronous parity or ECC error" },
+ { do_sea, SIGBUS, 0, "synchronous parity or ECC error" }, // Reserved when RAS is implemented
{ do_bad, SIGBUS, 0, "unknown 25" },
{ do_bad, SIGBUS, 0, "unknown 26" },
{ do_bad, SIGBUS, 0, "unknown 27" },
- { do_sea, SIGBUS, 0, "level 0 synchronous parity error (translation table walk)" },
- { do_sea, SIGBUS, 0, "level 1 synchronous parity error (translation table walk)" },
- { do_sea, SIGBUS, 0, "level 2 synchronous parity error (translation table walk)" },
- { do_sea, SIGBUS, 0, "level 3 synchronous parity error (translation table walk)" },
+ { do_sea, SIGBUS, 0, "level 0 synchronous parity error (translation table walk)" }, // Reserved when RAS is implemented
+ { do_sea, SIGBUS, 0, "level 1 synchronous parity error (translation table walk)" }, // Reserved when RAS is implemented
+ { do_sea, SIGBUS, 0, "level 2 synchronous parity error (translation table walk)" }, // Reserved when RAS is implemented
+ { do_sea, SIGBUS, 0, "level 3 synchronous parity error (translation table walk)" }, // Reserved when RAS is implemented
{ do_bad, SIGBUS, 0, "unknown 32" },
{ do_alignment_fault, SIGBUS, BUS_ADRALN, "alignment fault" },
{ do_bad, SIGBUS, 0, "unknown 34" },
@@ -693,7 +657,7 @@ static const struct fault_info fault_info[] = {
{ do_bad, SIGBUS, 0, "unknown 46" },
{ do_bad, SIGBUS, 0, "unknown 47" },
{ do_bad, SIGBUS, 0, "TLB conflict abort" },
- { do_bad, SIGBUS, 0, "unknown 49" },
+ { do_bad, SIGBUS, 0, "Unsupported atomic hardware update fault" },
{ do_bad, SIGBUS, 0, "unknown 50" },
{ do_bad, SIGBUS, 0, "unknown 51" },
{ do_bad, SIGBUS, 0, "implementation fault (lockdown abort)" },
@@ -710,13 +674,6 @@ static const struct fault_info fault_info[] = {
{ do_bad, SIGBUS, 0, "unknown 63" },
};
-/*
- * Handle Synchronous External Aborts that occur in a guest kernel.
- *
- * The return value will be zero if the SEA was successfully handled
- * and non-zero if there was an error processing the error or there was
- * no error to process.
- */
int handle_guest_sea(phys_addr_t addr, unsigned int esr)
{
int ret = -ENOENT;
@@ -727,9 +684,6 @@ int handle_guest_sea(phys_addr_t addr, unsigned int esr)
return ret;
}
-/*
- * Dispatch a data abort to the relevant handler.
- */
asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr,
struct pt_regs *regs)
{
@@ -739,11 +693,14 @@ asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr,
if (!inf->fn(addr, esr, regs))
return;
- pr_alert("Unhandled fault: %s (0x%08x) at 0x%016lx\n",
- inf->name, esr, addr);
+ pr_alert("Unhandled fault: %s at 0x%016lx\n",
+ inf->name, addr);
mem_abort_decode(esr);
+ if (!user_mode(regs))
+ show_pte(addr);
+
info.si_signo = inf->sig;
info.si_errno = 0;
info.si_code = inf->code;
@@ -751,9 +708,6 @@ asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr,
arm64_notify_die("", regs, &info, esr);
}
-/*
- * Handle stack alignment exceptions.
- */
asmlinkage void __exception do_sp_pc_abort(unsigned long addr,
unsigned int esr,
struct pt_regs *regs)
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 877d42fb0df6..95233dfc4c39 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -109,10 +109,10 @@ ENTRY(cpu_do_resume)
/*
* __cpu_setup() cleared MDSCR_EL1.MDE and friends, before unmasking
* debug exceptions. By restoring MDSCR_EL1 here, we may take a debug
- * exception. Mask them until local_dbg_restore() in cpu_suspend()
+ * exception. Mask them until local_daif_restore() in cpu_suspend()
* resets them.
*/
- disable_dbg
+ disable_daif
msr mdscr_el1, x10
msr sctlr_el1, x12
@@ -155,8 +155,7 @@ ENDPROC(cpu_do_switch_mm)
* called by anything else. It can only be executed from a TTBR0 mapping.
*/
ENTRY(idmap_cpu_replace_ttbr1)
- mrs x2, daif
- msr daifset, #0xf
+ save_and_disable_daif flags=x2
adrp x1, empty_zero_page
msr ttbr1_el1, x1
@@ -169,7 +168,7 @@ ENTRY(idmap_cpu_replace_ttbr1)
msr ttbr1_el1, x0
isb
- msr daif, x2
+ restore_daif x2
ret
ENDPROC(idmap_cpu_replace_ttbr1)
diff --git a/drivers/acpi/arm64/gtdt.c b/drivers/acpi/arm64/gtdt.c
index 597a737d538f..92f9edf9d11e 100644
--- a/drivers/acpi/arm64/gtdt.c
+++ b/drivers/acpi/arm64/gtdt.c
@@ -199,7 +199,7 @@ static int __init gtdt_parse_timer_block(struct acpi_gtdt_timer_block *block,
struct acpi_gtdt_timer_entry *gtdt_frame;
if (!block->timer_count) {
- pr_err(FW_BUG "GT block present, but frame count is zero.");
+ pr_err(FW_BUG "GT block present, but frame count is zero.\n");
return -ENODEV;
}
diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index de56394dd161..95255ecfae7c 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -88,8 +88,8 @@ static inline int iort_set_fwnode(struct acpi_iort_node *iort_node,
*
* Returns: fwnode_handle pointer on success, NULL on failure
*/
-static inline
-struct fwnode_handle *iort_get_fwnode(struct acpi_iort_node *node)
+static inline struct fwnode_handle *iort_get_fwnode(
+ struct acpi_iort_node *node)
{
struct iort_fwnode *curr;
struct fwnode_handle *fwnode = NULL;
@@ -126,6 +126,31 @@ static inline void iort_delete_fwnode(struct acpi_iort_node *node)
spin_unlock(&iort_fwnode_lock);
}
+/**
+ * iort_get_iort_node() - Retrieve iort_node associated with an fwnode
+ *
+ * @fwnode: fwnode associated with device to be looked-up
+ *
+ * Returns: iort_node pointer on success, NULL on failure
+ */
+static inline struct acpi_iort_node *iort_get_iort_node(
+ struct fwnode_handle *fwnode)
+{
+ struct iort_fwnode *curr;
+ struct acpi_iort_node *iort_node = NULL;
+
+ spin_lock(&iort_fwnode_lock);
+ list_for_each_entry(curr, &iort_fwnode_list, list) {
+ if (curr->fwnode == fwnode) {
+ iort_node = curr->iort_node;
+ break;
+ }
+ }
+ spin_unlock(&iort_fwnode_lock);
+
+ return iort_node;
+}
+
typedef acpi_status (*iort_find_node_callback)
(struct acpi_iort_node *node, void *context);
@@ -306,9 +331,8 @@ static int iort_id_map(struct acpi_iort_id_mapping *map, u8 type, u32 rid_in,
return 0;
}
-static
-struct acpi_iort_node *iort_node_get_id(struct acpi_iort_node *node,
- u32 *id_out, int index)
+static struct acpi_iort_node *iort_node_get_id(struct acpi_iort_node *node,
+ u32 *id_out, int index)
{
struct acpi_iort_node *parent;
struct acpi_iort_id_mapping *map;
@@ -332,7 +356,8 @@ struct acpi_iort_node *iort_node_get_id(struct acpi_iort_node *node,
if (map->flags & ACPI_IORT_ID_SINGLE_MAPPING) {
if (node->type == ACPI_IORT_NODE_NAMED_COMPONENT ||
- node->type == ACPI_IORT_NODE_PCI_ROOT_COMPLEX) {
+ node->type == ACPI_IORT_NODE_PCI_ROOT_COMPLEX ||
+ node->type == ACPI_IORT_NODE_SMMU_V3) {
*id_out = map->output_base;
return parent;
}
@@ -341,6 +366,47 @@ struct acpi_iort_node *iort_node_get_id(struct acpi_iort_node *node,
return NULL;
}
+#if (ACPI_CA_VERSION > 0x20170929)
+static int iort_get_id_mapping_index(struct acpi_iort_node *node)
+{
+ struct acpi_iort_smmu_v3 *smmu;
+
+ switch (node->type) {
+ case ACPI_IORT_NODE_SMMU_V3:
+ /*
+ * SMMUv3 dev ID mapping index was introduced in revision 1
+ * table, not available in revision 0
+ */
+ if (node->revision < 1)
+ return -EINVAL;
+
+ smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
+ /*
+ * ID mapping index is only ignored if all interrupts are
+ * GSIV based
+ */
+ if (smmu->event_gsiv && smmu->pri_gsiv && smmu->gerr_gsiv
+ && smmu->sync_gsiv)
+ return -EINVAL;
+
+ if (smmu->id_mapping_index >= node->mapping_count) {
+ pr_err(FW_BUG "[node %p type %d] ID mapping index overflows valid mappings\n",
+ node, node->type);
+ return -EINVAL;
+ }
+
+ return smmu->id_mapping_index;
+ default:
+ return -EINVAL;
+ }
+}
+#else
+static inline int iort_get_id_mapping_index(struct acpi_iort_node *node)
+{
+ return -EINVAL;
+}
+#endif
+
static struct acpi_iort_node *iort_node_map_id(struct acpi_iort_node *node,
u32 id_in, u32 *id_out,
u8 type_mask)
@@ -350,7 +416,7 @@ static struct acpi_iort_node *iort_node_map_id(struct acpi_iort_node *node,
/* Parse the ID mapping tree to find specified node type */
while (node) {
struct acpi_iort_id_mapping *map;
- int i;
+ int i, index;
if (IORT_TYPE_MASK(node->type) & type_mask) {
if (id_out)
@@ -371,8 +437,19 @@ static struct acpi_iort_node *iort_node_map_id(struct acpi_iort_node *node,
goto fail_map;
}
+ /*
+ * Get the special ID mapping index (if any) and skip its
+ * associated ID map to prevent erroneous multi-stage
+ * IORT ID translations.
+ */
+ index = iort_get_id_mapping_index(node);
+
/* Do the ID translation */
for (i = 0; i < node->mapping_count; i++, map++) {
+ /* if it is special mapping index, skip it */
+ if (i == index)
+ continue;
+
if (!iort_id_map(map, node->type, id, &id))
break;
}
@@ -392,10 +469,9 @@ fail_map:
return NULL;
}
-static
-struct acpi_iort_node *iort_node_map_platform_id(struct acpi_iort_node *node,
- u32 *id_out, u8 type_mask,
- int index)
+static struct acpi_iort_node *iort_node_map_platform_id(
+ struct acpi_iort_node *node, u32 *id_out, u8 type_mask,
+ int index)
{
struct acpi_iort_node *parent;
u32 id;
@@ -424,9 +500,25 @@ static struct acpi_iort_node *iort_find_dev_node(struct device *dev)
{
struct pci_bus *pbus;
- if (!dev_is_pci(dev))
+ if (!dev_is_pci(dev)) {
+ struct acpi_iort_node *node;
+ /*
+ * scan iort_fwnode_list to see if it's an iort platform
+ * device (such as SMMU, PMCG),its iort node already cached
+ * and associated with fwnode when iort platform devices
+ * were initialized.
+ */
+ node = iort_get_iort_node(dev->fwnode);
+ if (node)
+ return node;
+
+ /*
+ * if not, then it should be a platform device defined in
+ * DSDT/SSDT (with Named Component node in IORT)
+ */
return iort_scan_node(ACPI_IORT_NODE_NAMED_COMPONENT,
iort_match_node_callback, dev);
+ }
/* Find a PCI root bus */
pbus = to_pci_dev(dev)->bus;
@@ -466,16 +558,24 @@ u32 iort_msi_map_rid(struct device *dev, u32 req_id)
*/
int iort_pmsi_get_dev_id(struct device *dev, u32 *dev_id)
{
- int i;
+ int i, index;
struct acpi_iort_node *node;
node = iort_find_dev_node(dev);
if (!node)
return -ENODEV;
- for (i = 0; i < node->mapping_count; i++) {
- if (iort_node_map_platform_id(node, dev_id, IORT_MSI_TYPE, i))
+ index = iort_get_id_mapping_index(node);
+ /* if there is a valid index, go get the dev_id directly */
+ if (index >= 0) {
+ if (iort_node_get_id(node, dev_id, index))
return 0;
+ } else {
+ for (i = 0; i < node->mapping_count; i++) {
+ if (iort_node_map_platform_id(node, dev_id,
+ IORT_MSI_TYPE, i))
+ return 0;
+ }
}
return -ENODEV;
@@ -538,6 +638,49 @@ struct irq_domain *iort_get_device_domain(struct device *dev, u32 req_id)
return irq_find_matching_fwnode(handle, DOMAIN_BUS_PCI_MSI);
}
+static void iort_set_device_domain(struct device *dev,
+ struct acpi_iort_node *node)
+{
+ struct acpi_iort_its_group *its;
+ struct acpi_iort_node *msi_parent;
+ struct acpi_iort_id_mapping *map;
+ struct fwnode_handle *iort_fwnode;
+ struct irq_domain *domain;
+ int index;
+
+ index = iort_get_id_mapping_index(node);
+ if (index < 0)
+ return;
+
+ map = ACPI_ADD_PTR(struct acpi_iort_id_mapping, node,
+ node->mapping_offset + index * sizeof(*map));
+
+ /* Firmware bug! */
+ if (!map->output_reference ||
+ !(map->flags & ACPI_IORT_ID_SINGLE_MAPPING)) {
+ pr_err(FW_BUG "[node %p type %d] Invalid MSI mapping\n",
+ node, node->type);
+ return;
+ }
+
+ msi_parent = ACPI_ADD_PTR(struct acpi_iort_node, iort_table,
+ map->output_reference);
+
+ if (!msi_parent || msi_parent->type != ACPI_IORT_NODE_ITS_GROUP)
+ return;
+
+ /* Move to ITS specific data */
+ its = (struct acpi_iort_its_group *)msi_parent->node_data;
+
+ iort_fwnode = iort_find_domain_token(its->identifiers[0]);
+ if (!iort_fwnode)
+ return;
+
+ domain = irq_find_matching_fwnode(iort_fwnode, DOMAIN_BUS_PLATFORM_MSI);
+ if (domain)
+ dev_set_msi_domain(dev, domain);
+}
+
/**
* iort_get_platform_device_domain() - Find MSI domain related to a
* platform device
@@ -623,14 +766,14 @@ static inline bool iort_iommu_driver_enabled(u8 type)
}
#ifdef CONFIG_IOMMU_API
-static inline
-const struct iommu_ops *iort_fwspec_iommu_ops(struct iommu_fwspec *fwspec)
+static inline const struct iommu_ops *iort_fwspec_iommu_ops(
+ struct iommu_fwspec *fwspec)
{
return (fwspec && fwspec->ops) ? fwspec->ops : NULL;
}
-static inline
-int iort_add_device_replay(const struct iommu_ops *ops, struct device *dev)
+static inline int iort_add_device_replay(const struct iommu_ops *ops,
+ struct device *dev)
{
int err = 0;
@@ -640,11 +783,11 @@ int iort_add_device_replay(const struct iommu_ops *ops, struct device *dev)
return err;
}
#else
-static inline
-const struct iommu_ops *iort_fwspec_iommu_ops(struct iommu_fwspec *fwspec)
+static inline const struct iommu_ops *iort_fwspec_iommu_ops(
+ struct iommu_fwspec *fwspec)
{ return NULL; }
-static inline
-int iort_add_device_replay(const struct iommu_ops *ops, struct device *dev)
+static inline int iort_add_device_replay(const struct iommu_ops *ops,
+ struct device *dev)
{ return 0; }
#endif
@@ -968,7 +1111,7 @@ static bool __init arm_smmu_v3_is_coherent(struct acpi_iort_node *node)
return smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE;
}
-#if defined(CONFIG_ACPI_NUMA) && defined(ACPI_IORT_SMMU_V3_PXM_VALID)
+#if defined(CONFIG_ACPI_NUMA)
/*
* set numa proximity domain for smmuv3 device
*/
@@ -1051,34 +1194,34 @@ static bool __init arm_smmu_is_coherent(struct acpi_iort_node *node)
return smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK;
}
-struct iort_iommu_config {
+struct iort_dev_config {
const char *name;
- int (*iommu_init)(struct acpi_iort_node *node);
- bool (*iommu_is_coherent)(struct acpi_iort_node *node);
- int (*iommu_count_resources)(struct acpi_iort_node *node);
- void (*iommu_init_resources)(struct resource *res,
+ int (*dev_init)(struct acpi_iort_node *node);
+ bool (*dev_is_coherent)(struct acpi_iort_node *node);
+ int (*dev_count_resources)(struct acpi_iort_node *node);
+ void (*dev_init_resources)(struct resource *res,
struct acpi_iort_node *node);
- void (*iommu_set_proximity)(struct device *dev,
+ void (*dev_set_proximity)(struct device *dev,
struct acpi_iort_node *node);
};
-static const struct iort_iommu_config iort_arm_smmu_v3_cfg __initconst = {
+static const struct iort_dev_config iort_arm_smmu_v3_cfg __initconst = {
.name = "arm-smmu-v3",
- .iommu_is_coherent = arm_smmu_v3_is_coherent,
- .iommu_count_resources = arm_smmu_v3_count_resources,
- .iommu_init_resources = arm_smmu_v3_init_resources,
- .iommu_set_proximity = arm_smmu_v3_set_proximity,
+ .dev_is_coherent = arm_smmu_v3_is_coherent,
+ .dev_count_resources = arm_smmu_v3_count_resources,
+ .dev_init_resources = arm_smmu_v3_init_resources,
+ .dev_set_proximity = arm_smmu_v3_set_proximity,
};
-static const struct iort_iommu_config iort_arm_smmu_cfg __initconst = {
+static const struct iort_dev_config iort_arm_smmu_cfg __initconst = {
.name = "arm-smmu",
- .iommu_is_coherent = arm_smmu_is_coherent,
- .iommu_count_resources = arm_smmu_count_resources,
- .iommu_init_resources = arm_smmu_init_resources
+ .dev_is_coherent = arm_smmu_is_coherent,
+ .dev_count_resources = arm_smmu_count_resources,
+ .dev_init_resources = arm_smmu_init_resources
};
-static __init
-const struct iort_iommu_config *iort_get_iommu_cfg(struct acpi_iort_node *node)
+static __init const struct iort_dev_config *iort_get_dev_cfg(
+ struct acpi_iort_node *node)
{
switch (node->type) {
case ACPI_IORT_NODE_SMMU_V3:
@@ -1091,31 +1234,28 @@ const struct iort_iommu_config *iort_get_iommu_cfg(struct acpi_iort_node *node)
}
/**
- * iort_add_smmu_platform_device() - Allocate a platform device for SMMU
- * @node: Pointer to SMMU ACPI IORT node
+ * iort_add_platform_device() - Allocate a platform device for IORT node
+ * @node: Pointer to device ACPI IORT node
*
* Returns: 0 on success, <0 failure
*/
-static int __init iort_add_smmu_platform_device(struct acpi_iort_node *node)
+static int __init iort_add_platform_device(struct acpi_iort_node *node,
+ const struct iort_dev_config *ops)
{
struct fwnode_handle *fwnode;
struct platform_device *pdev;
struct resource *r;
enum dev_dma_attr attr;
int ret, count;
- const struct iort_iommu_config *ops = iort_get_iommu_cfg(node);
-
- if (!ops)
- return -ENODEV;
pdev = platform_device_alloc(ops->name, PLATFORM_DEVID_AUTO);
if (!pdev)
return -ENOMEM;
- if (ops->iommu_set_proximity)
- ops->iommu_set_proximity(&pdev->dev, node);
+ if (ops->dev_set_proximity)
+ ops->dev_set_proximity(&pdev->dev, node);
- count = ops->iommu_count_resources(node);
+ count = ops->dev_count_resources(node);
r = kcalloc(count, sizeof(*r), GFP_KERNEL);
if (!r) {
@@ -1123,7 +1263,7 @@ static int __init iort_add_smmu_platform_device(struct acpi_iort_node *node)
goto dev_put;
}
- ops->iommu_init_resources(r, node);
+ ops->dev_init_resources(r, node);
ret = platform_device_add_resources(pdev, r, count);
/*
@@ -1158,12 +1298,14 @@ static int __init iort_add_smmu_platform_device(struct acpi_iort_node *node)
pdev->dev.fwnode = fwnode;
- attr = ops->iommu_is_coherent(node) ?
- DEV_DMA_COHERENT : DEV_DMA_NON_COHERENT;
+ attr = ops->dev_is_coherent && ops->dev_is_coherent(node) ?
+ DEV_DMA_COHERENT : DEV_DMA_NON_COHERENT;
/* Configure DMA for the page table walker */
acpi_dma_configure(&pdev->dev, attr);
+ iort_set_device_domain(&pdev->dev, node);
+
ret = platform_device_add(pdev);
if (ret)
goto dma_deconfigure;
@@ -1216,6 +1358,7 @@ static void __init iort_init_platform_devices(void)
struct fwnode_handle *fwnode;
int i, ret;
bool acs_enabled = false;
+ const struct iort_dev_config *ops;
/*
* iort_table and iort both point to the start of IORT table, but
@@ -1238,16 +1381,15 @@ static void __init iort_init_platform_devices(void)
if (!acs_enabled)
acs_enabled = iort_enable_acs(iort_node);
- if ((iort_node->type == ACPI_IORT_NODE_SMMU) ||
- (iort_node->type == ACPI_IORT_NODE_SMMU_V3)) {
-
+ ops = iort_get_dev_cfg(iort_node);
+ if (ops) {
fwnode = acpi_alloc_fwnode_static();
if (!fwnode)
return;
iort_set_fwnode(iort_node, fwnode);
- ret = iort_add_smmu_platform_device(iort_node);
+ ret = iort_add_platform_device(iort_node, ops);
if (ret) {
iort_delete_fwnode(iort_node);
acpi_free_fwnode_static(fwnode);
diff --git a/drivers/bus/arm-ccn.c b/drivers/bus/arm-ccn.c
index e8c6946fed9d..3063f5312397 100644
--- a/drivers/bus/arm-ccn.c
+++ b/drivers/bus/arm-ccn.c
@@ -1276,6 +1276,7 @@ static int arm_ccn_pmu_init(struct arm_ccn *ccn)
/* Perf driver registration */
ccn->dt.pmu = (struct pmu) {
+ .module = THIS_MODULE,
.attr_groups = arm_ccn_pmu_attr_groups,
.task_ctx_nr = perf_invalid_context,
.event_init = arm_ccn_pmu_event_init,
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index 0ecf5beb56ec..538bfa8ba9b4 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -77,6 +77,7 @@ static bool arch_timer_mem_use_virtual;
static bool arch_counter_suspend_stop;
static bool vdso_default = true;
+static cpumask_t evtstrm_available = CPU_MASK_NONE;
static bool evtstrm_enable = IS_ENABLED(CONFIG_ARM_ARCH_TIMER_EVTSTREAM);
static int __init early_evtstrm_cfg(char *buf)
@@ -739,6 +740,7 @@ static void arch_timer_evtstrm_enable(int divider)
#ifdef CONFIG_COMPAT
compat_elf_hwcap |= COMPAT_HWCAP_EVTSTRM;
#endif
+ cpumask_set_cpu(smp_processor_id(), &evtstrm_available);
}
static void arch_timer_configure_evtstream(void)
@@ -863,6 +865,16 @@ u32 arch_timer_get_rate(void)
return arch_timer_rate;
}
+bool arch_timer_evtstrm_available(void)
+{
+ /*
+ * We might get called from a preemptible context. This is fine
+ * because availability of the event stream should be always the same
+ * for a preemptible context and context where we might resume a task.
+ */
+ return cpumask_test_cpu(raw_smp_processor_id(), &evtstrm_available);
+}
+
static u64 arch_counter_get_cntvct_mem(void)
{
u32 vct_lo, vct_hi, tmp_hi;
@@ -928,6 +940,8 @@ static int arch_timer_dying_cpu(unsigned int cpu)
{
struct clock_event_device *clk = this_cpu_ptr(arch_timer_evt);
+ cpumask_clear_cpu(smp_processor_id(), &evtstrm_available);
+
arch_timer_stop(clk);
return 0;
}
@@ -937,10 +951,16 @@ static DEFINE_PER_CPU(unsigned long, saved_cntkctl);
static int arch_timer_cpu_pm_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
- if (action == CPU_PM_ENTER)
+ if (action == CPU_PM_ENTER) {
__this_cpu_write(saved_cntkctl, arch_timer_get_cntkctl());
- else if (action == CPU_PM_ENTER_FAILED || action == CPU_PM_EXIT)
+
+ cpumask_clear_cpu(smp_processor_id(), &evtstrm_available);
+ } else if (action == CPU_PM_ENTER_FAILED || action == CPU_PM_EXIT) {
arch_timer_set_cntkctl(__this_cpu_read(saved_cntkctl));
+
+ if (elf_hwcap & HWCAP_EVTSTRM)
+ cpumask_set_cpu(smp_processor_id(), &evtstrm_available);
+ }
return NOTIFY_OK;
}
@@ -1016,7 +1036,6 @@ static int __init arch_timer_register(void)
if (err)
goto out_unreg_notify;
-
/* Register and immediately configure the timer on the boot CPU */
err = cpuhp_setup_state(CPUHP_AP_ARM_ARCH_TIMER_STARTING,
"clockevents/arm/arch_timer:starting",
diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index e5197ffb7422..b8f44b068fc6 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -17,6 +17,13 @@ config ARM_PMU_ACPI
depends on ARM_PMU && ACPI
def_bool y
+config HISI_PMU
+ bool "HiSilicon SoC PMU"
+ depends on ARM64 && ACPI
+ help
+ Support for HiSilicon SoC uncore performance monitoring
+ unit (PMU), such as: L3C, HHA and DDRC.
+
config QCOM_L2_PMU
bool "Qualcomm Technologies L2-cache PMU"
depends on ARCH_QCOM && ARM64 && ACPI
@@ -43,4 +50,12 @@ config XGENE_PMU
help
Say y if you want to use APM X-Gene SoC performance monitors.
+config ARM_SPE_PMU
+ tristate "Enable support for the ARMv8.2 Statistical Profiling Extension"
+ depends on PERF_EVENTS && ARM64
+ help
+ Enable perf support for the ARMv8.2 Statistical Profiling
+ Extension, which provides periodic sampling of operations in
+ the CPU pipeline and reports this via the perf AUX interface.
+
endmenu
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
index 9402dc8ff22c..710a0135bd61 100644
--- a/drivers/perf/Makefile
+++ b/drivers/perf/Makefile
@@ -1,6 +1,8 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_ARM_PMU) += arm_pmu.o arm_pmu_platform.o
obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o
+obj-$(CONFIG_HISI_PMU) += hisilicon/
obj-$(CONFIG_QCOM_L2_PMU) += qcom_l2_pmu.o
obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o
obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o
+obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index d14fc2e67f93..7bc5eee96b31 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -539,7 +539,7 @@ void armpmu_free_irq(struct arm_pmu *armpmu, int cpu)
if (!cpumask_test_and_clear_cpu(cpu, &armpmu->active_irqs))
return;
- if (irq_is_percpu(irq)) {
+ if (irq_is_percpu_devid(irq)) {
free_percpu_irq(irq, &hw_events->percpu_pmu);
cpumask_clear(&armpmu->active_irqs);
return;
@@ -565,10 +565,10 @@ int armpmu_request_irq(struct arm_pmu *armpmu, int cpu)
if (!irq)
return 0;
- if (irq_is_percpu(irq) && cpumask_empty(&armpmu->active_irqs)) {
+ if (irq_is_percpu_devid(irq) && cpumask_empty(&armpmu->active_irqs)) {
err = request_percpu_irq(irq, handler, "arm-pmu",
&hw_events->percpu_pmu);
- } else if (irq_is_percpu(irq)) {
+ } else if (irq_is_percpu_devid(irq)) {
int other_cpu = cpumask_first(&armpmu->active_irqs);
int other_irq = per_cpu(hw_events->irq, other_cpu);
@@ -649,7 +649,7 @@ static int arm_perf_starting_cpu(unsigned int cpu, struct hlist_node *node)
irq = armpmu_get_cpu_irq(pmu, cpu);
if (irq) {
- if (irq_is_percpu(irq)) {
+ if (irq_is_percpu_devid(irq)) {
enable_percpu_irq(irq, IRQ_TYPE_NONE);
return 0;
}
@@ -667,7 +667,7 @@ static int arm_perf_teardown_cpu(unsigned int cpu, struct hlist_node *node)
return 0;
irq = armpmu_get_cpu_irq(pmu, cpu);
- if (irq && irq_is_percpu(irq))
+ if (irq && irq_is_percpu_devid(irq))
disable_percpu_irq(irq);
return 0;
diff --git a/drivers/perf/arm_pmu_acpi.c b/drivers/perf/arm_pmu_acpi.c
index 3303dd8d8eb5..705f1a390e31 100644
--- a/drivers/perf/arm_pmu_acpi.c
+++ b/drivers/perf/arm_pmu_acpi.c
@@ -193,9 +193,6 @@ int arm_pmu_acpi_probe(armpmu_init_fn init_fn)
int pmu_idx = 0;
int cpu, ret;
- if (acpi_disabled)
- return 0;
-
/*
* Initialise and register the set of PMUs which we know about right
* now. Ideally we'd do this in arm_pmu_acpi_cpu_starting() so that we
diff --git a/drivers/perf/arm_pmu_platform.c b/drivers/perf/arm_pmu_platform.c
index 4428852e1da1..91b224eced18 100644
--- a/drivers/perf/arm_pmu_platform.c
+++ b/drivers/perf/arm_pmu_platform.c
@@ -127,7 +127,7 @@ static int pmu_parse_irqs(struct arm_pmu *pmu)
if (num_irqs == 1) {
int irq = platform_get_irq(pdev, 0);
- if (irq && irq_is_percpu(irq))
+ if (irq && irq_is_percpu_devid(irq))
return pmu_parse_percpu_irq(pmu, irq);
}
@@ -150,7 +150,7 @@ static int pmu_parse_irqs(struct arm_pmu *pmu)
if (WARN_ON(irq <= 0))
continue;
- if (irq_is_percpu(irq)) {
+ if (irq_is_percpu_devid(irq)) {
pr_warn("multiple PPIs or mismatched SPI/PPI detected\n");
return -EINVAL;
}
diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c
new file mode 100644
index 000000000000..8ce262fc2561
--- /dev/null
+++ b/drivers/perf/arm_spe_pmu.c
@@ -0,0 +1,1249 @@
+/*
+ * Perf support for the Statistical Profiling Extension, introduced as
+ * part of ARMv8.2.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Copyright (C) 2016 ARM Limited
+ *
+ * Author: Will Deacon <will.deacon@arm.com>
+ */
+
+#define PMUNAME "arm_spe"
+#define DRVNAME PMUNAME "_pmu"
+#define pr_fmt(fmt) DRVNAME ": " fmt
+
+#include <linux/cpuhotplug.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include <asm/sysreg.h>
+
+#define ARM_SPE_BUF_PAD_BYTE 0
+
+struct arm_spe_pmu_buf {
+ int nr_pages;
+ bool snapshot;
+ void *base;
+};
+
+struct arm_spe_pmu {
+ struct pmu pmu;
+ struct platform_device *pdev;
+ cpumask_t supported_cpus;
+ struct hlist_node hotplug_node;
+
+ int irq; /* PPI */
+
+ u16 min_period;
+ u16 counter_sz;
+
+#define SPE_PMU_FEAT_FILT_EVT (1UL << 0)
+#define SPE_PMU_FEAT_FILT_TYP (1UL << 1)
+#define SPE_PMU_FEAT_FILT_LAT (1UL << 2)
+#define SPE_PMU_FEAT_ARCH_INST (1UL << 3)
+#define SPE_PMU_FEAT_LDS (1UL << 4)
+#define SPE_PMU_FEAT_ERND (1UL << 5)
+#define SPE_PMU_FEAT_DEV_PROBED (1UL << 63)
+ u64 features;
+
+ u16 max_record_sz;
+ u16 align;
+ struct perf_output_handle __percpu *handle;
+};
+
+#define to_spe_pmu(p) (container_of(p, struct arm_spe_pmu, pmu))
+
+/* Convert a free-running index from perf into an SPE buffer offset */
+#define PERF_IDX2OFF(idx, buf) ((idx) % ((buf)->nr_pages << PAGE_SHIFT))
+
+/* Keep track of our dynamic hotplug state */
+static enum cpuhp_state arm_spe_pmu_online;
+
+enum arm_spe_pmu_buf_fault_action {
+ SPE_PMU_BUF_FAULT_ACT_SPURIOUS,
+ SPE_PMU_BUF_FAULT_ACT_FATAL,
+ SPE_PMU_BUF_FAULT_ACT_OK,
+};
+
+/* This sysfs gunk was really good fun to write. */
+enum arm_spe_pmu_capabilities {
+ SPE_PMU_CAP_ARCH_INST = 0,
+ SPE_PMU_CAP_ERND,
+ SPE_PMU_CAP_FEAT_MAX,
+ SPE_PMU_CAP_CNT_SZ = SPE_PMU_CAP_FEAT_MAX,
+ SPE_PMU_CAP_MIN_IVAL,
+};
+
+static int arm_spe_pmu_feat_caps[SPE_PMU_CAP_FEAT_MAX] = {
+ [SPE_PMU_CAP_ARCH_INST] = SPE_PMU_FEAT_ARCH_INST,
+ [SPE_PMU_CAP_ERND] = SPE_PMU_FEAT_ERND,
+};
+
+static u32 arm_spe_pmu_cap_get(struct arm_spe_pmu *spe_pmu, int cap)
+{
+ if (cap < SPE_PMU_CAP_FEAT_MAX)
+ return !!(spe_pmu->features & arm_spe_pmu_feat_caps[cap]);
+
+ switch (cap) {
+ case SPE_PMU_CAP_CNT_SZ:
+ return spe_pmu->counter_sz;
+ case SPE_PMU_CAP_MIN_IVAL:
+ return spe_pmu->min_period;
+ default:
+ WARN(1, "unknown cap %d\n", cap);
+ }
+
+ return 0;
+}
+
+static ssize_t arm_spe_pmu_cap_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct arm_spe_pmu *spe_pmu = platform_get_drvdata(pdev);
+ struct dev_ext_attribute *ea =
+ container_of(attr, struct dev_ext_attribute, attr);
+ int cap = (long)ea->var;
+
+ return snprintf(buf, PAGE_SIZE, "%u\n",
+ arm_spe_pmu_cap_get(spe_pmu, cap));
+}
+
+#define SPE_EXT_ATTR_ENTRY(_name, _func, _var) \
+ &((struct dev_ext_attribute[]) { \
+ { __ATTR(_name, S_IRUGO, _func, NULL), (void *)_var } \
+ })[0].attr.attr
+
+#define SPE_CAP_EXT_ATTR_ENTRY(_name, _var) \
+ SPE_EXT_ATTR_ENTRY(_name, arm_spe_pmu_cap_show, _var)
+
+static struct attribute *arm_spe_pmu_cap_attr[] = {
+ SPE_CAP_EXT_ATTR_ENTRY(arch_inst, SPE_PMU_CAP_ARCH_INST),
+ SPE_CAP_EXT_ATTR_ENTRY(ernd, SPE_PMU_CAP_ERND),
+ SPE_CAP_EXT_ATTR_ENTRY(count_size, SPE_PMU_CAP_CNT_SZ),
+ SPE_CAP_EXT_ATTR_ENTRY(min_interval, SPE_PMU_CAP_MIN_IVAL),
+ NULL,
+};
+
+static struct attribute_group arm_spe_pmu_cap_group = {
+ .name = "caps",
+ .attrs = arm_spe_pmu_cap_attr,
+};
+
+/* User ABI */
+#define ATTR_CFG_FLD_ts_enable_CFG config /* PMSCR_EL1.TS */
+#define ATTR_CFG_FLD_ts_enable_LO 0
+#define ATTR_CFG_FLD_ts_enable_HI 0
+#define ATTR_CFG_FLD_pa_enable_CFG config /* PMSCR_EL1.PA */
+#define ATTR_CFG_FLD_pa_enable_LO 1
+#define ATTR_CFG_FLD_pa_enable_HI 1
+#define ATTR_CFG_FLD_pct_enable_CFG config /* PMSCR_EL1.PCT */
+#define ATTR_CFG_FLD_pct_enable_LO 2
+#define ATTR_CFG_FLD_pct_enable_HI 2
+#define ATTR_CFG_FLD_jitter_CFG config /* PMSIRR_EL1.RND */
+#define ATTR_CFG_FLD_jitter_LO 16
+#define ATTR_CFG_FLD_jitter_HI 16
+#define ATTR_CFG_FLD_branch_filter_CFG config /* PMSFCR_EL1.B */
+#define ATTR_CFG_FLD_branch_filter_LO 32
+#define ATTR_CFG_FLD_branch_filter_HI 32
+#define ATTR_CFG_FLD_load_filter_CFG config /* PMSFCR_EL1.LD */
+#define ATTR_CFG_FLD_load_filter_LO 33
+#define ATTR_CFG_FLD_load_filter_HI 33
+#define ATTR_CFG_FLD_store_filter_CFG config /* PMSFCR_EL1.ST */
+#define ATTR_CFG_FLD_store_filter_LO 34
+#define ATTR_CFG_FLD_store_filter_HI 34
+
+#define ATTR_CFG_FLD_event_filter_CFG config1 /* PMSEVFR_EL1 */
+#define ATTR_CFG_FLD_event_filter_LO 0
+#define ATTR_CFG_FLD_event_filter_HI 63
+
+#define ATTR_CFG_FLD_min_latency_CFG config2 /* PMSLATFR_EL1.MINLAT */
+#define ATTR_CFG_FLD_min_latency_LO 0
+#define ATTR_CFG_FLD_min_latency_HI 11
+
+/* Why does everything I do descend into this? */
+#define __GEN_PMU_FORMAT_ATTR(cfg, lo, hi) \
+ (lo) == (hi) ? #cfg ":" #lo "\n" : #cfg ":" #lo "-" #hi
+
+#define _GEN_PMU_FORMAT_ATTR(cfg, lo, hi) \
+ __GEN_PMU_FORMAT_ATTR(cfg, lo, hi)
+
+#define GEN_PMU_FORMAT_ATTR(name) \
+ PMU_FORMAT_ATTR(name, \
+ _GEN_PMU_FORMAT_ATTR(ATTR_CFG_FLD_##name##_CFG, \
+ ATTR_CFG_FLD_##name##_LO, \
+ ATTR_CFG_FLD_##name##_HI))
+
+#define _ATTR_CFG_GET_FLD(attr, cfg, lo, hi) \
+ ((((attr)->cfg) >> lo) & GENMASK(hi - lo, 0))
+
+#define ATTR_CFG_GET_FLD(attr, name) \
+ _ATTR_CFG_GET_FLD(attr, \
+ ATTR_CFG_FLD_##name##_CFG, \
+ ATTR_CFG_FLD_##name##_LO, \
+ ATTR_CFG_FLD_##name##_HI)
+
+GEN_PMU_FORMAT_ATTR(ts_enable);
+GEN_PMU_FORMAT_ATTR(pa_enable);
+GEN_PMU_FORMAT_ATTR(pct_enable);
+GEN_PMU_FORMAT_ATTR(jitter);
+GEN_PMU_FORMAT_ATTR(branch_filter);
+GEN_PMU_FORMAT_ATTR(load_filter);
+GEN_PMU_FORMAT_ATTR(store_filter);
+GEN_PMU_FORMAT_ATTR(event_filter);
+GEN_PMU_FORMAT_ATTR(min_latency);
+
+static struct attribute *arm_spe_pmu_formats_attr[] = {
+ &format_attr_ts_enable.attr,
+ &format_attr_pa_enable.attr,
+ &format_attr_pct_enable.attr,
+ &format_attr_jitter.attr,
+ &format_attr_branch_filter.attr,
+ &format_attr_load_filter.attr,
+ &format_attr_store_filter.attr,
+ &format_attr_event_filter.attr,
+ &format_attr_min_latency.attr,
+ NULL,
+};
+
+static struct attribute_group arm_spe_pmu_format_group = {
+ .name = "format",
+ .attrs = arm_spe_pmu_formats_attr,
+};
+
+static ssize_t arm_spe_pmu_get_attr_cpumask(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct arm_spe_pmu *spe_pmu = platform_get_drvdata(pdev);
+
+ return cpumap_print_to_pagebuf(true, buf, &spe_pmu->supported_cpus);
+}
+static DEVICE_ATTR(cpumask, S_IRUGO, arm_spe_pmu_get_attr_cpumask, NULL);
+
+static struct attribute *arm_spe_pmu_attrs[] = {
+ &dev_attr_cpumask.attr,
+ NULL,
+};
+
+static struct attribute_group arm_spe_pmu_group = {
+ .attrs = arm_spe_pmu_attrs,
+};
+
+static const struct attribute_group *arm_spe_pmu_attr_groups[] = {
+ &arm_spe_pmu_group,
+ &arm_spe_pmu_cap_group,
+ &arm_spe_pmu_format_group,
+ NULL,
+};
+
+/* Convert between user ABI and register values */
+static u64 arm_spe_event_to_pmscr(struct perf_event *event)
+{
+ struct perf_event_attr *attr = &event->attr;
+ u64 reg = 0;
+
+ reg |= ATTR_CFG_GET_FLD(attr, ts_enable) << SYS_PMSCR_EL1_TS_SHIFT;
+ reg |= ATTR_CFG_GET_FLD(attr, pa_enable) << SYS_PMSCR_EL1_PA_SHIFT;
+ reg |= ATTR_CFG_GET_FLD(attr, pct_enable) << SYS_PMSCR_EL1_PCT_SHIFT;
+
+ if (!attr->exclude_user)
+ reg |= BIT(SYS_PMSCR_EL1_E0SPE_SHIFT);
+
+ if (!attr->exclude_kernel)
+ reg |= BIT(SYS_PMSCR_EL1_E1SPE_SHIFT);
+
+ if (IS_ENABLED(CONFIG_PID_IN_CONTEXTIDR) && capable(CAP_SYS_ADMIN))
+ reg |= BIT(SYS_PMSCR_EL1_CX_SHIFT);
+
+ return reg;
+}
+
+static void arm_spe_event_sanitise_period(struct perf_event *event)
+{
+ struct arm_spe_pmu *spe_pmu = to_spe_pmu(event->pmu);
+ u64 period = event->hw.sample_period;
+ u64 max_period = SYS_PMSIRR_EL1_INTERVAL_MASK
+ << SYS_PMSIRR_EL1_INTERVAL_SHIFT;
+
+ if (period < spe_pmu->min_period)
+ period = spe_pmu->min_period;
+ else if (period > max_period)
+ period = max_period;
+ else
+ period &= max_period;
+
+ event->hw.sample_period = period;
+}
+
+static u64 arm_spe_event_to_pmsirr(struct perf_event *event)
+{
+ struct perf_event_attr *attr = &event->attr;
+ u64 reg = 0;
+
+ arm_spe_event_sanitise_period(event);
+
+ reg |= ATTR_CFG_GET_FLD(attr, jitter) << SYS_PMSIRR_EL1_RND_SHIFT;
+ reg |= event->hw.sample_period;
+
+ return reg;
+}
+
+static u64 arm_spe_event_to_pmsfcr(struct perf_event *event)
+{
+ struct perf_event_attr *attr = &event->attr;
+ u64 reg = 0;
+
+ reg |= ATTR_CFG_GET_FLD(attr, load_filter) << SYS_PMSFCR_EL1_LD_SHIFT;
+ reg |= ATTR_CFG_GET_FLD(attr, store_filter) << SYS_PMSFCR_EL1_ST_SHIFT;
+ reg |= ATTR_CFG_GET_FLD(attr, branch_filter) << SYS_PMSFCR_EL1_B_SHIFT;
+
+ if (reg)
+ reg |= BIT(SYS_PMSFCR_EL1_FT_SHIFT);
+
+ if (ATTR_CFG_GET_FLD(attr, event_filter))
+ reg |= BIT(SYS_PMSFCR_EL1_FE_SHIFT);
+
+ if (ATTR_CFG_GET_FLD(attr, min_latency))
+ reg |= BIT(SYS_PMSFCR_EL1_FL_SHIFT);
+
+ return reg;
+}
+
+static u64 arm_spe_event_to_pmsevfr(struct perf_event *event)
+{
+ struct perf_event_attr *attr = &event->attr;
+ return ATTR_CFG_GET_FLD(attr, event_filter);
+}
+
+static u64 arm_spe_event_to_pmslatfr(struct perf_event *event)
+{
+ struct perf_event_attr *attr = &event->attr;
+ return ATTR_CFG_GET_FLD(attr, min_latency)
+ << SYS_PMSLATFR_EL1_MINLAT_SHIFT;
+}
+
+static void arm_spe_pmu_pad_buf(struct perf_output_handle *handle, int len)
+{
+ struct arm_spe_pmu_buf *buf = perf_get_aux(handle);
+ u64 head = PERF_IDX2OFF(handle->head, buf);
+
+ memset(buf->base + head, ARM_SPE_BUF_PAD_BYTE, len);
+ if (!buf->snapshot)
+ perf_aux_output_skip(handle, len);
+}
+
+static u64 arm_spe_pmu_next_snapshot_off(struct perf_output_handle *handle)
+{
+ struct arm_spe_pmu_buf *buf = perf_get_aux(handle);
+ struct arm_spe_pmu *spe_pmu = to_spe_pmu(handle->event->pmu);
+ u64 head = PERF_IDX2OFF(handle->head, buf);
+ u64 limit = buf->nr_pages * PAGE_SIZE;
+
+ /*
+ * The trace format isn't parseable in reverse, so clamp
+ * the limit to half of the buffer size in snapshot mode
+ * so that the worst case is half a buffer of records, as
+ * opposed to a single record.
+ */
+ if (head < limit >> 1)
+ limit >>= 1;
+
+ /*
+ * If we're within max_record_sz of the limit, we must
+ * pad, move the head index and recompute the limit.
+ */
+ if (limit - head < spe_pmu->max_record_sz) {
+ arm_spe_pmu_pad_buf(handle, limit - head);
+ handle->head = PERF_IDX2OFF(limit, buf);
+ limit = ((buf->nr_pages * PAGE_SIZE) >> 1) + handle->head;
+ }
+
+ return limit;
+}
+
+static u64 __arm_spe_pmu_next_off(struct perf_output_handle *handle)
+{
+ struct arm_spe_pmu *spe_pmu = to_spe_pmu(handle->event->pmu);
+ struct arm_spe_pmu_buf *buf = perf_get_aux(handle);
+ const u64 bufsize = buf->nr_pages * PAGE_SIZE;
+ u64 limit = bufsize;
+ u64 head, tail, wakeup;
+
+ /*
+ * The head can be misaligned for two reasons:
+ *
+ * 1. The hardware left PMBPTR pointing to the first byte after
+ * a record when generating a buffer management event.
+ *
+ * 2. We used perf_aux_output_skip to consume handle->size bytes
+ * and CIRC_SPACE was used to compute the size, which always
+ * leaves one entry free.
+ *
+ * Deal with this by padding to the next alignment boundary and
+ * moving the head index. If we run out of buffer space, we'll
+ * reduce handle->size to zero and end up reporting truncation.
+ */
+ head = PERF_IDX2OFF(handle->head, buf);
+ if (!IS_ALIGNED(head, spe_pmu->align)) {
+ unsigned long delta = roundup(head, spe_pmu->align) - head;
+
+ delta = min(delta, handle->size);
+ arm_spe_pmu_pad_buf(handle, delta);
+ head = PERF_IDX2OFF(handle->head, buf);
+ }
+
+ /* If we've run out of free space, then nothing more to do */
+ if (!handle->size)
+ goto no_space;
+
+ /* Compute the tail and wakeup indices now that we've aligned head */
+ tail = PERF_IDX2OFF(handle->head + handle->size, buf);
+ wakeup = PERF_IDX2OFF(handle->wakeup, buf);
+
+ /*
+ * Avoid clobbering unconsumed data. We know we have space, so
+ * if we see head == tail we know that the buffer is empty. If
+ * head > tail, then there's nothing to clobber prior to
+ * wrapping.
+ */
+ if (head < tail)
+ limit = round_down(tail, PAGE_SIZE);
+
+ /*
+ * Wakeup may be arbitrarily far into the future. If it's not in
+ * the current generation, either we'll wrap before hitting it,
+ * or it's in the past and has been handled already.
+ *
+ * If there's a wakeup before we wrap, arrange to be woken up by
+ * the page boundary following it. Keep the tail boundary if
+ * that's lower.
+ */
+ if (handle->wakeup < (handle->head + handle->size) && head <= wakeup)
+ limit = min(limit, round_up(wakeup, PAGE_SIZE));
+
+ if (limit > head)
+ return limit;
+
+ arm_spe_pmu_pad_buf(handle, handle->size);
+no_space:
+ perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
+ perf_aux_output_end(handle, 0);
+ return 0;
+}
+
+static u64 arm_spe_pmu_next_off(struct perf_output_handle *handle)
+{
+ struct arm_spe_pmu_buf *buf = perf_get_aux(handle);
+ struct arm_spe_pmu *spe_pmu = to_spe_pmu(handle->event->pmu);
+ u64 limit = __arm_spe_pmu_next_off(handle);
+ u64 head = PERF_IDX2OFF(handle->head, buf);
+
+ /*
+ * If the head has come too close to the end of the buffer,
+ * then pad to the end and recompute the limit.
+ */
+ if (limit && (limit - head < spe_pmu->max_record_sz)) {
+ arm_spe_pmu_pad_buf(handle, limit - head);
+ limit = __arm_spe_pmu_next_off(handle);
+ }
+
+ return limit;
+}
+
+static void arm_spe_perf_aux_output_begin(struct perf_output_handle *handle,
+ struct perf_event *event)
+{
+ u64 base, limit;
+ struct arm_spe_pmu_buf *buf;
+
+ /* Start a new aux session */
+ buf = perf_aux_output_begin(handle, event);
+ if (!buf) {
+ event->hw.state |= PERF_HES_STOPPED;
+ /*
+ * We still need to clear the limit pointer, since the
+ * profiler might only be disabled by virtue of a fault.
+ */
+ limit = 0;
+ goto out_write_limit;
+ }
+
+ limit = buf->snapshot ? arm_spe_pmu_next_snapshot_off(handle)
+ : arm_spe_pmu_next_off(handle);
+ if (limit)
+ limit |= BIT(SYS_PMBLIMITR_EL1_E_SHIFT);
+
+ limit += (u64)buf->base;
+ base = (u64)buf->base + PERF_IDX2OFF(handle->head, buf);
+ write_sysreg_s(base, SYS_PMBPTR_EL1);
+
+out_write_limit:
+ write_sysreg_s(limit, SYS_PMBLIMITR_EL1);
+}
+
+static void arm_spe_perf_aux_output_end(struct perf_output_handle *handle)
+{
+ struct arm_spe_pmu_buf *buf = perf_get_aux(handle);
+ u64 offset, size;
+
+ offset = read_sysreg_s(SYS_PMBPTR_EL1) - (u64)buf->base;
+ size = offset - PERF_IDX2OFF(handle->head, buf);
+
+ if (buf->snapshot)
+ handle->head = offset;
+
+ perf_aux_output_end(handle, size);
+}
+
+static void arm_spe_pmu_disable_and_drain_local(void)
+{
+ /* Disable profiling at EL0 and EL1 */
+ write_sysreg_s(0, SYS_PMSCR_EL1);
+ isb();
+
+ /* Drain any buffered data */
+ psb_csync();
+ dsb(nsh);
+
+ /* Disable the profiling buffer */
+ write_sysreg_s(0, SYS_PMBLIMITR_EL1);
+ isb();
+}
+
+/* IRQ handling */
+static enum arm_spe_pmu_buf_fault_action
+arm_spe_pmu_buf_get_fault_act(struct perf_output_handle *handle)
+{
+ const char *err_str;
+ u64 pmbsr;
+ enum arm_spe_pmu_buf_fault_action ret;
+
+ /*
+ * Ensure new profiling data is visible to the CPU and any external
+ * aborts have been resolved.
+ */
+ psb_csync();
+ dsb(nsh);
+
+ /* Ensure hardware updates to PMBPTR_EL1 are visible */
+ isb();
+
+ /* Service required? */
+ pmbsr = read_sysreg_s(SYS_PMBSR_EL1);
+ if (!(pmbsr & BIT(SYS_PMBSR_EL1_S_SHIFT)))
+ return SPE_PMU_BUF_FAULT_ACT_SPURIOUS;
+
+ /*
+ * If we've lost data, disable profiling and also set the PARTIAL
+ * flag to indicate that the last record is corrupted.
+ */
+ if (pmbsr & BIT(SYS_PMBSR_EL1_DL_SHIFT))
+ perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED |
+ PERF_AUX_FLAG_PARTIAL);
+
+ /* Report collisions to userspace so that it can up the period */
+ if (pmbsr & BIT(SYS_PMBSR_EL1_COLL_SHIFT))
+ perf_aux_output_flag(handle, PERF_AUX_FLAG_COLLISION);
+
+ /* We only expect buffer management events */
+ switch (pmbsr & (SYS_PMBSR_EL1_EC_MASK << SYS_PMBSR_EL1_EC_SHIFT)) {
+ case SYS_PMBSR_EL1_EC_BUF:
+ /* Handled below */
+ break;
+ case SYS_PMBSR_EL1_EC_FAULT_S1:
+ case SYS_PMBSR_EL1_EC_FAULT_S2:
+ err_str = "Unexpected buffer fault";
+ goto out_err;
+ default:
+ err_str = "Unknown error code";
+ goto out_err;
+ }
+
+ /* Buffer management event */
+ switch (pmbsr &
+ (SYS_PMBSR_EL1_BUF_BSC_MASK << SYS_PMBSR_EL1_BUF_BSC_SHIFT)) {
+ case SYS_PMBSR_EL1_BUF_BSC_FULL:
+ ret = SPE_PMU_BUF_FAULT_ACT_OK;
+ goto out_stop;
+ default:
+ err_str = "Unknown buffer status code";
+ }
+
+out_err:
+ pr_err_ratelimited("%s on CPU %d [PMBSR=0x%016llx, PMBPTR=0x%016llx, PMBLIMITR=0x%016llx]\n",
+ err_str, smp_processor_id(), pmbsr,
+ read_sysreg_s(SYS_PMBPTR_EL1),
+ read_sysreg_s(SYS_PMBLIMITR_EL1));
+ ret = SPE_PMU_BUF_FAULT_ACT_FATAL;
+
+out_stop:
+ arm_spe_perf_aux_output_end(handle);
+ return ret;
+}
+
+static irqreturn_t arm_spe_pmu_irq_handler(int irq, void *dev)
+{
+ struct perf_output_handle *handle = dev;
+ struct perf_event *event = handle->event;
+ enum arm_spe_pmu_buf_fault_action act;
+
+ if (!perf_get_aux(handle))
+ return IRQ_NONE;
+
+ act = arm_spe_pmu_buf_get_fault_act(handle);
+ if (act == SPE_PMU_BUF_FAULT_ACT_SPURIOUS)
+ return IRQ_NONE;
+
+ /*
+ * Ensure perf callbacks have completed, which may disable the
+ * profiling buffer in response to a TRUNCATION flag.
+ */
+ irq_work_run();
+
+ switch (act) {
+ case SPE_PMU_BUF_FAULT_ACT_FATAL:
+ /*
+ * If a fatal exception occurred then leaving the profiling
+ * buffer enabled is a recipe waiting to happen. Since
+ * fatal faults don't always imply truncation, make sure
+ * that the profiling buffer is disabled explicitly before
+ * clearing the syndrome register.
+ */
+ arm_spe_pmu_disable_and_drain_local();
+ break;
+ case SPE_PMU_BUF_FAULT_ACT_OK:
+ /*
+ * We handled the fault (the buffer was full), so resume
+ * profiling as long as we didn't detect truncation.
+ * PMBPTR might be misaligned, but we'll burn that bridge
+ * when we get to it.
+ */
+ if (!(handle->aux_flags & PERF_AUX_FLAG_TRUNCATED)) {
+ arm_spe_perf_aux_output_begin(handle, event);
+ isb();
+ }
+ break;
+ case SPE_PMU_BUF_FAULT_ACT_SPURIOUS:
+ /* We've seen you before, but GCC has the memory of a sieve. */
+ break;
+ }
+
+ /* The buffer pointers are now sane, so resume profiling. */
+ write_sysreg_s(0, SYS_PMBSR_EL1);
+ return IRQ_HANDLED;
+}
+
+/* Perf callbacks */
+static int arm_spe_pmu_event_init(struct perf_event *event)
+{
+ u64 reg;
+ struct perf_event_attr *attr = &event->attr;
+ struct arm_spe_pmu *spe_pmu = to_spe_pmu(event->pmu);
+
+ /* This is, of course, deeply driver-specific */
+ if (attr->type != event->pmu->type)
+ return -ENOENT;
+
+ if (event->cpu >= 0 &&
+ !cpumask_test_cpu(event->cpu, &spe_pmu->supported_cpus))
+ return -ENOENT;
+
+ if (arm_spe_event_to_pmsevfr(event) & SYS_PMSEVFR_EL1_RES0)
+ return -EOPNOTSUPP;
+
+ if (attr->exclude_idle)
+ return -EOPNOTSUPP;
+
+ /*
+ * Feedback-directed frequency throttling doesn't work when we
+ * have a buffer of samples. We'd need to manually count the
+ * samples in the buffer when it fills up and adjust the event
+ * count to reflect that. Instead, just force the user to specify
+ * a sample period.
+ */
+ if (attr->freq)
+ return -EINVAL;
+
+ reg = arm_spe_event_to_pmsfcr(event);
+ if ((reg & BIT(SYS_PMSFCR_EL1_FE_SHIFT)) &&
+ !(spe_pmu->features & SPE_PMU_FEAT_FILT_EVT))
+ return -EOPNOTSUPP;
+
+ if ((reg & BIT(SYS_PMSFCR_EL1_FT_SHIFT)) &&
+ !(spe_pmu->features & SPE_PMU_FEAT_FILT_TYP))
+ return -EOPNOTSUPP;
+
+ if ((reg & BIT(SYS_PMSFCR_EL1_FL_SHIFT)) &&
+ !(spe_pmu->features & SPE_PMU_FEAT_FILT_LAT))
+ return -EOPNOTSUPP;
+
+ reg = arm_spe_event_to_pmscr(event);
+ if (!capable(CAP_SYS_ADMIN) &&
+ (reg & (BIT(SYS_PMSCR_EL1_PA_SHIFT) |
+ BIT(SYS_PMSCR_EL1_CX_SHIFT) |
+ BIT(SYS_PMSCR_EL1_PCT_SHIFT))))
+ return -EACCES;
+
+ return 0;
+}
+
+static void arm_spe_pmu_start(struct perf_event *event, int flags)
+{
+ u64 reg;
+ struct arm_spe_pmu *spe_pmu = to_spe_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ struct perf_output_handle *handle = this_cpu_ptr(spe_pmu->handle);
+
+ hwc->state = 0;
+ arm_spe_perf_aux_output_begin(handle, event);
+ if (hwc->state)
+ return;
+
+ reg = arm_spe_event_to_pmsfcr(event);
+ write_sysreg_s(reg, SYS_PMSFCR_EL1);
+
+ reg = arm_spe_event_to_pmsevfr(event);
+ write_sysreg_s(reg, SYS_PMSEVFR_EL1);
+
+ reg = arm_spe_event_to_pmslatfr(event);
+ write_sysreg_s(reg, SYS_PMSLATFR_EL1);
+
+ if (flags & PERF_EF_RELOAD) {
+ reg = arm_spe_event_to_pmsirr(event);
+ write_sysreg_s(reg, SYS_PMSIRR_EL1);
+ isb();
+ reg = local64_read(&hwc->period_left);
+ write_sysreg_s(reg, SYS_PMSICR_EL1);
+ }
+
+ reg = arm_spe_event_to_pmscr(event);
+ isb();
+ write_sysreg_s(reg, SYS_PMSCR_EL1);
+}
+
+static void arm_spe_pmu_stop(struct perf_event *event, int flags)
+{
+ struct arm_spe_pmu *spe_pmu = to_spe_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ struct perf_output_handle *handle = this_cpu_ptr(spe_pmu->handle);
+
+ /* If we're already stopped, then nothing to do */
+ if (hwc->state & PERF_HES_STOPPED)
+ return;
+
+ /* Stop all trace generation */
+ arm_spe_pmu_disable_and_drain_local();
+
+ if (flags & PERF_EF_UPDATE) {
+ /*
+ * If there's a fault pending then ensure we contain it
+ * to this buffer, since we might be on the context-switch
+ * path.
+ */
+ if (perf_get_aux(handle)) {
+ enum arm_spe_pmu_buf_fault_action act;
+
+ act = arm_spe_pmu_buf_get_fault_act(handle);
+ if (act == SPE_PMU_BUF_FAULT_ACT_SPURIOUS)
+ arm_spe_perf_aux_output_end(handle);
+ else
+ write_sysreg_s(0, SYS_PMBSR_EL1);
+ }
+
+ /*
+ * This may also contain ECOUNT, but nobody else should
+ * be looking at period_left, since we forbid frequency
+ * based sampling.
+ */
+ local64_set(&hwc->period_left, read_sysreg_s(SYS_PMSICR_EL1));
+ hwc->state |= PERF_HES_UPTODATE;
+ }
+
+ hwc->state |= PERF_HES_STOPPED;
+}
+
+static int arm_spe_pmu_add(struct perf_event *event, int flags)
+{
+ int ret = 0;
+ struct arm_spe_pmu *spe_pmu = to_spe_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ int cpu = event->cpu == -1 ? smp_processor_id() : event->cpu;
+
+ if (!cpumask_test_cpu(cpu, &spe_pmu->supported_cpus))
+ return -ENOENT;
+
+ hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+ if (flags & PERF_EF_START) {
+ arm_spe_pmu_start(event, PERF_EF_RELOAD);
+ if (hwc->state & PERF_HES_STOPPED)
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+static void arm_spe_pmu_del(struct perf_event *event, int flags)
+{
+ arm_spe_pmu_stop(event, PERF_EF_UPDATE);
+}
+
+static void arm_spe_pmu_read(struct perf_event *event)
+{
+}
+
+static void *arm_spe_pmu_setup_aux(int cpu, void **pages, int nr_pages,
+ bool snapshot)
+{
+ int i;
+ struct page **pglist;
+ struct arm_spe_pmu_buf *buf;
+
+ /* We need at least two pages for this to work. */
+ if (nr_pages < 2)
+ return NULL;
+
+ /*
+ * We require an even number of pages for snapshot mode, so that
+ * we can effectively treat the buffer as consisting of two equal
+ * parts and give userspace a fighting chance of getting some
+ * useful data out of it.
+ */
+ if (!nr_pages || (snapshot && (nr_pages & 1)))
+ return NULL;
+
+ if (cpu == -1)
+ cpu = raw_smp_processor_id();
+
+ buf = kzalloc_node(sizeof(*buf), GFP_KERNEL, cpu_to_node(cpu));
+ if (!buf)
+ return NULL;
+
+ pglist = kcalloc(nr_pages, sizeof(*pglist), GFP_KERNEL);
+ if (!pglist)
+ goto out_free_buf;
+
+ for (i = 0; i < nr_pages; ++i) {
+ struct page *page = virt_to_page(pages[i]);
+
+ if (PagePrivate(page)) {
+ pr_warn("unexpected high-order page for auxbuf!");
+ goto out_free_pglist;
+ }
+
+ pglist[i] = virt_to_page(pages[i]);
+ }
+
+ buf->base = vmap(pglist, nr_pages, VM_MAP, PAGE_KERNEL);
+ if (!buf->base)
+ goto out_free_pglist;
+
+ buf->nr_pages = nr_pages;
+ buf->snapshot = snapshot;
+
+ kfree(pglist);
+ return buf;
+
+out_free_pglist:
+ kfree(pglist);
+out_free_buf:
+ kfree(buf);
+ return NULL;
+}
+
+static void arm_spe_pmu_free_aux(void *aux)
+{
+ struct arm_spe_pmu_buf *buf = aux;
+
+ vunmap(buf->base);
+ kfree(buf);
+}
+
+/* Initialisation and teardown functions */
+static int arm_spe_pmu_perf_init(struct arm_spe_pmu *spe_pmu)
+{
+ static atomic_t pmu_idx = ATOMIC_INIT(-1);
+
+ int idx;
+ char *name;
+ struct device *dev = &spe_pmu->pdev->dev;
+
+ spe_pmu->pmu = (struct pmu) {
+ .module = THIS_MODULE,
+ .capabilities = PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE,
+ .attr_groups = arm_spe_pmu_attr_groups,
+ /*
+ * We hitch a ride on the software context here, so that
+ * we can support per-task profiling (which is not possible
+ * with the invalid context as it doesn't get sched callbacks).
+ * This requires that userspace either uses a dummy event for
+ * perf_event_open, since the aux buffer is not setup until
+ * a subsequent mmap, or creates the profiling event in a
+ * disabled state and explicitly PERF_EVENT_IOC_ENABLEs it
+ * once the buffer has been created.
+ */
+ .task_ctx_nr = perf_sw_context,
+ .event_init = arm_spe_pmu_event_init,
+ .add = arm_spe_pmu_add,
+ .del = arm_spe_pmu_del,
+ .start = arm_spe_pmu_start,
+ .stop = arm_spe_pmu_stop,
+ .read = arm_spe_pmu_read,
+ .setup_aux = arm_spe_pmu_setup_aux,
+ .free_aux = arm_spe_pmu_free_aux,
+ };
+
+ idx = atomic_inc_return(&pmu_idx);
+ name = devm_kasprintf(dev, GFP_KERNEL, "%s_%d", PMUNAME, idx);
+ return perf_pmu_register(&spe_pmu->pmu, name, -1);
+}
+
+static void arm_spe_pmu_perf_destroy(struct arm_spe_pmu *spe_pmu)
+{
+ perf_pmu_unregister(&spe_pmu->pmu);
+}
+
+static void __arm_spe_pmu_dev_probe(void *info)
+{
+ int fld;
+ u64 reg;
+ struct arm_spe_pmu *spe_pmu = info;
+ struct device *dev = &spe_pmu->pdev->dev;
+
+ fld = cpuid_feature_extract_unsigned_field(read_cpuid(ID_AA64DFR0_EL1),
+ ID_AA64DFR0_PMSVER_SHIFT);
+ if (!fld) {
+ dev_err(dev,
+ "unsupported ID_AA64DFR0_EL1.PMSVer [%d] on CPU %d\n",
+ fld, smp_processor_id());
+ return;
+ }
+
+ /* Read PMBIDR first to determine whether or not we have access */
+ reg = read_sysreg_s(SYS_PMBIDR_EL1);
+ if (reg & BIT(SYS_PMBIDR_EL1_P_SHIFT)) {
+ dev_err(dev,
+ "profiling buffer owned by higher exception level\n");
+ return;
+ }
+
+ /* Minimum alignment. If it's out-of-range, then fail the probe */
+ fld = reg >> SYS_PMBIDR_EL1_ALIGN_SHIFT & SYS_PMBIDR_EL1_ALIGN_MASK;
+ spe_pmu->align = 1 << fld;
+ if (spe_pmu->align > SZ_2K) {
+ dev_err(dev, "unsupported PMBIDR.Align [%d] on CPU %d\n",
+ fld, smp_processor_id());
+ return;
+ }
+
+ /* It's now safe to read PMSIDR and figure out what we've got */
+ reg = read_sysreg_s(SYS_PMSIDR_EL1);
+ if (reg & BIT(SYS_PMSIDR_EL1_FE_SHIFT))
+ spe_pmu->features |= SPE_PMU_FEAT_FILT_EVT;
+
+ if (reg & BIT(SYS_PMSIDR_EL1_FT_SHIFT))
+ spe_pmu->features |= SPE_PMU_FEAT_FILT_TYP;
+
+ if (reg & BIT(SYS_PMSIDR_EL1_FL_SHIFT))
+ spe_pmu->features |= SPE_PMU_FEAT_FILT_LAT;
+
+ if (reg & BIT(SYS_PMSIDR_EL1_ARCHINST_SHIFT))
+ spe_pmu->features |= SPE_PMU_FEAT_ARCH_INST;
+
+ if (reg & BIT(SYS_PMSIDR_EL1_LDS_SHIFT))
+ spe_pmu->features |= SPE_PMU_FEAT_LDS;
+
+ if (reg & BIT(SYS_PMSIDR_EL1_ERND_SHIFT))
+ spe_pmu->features |= SPE_PMU_FEAT_ERND;
+
+ /* This field has a spaced out encoding, so just use a look-up */
+ fld = reg >> SYS_PMSIDR_EL1_INTERVAL_SHIFT & SYS_PMSIDR_EL1_INTERVAL_MASK;
+ switch (fld) {
+ case 0:
+ spe_pmu->min_period = 256;
+ break;
+ case 2:
+ spe_pmu->min_period = 512;
+ break;
+ case 3:
+ spe_pmu->min_period = 768;
+ break;
+ case 4:
+ spe_pmu->min_period = 1024;
+ break;
+ case 5:
+ spe_pmu->min_period = 1536;
+ break;
+ case 6:
+ spe_pmu->min_period = 2048;
+ break;
+ case 7:
+ spe_pmu->min_period = 3072;
+ break;
+ default:
+ dev_warn(dev, "unknown PMSIDR_EL1.Interval [%d]; assuming 8\n",
+ fld);
+ /* Fallthrough */
+ case 8:
+ spe_pmu->min_period = 4096;
+ }
+
+ /* Maximum record size. If it's out-of-range, then fail the probe */
+ fld = reg >> SYS_PMSIDR_EL1_MAXSIZE_SHIFT & SYS_PMSIDR_EL1_MAXSIZE_MASK;
+ spe_pmu->max_record_sz = 1 << fld;
+ if (spe_pmu->max_record_sz > SZ_2K || spe_pmu->max_record_sz < 16) {
+ dev_err(dev, "unsupported PMSIDR_EL1.MaxSize [%d] on CPU %d\n",
+ fld, smp_processor_id());
+ return;
+ }
+
+ fld = reg >> SYS_PMSIDR_EL1_COUNTSIZE_SHIFT & SYS_PMSIDR_EL1_COUNTSIZE_MASK;
+ switch (fld) {
+ default:
+ dev_warn(dev, "unknown PMSIDR_EL1.CountSize [%d]; assuming 2\n",
+ fld);
+ /* Fallthrough */
+ case 2:
+ spe_pmu->counter_sz = 12;
+ }
+
+ dev_info(dev,
+ "probed for CPUs %*pbl [max_record_sz %u, align %u, features 0x%llx]\n",
+ cpumask_pr_args(&spe_pmu->supported_cpus),
+ spe_pmu->max_record_sz, spe_pmu->align, spe_pmu->features);
+
+ spe_pmu->features |= SPE_PMU_FEAT_DEV_PROBED;
+ return;
+}
+
+static void __arm_spe_pmu_reset_local(void)
+{
+ /*
+ * This is probably overkill, as we have no idea where we're
+ * draining any buffered data to...
+ */
+ arm_spe_pmu_disable_and_drain_local();
+
+ /* Reset the buffer base pointer */
+ write_sysreg_s(0, SYS_PMBPTR_EL1);
+ isb();
+
+ /* Clear any pending management interrupts */
+ write_sysreg_s(0, SYS_PMBSR_EL1);
+ isb();
+}
+
+static void __arm_spe_pmu_setup_one(void *info)
+{
+ struct arm_spe_pmu *spe_pmu = info;
+
+ __arm_spe_pmu_reset_local();
+ enable_percpu_irq(spe_pmu->irq, IRQ_TYPE_NONE);
+}
+
+static void __arm_spe_pmu_stop_one(void *info)
+{
+ struct arm_spe_pmu *spe_pmu = info;
+
+ disable_percpu_irq(spe_pmu->irq);
+ __arm_spe_pmu_reset_local();
+}
+
+static int arm_spe_pmu_cpu_startup(unsigned int cpu, struct hlist_node *node)
+{
+ struct arm_spe_pmu *spe_pmu;
+
+ spe_pmu = hlist_entry_safe(node, struct arm_spe_pmu, hotplug_node);
+ if (!cpumask_test_cpu(cpu, &spe_pmu->supported_cpus))
+ return 0;
+
+ __arm_spe_pmu_setup_one(spe_pmu);
+ return 0;
+}
+
+static int arm_spe_pmu_cpu_teardown(unsigned int cpu, struct hlist_node *node)
+{
+ struct arm_spe_pmu *spe_pmu;
+
+ spe_pmu = hlist_entry_safe(node, struct arm_spe_pmu, hotplug_node);
+ if (!cpumask_test_cpu(cpu, &spe_pmu->supported_cpus))
+ return 0;
+
+ __arm_spe_pmu_stop_one(spe_pmu);
+ return 0;
+}
+
+static int arm_spe_pmu_dev_init(struct arm_spe_pmu *spe_pmu)
+{
+ int ret;
+ cpumask_t *mask = &spe_pmu->supported_cpus;
+
+ /* Make sure we probe the hardware on a relevant CPU */
+ ret = smp_call_function_any(mask, __arm_spe_pmu_dev_probe, spe_pmu, 1);
+ if (ret || !(spe_pmu->features & SPE_PMU_FEAT_DEV_PROBED))
+ return -ENXIO;
+
+ /* Request our PPIs (note that the IRQ is still disabled) */
+ ret = request_percpu_irq(spe_pmu->irq, arm_spe_pmu_irq_handler, DRVNAME,
+ spe_pmu->handle);
+ if (ret)
+ return ret;
+
+ /*
+ * Register our hotplug notifier now so we don't miss any events.
+ * This will enable the IRQ for any supported CPUs that are already
+ * up.
+ */
+ ret = cpuhp_state_add_instance(arm_spe_pmu_online,
+ &spe_pmu->hotplug_node);
+ if (ret)
+ free_percpu_irq(spe_pmu->irq, spe_pmu->handle);
+
+ return ret;
+}
+
+static void arm_spe_pmu_dev_teardown(struct arm_spe_pmu *spe_pmu)
+{
+ cpuhp_state_remove_instance(arm_spe_pmu_online, &spe_pmu->hotplug_node);
+ free_percpu_irq(spe_pmu->irq, spe_pmu->handle);
+}
+
+/* Driver and device probing */
+static int arm_spe_pmu_irq_probe(struct arm_spe_pmu *spe_pmu)
+{
+ struct platform_device *pdev = spe_pmu->pdev;
+ int irq = platform_get_irq(pdev, 0);
+
+ if (irq < 0) {
+ dev_err(&pdev->dev, "failed to get IRQ (%d)\n", irq);
+ return -ENXIO;
+ }
+
+ if (!irq_is_percpu(irq)) {
+ dev_err(&pdev->dev, "expected PPI but got SPI (%d)\n", irq);
+ return -EINVAL;
+ }
+
+ if (irq_get_percpu_devid_partition(irq, &spe_pmu->supported_cpus)) {
+ dev_err(&pdev->dev, "failed to get PPI partition (%d)\n", irq);
+ return -EINVAL;
+ }
+
+ spe_pmu->irq = irq;
+ return 0;
+}
+
+static const struct of_device_id arm_spe_pmu_of_match[] = {
+ { .compatible = "arm,statistical-profiling-extension-v1", .data = (void *)1 },
+ { /* Sentinel */ },
+};
+
+static int arm_spe_pmu_device_dt_probe(struct platform_device *pdev)
+{
+ int ret;
+ struct arm_spe_pmu *spe_pmu;
+ struct device *dev = &pdev->dev;
+
+ spe_pmu = devm_kzalloc(dev, sizeof(*spe_pmu), GFP_KERNEL);
+ if (!spe_pmu) {
+ dev_err(dev, "failed to allocate spe_pmu\n");
+ return -ENOMEM;
+ }
+
+ spe_pmu->handle = alloc_percpu(typeof(*spe_pmu->handle));
+ if (!spe_pmu->handle)
+ return -ENOMEM;
+
+ spe_pmu->pdev = pdev;
+ platform_set_drvdata(pdev, spe_pmu);
+
+ ret = arm_spe_pmu_irq_probe(spe_pmu);
+ if (ret)
+ goto out_free_handle;
+
+ ret = arm_spe_pmu_dev_init(spe_pmu);
+ if (ret)
+ goto out_free_handle;
+
+ ret = arm_spe_pmu_perf_init(spe_pmu);
+ if (ret)
+ goto out_teardown_dev;
+
+ return 0;
+
+out_teardown_dev:
+ arm_spe_pmu_dev_teardown(spe_pmu);
+out_free_handle:
+ free_percpu(spe_pmu->handle);
+ return ret;
+}
+
+static int arm_spe_pmu_device_remove(struct platform_device *pdev)
+{
+ struct arm_spe_pmu *spe_pmu = platform_get_drvdata(pdev);
+
+ arm_spe_pmu_perf_destroy(spe_pmu);
+ arm_spe_pmu_dev_teardown(spe_pmu);
+ free_percpu(spe_pmu->handle);
+ return 0;
+}
+
+static struct platform_driver arm_spe_pmu_driver = {
+ .driver = {
+ .name = DRVNAME,
+ .of_match_table = of_match_ptr(arm_spe_pmu_of_match),
+ },
+ .probe = arm_spe_pmu_device_dt_probe,
+ .remove = arm_spe_pmu_device_remove,
+};
+
+static int __init arm_spe_pmu_init(void)
+{
+ int ret;
+
+ ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, DRVNAME,
+ arm_spe_pmu_cpu_startup,
+ arm_spe_pmu_cpu_teardown);
+ if (ret < 0)
+ return ret;
+ arm_spe_pmu_online = ret;
+
+ ret = platform_driver_register(&arm_spe_pmu_driver);
+ if (ret)
+ cpuhp_remove_multi_state(arm_spe_pmu_online);
+
+ return ret;
+}
+
+static void __exit arm_spe_pmu_exit(void)
+{
+ platform_driver_unregister(&arm_spe_pmu_driver);
+ cpuhp_remove_multi_state(arm_spe_pmu_online);
+}
+
+module_init(arm_spe_pmu_init);
+module_exit(arm_spe_pmu_exit);
+
+MODULE_DESCRIPTION("Perf driver for the ARMv8.2 Statistical Profiling Extension");
+MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/hisilicon/Makefile b/drivers/perf/hisilicon/Makefile
new file mode 100644
index 000000000000..2621d51ae87a
--- /dev/null
+++ b/drivers/perf/hisilicon/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_HISI_PMU) += hisi_uncore_pmu.o hisi_uncore_l3c_pmu.o hisi_uncore_hha_pmu.o hisi_uncore_ddrc_pmu.o
diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c
new file mode 100644
index 000000000000..1b10ea05a914
--- /dev/null
+++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c
@@ -0,0 +1,463 @@
+/*
+ * HiSilicon SoC DDRC uncore Hardware event counters support
+ *
+ * Copyright (C) 2017 Hisilicon Limited
+ * Author: Shaokun Zhang <zhangshaokun@hisilicon.com>
+ * Anurup M <anurup.m@huawei.com>
+ *
+ * This code is based on the uncore PMUs like arm-cci and arm-ccn.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/acpi.h>
+#include <linux/bug.h>
+#include <linux/cpuhotplug.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/list.h>
+#include <linux/platform_device.h>
+#include <linux/smp.h>
+
+#include "hisi_uncore_pmu.h"
+
+/* DDRC register definition */
+#define DDRC_PERF_CTRL 0x010
+#define DDRC_FLUX_WR 0x380
+#define DDRC_FLUX_RD 0x384
+#define DDRC_FLUX_WCMD 0x388
+#define DDRC_FLUX_RCMD 0x38c
+#define DDRC_PRE_CMD 0x3c0
+#define DDRC_ACT_CMD 0x3c4
+#define DDRC_BNK_CHG 0x3c8
+#define DDRC_RNK_CHG 0x3cc
+#define DDRC_EVENT_CTRL 0x6C0
+#define DDRC_INT_MASK 0x6c8
+#define DDRC_INT_STATUS 0x6cc
+#define DDRC_INT_CLEAR 0x6d0
+
+/* DDRC has 8-counters */
+#define DDRC_NR_COUNTERS 0x8
+#define DDRC_PERF_CTRL_EN 0x2
+
+/*
+ * For DDRC PMU, there are eight-events and every event has been mapped
+ * to fixed-purpose counters which register offset is not consistent.
+ * Therefore there is no write event type and we assume that event
+ * code (0 to 7) is equal to counter index in PMU driver.
+ */
+#define GET_DDRC_EVENTID(hwc) (hwc->config_base & 0x7)
+
+static const u32 ddrc_reg_off[] = {
+ DDRC_FLUX_WR, DDRC_FLUX_RD, DDRC_FLUX_WCMD, DDRC_FLUX_RCMD,
+ DDRC_PRE_CMD, DDRC_ACT_CMD, DDRC_BNK_CHG, DDRC_RNK_CHG
+};
+
+/*
+ * Select the counter register offset using the counter index.
+ * In DDRC there are no programmable counter, the count
+ * is readed form the statistics counter register itself.
+ */
+static u32 hisi_ddrc_pmu_get_counter_offset(int cntr_idx)
+{
+ return ddrc_reg_off[cntr_idx];
+}
+
+static u64 hisi_ddrc_pmu_read_counter(struct hisi_pmu *ddrc_pmu,
+ struct hw_perf_event *hwc)
+{
+ /* Use event code as counter index */
+ u32 idx = GET_DDRC_EVENTID(hwc);
+
+ if (!hisi_uncore_pmu_counter_valid(ddrc_pmu, idx)) {
+ dev_err(ddrc_pmu->dev, "Unsupported event index:%d!\n", idx);
+ return 0;
+ }
+
+ return readl(ddrc_pmu->base + hisi_ddrc_pmu_get_counter_offset(idx));
+}
+
+static void hisi_ddrc_pmu_write_counter(struct hisi_pmu *ddrc_pmu,
+ struct hw_perf_event *hwc, u64 val)
+{
+ u32 idx = GET_DDRC_EVENTID(hwc);
+
+ if (!hisi_uncore_pmu_counter_valid(ddrc_pmu, idx)) {
+ dev_err(ddrc_pmu->dev, "Unsupported event index:%d!\n", idx);
+ return;
+ }
+
+ writel((u32)val,
+ ddrc_pmu->base + hisi_ddrc_pmu_get_counter_offset(idx));
+}
+
+/*
+ * For DDRC PMU, event has been mapped to fixed-purpose counter by hardware,
+ * so there is no need to write event type.
+ */
+static void hisi_ddrc_pmu_write_evtype(struct hisi_pmu *hha_pmu, int idx,
+ u32 type)
+{
+}
+
+static void hisi_ddrc_pmu_start_counters(struct hisi_pmu *ddrc_pmu)
+{
+ u32 val;
+
+ /* Set perf_enable in DDRC_PERF_CTRL to start event counting */
+ val = readl(ddrc_pmu->base + DDRC_PERF_CTRL);
+ val |= DDRC_PERF_CTRL_EN;
+ writel(val, ddrc_pmu->base + DDRC_PERF_CTRL);
+}
+
+static void hisi_ddrc_pmu_stop_counters(struct hisi_pmu *ddrc_pmu)
+{
+ u32 val;
+
+ /* Clear perf_enable in DDRC_PERF_CTRL to stop event counting */
+ val = readl(ddrc_pmu->base + DDRC_PERF_CTRL);
+ val &= ~DDRC_PERF_CTRL_EN;
+ writel(val, ddrc_pmu->base + DDRC_PERF_CTRL);
+}
+
+static void hisi_ddrc_pmu_enable_counter(struct hisi_pmu *ddrc_pmu,
+ struct hw_perf_event *hwc)
+{
+ u32 val;
+
+ /* Set counter index(event code) in DDRC_EVENT_CTRL register */
+ val = readl(ddrc_pmu->base + DDRC_EVENT_CTRL);
+ val |= (1 << GET_DDRC_EVENTID(hwc));
+ writel(val, ddrc_pmu->base + DDRC_EVENT_CTRL);
+}
+
+static void hisi_ddrc_pmu_disable_counter(struct hisi_pmu *ddrc_pmu,
+ struct hw_perf_event *hwc)
+{
+ u32 val;
+
+ /* Clear counter index(event code) in DDRC_EVENT_CTRL register */
+ val = readl(ddrc_pmu->base + DDRC_EVENT_CTRL);
+ val &= ~(1 << GET_DDRC_EVENTID(hwc));
+ writel(val, ddrc_pmu->base + DDRC_EVENT_CTRL);
+}
+
+static int hisi_ddrc_pmu_get_event_idx(struct perf_event *event)
+{
+ struct hisi_pmu *ddrc_pmu = to_hisi_pmu(event->pmu);
+ unsigned long *used_mask = ddrc_pmu->pmu_events.used_mask;
+ struct hw_perf_event *hwc = &event->hw;
+ /* For DDRC PMU, we use event code as counter index */
+ int idx = GET_DDRC_EVENTID(hwc);
+
+ if (test_bit(idx, used_mask))
+ return -EAGAIN;
+
+ set_bit(idx, used_mask);
+
+ return idx;
+}
+
+static void hisi_ddrc_pmu_enable_counter_int(struct hisi_pmu *ddrc_pmu,
+ struct hw_perf_event *hwc)
+{
+ u32 val;
+
+ /* Write 0 to enable interrupt */
+ val = readl(ddrc_pmu->base + DDRC_INT_MASK);
+ val &= ~(1 << GET_DDRC_EVENTID(hwc));
+ writel(val, ddrc_pmu->base + DDRC_INT_MASK);
+}
+
+static void hisi_ddrc_pmu_disable_counter_int(struct hisi_pmu *ddrc_pmu,
+ struct hw_perf_event *hwc)
+{
+ u32 val;
+
+ /* Write 1 to mask interrupt */
+ val = readl(ddrc_pmu->base + DDRC_INT_MASK);
+ val |= (1 << GET_DDRC_EVENTID(hwc));
+ writel(val, ddrc_pmu->base + DDRC_INT_MASK);
+}
+
+static irqreturn_t hisi_ddrc_pmu_isr(int irq, void *dev_id)
+{
+ struct hisi_pmu *ddrc_pmu = dev_id;
+ struct perf_event *event;
+ unsigned long overflown;
+ int idx;
+
+ /* Read the DDRC_INT_STATUS register */
+ overflown = readl(ddrc_pmu->base + DDRC_INT_STATUS);
+ if (!overflown)
+ return IRQ_NONE;
+
+ /*
+ * Find the counter index which overflowed if the bit was set
+ * and handle it
+ */
+ for_each_set_bit(idx, &overflown, DDRC_NR_COUNTERS) {
+ /* Write 1 to clear the IRQ status flag */
+ writel((1 << idx), ddrc_pmu->base + DDRC_INT_CLEAR);
+
+ /* Get the corresponding event struct */
+ event = ddrc_pmu->pmu_events.hw_events[idx];
+ if (!event)
+ continue;
+
+ hisi_uncore_pmu_event_update(event);
+ hisi_uncore_pmu_set_event_period(event);
+ }
+
+ return IRQ_HANDLED;
+}
+
+static int hisi_ddrc_pmu_init_irq(struct hisi_pmu *ddrc_pmu,
+ struct platform_device *pdev)
+{
+ int irq, ret;
+
+ /* Read and init IRQ */
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0) {
+ dev_err(&pdev->dev, "DDRC PMU get irq fail; irq:%d\n", irq);
+ return irq;
+ }
+
+ ret = devm_request_irq(&pdev->dev, irq, hisi_ddrc_pmu_isr,
+ IRQF_NOBALANCING | IRQF_NO_THREAD,
+ dev_name(&pdev->dev), ddrc_pmu);
+ if (ret < 0) {
+ dev_err(&pdev->dev,
+ "Fail to request IRQ:%d ret:%d\n", irq, ret);
+ return ret;
+ }
+
+ ddrc_pmu->irq = irq;
+
+ return 0;
+}
+
+static const struct acpi_device_id hisi_ddrc_pmu_acpi_match[] = {
+ { "HISI0233", },
+ {},
+};
+MODULE_DEVICE_TABLE(acpi, hisi_ddrc_pmu_acpi_match);
+
+static int hisi_ddrc_pmu_init_data(struct platform_device *pdev,
+ struct hisi_pmu *ddrc_pmu)
+{
+ struct resource *res;
+
+ /*
+ * Use the SCCL_ID and DDRC channel ID to identify the
+ * DDRC PMU, while SCCL_ID is in MPIDR[aff2].
+ */
+ if (device_property_read_u32(&pdev->dev, "hisilicon,ch-id",
+ &ddrc_pmu->index_id)) {
+ dev_err(&pdev->dev, "Can not read ddrc channel-id!\n");
+ return -EINVAL;
+ }
+
+ if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id",
+ &ddrc_pmu->sccl_id)) {
+ dev_err(&pdev->dev, "Can not read ddrc sccl-id!\n");
+ return -EINVAL;
+ }
+ /* DDRC PMUs only share the same SCCL */
+ ddrc_pmu->ccl_id = -1;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ ddrc_pmu->base = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(ddrc_pmu->base)) {
+ dev_err(&pdev->dev, "ioremap failed for ddrc_pmu resource\n");
+ return PTR_ERR(ddrc_pmu->base);
+ }
+
+ return 0;
+}
+
+static struct attribute *hisi_ddrc_pmu_format_attr[] = {
+ HISI_PMU_FORMAT_ATTR(event, "config:0-4"),
+ NULL,
+};
+
+static const struct attribute_group hisi_ddrc_pmu_format_group = {
+ .name = "format",
+ .attrs = hisi_ddrc_pmu_format_attr,
+};
+
+static struct attribute *hisi_ddrc_pmu_events_attr[] = {
+ HISI_PMU_EVENT_ATTR(flux_wr, 0x00),
+ HISI_PMU_EVENT_ATTR(flux_rd, 0x01),
+ HISI_PMU_EVENT_ATTR(flux_wcmd, 0x02),
+ HISI_PMU_EVENT_ATTR(flux_rcmd, 0x03),
+ HISI_PMU_EVENT_ATTR(pre_cmd, 0x04),
+ HISI_PMU_EVENT_ATTR(act_cmd, 0x05),
+ HISI_PMU_EVENT_ATTR(rnk_chg, 0x06),
+ HISI_PMU_EVENT_ATTR(rw_chg, 0x07),
+ NULL,
+};
+
+static const struct attribute_group hisi_ddrc_pmu_events_group = {
+ .name = "events",
+ .attrs = hisi_ddrc_pmu_events_attr,
+};
+
+static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL);
+
+static struct attribute *hisi_ddrc_pmu_cpumask_attrs[] = {
+ &dev_attr_cpumask.attr,
+ NULL,
+};
+
+static const struct attribute_group hisi_ddrc_pmu_cpumask_attr_group = {
+ .attrs = hisi_ddrc_pmu_cpumask_attrs,
+};
+
+static const struct attribute_group *hisi_ddrc_pmu_attr_groups[] = {
+ &hisi_ddrc_pmu_format_group,
+ &hisi_ddrc_pmu_events_group,
+ &hisi_ddrc_pmu_cpumask_attr_group,
+ NULL,
+};
+
+static const struct hisi_uncore_ops hisi_uncore_ddrc_ops = {
+ .write_evtype = hisi_ddrc_pmu_write_evtype,
+ .get_event_idx = hisi_ddrc_pmu_get_event_idx,
+ .start_counters = hisi_ddrc_pmu_start_counters,
+ .stop_counters = hisi_ddrc_pmu_stop_counters,
+ .enable_counter = hisi_ddrc_pmu_enable_counter,
+ .disable_counter = hisi_ddrc_pmu_disable_counter,
+ .enable_counter_int = hisi_ddrc_pmu_enable_counter_int,
+ .disable_counter_int = hisi_ddrc_pmu_disable_counter_int,
+ .write_counter = hisi_ddrc_pmu_write_counter,
+ .read_counter = hisi_ddrc_pmu_read_counter,
+};
+
+static int hisi_ddrc_pmu_dev_probe(struct platform_device *pdev,
+ struct hisi_pmu *ddrc_pmu)
+{
+ int ret;
+
+ ret = hisi_ddrc_pmu_init_data(pdev, ddrc_pmu);
+ if (ret)
+ return ret;
+
+ ret = hisi_ddrc_pmu_init_irq(ddrc_pmu, pdev);
+ if (ret)
+ return ret;
+
+ ddrc_pmu->num_counters = DDRC_NR_COUNTERS;
+ ddrc_pmu->counter_bits = 32;
+ ddrc_pmu->ops = &hisi_uncore_ddrc_ops;
+ ddrc_pmu->dev = &pdev->dev;
+ ddrc_pmu->on_cpu = -1;
+ ddrc_pmu->check_event = 7;
+
+ return 0;
+}
+
+static int hisi_ddrc_pmu_probe(struct platform_device *pdev)
+{
+ struct hisi_pmu *ddrc_pmu;
+ char *name;
+ int ret;
+
+ ddrc_pmu = devm_kzalloc(&pdev->dev, sizeof(*ddrc_pmu), GFP_KERNEL);
+ if (!ddrc_pmu)
+ return -ENOMEM;
+
+ platform_set_drvdata(pdev, ddrc_pmu);
+
+ ret = hisi_ddrc_pmu_dev_probe(pdev, ddrc_pmu);
+ if (ret)
+ return ret;
+
+ ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE,
+ &ddrc_pmu->node);
+ if (ret) {
+ dev_err(&pdev->dev, "Error %d registering hotplug;\n", ret);
+ return ret;
+ }
+
+ name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_ddrc%u",
+ ddrc_pmu->sccl_id, ddrc_pmu->index_id);
+ ddrc_pmu->pmu = (struct pmu) {
+ .name = name,
+ .task_ctx_nr = perf_invalid_context,
+ .event_init = hisi_uncore_pmu_event_init,
+ .pmu_enable = hisi_uncore_pmu_enable,
+ .pmu_disable = hisi_uncore_pmu_disable,
+ .add = hisi_uncore_pmu_add,
+ .del = hisi_uncore_pmu_del,
+ .start = hisi_uncore_pmu_start,
+ .stop = hisi_uncore_pmu_stop,
+ .read = hisi_uncore_pmu_read,
+ .attr_groups = hisi_ddrc_pmu_attr_groups,
+ };
+
+ ret = perf_pmu_register(&ddrc_pmu->pmu, name, -1);
+ if (ret) {
+ dev_err(ddrc_pmu->dev, "DDRC PMU register failed!\n");
+ cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE,
+ &ddrc_pmu->node);
+ }
+
+ return ret;
+}
+
+static int hisi_ddrc_pmu_remove(struct platform_device *pdev)
+{
+ struct hisi_pmu *ddrc_pmu = platform_get_drvdata(pdev);
+
+ perf_pmu_unregister(&ddrc_pmu->pmu);
+ cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE,
+ &ddrc_pmu->node);
+
+ return 0;
+}
+
+static struct platform_driver hisi_ddrc_pmu_driver = {
+ .driver = {
+ .name = "hisi_ddrc_pmu",
+ .acpi_match_table = ACPI_PTR(hisi_ddrc_pmu_acpi_match),
+ },
+ .probe = hisi_ddrc_pmu_probe,
+ .remove = hisi_ddrc_pmu_remove,
+};
+
+static int __init hisi_ddrc_pmu_module_init(void)
+{
+ int ret;
+
+ ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE,
+ "AP_PERF_ARM_HISI_DDRC_ONLINE",
+ hisi_uncore_pmu_online_cpu,
+ hisi_uncore_pmu_offline_cpu);
+ if (ret) {
+ pr_err("DDRC PMU: setup hotplug, ret = %d\n", ret);
+ return ret;
+ }
+
+ ret = platform_driver_register(&hisi_ddrc_pmu_driver);
+ if (ret)
+ cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE);
+
+ return ret;
+}
+module_init(hisi_ddrc_pmu_module_init);
+
+static void __exit hisi_ddrc_pmu_module_exit(void)
+{
+ platform_driver_unregister(&hisi_ddrc_pmu_driver);
+ cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE);
+
+}
+module_exit(hisi_ddrc_pmu_module_exit);
+
+MODULE_DESCRIPTION("HiSilicon SoC DDRC uncore PMU driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Shaokun Zhang <zhangshaokun@hisilicon.com>");
+MODULE_AUTHOR("Anurup M <anurup.m@huawei.com>");
diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c
new file mode 100644
index 000000000000..443906e0aff3
--- /dev/null
+++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c
@@ -0,0 +1,473 @@
+/*
+ * HiSilicon SoC HHA uncore Hardware event counters support
+ *
+ * Copyright (C) 2017 Hisilicon Limited
+ * Author: Shaokun Zhang <zhangshaokun@hisilicon.com>
+ * Anurup M <anurup.m@huawei.com>
+ *
+ * This code is based on the uncore PMUs like arm-cci and arm-ccn.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/acpi.h>
+#include <linux/bug.h>
+#include <linux/cpuhotplug.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/list.h>
+#include <linux/platform_device.h>
+#include <linux/smp.h>
+
+#include "hisi_uncore_pmu.h"
+
+/* HHA register definition */
+#define HHA_INT_MASK 0x0804
+#define HHA_INT_STATUS 0x0808
+#define HHA_INT_CLEAR 0x080C
+#define HHA_PERF_CTRL 0x1E00
+#define HHA_EVENT_CTRL 0x1E04
+#define HHA_EVENT_TYPE0 0x1E80
+/*
+ * Each counter is 48-bits and [48:63] are reserved
+ * which are Read-As-Zero and Writes-Ignored.
+ */
+#define HHA_CNT0_LOWER 0x1F00
+
+/* HHA has 16-counters */
+#define HHA_NR_COUNTERS 0x10
+
+#define HHA_PERF_CTRL_EN 0x1
+#define HHA_EVTYPE_NONE 0xff
+
+/*
+ * Select the counter register offset using the counter index
+ * each counter is 48-bits.
+ */
+static u32 hisi_hha_pmu_get_counter_offset(int cntr_idx)
+{
+ return (HHA_CNT0_LOWER + (cntr_idx * 8));
+}
+
+static u64 hisi_hha_pmu_read_counter(struct hisi_pmu *hha_pmu,
+ struct hw_perf_event *hwc)
+{
+ u32 idx = hwc->idx;
+
+ if (!hisi_uncore_pmu_counter_valid(hha_pmu, idx)) {
+ dev_err(hha_pmu->dev, "Unsupported event index:%d!\n", idx);
+ return 0;
+ }
+
+ /* Read 64 bits and like L3C, top 16 bits are RAZ */
+ return readq(hha_pmu->base + hisi_hha_pmu_get_counter_offset(idx));
+}
+
+static void hisi_hha_pmu_write_counter(struct hisi_pmu *hha_pmu,
+ struct hw_perf_event *hwc, u64 val)
+{
+ u32 idx = hwc->idx;
+
+ if (!hisi_uncore_pmu_counter_valid(hha_pmu, idx)) {
+ dev_err(hha_pmu->dev, "Unsupported event index:%d!\n", idx);
+ return;
+ }
+
+ /* Write 64 bits and like L3C, top 16 bits are WI */
+ writeq(val, hha_pmu->base + hisi_hha_pmu_get_counter_offset(idx));
+}
+
+static void hisi_hha_pmu_write_evtype(struct hisi_pmu *hha_pmu, int idx,
+ u32 type)
+{
+ u32 reg, reg_idx, shift, val;
+
+ /*
+ * Select the appropriate event select register(HHA_EVENT_TYPEx).
+ * There are 4 event select registers for the 16 hardware counters.
+ * Event code is 8-bits and for the first 4 hardware counters,
+ * HHA_EVENT_TYPE0 is chosen. For the next 4 hardware counters,
+ * HHA_EVENT_TYPE1 is chosen and so on.
+ */
+ reg = HHA_EVENT_TYPE0 + 4 * (idx / 4);
+ reg_idx = idx % 4;
+ shift = 8 * reg_idx;
+
+ /* Write event code to HHA_EVENT_TYPEx register */
+ val = readl(hha_pmu->base + reg);
+ val &= ~(HHA_EVTYPE_NONE << shift);
+ val |= (type << shift);
+ writel(val, hha_pmu->base + reg);
+}
+
+static void hisi_hha_pmu_start_counters(struct hisi_pmu *hha_pmu)
+{
+ u32 val;
+
+ /*
+ * Set perf_enable bit in HHA_PERF_CTRL to start event
+ * counting for all enabled counters.
+ */
+ val = readl(hha_pmu->base + HHA_PERF_CTRL);
+ val |= HHA_PERF_CTRL_EN;
+ writel(val, hha_pmu->base + HHA_PERF_CTRL);
+}
+
+static void hisi_hha_pmu_stop_counters(struct hisi_pmu *hha_pmu)
+{
+ u32 val;
+
+ /*
+ * Clear perf_enable bit in HHA_PERF_CTRL to stop event
+ * counting for all enabled counters.
+ */
+ val = readl(hha_pmu->base + HHA_PERF_CTRL);
+ val &= ~(HHA_PERF_CTRL_EN);
+ writel(val, hha_pmu->base + HHA_PERF_CTRL);
+}
+
+static void hisi_hha_pmu_enable_counter(struct hisi_pmu *hha_pmu,
+ struct hw_perf_event *hwc)
+{
+ u32 val;
+
+ /* Enable counter index in HHA_EVENT_CTRL register */
+ val = readl(hha_pmu->base + HHA_EVENT_CTRL);
+ val |= (1 << hwc->idx);
+ writel(val, hha_pmu->base + HHA_EVENT_CTRL);
+}
+
+static void hisi_hha_pmu_disable_counter(struct hisi_pmu *hha_pmu,
+ struct hw_perf_event *hwc)
+{
+ u32 val;
+
+ /* Clear counter index in HHA_EVENT_CTRL register */
+ val = readl(hha_pmu->base + HHA_EVENT_CTRL);
+ val &= ~(1 << hwc->idx);
+ writel(val, hha_pmu->base + HHA_EVENT_CTRL);
+}
+
+static void hisi_hha_pmu_enable_counter_int(struct hisi_pmu *hha_pmu,
+ struct hw_perf_event *hwc)
+{
+ u32 val;
+
+ /* Write 0 to enable interrupt */
+ val = readl(hha_pmu->base + HHA_INT_MASK);
+ val &= ~(1 << hwc->idx);
+ writel(val, hha_pmu->base + HHA_INT_MASK);
+}
+
+static void hisi_hha_pmu_disable_counter_int(struct hisi_pmu *hha_pmu,
+ struct hw_perf_event *hwc)
+{
+ u32 val;
+
+ /* Write 1 to mask interrupt */
+ val = readl(hha_pmu->base + HHA_INT_MASK);
+ val |= (1 << hwc->idx);
+ writel(val, hha_pmu->base + HHA_INT_MASK);
+}
+
+static irqreturn_t hisi_hha_pmu_isr(int irq, void *dev_id)
+{
+ struct hisi_pmu *hha_pmu = dev_id;
+ struct perf_event *event;
+ unsigned long overflown;
+ int idx;
+
+ /* Read HHA_INT_STATUS register */
+ overflown = readl(hha_pmu->base + HHA_INT_STATUS);
+ if (!overflown)
+ return IRQ_NONE;
+
+ /*
+ * Find the counter index which overflowed if the bit was set
+ * and handle it
+ */
+ for_each_set_bit(idx, &overflown, HHA_NR_COUNTERS) {
+ /* Write 1 to clear the IRQ status flag */
+ writel((1 << idx), hha_pmu->base + HHA_INT_CLEAR);
+
+ /* Get the corresponding event struct */
+ event = hha_pmu->pmu_events.hw_events[idx];
+ if (!event)
+ continue;
+
+ hisi_uncore_pmu_event_update(event);
+ hisi_uncore_pmu_set_event_period(event);
+ }
+
+ return IRQ_HANDLED;
+}
+
+static int hisi_hha_pmu_init_irq(struct hisi_pmu *hha_pmu,
+ struct platform_device *pdev)
+{
+ int irq, ret;
+
+ /* Read and init IRQ */
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0) {
+ dev_err(&pdev->dev, "HHA PMU get irq fail; irq:%d\n", irq);
+ return irq;
+ }
+
+ ret = devm_request_irq(&pdev->dev, irq, hisi_hha_pmu_isr,
+ IRQF_NOBALANCING | IRQF_NO_THREAD,
+ dev_name(&pdev->dev), hha_pmu);
+ if (ret < 0) {
+ dev_err(&pdev->dev,
+ "Fail to request IRQ:%d ret:%d\n", irq, ret);
+ return ret;
+ }
+
+ hha_pmu->irq = irq;
+
+ return 0;
+}
+
+static const struct acpi_device_id hisi_hha_pmu_acpi_match[] = {
+ { "HISI0243", },
+ {},
+};
+MODULE_DEVICE_TABLE(acpi, hisi_hha_pmu_acpi_match);
+
+static int hisi_hha_pmu_init_data(struct platform_device *pdev,
+ struct hisi_pmu *hha_pmu)
+{
+ unsigned long long id;
+ struct resource *res;
+ acpi_status status;
+
+ status = acpi_evaluate_integer(ACPI_HANDLE(&pdev->dev),
+ "_UID", NULL, &id);
+ if (ACPI_FAILURE(status))
+ return -EINVAL;
+
+ hha_pmu->index_id = id;
+
+ /*
+ * Use SCCL_ID and UID to identify the HHA PMU, while
+ * SCCL_ID is in MPIDR[aff2].
+ */
+ if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id",
+ &hha_pmu->sccl_id)) {
+ dev_err(&pdev->dev, "Can not read hha sccl-id!\n");
+ return -EINVAL;
+ }
+ /* HHA PMUs only share the same SCCL */
+ hha_pmu->ccl_id = -1;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ hha_pmu->base = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(hha_pmu->base)) {
+ dev_err(&pdev->dev, "ioremap failed for hha_pmu resource\n");
+ return PTR_ERR(hha_pmu->base);
+ }
+
+ return 0;
+}
+
+static struct attribute *hisi_hha_pmu_format_attr[] = {
+ HISI_PMU_FORMAT_ATTR(event, "config:0-7"),
+ NULL,
+};
+
+static const struct attribute_group hisi_hha_pmu_format_group = {
+ .name = "format",
+ .attrs = hisi_hha_pmu_format_attr,
+};
+
+static struct attribute *hisi_hha_pmu_events_attr[] = {
+ HISI_PMU_EVENT_ATTR(rx_ops_num, 0x00),
+ HISI_PMU_EVENT_ATTR(rx_outer, 0x01),
+ HISI_PMU_EVENT_ATTR(rx_sccl, 0x02),
+ HISI_PMU_EVENT_ATTR(rx_ccix, 0x03),
+ HISI_PMU_EVENT_ATTR(rx_wbi, 0x04),
+ HISI_PMU_EVENT_ATTR(rx_wbip, 0x05),
+ HISI_PMU_EVENT_ATTR(rx_wtistash, 0x11),
+ HISI_PMU_EVENT_ATTR(rd_ddr_64b, 0x1c),
+ HISI_PMU_EVENT_ATTR(wr_dr_64b, 0x1d),
+ HISI_PMU_EVENT_ATTR(rd_ddr_128b, 0x1e),
+ HISI_PMU_EVENT_ATTR(wr_ddr_128b, 0x1f),
+ HISI_PMU_EVENT_ATTR(spill_num, 0x20),
+ HISI_PMU_EVENT_ATTR(spill_success, 0x21),
+ HISI_PMU_EVENT_ATTR(bi_num, 0x23),
+ HISI_PMU_EVENT_ATTR(mediated_num, 0x32),
+ HISI_PMU_EVENT_ATTR(tx_snp_num, 0x33),
+ HISI_PMU_EVENT_ATTR(tx_snp_outer, 0x34),
+ HISI_PMU_EVENT_ATTR(tx_snp_ccix, 0x35),
+ HISI_PMU_EVENT_ATTR(rx_snprspdata, 0x38),
+ HISI_PMU_EVENT_ATTR(rx_snprsp_outer, 0x3c),
+ HISI_PMU_EVENT_ATTR(sdir-lookup, 0x40),
+ HISI_PMU_EVENT_ATTR(edir-lookup, 0x41),
+ HISI_PMU_EVENT_ATTR(sdir-hit, 0x42),
+ HISI_PMU_EVENT_ATTR(edir-hit, 0x43),
+ HISI_PMU_EVENT_ATTR(sdir-home-migrate, 0x4c),
+ HISI_PMU_EVENT_ATTR(edir-home-migrate, 0x4d),
+ NULL,
+};
+
+static const struct attribute_group hisi_hha_pmu_events_group = {
+ .name = "events",
+ .attrs = hisi_hha_pmu_events_attr,
+};
+
+static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL);
+
+static struct attribute *hisi_hha_pmu_cpumask_attrs[] = {
+ &dev_attr_cpumask.attr,
+ NULL,
+};
+
+static const struct attribute_group hisi_hha_pmu_cpumask_attr_group = {
+ .attrs = hisi_hha_pmu_cpumask_attrs,
+};
+
+static const struct attribute_group *hisi_hha_pmu_attr_groups[] = {
+ &hisi_hha_pmu_format_group,
+ &hisi_hha_pmu_events_group,
+ &hisi_hha_pmu_cpumask_attr_group,
+ NULL,
+};
+
+static const struct hisi_uncore_ops hisi_uncore_hha_ops = {
+ .write_evtype = hisi_hha_pmu_write_evtype,
+ .get_event_idx = hisi_uncore_pmu_get_event_idx,
+ .start_counters = hisi_hha_pmu_start_counters,
+ .stop_counters = hisi_hha_pmu_stop_counters,
+ .enable_counter = hisi_hha_pmu_enable_counter,
+ .disable_counter = hisi_hha_pmu_disable_counter,
+ .enable_counter_int = hisi_hha_pmu_enable_counter_int,
+ .disable_counter_int = hisi_hha_pmu_disable_counter_int,
+ .write_counter = hisi_hha_pmu_write_counter,
+ .read_counter = hisi_hha_pmu_read_counter,
+};
+
+static int hisi_hha_pmu_dev_probe(struct platform_device *pdev,
+ struct hisi_pmu *hha_pmu)
+{
+ int ret;
+
+ ret = hisi_hha_pmu_init_data(pdev, hha_pmu);
+ if (ret)
+ return ret;
+
+ ret = hisi_hha_pmu_init_irq(hha_pmu, pdev);
+ if (ret)
+ return ret;
+
+ hha_pmu->num_counters = HHA_NR_COUNTERS;
+ hha_pmu->counter_bits = 48;
+ hha_pmu->ops = &hisi_uncore_hha_ops;
+ hha_pmu->dev = &pdev->dev;
+ hha_pmu->on_cpu = -1;
+ hha_pmu->check_event = 0x65;
+
+ return 0;
+}
+
+static int hisi_hha_pmu_probe(struct platform_device *pdev)
+{
+ struct hisi_pmu *hha_pmu;
+ char *name;
+ int ret;
+
+ hha_pmu = devm_kzalloc(&pdev->dev, sizeof(*hha_pmu), GFP_KERNEL);
+ if (!hha_pmu)
+ return -ENOMEM;
+
+ platform_set_drvdata(pdev, hha_pmu);
+
+ ret = hisi_hha_pmu_dev_probe(pdev, hha_pmu);
+ if (ret)
+ return ret;
+
+ ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE,
+ &hha_pmu->node);
+ if (ret) {
+ dev_err(&pdev->dev, "Error %d registering hotplug\n", ret);
+ return ret;
+ }
+
+ name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_hha%u",
+ hha_pmu->sccl_id, hha_pmu->index_id);
+ hha_pmu->pmu = (struct pmu) {
+ .name = name,
+ .task_ctx_nr = perf_invalid_context,
+ .event_init = hisi_uncore_pmu_event_init,
+ .pmu_enable = hisi_uncore_pmu_enable,
+ .pmu_disable = hisi_uncore_pmu_disable,
+ .add = hisi_uncore_pmu_add,
+ .del = hisi_uncore_pmu_del,
+ .start = hisi_uncore_pmu_start,
+ .stop = hisi_uncore_pmu_stop,
+ .read = hisi_uncore_pmu_read,
+ .attr_groups = hisi_hha_pmu_attr_groups,
+ };
+
+ ret = perf_pmu_register(&hha_pmu->pmu, name, -1);
+ if (ret) {
+ dev_err(hha_pmu->dev, "HHA PMU register failed!\n");
+ cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE,
+ &hha_pmu->node);
+ }
+
+ return ret;
+}
+
+static int hisi_hha_pmu_remove(struct platform_device *pdev)
+{
+ struct hisi_pmu *hha_pmu = platform_get_drvdata(pdev);
+
+ perf_pmu_unregister(&hha_pmu->pmu);
+ cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE,
+ &hha_pmu->node);
+
+ return 0;
+}
+
+static struct platform_driver hisi_hha_pmu_driver = {
+ .driver = {
+ .name = "hisi_hha_pmu",
+ .acpi_match_table = ACPI_PTR(hisi_hha_pmu_acpi_match),
+ },
+ .probe = hisi_hha_pmu_probe,
+ .remove = hisi_hha_pmu_remove,
+};
+
+static int __init hisi_hha_pmu_module_init(void)
+{
+ int ret;
+
+ ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE,
+ "AP_PERF_ARM_HISI_HHA_ONLINE",
+ hisi_uncore_pmu_online_cpu,
+ hisi_uncore_pmu_offline_cpu);
+ if (ret) {
+ pr_err("HHA PMU: Error setup hotplug, ret = %d;\n", ret);
+ return ret;
+ }
+
+ ret = platform_driver_register(&hisi_hha_pmu_driver);
+ if (ret)
+ cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE);
+
+ return ret;
+}
+module_init(hisi_hha_pmu_module_init);
+
+static void __exit hisi_hha_pmu_module_exit(void)
+{
+ platform_driver_unregister(&hisi_hha_pmu_driver);
+ cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE);
+}
+module_exit(hisi_hha_pmu_module_exit);
+
+MODULE_DESCRIPTION("HiSilicon SoC HHA uncore PMU driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Shaokun Zhang <zhangshaokun@hisilicon.com>");
+MODULE_AUTHOR("Anurup M <anurup.m@huawei.com>");
diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
new file mode 100644
index 000000000000..0bde5d919b2e
--- /dev/null
+++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
@@ -0,0 +1,463 @@
+/*
+ * HiSilicon SoC L3C uncore Hardware event counters support
+ *
+ * Copyright (C) 2017 Hisilicon Limited
+ * Author: Anurup M <anurup.m@huawei.com>
+ * Shaokun Zhang <zhangshaokun@hisilicon.com>
+ *
+ * This code is based on the uncore PMUs like arm-cci and arm-ccn.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/acpi.h>
+#include <linux/bug.h>
+#include <linux/cpuhotplug.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/list.h>
+#include <linux/platform_device.h>
+#include <linux/smp.h>
+
+#include "hisi_uncore_pmu.h"
+
+/* L3C register definition */
+#define L3C_PERF_CTRL 0x0408
+#define L3C_INT_MASK 0x0800
+#define L3C_INT_STATUS 0x0808
+#define L3C_INT_CLEAR 0x080c
+#define L3C_EVENT_CTRL 0x1c00
+#define L3C_EVENT_TYPE0 0x1d00
+/*
+ * Each counter is 48-bits and [48:63] are reserved
+ * which are Read-As-Zero and Writes-Ignored.
+ */
+#define L3C_CNTR0_LOWER 0x1e00
+
+/* L3C has 8-counters */
+#define L3C_NR_COUNTERS 0x8
+
+#define L3C_PERF_CTRL_EN 0x20000
+#define L3C_EVTYPE_NONE 0xff
+
+/*
+ * Select the counter register offset using the counter index
+ */
+static u32 hisi_l3c_pmu_get_counter_offset(int cntr_idx)
+{
+ return (L3C_CNTR0_LOWER + (cntr_idx * 8));
+}
+
+static u64 hisi_l3c_pmu_read_counter(struct hisi_pmu *l3c_pmu,
+ struct hw_perf_event *hwc)
+{
+ u32 idx = hwc->idx;
+
+ if (!hisi_uncore_pmu_counter_valid(l3c_pmu, idx)) {
+ dev_err(l3c_pmu->dev, "Unsupported event index:%d!\n", idx);
+ return 0;
+ }
+
+ /* Read 64-bits and the upper 16 bits are RAZ */
+ return readq(l3c_pmu->base + hisi_l3c_pmu_get_counter_offset(idx));
+}
+
+static void hisi_l3c_pmu_write_counter(struct hisi_pmu *l3c_pmu,
+ struct hw_perf_event *hwc, u64 val)
+{
+ u32 idx = hwc->idx;
+
+ if (!hisi_uncore_pmu_counter_valid(l3c_pmu, idx)) {
+ dev_err(l3c_pmu->dev, "Unsupported event index:%d!\n", idx);
+ return;
+ }
+
+ /* Write 64-bits and the upper 16 bits are WI */
+ writeq(val, l3c_pmu->base + hisi_l3c_pmu_get_counter_offset(idx));
+}
+
+static void hisi_l3c_pmu_write_evtype(struct hisi_pmu *l3c_pmu, int idx,
+ u32 type)
+{
+ u32 reg, reg_idx, shift, val;
+
+ /*
+ * Select the appropriate event select register(L3C_EVENT_TYPE0/1).
+ * There are 2 event select registers for the 8 hardware counters.
+ * Event code is 8-bits and for the former 4 hardware counters,
+ * L3C_EVENT_TYPE0 is chosen. For the latter 4 hardware counters,
+ * L3C_EVENT_TYPE1 is chosen.
+ */
+ reg = L3C_EVENT_TYPE0 + (idx / 4) * 4;
+ reg_idx = idx % 4;
+ shift = 8 * reg_idx;
+
+ /* Write event code to L3C_EVENT_TYPEx Register */
+ val = readl(l3c_pmu->base + reg);
+ val &= ~(L3C_EVTYPE_NONE << shift);
+ val |= (type << shift);
+ writel(val, l3c_pmu->base + reg);
+}
+
+static void hisi_l3c_pmu_start_counters(struct hisi_pmu *l3c_pmu)
+{
+ u32 val;
+
+ /*
+ * Set perf_enable bit in L3C_PERF_CTRL register to start counting
+ * for all enabled counters.
+ */
+ val = readl(l3c_pmu->base + L3C_PERF_CTRL);
+ val |= L3C_PERF_CTRL_EN;
+ writel(val, l3c_pmu->base + L3C_PERF_CTRL);
+}
+
+static void hisi_l3c_pmu_stop_counters(struct hisi_pmu *l3c_pmu)
+{
+ u32 val;
+
+ /*
+ * Clear perf_enable bit in L3C_PERF_CTRL register to stop counting
+ * for all enabled counters.
+ */
+ val = readl(l3c_pmu->base + L3C_PERF_CTRL);
+ val &= ~(L3C_PERF_CTRL_EN);
+ writel(val, l3c_pmu->base + L3C_PERF_CTRL);
+}
+
+static void hisi_l3c_pmu_enable_counter(struct hisi_pmu *l3c_pmu,
+ struct hw_perf_event *hwc)
+{
+ u32 val;
+
+ /* Enable counter index in L3C_EVENT_CTRL register */
+ val = readl(l3c_pmu->base + L3C_EVENT_CTRL);
+ val |= (1 << hwc->idx);
+ writel(val, l3c_pmu->base + L3C_EVENT_CTRL);
+}
+
+static void hisi_l3c_pmu_disable_counter(struct hisi_pmu *l3c_pmu,
+ struct hw_perf_event *hwc)
+{
+ u32 val;
+
+ /* Clear counter index in L3C_EVENT_CTRL register */
+ val = readl(l3c_pmu->base + L3C_EVENT_CTRL);
+ val &= ~(1 << hwc->idx);
+ writel(val, l3c_pmu->base + L3C_EVENT_CTRL);
+}
+
+static void hisi_l3c_pmu_enable_counter_int(struct hisi_pmu *l3c_pmu,
+ struct hw_perf_event *hwc)
+{
+ u32 val;
+
+ val = readl(l3c_pmu->base + L3C_INT_MASK);
+ /* Write 0 to enable interrupt */
+ val &= ~(1 << hwc->idx);
+ writel(val, l3c_pmu->base + L3C_INT_MASK);
+}
+
+static void hisi_l3c_pmu_disable_counter_int(struct hisi_pmu *l3c_pmu,
+ struct hw_perf_event *hwc)
+{
+ u32 val;
+
+ val = readl(l3c_pmu->base + L3C_INT_MASK);
+ /* Write 1 to mask interrupt */
+ val |= (1 << hwc->idx);
+ writel(val, l3c_pmu->base + L3C_INT_MASK);
+}
+
+static irqreturn_t hisi_l3c_pmu_isr(int irq, void *dev_id)
+{
+ struct hisi_pmu *l3c_pmu = dev_id;
+ struct perf_event *event;
+ unsigned long overflown;
+ int idx;
+
+ /* Read L3C_INT_STATUS register */
+ overflown = readl(l3c_pmu->base + L3C_INT_STATUS);
+ if (!overflown)
+ return IRQ_NONE;
+
+ /*
+ * Find the counter index which overflowed if the bit was set
+ * and handle it.
+ */
+ for_each_set_bit(idx, &overflown, L3C_NR_COUNTERS) {
+ /* Write 1 to clear the IRQ status flag */
+ writel((1 << idx), l3c_pmu->base + L3C_INT_CLEAR);
+
+ /* Get the corresponding event struct */
+ event = l3c_pmu->pmu_events.hw_events[idx];
+ if (!event)
+ continue;
+
+ hisi_uncore_pmu_event_update(event);
+ hisi_uncore_pmu_set_event_period(event);
+ }
+
+ return IRQ_HANDLED;
+}
+
+static int hisi_l3c_pmu_init_irq(struct hisi_pmu *l3c_pmu,
+ struct platform_device *pdev)
+{
+ int irq, ret;
+
+ /* Read and init IRQ */
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0) {
+ dev_err(&pdev->dev, "L3C PMU get irq fail; irq:%d\n", irq);
+ return irq;
+ }
+
+ ret = devm_request_irq(&pdev->dev, irq, hisi_l3c_pmu_isr,
+ IRQF_NOBALANCING | IRQF_NO_THREAD,
+ dev_name(&pdev->dev), l3c_pmu);
+ if (ret < 0) {
+ dev_err(&pdev->dev,
+ "Fail to request IRQ:%d ret:%d\n", irq, ret);
+ return ret;
+ }
+
+ l3c_pmu->irq = irq;
+
+ return 0;
+}
+
+static const struct acpi_device_id hisi_l3c_pmu_acpi_match[] = {
+ { "HISI0213", },
+ {},
+};
+MODULE_DEVICE_TABLE(acpi, hisi_l3c_pmu_acpi_match);
+
+static int hisi_l3c_pmu_init_data(struct platform_device *pdev,
+ struct hisi_pmu *l3c_pmu)
+{
+ unsigned long long id;
+ struct resource *res;
+ acpi_status status;
+
+ status = acpi_evaluate_integer(ACPI_HANDLE(&pdev->dev),
+ "_UID", NULL, &id);
+ if (ACPI_FAILURE(status))
+ return -EINVAL;
+
+ l3c_pmu->index_id = id;
+
+ /*
+ * Use the SCCL_ID and CCL_ID to identify the L3C PMU, while
+ * SCCL_ID is in MPIDR[aff2] and CCL_ID is in MPIDR[aff1].
+ */
+ if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id",
+ &l3c_pmu->sccl_id)) {
+ dev_err(&pdev->dev, "Can not read l3c sccl-id!\n");
+ return -EINVAL;
+ }
+
+ if (device_property_read_u32(&pdev->dev, "hisilicon,ccl-id",
+ &l3c_pmu->ccl_id)) {
+ dev_err(&pdev->dev, "Can not read l3c ccl-id!\n");
+ return -EINVAL;
+ }
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ l3c_pmu->base = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(l3c_pmu->base)) {
+ dev_err(&pdev->dev, "ioremap failed for l3c_pmu resource\n");
+ return PTR_ERR(l3c_pmu->base);
+ }
+
+ return 0;
+}
+
+static struct attribute *hisi_l3c_pmu_format_attr[] = {
+ HISI_PMU_FORMAT_ATTR(event, "config:0-7"),
+ NULL,
+};
+
+static const struct attribute_group hisi_l3c_pmu_format_group = {
+ .name = "format",
+ .attrs = hisi_l3c_pmu_format_attr,
+};
+
+static struct attribute *hisi_l3c_pmu_events_attr[] = {
+ HISI_PMU_EVENT_ATTR(rd_cpipe, 0x00),
+ HISI_PMU_EVENT_ATTR(wr_cpipe, 0x01),
+ HISI_PMU_EVENT_ATTR(rd_hit_cpipe, 0x02),
+ HISI_PMU_EVENT_ATTR(wr_hit_cpipe, 0x03),
+ HISI_PMU_EVENT_ATTR(victim_num, 0x04),
+ HISI_PMU_EVENT_ATTR(rd_spipe, 0x20),
+ HISI_PMU_EVENT_ATTR(wr_spipe, 0x21),
+ HISI_PMU_EVENT_ATTR(rd_hit_spipe, 0x22),
+ HISI_PMU_EVENT_ATTR(wr_hit_spipe, 0x23),
+ HISI_PMU_EVENT_ATTR(back_invalid, 0x29),
+ HISI_PMU_EVENT_ATTR(retry_cpu, 0x40),
+ HISI_PMU_EVENT_ATTR(retry_ring, 0x41),
+ HISI_PMU_EVENT_ATTR(prefetch_drop, 0x42),
+ NULL,
+};
+
+static const struct attribute_group hisi_l3c_pmu_events_group = {
+ .name = "events",
+ .attrs = hisi_l3c_pmu_events_attr,
+};
+
+static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL);
+
+static struct attribute *hisi_l3c_pmu_cpumask_attrs[] = {
+ &dev_attr_cpumask.attr,
+ NULL,
+};
+
+static const struct attribute_group hisi_l3c_pmu_cpumask_attr_group = {
+ .attrs = hisi_l3c_pmu_cpumask_attrs,
+};
+
+static const struct attribute_group *hisi_l3c_pmu_attr_groups[] = {
+ &hisi_l3c_pmu_format_group,
+ &hisi_l3c_pmu_events_group,
+ &hisi_l3c_pmu_cpumask_attr_group,
+ NULL,
+};
+
+static const struct hisi_uncore_ops hisi_uncore_l3c_ops = {
+ .write_evtype = hisi_l3c_pmu_write_evtype,
+ .get_event_idx = hisi_uncore_pmu_get_event_idx,
+ .start_counters = hisi_l3c_pmu_start_counters,
+ .stop_counters = hisi_l3c_pmu_stop_counters,
+ .enable_counter = hisi_l3c_pmu_enable_counter,
+ .disable_counter = hisi_l3c_pmu_disable_counter,
+ .enable_counter_int = hisi_l3c_pmu_enable_counter_int,
+ .disable_counter_int = hisi_l3c_pmu_disable_counter_int,
+ .write_counter = hisi_l3c_pmu_write_counter,
+ .read_counter = hisi_l3c_pmu_read_counter,
+};
+
+static int hisi_l3c_pmu_dev_probe(struct platform_device *pdev,
+ struct hisi_pmu *l3c_pmu)
+{
+ int ret;
+
+ ret = hisi_l3c_pmu_init_data(pdev, l3c_pmu);
+ if (ret)
+ return ret;
+
+ ret = hisi_l3c_pmu_init_irq(l3c_pmu, pdev);
+ if (ret)
+ return ret;
+
+ l3c_pmu->num_counters = L3C_NR_COUNTERS;
+ l3c_pmu->counter_bits = 48;
+ l3c_pmu->ops = &hisi_uncore_l3c_ops;
+ l3c_pmu->dev = &pdev->dev;
+ l3c_pmu->on_cpu = -1;
+ l3c_pmu->check_event = 0x59;
+
+ return 0;
+}
+
+static int hisi_l3c_pmu_probe(struct platform_device *pdev)
+{
+ struct hisi_pmu *l3c_pmu;
+ char *name;
+ int ret;
+
+ l3c_pmu = devm_kzalloc(&pdev->dev, sizeof(*l3c_pmu), GFP_KERNEL);
+ if (!l3c_pmu)
+ return -ENOMEM;
+
+ platform_set_drvdata(pdev, l3c_pmu);
+
+ ret = hisi_l3c_pmu_dev_probe(pdev, l3c_pmu);
+ if (ret)
+ return ret;
+
+ ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_HISI_L3_ONLINE,
+ &l3c_pmu->node);
+ if (ret) {
+ dev_err(&pdev->dev, "Error %d registering hotplug\n", ret);
+ return ret;
+ }
+
+ name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_l3c%u",
+ l3c_pmu->sccl_id, l3c_pmu->index_id);
+ l3c_pmu->pmu = (struct pmu) {
+ .name = name,
+ .task_ctx_nr = perf_invalid_context,
+ .event_init = hisi_uncore_pmu_event_init,
+ .pmu_enable = hisi_uncore_pmu_enable,
+ .pmu_disable = hisi_uncore_pmu_disable,
+ .add = hisi_uncore_pmu_add,
+ .del = hisi_uncore_pmu_del,
+ .start = hisi_uncore_pmu_start,
+ .stop = hisi_uncore_pmu_stop,
+ .read = hisi_uncore_pmu_read,
+ .attr_groups = hisi_l3c_pmu_attr_groups,
+ };
+
+ ret = perf_pmu_register(&l3c_pmu->pmu, name, -1);
+ if (ret) {
+ dev_err(l3c_pmu->dev, "L3C PMU register failed!\n");
+ cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HISI_L3_ONLINE,
+ &l3c_pmu->node);
+ }
+
+ return ret;
+}
+
+static int hisi_l3c_pmu_remove(struct platform_device *pdev)
+{
+ struct hisi_pmu *l3c_pmu = platform_get_drvdata(pdev);
+
+ perf_pmu_unregister(&l3c_pmu->pmu);
+ cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HISI_L3_ONLINE,
+ &l3c_pmu->node);
+
+ return 0;
+}
+
+static struct platform_driver hisi_l3c_pmu_driver = {
+ .driver = {
+ .name = "hisi_l3c_pmu",
+ .acpi_match_table = ACPI_PTR(hisi_l3c_pmu_acpi_match),
+ },
+ .probe = hisi_l3c_pmu_probe,
+ .remove = hisi_l3c_pmu_remove,
+};
+
+static int __init hisi_l3c_pmu_module_init(void)
+{
+ int ret;
+
+ ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_HISI_L3_ONLINE,
+ "AP_PERF_ARM_HISI_L3_ONLINE",
+ hisi_uncore_pmu_online_cpu,
+ hisi_uncore_pmu_offline_cpu);
+ if (ret) {
+ pr_err("L3C PMU: Error setup hotplug, ret = %d\n", ret);
+ return ret;
+ }
+
+ ret = platform_driver_register(&hisi_l3c_pmu_driver);
+ if (ret)
+ cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_L3_ONLINE);
+
+ return ret;
+}
+module_init(hisi_l3c_pmu_module_init);
+
+static void __exit hisi_l3c_pmu_module_exit(void)
+{
+ platform_driver_unregister(&hisi_l3c_pmu_driver);
+ cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_L3_ONLINE);
+}
+module_exit(hisi_l3c_pmu_module_exit);
+
+MODULE_DESCRIPTION("HiSilicon SoC L3C uncore PMU driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Anurup M <anurup.m@huawei.com>");
+MODULE_AUTHOR("Shaokun Zhang <zhangshaokun@hisilicon.com>");
diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c
new file mode 100644
index 000000000000..7ed24b954422
--- /dev/null
+++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c
@@ -0,0 +1,447 @@
+/*
+ * HiSilicon SoC Hardware event counters support
+ *
+ * Copyright (C) 2017 Hisilicon Limited
+ * Author: Anurup M <anurup.m@huawei.com>
+ * Shaokun Zhang <zhangshaokun@hisilicon.com>
+ *
+ * This code is based on the uncore PMUs like arm-cci and arm-ccn.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+#include <linux/bug.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+
+#include <asm/local64.h>
+
+#include "hisi_uncore_pmu.h"
+
+#define HISI_GET_EVENTID(ev) (ev->hw.config_base & 0xff)
+#define HISI_MAX_PERIOD(nr) (BIT_ULL(nr) - 1)
+
+/*
+ * PMU format attributes
+ */
+ssize_t hisi_format_sysfs_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct dev_ext_attribute *eattr;
+
+ eattr = container_of(attr, struct dev_ext_attribute, attr);
+
+ return sprintf(buf, "%s\n", (char *)eattr->var);
+}
+
+/*
+ * PMU event attributes
+ */
+ssize_t hisi_event_sysfs_show(struct device *dev,
+ struct device_attribute *attr, char *page)
+{
+ struct dev_ext_attribute *eattr;
+
+ eattr = container_of(attr, struct dev_ext_attribute, attr);
+
+ return sprintf(page, "config=0x%lx\n", (unsigned long)eattr->var);
+}
+
+/*
+ * sysfs cpumask attributes. For uncore PMU, we only have a single CPU to show
+ */
+ssize_t hisi_cpumask_sysfs_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct hisi_pmu *hisi_pmu = to_hisi_pmu(dev_get_drvdata(dev));
+
+ return sprintf(buf, "%d\n", hisi_pmu->on_cpu);
+}
+
+static bool hisi_validate_event_group(struct perf_event *event)
+{
+ struct perf_event *sibling, *leader = event->group_leader;
+ struct hisi_pmu *hisi_pmu = to_hisi_pmu(event->pmu);
+ /* Include count for the event */
+ int counters = 1;
+
+ if (!is_software_event(leader)) {
+ /*
+ * We must NOT create groups containing mixed PMUs, although
+ * software events are acceptable
+ */
+ if (leader->pmu != event->pmu)
+ return false;
+
+ /* Increment counter for the leader */
+ if (leader != event)
+ counters++;
+ }
+
+ list_for_each_entry(sibling, &event->group_leader->sibling_list,
+ group_entry) {
+ if (is_software_event(sibling))
+ continue;
+ if (sibling->pmu != event->pmu)
+ return false;
+ /* Increment counter for each sibling */
+ counters++;
+ }
+
+ /* The group can not count events more than the counters in the HW */
+ return counters <= hisi_pmu->num_counters;
+}
+
+int hisi_uncore_pmu_counter_valid(struct hisi_pmu *hisi_pmu, int idx)
+{
+ return idx >= 0 && idx < hisi_pmu->num_counters;
+}
+
+int hisi_uncore_pmu_get_event_idx(struct perf_event *event)
+{
+ struct hisi_pmu *hisi_pmu = to_hisi_pmu(event->pmu);
+ unsigned long *used_mask = hisi_pmu->pmu_events.used_mask;
+ u32 num_counters = hisi_pmu->num_counters;
+ int idx;
+
+ idx = find_first_zero_bit(used_mask, num_counters);
+ if (idx == num_counters)
+ return -EAGAIN;
+
+ set_bit(idx, used_mask);
+
+ return idx;
+}
+
+static void hisi_uncore_pmu_clear_event_idx(struct hisi_pmu *hisi_pmu, int idx)
+{
+ if (!hisi_uncore_pmu_counter_valid(hisi_pmu, idx)) {
+ dev_err(hisi_pmu->dev, "Unsupported event index:%d!\n", idx);
+ return;
+ }
+
+ clear_bit(idx, hisi_pmu->pmu_events.used_mask);
+}
+
+int hisi_uncore_pmu_event_init(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hisi_pmu *hisi_pmu;
+
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ /*
+ * We do not support sampling as the counters are all
+ * shared by all CPU cores in a CPU die(SCCL). Also we
+ * do not support attach to a task(per-process mode)
+ */
+ if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
+ return -EOPNOTSUPP;
+
+ /* counters do not have these bits */
+ if (event->attr.exclude_user ||
+ event->attr.exclude_kernel ||
+ event->attr.exclude_host ||
+ event->attr.exclude_guest ||
+ event->attr.exclude_hv ||
+ event->attr.exclude_idle)
+ return -EINVAL;
+
+ /*
+ * The uncore counters not specific to any CPU, so cannot
+ * support per-task
+ */
+ if (event->cpu < 0)
+ return -EINVAL;
+
+ /*
+ * Validate if the events in group does not exceed the
+ * available counters in hardware.
+ */
+ if (!hisi_validate_event_group(event))
+ return -EINVAL;
+
+ hisi_pmu = to_hisi_pmu(event->pmu);
+ if (event->attr.config > hisi_pmu->check_event)
+ return -EINVAL;
+
+ if (hisi_pmu->on_cpu == -1)
+ return -EINVAL;
+ /*
+ * We don't assign an index until we actually place the event onto
+ * hardware. Use -1 to signify that we haven't decided where to put it
+ * yet.
+ */
+ hwc->idx = -1;
+ hwc->config_base = event->attr.config;
+
+ /* Enforce to use the same CPU for all events in this PMU */
+ event->cpu = hisi_pmu->on_cpu;
+
+ return 0;
+}
+
+/*
+ * Set the counter to count the event that we're interested in,
+ * and enable interrupt and counter.
+ */
+static void hisi_uncore_pmu_enable_event(struct perf_event *event)
+{
+ struct hisi_pmu *hisi_pmu = to_hisi_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+
+ hisi_pmu->ops->write_evtype(hisi_pmu, hwc->idx,
+ HISI_GET_EVENTID(event));
+
+ hisi_pmu->ops->enable_counter_int(hisi_pmu, hwc);
+ hisi_pmu->ops->enable_counter(hisi_pmu, hwc);
+}
+
+/*
+ * Disable counter and interrupt.
+ */
+static void hisi_uncore_pmu_disable_event(struct perf_event *event)
+{
+ struct hisi_pmu *hisi_pmu = to_hisi_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+
+ hisi_pmu->ops->disable_counter(hisi_pmu, hwc);
+ hisi_pmu->ops->disable_counter_int(hisi_pmu, hwc);
+}
+
+void hisi_uncore_pmu_set_event_period(struct perf_event *event)
+{
+ struct hisi_pmu *hisi_pmu = to_hisi_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+
+ /*
+ * The HiSilicon PMU counters support 32 bits or 48 bits, depending on
+ * the PMU. We reduce it to 2^(counter_bits - 1) to account for the
+ * extreme interrupt latency. So we could hopefully handle the overflow
+ * interrupt before another 2^(counter_bits - 1) events occur and the
+ * counter overtakes its previous value.
+ */
+ u64 val = BIT_ULL(hisi_pmu->counter_bits - 1);
+
+ local64_set(&hwc->prev_count, val);
+ /* Write start value to the hardware event counter */
+ hisi_pmu->ops->write_counter(hisi_pmu, hwc, val);
+}
+
+void hisi_uncore_pmu_event_update(struct perf_event *event)
+{
+ struct hisi_pmu *hisi_pmu = to_hisi_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ u64 delta, prev_raw_count, new_raw_count;
+
+ do {
+ /* Read the count from the counter register */
+ new_raw_count = hisi_pmu->ops->read_counter(hisi_pmu, hwc);
+ prev_raw_count = local64_read(&hwc->prev_count);
+ } while (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+ new_raw_count) != prev_raw_count);
+ /*
+ * compute the delta
+ */
+ delta = (new_raw_count - prev_raw_count) &
+ HISI_MAX_PERIOD(hisi_pmu->counter_bits);
+ local64_add(delta, &event->count);
+}
+
+void hisi_uncore_pmu_start(struct perf_event *event, int flags)
+{
+ struct hisi_pmu *hisi_pmu = to_hisi_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
+ return;
+
+ WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+ hwc->state = 0;
+ hisi_uncore_pmu_set_event_period(event);
+
+ if (flags & PERF_EF_RELOAD) {
+ u64 prev_raw_count = local64_read(&hwc->prev_count);
+
+ hisi_pmu->ops->write_counter(hisi_pmu, hwc, prev_raw_count);
+ }
+
+ hisi_uncore_pmu_enable_event(event);
+ perf_event_update_userpage(event);
+}
+
+void hisi_uncore_pmu_stop(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ hisi_uncore_pmu_disable_event(event);
+ WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+ hwc->state |= PERF_HES_STOPPED;
+
+ if (hwc->state & PERF_HES_UPTODATE)
+ return;
+
+ /* Read hardware counter and update the perf counter statistics */
+ hisi_uncore_pmu_event_update(event);
+ hwc->state |= PERF_HES_UPTODATE;
+}
+
+int hisi_uncore_pmu_add(struct perf_event *event, int flags)
+{
+ struct hisi_pmu *hisi_pmu = to_hisi_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ int idx;
+
+ hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+
+ /* Get an available counter index for counting */
+ idx = hisi_pmu->ops->get_event_idx(event);
+ if (idx < 0)
+ return idx;
+
+ event->hw.idx = idx;
+ hisi_pmu->pmu_events.hw_events[idx] = event;
+
+ if (flags & PERF_EF_START)
+ hisi_uncore_pmu_start(event, PERF_EF_RELOAD);
+
+ return 0;
+}
+
+void hisi_uncore_pmu_del(struct perf_event *event, int flags)
+{
+ struct hisi_pmu *hisi_pmu = to_hisi_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+
+ hisi_uncore_pmu_stop(event, PERF_EF_UPDATE);
+ hisi_uncore_pmu_clear_event_idx(hisi_pmu, hwc->idx);
+ perf_event_update_userpage(event);
+ hisi_pmu->pmu_events.hw_events[hwc->idx] = NULL;
+}
+
+void hisi_uncore_pmu_read(struct perf_event *event)
+{
+ /* Read hardware counter and update the perf counter statistics */
+ hisi_uncore_pmu_event_update(event);
+}
+
+void hisi_uncore_pmu_enable(struct pmu *pmu)
+{
+ struct hisi_pmu *hisi_pmu = to_hisi_pmu(pmu);
+ int enabled = bitmap_weight(hisi_pmu->pmu_events.used_mask,
+ hisi_pmu->num_counters);
+
+ if (!enabled)
+ return;
+
+ hisi_pmu->ops->start_counters(hisi_pmu);
+}
+
+void hisi_uncore_pmu_disable(struct pmu *pmu)
+{
+ struct hisi_pmu *hisi_pmu = to_hisi_pmu(pmu);
+
+ hisi_pmu->ops->stop_counters(hisi_pmu);
+}
+
+/*
+ * Read Super CPU cluster and CPU cluster ID from MPIDR_EL1.
+ * If multi-threading is supported, SCCL_ID is in MPIDR[aff3] and CCL_ID
+ * is in MPIDR[aff2]; if not, SCCL_ID is in MPIDR[aff2] and CCL_ID is
+ * in MPIDR[aff1]. If this changes in future, this shall be updated.
+ */
+static void hisi_read_sccl_and_ccl_id(int *sccl_id, int *ccl_id)
+{
+ u64 mpidr = read_cpuid_mpidr();
+
+ if (mpidr & MPIDR_MT_BITMASK) {
+ if (sccl_id)
+ *sccl_id = MPIDR_AFFINITY_LEVEL(mpidr, 3);
+ if (ccl_id)
+ *ccl_id = MPIDR_AFFINITY_LEVEL(mpidr, 2);
+ } else {
+ if (sccl_id)
+ *sccl_id = MPIDR_AFFINITY_LEVEL(mpidr, 2);
+ if (ccl_id)
+ *ccl_id = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+ }
+}
+
+/*
+ * Check whether the CPU is associated with this uncore PMU
+ */
+static bool hisi_pmu_cpu_is_associated_pmu(struct hisi_pmu *hisi_pmu)
+{
+ int sccl_id, ccl_id;
+
+ if (hisi_pmu->ccl_id == -1) {
+ /* If CCL_ID is -1, the PMU only shares the same SCCL */
+ hisi_read_sccl_and_ccl_id(&sccl_id, NULL);
+
+ return sccl_id == hisi_pmu->sccl_id;
+ }
+
+ hisi_read_sccl_and_ccl_id(&sccl_id, &ccl_id);
+
+ return sccl_id == hisi_pmu->sccl_id && ccl_id == hisi_pmu->ccl_id;
+}
+
+int hisi_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
+{
+ struct hisi_pmu *hisi_pmu = hlist_entry_safe(node, struct hisi_pmu,
+ node);
+
+ if (!hisi_pmu_cpu_is_associated_pmu(hisi_pmu))
+ return 0;
+
+ cpumask_set_cpu(cpu, &hisi_pmu->associated_cpus);
+
+ /* If another CPU is already managing this PMU, simply return. */
+ if (hisi_pmu->on_cpu != -1)
+ return 0;
+
+ /* Use this CPU in cpumask for event counting */
+ hisi_pmu->on_cpu = cpu;
+
+ /* Overflow interrupt also should use the same CPU */
+ WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(cpu)));
+
+ return 0;
+}
+
+int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
+{
+ struct hisi_pmu *hisi_pmu = hlist_entry_safe(node, struct hisi_pmu,
+ node);
+ cpumask_t pmu_online_cpus;
+ unsigned int target;
+
+ if (!cpumask_test_and_clear_cpu(cpu, &hisi_pmu->associated_cpus))
+ return 0;
+
+ /* Nothing to do if this CPU doesn't own the PMU */
+ if (hisi_pmu->on_cpu != cpu)
+ return 0;
+
+ /* Give up ownership of the PMU */
+ hisi_pmu->on_cpu = -1;
+
+ /* Choose a new CPU to migrate ownership of the PMU to */
+ cpumask_and(&pmu_online_cpus, &hisi_pmu->associated_cpus,
+ cpu_online_mask);
+ target = cpumask_any_but(&pmu_online_cpus, cpu);
+ if (target >= nr_cpu_ids)
+ return 0;
+
+ perf_pmu_migrate_context(&hisi_pmu->pmu, cpu, target);
+ /* Use this CPU for event counting */
+ hisi_pmu->on_cpu = target;
+ WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(target)));
+
+ return 0;
+}
diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h
new file mode 100644
index 000000000000..f21226a0e9c6
--- /dev/null
+++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h
@@ -0,0 +1,102 @@
+/*
+ * HiSilicon SoC Hardware event counters support
+ *
+ * Copyright (C) 2017 Hisilicon Limited
+ * Author: Anurup M <anurup.m@huawei.com>
+ * Shaokun Zhang <zhangshaokun@hisilicon.com>
+ *
+ * This code is based on the uncore PMUs like arm-cci and arm-ccn.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __HISI_UNCORE_PMU_H__
+#define __HISI_UNCORE_PMU_H__
+
+#include <linux/cpumask.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/perf_event.h>
+#include <linux/types.h>
+
+#undef pr_fmt
+#define pr_fmt(fmt) "hisi_pmu: " fmt
+
+#define HISI_MAX_COUNTERS 0x10
+#define to_hisi_pmu(p) (container_of(p, struct hisi_pmu, pmu))
+
+#define HISI_PMU_ATTR(_name, _func, _config) \
+ (&((struct dev_ext_attribute[]) { \
+ { __ATTR(_name, 0444, _func, NULL), (void *)_config } \
+ })[0].attr.attr)
+
+#define HISI_PMU_FORMAT_ATTR(_name, _config) \
+ HISI_PMU_ATTR(_name, hisi_format_sysfs_show, (void *)_config)
+#define HISI_PMU_EVENT_ATTR(_name, _config) \
+ HISI_PMU_ATTR(_name, hisi_event_sysfs_show, (unsigned long)_config)
+
+struct hisi_pmu;
+
+struct hisi_uncore_ops {
+ void (*write_evtype)(struct hisi_pmu *, int, u32);
+ int (*get_event_idx)(struct perf_event *);
+ u64 (*read_counter)(struct hisi_pmu *, struct hw_perf_event *);
+ void (*write_counter)(struct hisi_pmu *, struct hw_perf_event *, u64);
+ void (*enable_counter)(struct hisi_pmu *, struct hw_perf_event *);
+ void (*disable_counter)(struct hisi_pmu *, struct hw_perf_event *);
+ void (*enable_counter_int)(struct hisi_pmu *, struct hw_perf_event *);
+ void (*disable_counter_int)(struct hisi_pmu *, struct hw_perf_event *);
+ void (*start_counters)(struct hisi_pmu *);
+ void (*stop_counters)(struct hisi_pmu *);
+};
+
+struct hisi_pmu_hwevents {
+ struct perf_event *hw_events[HISI_MAX_COUNTERS];
+ DECLARE_BITMAP(used_mask, HISI_MAX_COUNTERS);
+};
+
+/* Generic pmu struct for different pmu types */
+struct hisi_pmu {
+ struct pmu pmu;
+ const struct hisi_uncore_ops *ops;
+ struct hisi_pmu_hwevents pmu_events;
+ /* associated_cpus: All CPUs associated with the PMU */
+ cpumask_t associated_cpus;
+ /* CPU used for counting */
+ int on_cpu;
+ int irq;
+ struct device *dev;
+ struct hlist_node node;
+ int sccl_id;
+ int ccl_id;
+ void __iomem *base;
+ /* the ID of the PMU modules */
+ u32 index_id;
+ int num_counters;
+ int counter_bits;
+ /* check event code range */
+ int check_event;
+};
+
+int hisi_uncore_pmu_counter_valid(struct hisi_pmu *hisi_pmu, int idx);
+int hisi_uncore_pmu_get_event_idx(struct perf_event *event);
+void hisi_uncore_pmu_read(struct perf_event *event);
+int hisi_uncore_pmu_add(struct perf_event *event, int flags);
+void hisi_uncore_pmu_del(struct perf_event *event, int flags);
+void hisi_uncore_pmu_start(struct perf_event *event, int flags);
+void hisi_uncore_pmu_stop(struct perf_event *event, int flags);
+void hisi_uncore_pmu_set_event_period(struct perf_event *event);
+void hisi_uncore_pmu_event_update(struct perf_event *event);
+int hisi_uncore_pmu_event_init(struct perf_event *event);
+void hisi_uncore_pmu_enable(struct pmu *pmu);
+void hisi_uncore_pmu_disable(struct pmu *pmu);
+ssize_t hisi_event_sysfs_show(struct device *dev,
+ struct device_attribute *attr, char *buf);
+ssize_t hisi_format_sysfs_show(struct device *dev,
+ struct device_attribute *attr, char *buf);
+ssize_t hisi_cpumask_sysfs_show(struct device *dev,
+ struct device_attribute *attr, char *buf);
+int hisi_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *node);
+int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node);
+#endif /* __HISI_UNCORE_PMU_H__ */
diff --git a/drivers/perf/qcom_l2_pmu.c b/drivers/perf/qcom_l2_pmu.c
index b242cce10468..4fdc8486a8e4 100644
--- a/drivers/perf/qcom_l2_pmu.c
+++ b/drivers/perf/qcom_l2_pmu.c
@@ -92,6 +92,21 @@
#define reg_idx(reg, i) (((i) * IA_L2_REG_OFFSET) + reg##_BASE)
+/*
+ * Events
+ */
+#define L2_EVENT_CYCLES 0xfe
+#define L2_EVENT_DCACHE_OPS 0x400
+#define L2_EVENT_ICACHE_OPS 0x401
+#define L2_EVENT_TLBI 0x402
+#define L2_EVENT_BARRIERS 0x403
+#define L2_EVENT_TOTAL_READS 0x405
+#define L2_EVENT_TOTAL_WRITES 0x406
+#define L2_EVENT_TOTAL_REQUESTS 0x407
+#define L2_EVENT_LDREX 0x420
+#define L2_EVENT_STREX 0x421
+#define L2_EVENT_CLREX 0x422
+
static DEFINE_RAW_SPINLOCK(l2_access_lock);
/**
@@ -700,9 +715,12 @@ static struct attribute_group l2_cache_pmu_cpumask_group = {
/* CCG format for perf RAW codes. */
PMU_FORMAT_ATTR(l2_code, "config:4-11");
PMU_FORMAT_ATTR(l2_group, "config:0-3");
+PMU_FORMAT_ATTR(event, "config:0-11");
+
static struct attribute *l2_cache_pmu_formats[] = {
&format_attr_l2_code.attr,
&format_attr_l2_group.attr,
+ &format_attr_event.attr,
NULL,
};
@@ -711,9 +729,45 @@ static struct attribute_group l2_cache_pmu_format_group = {
.attrs = l2_cache_pmu_formats,
};
+static ssize_t l2cache_pmu_event_show(struct device *dev,
+ struct device_attribute *attr, char *page)
+{
+ struct perf_pmu_events_attr *pmu_attr;
+
+ pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+ return sprintf(page, "event=0x%02llx\n", pmu_attr->id);
+}
+
+#define L2CACHE_EVENT_ATTR(_name, _id) \
+ (&((struct perf_pmu_events_attr[]) { \
+ { .attr = __ATTR(_name, 0444, l2cache_pmu_event_show, NULL), \
+ .id = _id, } \
+ })[0].attr.attr)
+
+static struct attribute *l2_cache_pmu_events[] = {
+ L2CACHE_EVENT_ATTR(cycles, L2_EVENT_CYCLES),
+ L2CACHE_EVENT_ATTR(dcache-ops, L2_EVENT_DCACHE_OPS),
+ L2CACHE_EVENT_ATTR(icache-ops, L2_EVENT_ICACHE_OPS),
+ L2CACHE_EVENT_ATTR(tlbi, L2_EVENT_TLBI),
+ L2CACHE_EVENT_ATTR(barriers, L2_EVENT_BARRIERS),
+ L2CACHE_EVENT_ATTR(total-reads, L2_EVENT_TOTAL_READS),
+ L2CACHE_EVENT_ATTR(total-writes, L2_EVENT_TOTAL_WRITES),
+ L2CACHE_EVENT_ATTR(total-requests, L2_EVENT_TOTAL_REQUESTS),
+ L2CACHE_EVENT_ATTR(ldrex, L2_EVENT_LDREX),
+ L2CACHE_EVENT_ATTR(strex, L2_EVENT_STREX),
+ L2CACHE_EVENT_ATTR(clrex, L2_EVENT_CLREX),
+ NULL
+};
+
+static struct attribute_group l2_cache_pmu_events_group = {
+ .name = "events",
+ .attrs = l2_cache_pmu_events,
+};
+
static const struct attribute_group *l2_cache_pmu_attr_grps[] = {
&l2_cache_pmu_format_group,
&l2_cache_pmu_cpumask_group,
+ &l2_cache_pmu_events_group,
NULL,
};
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 73b01e474fdc..c697882b3aba 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1699,7 +1699,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
long signr, size_t *total)
{
unsigned int i;
- unsigned int regset_size = view->regsets[0].n * view->regsets[0].size;
+ unsigned int regset0_size = regset_size(t->task, &view->regsets[0]);
/*
* NT_PRSTATUS is the one special case, because the regset data
@@ -1708,11 +1708,11 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
* We assume that regset 0 is NT_PRSTATUS.
*/
fill_prstatus(&t->prstatus, t->task, signr);
- (void) view->regsets[0].get(t->task, &view->regsets[0], 0, regset_size,
+ (void) view->regsets[0].get(t->task, &view->regsets[0], 0, regset0_size,
&t->prstatus.pr_reg, NULL);
fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
- PRSTATUS_SIZE(t->prstatus, regset_size), &t->prstatus);
+ PRSTATUS_SIZE(t->prstatus, regset0_size), &t->prstatus);
*total += notesize(&t->notes[0]);
do_thread_regset_writeback(t->task, &view->regsets[0]);
@@ -1728,7 +1728,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
if (regset->core_note_type && regset->get &&
(!regset->active || regset->active(t->task, regset))) {
int ret;
- size_t size = regset->n * regset->size;
+ size_t size = regset_size(t->task, regset);
void *data = kmalloc(size, GFP_KERNEL);
if (unlikely(!data))
return 0;
@@ -1743,7 +1743,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
size, data);
else {
SET_PR_FPVALID(&t->prstatus,
- 1, regset_size);
+ 1, regset0_size);
fill_note(&t->notes[i], "CORE",
NT_PRFPREG, size, data);
}
diff --git a/include/clocksource/arm_arch_timer.h b/include/clocksource/arm_arch_timer.h
index cc805b72994a..349e5957c949 100644
--- a/include/clocksource/arm_arch_timer.h
+++ b/include/clocksource/arm_arch_timer.h
@@ -67,7 +67,9 @@ enum arch_timer_spi_nr {
#define ARCH_TIMER_USR_VT_ACCESS_EN (1 << 8) /* virtual timer registers */
#define ARCH_TIMER_USR_PT_ACCESS_EN (1 << 9) /* physical timer registers */
-#define ARCH_TIMER_EVT_STREAM_FREQ 10000 /* 100us */
+#define ARCH_TIMER_EVT_STREAM_PERIOD_US 100
+#define ARCH_TIMER_EVT_STREAM_FREQ \
+ (USEC_PER_SEC / ARCH_TIMER_EVT_STREAM_PERIOD_US)
struct arch_timer_kvm_info {
struct timecounter timecounter;
@@ -93,6 +95,7 @@ struct arch_timer_mem {
extern u32 arch_timer_get_rate(void);
extern u64 (*arch_timer_read_counter)(void);
extern struct arch_timer_kvm_info *arch_timer_get_kvm_info(void);
+extern bool arch_timer_evtstrm_available(void);
#else
@@ -106,6 +109,11 @@ static inline u64 arch_timer_read_counter(void)
return 0;
}
+static inline bool arch_timer_evtstrm_available(void)
+{
+ return false;
+}
+
#endif
#endif
diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h
index 8d3f0bf80379..2f7a29242b87 100644
--- a/include/linux/acpi_iort.h
+++ b/include/linux/acpi_iort.h
@@ -49,8 +49,8 @@ static inline void acpi_configure_pmsi_domain(struct device *dev) { }
/* IOMMU interface */
static inline void iort_dma_setup(struct device *dev, u64 *dma_addr,
u64 *size) { }
-static inline
-const struct iommu_ops *iort_iommu_configure(struct device *dev)
+static inline const struct iommu_ops *iort_iommu_configure(
+ struct device *dev)
{ return NULL; }
#endif
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index ec32c4c5eb30..201ab7267986 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -155,6 +155,9 @@ enum cpuhp_state {
CPUHP_AP_PERF_S390_SF_ONLINE,
CPUHP_AP_PERF_ARM_CCI_ONLINE,
CPUHP_AP_PERF_ARM_CCN_ONLINE,
+ CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE,
+ CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE,
+ CPUHP_AP_PERF_ARM_HISI_L3_ONLINE,
CPUHP_AP_PERF_ARM_L2X0_ONLINE,
CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE,
CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE,
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 60e3100b0809..dd418955962b 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -246,6 +246,14 @@ static inline int irq_is_percpu(unsigned int irq)
return desc->status_use_accessors & IRQ_PER_CPU;
}
+static inline int irq_is_percpu_devid(unsigned int irq)
+{
+ struct irq_desc *desc;
+
+ desc = irq_to_desc(irq);
+ return desc->status_use_accessors & IRQ_PER_CPU_DEVID;
+}
+
static inline void
irq_set_lockdep_class(unsigned int irq, struct lock_class_key *class)
{
diff --git a/include/linux/regset.h b/include/linux/regset.h
index 8e0c9febf495..494cedaafdf2 100644
--- a/include/linux/regset.h
+++ b/include/linux/regset.h
@@ -107,6 +107,28 @@ typedef int user_regset_writeback_fn(struct task_struct *target,
int immediate);
/**
+ * user_regset_get_size_fn - type of @get_size function in &struct user_regset
+ * @target: thread being examined
+ * @regset: regset being examined
+ *
+ * This call is optional; usually the pointer is %NULL.
+ *
+ * When provided, this function must return the current size of regset
+ * data, as observed by the @get function in &struct user_regset. The
+ * value returned must be a multiple of @size. The returned size is
+ * required to be valid only until the next time (if any) @regset is
+ * modified for @target.
+ *
+ * This function is intended for dynamically sized regsets. A regset
+ * that is statically sized does not need to implement it.
+ *
+ * This function should not be called directly: instead, callers should
+ * call regset_size() to determine the current size of a regset.
+ */
+typedef unsigned int user_regset_get_size_fn(struct task_struct *target,
+ const struct user_regset *regset);
+
+/**
* struct user_regset - accessible thread CPU state
* @n: Number of slots (registers).
* @size: Size in bytes of a slot (register).
@@ -117,19 +139,33 @@ typedef int user_regset_writeback_fn(struct task_struct *target,
* @set: Function to store values.
* @active: Function to report if regset is active, or %NULL.
* @writeback: Function to write data back to user memory, or %NULL.
+ * @get_size: Function to return the regset's size, or %NULL.
*
* This data structure describes a machine resource we call a register set.
* This is part of the state of an individual thread, not necessarily
* actual CPU registers per se. A register set consists of a number of
* similar slots, given by @n. Each slot is @size bytes, and aligned to
- * @align bytes (which is at least @size).
+ * @align bytes (which is at least @size). For dynamically-sized
+ * regsets, @n must contain the maximum possible number of slots for the
+ * regset, and @get_size must point to a function that returns the
+ * current regset size.
*
- * These functions must be called only on the current thread or on a
- * thread that is in %TASK_STOPPED or %TASK_TRACED state, that we are
- * guaranteed will not be woken up and return to user mode, and that we
- * have called wait_task_inactive() on. (The target thread always might
- * wake up for SIGKILL while these functions are working, in which case
- * that thread's user_regset state might be scrambled.)
+ * Callers that need to know only the current size of the regset and do
+ * not care about its internal structure should call regset_size()
+ * instead of inspecting @n or calling @get_size.
+ *
+ * For backward compatibility, the @get and @set methods must pad to, or
+ * accept, @n * @size bytes, even if the current regset size is smaller.
+ * The precise semantics of these operations depend on the regset being
+ * accessed.
+ *
+ * The functions to which &struct user_regset members point must be
+ * called only on the current thread or on a thread that is in
+ * %TASK_STOPPED or %TASK_TRACED state, that we are guaranteed will not
+ * be woken up and return to user mode, and that we have called
+ * wait_task_inactive() on. (The target thread always might wake up for
+ * SIGKILL while these functions are working, in which case that
+ * thread's user_regset state might be scrambled.)
*
* The @pos argument must be aligned according to @align; the @count
* argument must be a multiple of @size. These functions are not
@@ -156,6 +192,7 @@ struct user_regset {
user_regset_set_fn *set;
user_regset_active_fn *active;
user_regset_writeback_fn *writeback;
+ user_regset_get_size_fn *get_size;
unsigned int n;
unsigned int size;
unsigned int align;
@@ -371,5 +408,21 @@ static inline int copy_regset_from_user(struct task_struct *target,
return regset->set(target, regset, offset, size, NULL, data);
}
+/**
+ * regset_size - determine the current size of a regset
+ * @target: thread to be examined
+ * @regset: regset to be examined
+ *
+ * Note that the returned size is valid only until the next time
+ * (if any) @regset is modified for @target.
+ */
+static inline unsigned int regset_size(struct task_struct *target,
+ const struct user_regset *regset)
+{
+ if (!regset->get_size)
+ return regset->n * regset->size;
+ else
+ return regset->get_size(target, regset);
+}
#endif /* <linux/regset.h> */
diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index e09a4f963dc0..bb6836986200 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -418,6 +418,7 @@ typedef struct elf64_shdr {
#define NT_ARM_HW_BREAK 0x402 /* ARM hardware breakpoint registers */
#define NT_ARM_HW_WATCH 0x403 /* ARM hardware watchpoint registers */
#define NT_ARM_SYSTEM_CALL 0x404 /* ARM system call number */
+#define NT_ARM_SVE 0x405 /* ARM Scalable Vector Extension registers */
#define NT_METAG_CBUF 0x500 /* Metag catch buffer registers */
#define NT_METAG_RPIPE 0x501 /* Metag read pipeline state */
#define NT_METAG_TLS 0x502 /* Metag TLS pointer */
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 362493a2f950..b9a4953018ed 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -942,6 +942,7 @@ enum perf_callchain_context {
#define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */
#define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */
#define PERF_AUX_FLAG_PARTIAL 0x04 /* record contains gaps */
+#define PERF_AUX_FLAG_COLLISION 0x08 /* sample collided with another */
#define PERF_FLAG_FD_NO_GROUP (1UL << 0)
#define PERF_FLAG_FD_OUTPUT (1UL << 1)
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index b640071421f7..af5f8c2df87a 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -198,4 +198,13 @@ struct prctl_mm_map {
# define PR_CAP_AMBIENT_LOWER 3
# define PR_CAP_AMBIENT_CLEAR_ALL 4
+/* arm64 Scalable Vector Extension controls */
+/* Flag values must be kept in sync with ptrace NT_ARM_SVE interface */
+#define PR_SVE_SET_VL 50 /* set task vector length */
+# define PR_SVE_SET_VL_ONEXEC (1 << 18) /* defer effect until exec */
+#define PR_SVE_GET_VL 51 /* get task vector length */
+/* Bits common to PR_SVE_SET_VL and PR_SVE_GET_VL */
+# define PR_SVE_VL_LEN_MASK 0xffff
+# define PR_SVE_VL_INHERIT (1 << 17) /* inherit across exec */
+
#endif /* _LINUX_PRCTL_H */
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index f3e37971c842..141aa2ca8728 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -411,6 +411,7 @@ err:
return NULL;
}
+EXPORT_SYMBOL_GPL(perf_aux_output_begin);
static bool __always_inline rb_need_aux_wakeup(struct ring_buffer *rb)
{
@@ -480,6 +481,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size)
rb_free_aux(rb);
ring_buffer_put(rb);
}
+EXPORT_SYMBOL_GPL(perf_aux_output_end);
/*
* Skip over a given number of bytes in the AUX buffer, due to, for example,
@@ -505,6 +507,7 @@ int perf_aux_output_skip(struct perf_output_handle *handle, unsigned long size)
return 0;
}
+EXPORT_SYMBOL_GPL(perf_aux_output_skip);
void *perf_get_aux(struct perf_output_handle *handle)
{
@@ -514,6 +517,7 @@ void *perf_get_aux(struct perf_output_handle *handle)
return handle->rb->aux_priv;
}
+EXPORT_SYMBOL_GPL(perf_get_aux);
#define PERF_AUX_GFP (GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY)
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index f2edcf85780d..49b54e9979cc 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -862,6 +862,7 @@ int irq_get_percpu_devid_partition(unsigned int irq, struct cpumask *affinity)
return 0;
}
+EXPORT_SYMBOL_GPL(irq_get_percpu_devid_partition);
void kstat_incr_irq_this_cpu(unsigned int irq)
{
diff --git a/kernel/sys.c b/kernel/sys.c
index 524a4cb9bbe2..83ffd7dccf23 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -111,6 +111,12 @@
#ifndef SET_FP_MODE
# define SET_FP_MODE(a,b) (-EINVAL)
#endif
+#ifndef SVE_SET_VL
+# define SVE_SET_VL(a) (-EINVAL)
+#endif
+#ifndef SVE_GET_VL
+# define SVE_GET_VL() (-EINVAL)
+#endif
/*
* this is where the system-wide overflow UID and GID are defined, for
@@ -2386,6 +2392,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
case PR_GET_FP_MODE:
error = GET_FP_MODE(me);
break;
+ case PR_SVE_SET_VL:
+ error = SVE_SET_VL(arg2);
+ break;
+ case PR_SVE_GET_VL:
+ error = SVE_GET_VL();
+ break;
default:
error = -EINVAL;
break;
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 95cba0799828..4cf9b91e6c9b 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -652,6 +652,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
*/
preempt_disable();
+ /* Flush FP/SIMD state that can't survive guest entry/exit */
+ kvm_fpsimd_flush_cpu_state();
+
kvm_pmu_flush_hwstate(vcpu);
kvm_timer_flush_hwstate(vcpu);