97 files changed, 7399 insertions, 601 deletions
diff --git a/Documentation/arm64/cpu-feature-registers.txt b/Documentation/arm64/cpu-feature-registers.txt
index dad411d635d8..bd9b3faab2c4 100644
--- a/Documentation/arm64/cpu-feature-registers.txt
+++ b/Documentation/arm64/cpu-feature-registers.txt
@@ -110,10 +110,20 @@ infrastructure:
      x--------------------------------------------------x
      | Name                         |  bits   | visible |
      |--------------------------------------------------|
-     | RES0                         | [63-32] |    n    |
+     | RES0                         | [63-48] |    n    |
+     |--------------------------------------------------|
+     | DP                           | [47-44] |    y    |
+     |--------------------------------------------------|
+     | SM4                          | [43-40] |    y    |
+     |--------------------------------------------------|
+     | SM3                          | [39-36] |    y    |
+     |--------------------------------------------------|
+     | SHA3                         | [35-32] |    y    |
      |--------------------------------------------------|
      | RDM                          | [31-28] |    y    |
      |--------------------------------------------------|
+     | RES0                         | [27-24] |    n    |
+     |--------------------------------------------------|
      | ATOMICS                      | [23-20] |    y    |
      |--------------------------------------------------|
      | CRC32                        | [19-16] |    y    |
@@ -132,7 +142,11 @@ infrastructure:
      x--------------------------------------------------x
      | Name                         |  bits   | visible |
      |--------------------------------------------------|
-     | RES0                         | [63-28] |    n    |
+     | RES0                         | [63-36] |    n    |
+     |--------------------------------------------------|
+     | SVE                          | [35-32] |    y    |
+     |--------------------------------------------------|
+     | RES0                         | [31-28] |    n    |
      |--------------------------------------------------|
      | GIC                          | [27-24] |    n    |
      |--------------------------------------------------|
diff --git a/Documentation/arm64/elf_hwcaps.txt b/Documentation/arm64/elf_hwcaps.txt
new file mode 100644
index 000000000000..89edba12a9e0
--- /dev/null
+++ b/Documentation/arm64/elf_hwcaps.txt
@@ -0,0 +1,160 @@
+ARM64 ELF hwcaps
+================
+
+This document describes the usage and semantics of the arm64 ELF hwcaps.
+
+
+1. Introduction
+---------------
+
+Some hardware or software features are only available on some CPU
+implementations, and/or with certain kernel configurations, but have no
+architected discovery mechanism available to userspace code at EL0. The
+kernel exposes the presence of these features to userspace through a set
+of flags called hwcaps, exposed in the auxilliary vector.
+
+Userspace software can test for features by acquiring the AT_HWCAP entry
+of the auxilliary vector, and testing whether the relevant flags are
+set, e.g.
+
+bool floating_point_is_present(void)
+{
+	unsigned long hwcaps = getauxval(AT_HWCAP);
+	if (hwcaps & HWCAP_FP)
+		return true;
+
+	return false;
+}
+
+Where software relies on a feature described by a hwcap, it should check
+the relevant hwcap flag to verify that the feature is present before
+attempting to make use of the feature.
+
+Features cannot be probed reliably through other means. When a feature
+is not available, attempting to use it may result in unpredictable
+behaviour, and is not guaranteed to result in any reliable indication
+that the feature is unavailable, such as a SIGILL.
+
+
+2. Interpretation of hwcaps
+---------------------------
+
+The majority of hwcaps are intended to indicate the presence of features
+which are described by architected ID registers inaccessible to
+userspace code at EL0. These hwcaps are defined in terms of ID register
+fields, and should be interpreted with reference to the definition of
+these fields in the ARM Architecture Reference Manual (ARM ARM).
+
+Such hwcaps are described below in the form:
+
+    Functionality implied by idreg.field == val.
+
+Such hwcaps indicate the availability of functionality that the ARM ARM
+defines as being present when idreg.field has value val, but do not
+indicate that idreg.field is precisely equal to val, nor do they
+indicate the absence of functionality implied by other values of
+idreg.field.
+
+Other hwcaps may indicate the presence of features which cannot be
+described by ID registers alone. These may be described without
+reference to ID registers, and may refer to other documentation.
+
+
+3. The hwcaps exposed in AT_HWCAP
+---------------------------------
+
+HWCAP_FP
+
+    Functionality implied by ID_AA64PFR0_EL1.FP == 0b0000.
+
+HWCAP_ASIMD
+
+    Functionality implied by ID_AA64PFR0_EL1.AdvSIMD == 0b0000.
+
+HWCAP_EVTSTRM
+
+    The generic timer is configured to generate events at a frequency of
+    approximately 100KHz.
+
+HWCAP_AES
+
+    Functionality implied by ID_AA64ISAR1_EL1.AES == 0b0001.
+
+HWCAP_PMULL
+
+    Functionality implied by ID_AA64ISAR1_EL1.AES == 0b0010.
+
+HWCAP_SHA1
+
+    Functionality implied by ID_AA64ISAR0_EL1.SHA1 == 0b0001.
+
+HWCAP_SHA2
+
+    Functionality implied by ID_AA64ISAR0_EL1.SHA2 == 0b0001.
+
+HWCAP_CRC32
+
+    Functionality implied by ID_AA64ISAR0_EL1.CRC32 == 0b0001.
+
+HWCAP_ATOMICS
+
+    Functionality implied by ID_AA64ISAR0_EL1.Atomic == 0b0010.
+
+HWCAP_FPHP
+
+    Functionality implied by ID_AA64PFR0_EL1.FP == 0b0001.
+
+HWCAP_ASIMDHP
+
+    Functionality implied by ID_AA64PFR0_EL1.AdvSIMD == 0b0001.
+
+HWCAP_CPUID
+
+    EL0 access to certain ID registers is available, to the extent
+    described by Documentation/arm64/cpu-feature-registers.txt.
+
+    These ID registers may imply the availability of features.
+
+HWCAP_ASIMDRDM
+
+    Functionality implied by ID_AA64ISAR0_EL1.RDM == 0b0001.
+
+HWCAP_JSCVT
+
+    Functionality implied by ID_AA64ISAR1_EL1.JSCVT == 0b0001.
+
+HWCAP_FCMA
+
+    Functionality implied by ID_AA64ISAR1_EL1.FCMA == 0b0001.
+
+HWCAP_LRCPC
+
+    Functionality implied by ID_AA64ISAR1_EL1.LRCPC == 0b0001.
+
+HWCAP_DCPOP
+
+    Functionality implied by ID_AA64ISAR1_EL1.DPB == 0b0001.
+
+HWCAP_SHA3
+
+    Functionality implied by ID_AA64ISAR0_EL1.SHA3 == 0b0001.
+
+HWCAP_SM3
+
+    Functionality implied by ID_AA64ISAR0_EL1.SM3 == 0b0001.
+
+HWCAP_SM4
+
+    Functionality implied by ID_AA64ISAR0_EL1.SM4 == 0b0001.
+
+HWCAP_ASIMDDP
+
+    Functionality implied by ID_AA64ISAR0_EL1.DP == 0b0001.
+
+HWCAP_SHA512
+
+    Functionality implied by ID_AA64ISAR0_EL1.SHA2 == 0b0002.
+
+HWCAP_SVE
+
+    Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001.
diff --git a/Documentation/arm64/memory.txt b/Documentation/arm64/memory.txt
index d7273a5f6456..671bc0639262 100644
--- a/Documentation/arm64/memory.txt
+++ b/Documentation/arm64/memory.txt
@@ -86,9 +86,9 @@ Translation table lookup with 64KB pages:
  +-------------------------------------------------> [63] TTBR0/1
 
 
-When using KVM, the hypervisor maps kernel pages in EL2, at a fixed
-offset from the kernel VA (top 24bits of the kernel VA set to zero):
+When using KVM without the Virtualization Host Extensions, the hypervisor
+maps kernel pages in EL2 at a fixed offset from the kernel VA. See the
+kern_hyp_va macro for more details.
 
-Start			End			Size		Use
------------------------------------------------------------------------
-0000004000000000	0000007fffffffff	 256GB		kernel objects mapped in HYP
+When using KVM with the Virtualization Host Extensions, no additional
+mappings are created, since the host kernel runs directly in EL2.
diff --git a/Documentation/arm64/sve.txt b/Documentation/arm64/sve.txt
new file mode 100644
index 000000000000..f128f736b4a5
--- /dev/null
+++ b/Documentation/arm64/sve.txt
@@ -0,0 +1,508 @@
+            Scalable Vector Extension support for AArch64 Linux
+            ===================================================
+
+Author: Dave Martin <Dave.Martin@arm.com>
+Date:   4 August 2017
+
+This document outlines briefly the interface provided to userspace by Linux in
+order to support use of the ARM Scalable Vector Extension (SVE).
+
+This is an outline of the most important features and issues only and not
+intended to be exhaustive.
+
+This document does not aim to describe the SVE architecture or programmer's
+model.  To aid understanding, a minimal description of relevant programmer's
+model features for SVE is included in Appendix A.
+
+
+1.  General
+-----------
+
+* SVE registers Z0..Z31, P0..P15 and FFR and the current vector length VL, are
+  tracked per-thread.
+
+* The presence of SVE is reported to userspace via HWCAP_SVE in the aux vector
+  AT_HWCAP entry.  Presence of this flag implies the presence of the SVE
+  instructions and registers, and the Linux-specific system interfaces
+  described in this document.  SVE is reported in /proc/cpuinfo as "sve".
+
+* Support for the execution of SVE instructions in userspace can also be
+  detected by reading the CPU ID register ID_AA64PFR0_EL1 using an MRS
+  instruction, and checking that the value of the SVE field is nonzero. [3]
+
+  It does not guarantee the presence of the system interfaces described in the
+  following sections: software that needs to verify that those interfaces are
+  present must check for HWCAP_SVE instead.
+
+* Debuggers should restrict themselves to interacting with the target via the
+  NT_ARM_SVE regset.  The recommended way of detecting support for this regset
+  is to connect to a target process first and then attempt a
+  ptrace(PTRACE_GETREGSET, pid, NT_ARM_SVE, &iov).
+
+
+2.  Vector length terminology
+-----------------------------
+
+The size of an SVE vector (Z) register is referred to as the "vector length".
+
+To avoid confusion about the units used to express vector length, the kernel
+adopts the following conventions:
+
+* Vector length (VL) = size of a Z-register in bytes
+
+* Vector quadwords (VQ) = size of a Z-register in units of 128 bits
+
+(So, VL = 16 * VQ.)
+
+The VQ convention is used where the underlying granularity is important, such
+as in data structure definitions.  In most other situations, the VL convention
+is used.  This is consistent with the meaning of the "VL" pseudo-register in
+the SVE instruction set architecture.
+
+
+3.  System call behaviour
+-------------------------
+
+* On syscall, V0..V31 are preserved (as without SVE).  Thus, bits [127:0] of
+  Z0..Z31 are preserved.  All other bits of Z0..Z31, and all of P0..P15 and FFR
+  become unspecified on return from a syscall.
+
+* The SVE registers are not used to pass arguments to or receive results from
+  any syscall.
+
+* In practice the affected registers/bits will be preserved or will be replaced
+  with zeros on return from a syscall, but userspace should not make
+  assumptions about this.  The kernel behaviour may vary on a case-by-case
+  basis.
+
+* All other SVE state of a thread, including the currently configured vector
+  length, the state of the PR_SVE_VL_INHERIT flag, and the deferred vector
+  length (if any), is preserved across all syscalls, subject to the specific
+  exceptions for execve() described in section 6.
+
+  In particular, on return from a fork() or clone(), the parent and new child
+  process or thread share identical SVE configuration, matching that of the
+  parent before the call.
+
+
+4.  Signal handling
+-------------------
+
+* A new signal frame record sve_context encodes the SVE registers on signal
+  delivery. [1]
+
+* This record is supplementary to fpsimd_context.  The FPSR and FPCR registers
+  are only present in fpsimd_context.  For convenience, the content of V0..V31
+  is duplicated between sve_context and fpsimd_context.
+
+* The signal frame record for SVE always contains basic metadata, in particular
+  the thread's vector length (in sve_context.vl).
+
+* The SVE registers may or may not be included in the record, depending on
+  whether the registers are live for the thread.  The registers are present if
+  and only if:
+  sve_context.head.size >= SVE_SIG_CONTEXT_SIZE(sve_vq_from_vl(sve_context.vl)).
+
+* If the registers are present, the remainder of the record has a vl-dependent
+  size and layout.  Macros SVE_SIG_* are defined [1] to facilitate access to
+  the members.
+
+* If the SVE context is too big to fit in sigcontext.__reserved[], then extra
+  space is allocated on the stack, an extra_context record is written in
+  __reserved[] referencing this space.  sve_context is then written in the
+  extra space.  Refer to [1] for further details about this mechanism.
+
+
+5.  Signal return
+-----------------
+
+When returning from a signal handler:
+
+* If there is no sve_context record in the signal frame, or if the record is
+  present but contains no register data as desribed in the previous section,
+  then the SVE registers/bits become non-live and take unspecified values.
+
+* If sve_context is present in the signal frame and contains full register
+  data, the SVE registers become live and are populated with the specified
+  data.  However, for backward compatibility reasons, bits [127:0] of Z0..Z31
+  are always restored from the corresponding members of fpsimd_context.vregs[]
+  and not from sve_context.  The remaining bits are restored from sve_context.
+
+* Inclusion of fpsimd_context in the signal frame remains mandatory,
+  irrespective of whether sve_context is present or not.
+
+* The vector length cannot be changed via signal return.  If sve_context.vl in
+  the signal frame does not match the current vector length, the signal return
+  attempt is treated as illegal, resulting in a forced SIGSEGV.
+
+
+6.  prctl extensions
+--------------------
+
+Some new prctl() calls are added to allow programs to manage the SVE vector
+length:
+
+prctl(PR_SVE_SET_VL, unsigned long arg)
+
+    Sets the vector length of the calling thread and related flags, where
+    arg == vl | flags.  Other threads of the calling process are unaffected.
+
+    vl is the desired vector length, where sve_vl_valid(vl) must be true.
+
+    flags:
+
+	PR_SVE_SET_VL_INHERIT
+
+	    Inherit the current vector length across execve().  Otherwise, the
+	    vector length is reset to the system default at execve().  (See
+	    Section 9.)
+
+	PR_SVE_SET_VL_ONEXEC
+
+	    Defer the requested vector length change until the next execve()
+	    performed by this thread.
+
+	    The effect is equivalent to implicit exceution of the following
+	    call immediately after the next execve() (if any) by the thread:
+
+		prctl(PR_SVE_SET_VL, arg & ~PR_SVE_SET_VL_ONEXEC)
+
+	    This allows launching of a new program with a different vector
+	    length, while avoiding runtime side effects in the caller.
+
+
+	    Without PR_SVE_SET_VL_ONEXEC, the requested change takes effect
+	    immediately.
+
+
+    Return value: a nonnegative on success, or a negative value on error:
+	EINVAL: SVE not supported, invalid vector length requested, or
+	    invalid flags.
+
+
+    On success:
+
+    * Either the calling thread's vector length or the deferred vector length
+      to be applied at the next execve() by the thread (dependent on whether
+      PR_SVE_SET_VL_ONEXEC is present in arg), is set to the largest value
+      supported by the system that is less than or equal to vl.  If vl ==
+      SVE_VL_MAX, the value set will be the largest value supported by the
+      system.
+
+    * Any previously outstanding deferred vector length change in the calling
+      thread is cancelled.
+
+    * The returned value describes the resulting configuration, encoded as for
+      PR_SVE_GET_VL.  The vector length reported in this value is the new
+      current vector length for this thread if PR_SVE_SET_VL_ONEXEC was not
+      present in arg; otherwise, the reported vector length is the deferred
+      vector length that will be applied at the next execve() by the calling
+      thread.
+
+    * Changing the vector length causes all of P0..P15, FFR and all bits of
+      Z0..V31 except for Z0 bits [127:0] .. Z31 bits [127:0] to become
+      unspecified.  Calling PR_SVE_SET_VL with vl equal to the thread's current
+      vector length, or calling PR_SVE_SET_VL with the PR_SVE_SET_VL_ONEXEC
+      flag, does not constitute a change to the vector length for this purpose.
+
+
+prctl(PR_SVE_GET_VL)
+
+    Gets the vector length of the calling thread.
+
+    The following flag may be OR-ed into the result:
+
+	PR_SVE_SET_VL_INHERIT
+
+	    Vector length will be inherited across execve().
+
+    There is no way to determine whether there is an outstanding deferred
+    vector length change (which would only normally be the case between a
+    fork() or vfork() and the corresponding execve() in typical use).
+
+    To extract the vector length from the result, and it with
+    PR_SVE_VL_LEN_MASK.
+
+    Return value: a nonnegative value on success, or a negative value on error:
+	EINVAL: SVE not supported.
+
+
+7.  ptrace extensions
+---------------------
+
+* A new regset NT_ARM_SVE is defined for use with PTRACE_GETREGSET and
+  PTRACE_SETREGSET.
+
+  Refer to [2] for definitions.
+
+The regset data starts with struct user_sve_header, containing:
+
+    size
+
+	Size of the complete regset, in bytes.
+	This depends on vl and possibly on other things in the future.
+
+	If a call to PTRACE_GETREGSET requests less data than the value of
+	size, the caller can allocate a larger buffer and retry in order to
+	read the complete regset.
+
+    max_size
+
+	Maximum size in bytes that the regset can grow to for the target
+	thread.  The regset won't grow bigger than this even if the target
+	thread changes its vector length etc.
+
+    vl
+
+	Target thread's current vector length, in bytes.
+
+    max_vl
+
+	Maximum possible vector length for the target thread.
+
+    flags
+
+	either
+
+	    SVE_PT_REGS_FPSIMD
+
+		SVE registers are not live (GETREGSET) or are to be made
+		non-live (SETREGSET).
+
+		The payload is of type struct user_fpsimd_state, with the same
+		meaning as for NT_PRFPREG, starting at offset
+		SVE_PT_FPSIMD_OFFSET from the start of user_sve_header.
+
+		Extra data might be appended in the future: the size of the
+		payload should be obtained using SVE_PT_FPSIMD_SIZE(vq, flags).
+
+		vq should be obtained using sve_vq_from_vl(vl).
+
+		or
+
+	    SVE_PT_REGS_SVE
+
+		SVE registers are live (GETREGSET) or are to be made live
+		(SETREGSET).
+
+		The payload contains the SVE register data, starting at offset
+		SVE_PT_SVE_OFFSET from the start of user_sve_header, and with
+		size SVE_PT_SVE_SIZE(vq, flags);
+
+	... OR-ed with zero or more of the following flags, which have the same
+	meaning and behaviour as the corresponding PR_SET_VL_* flags:
+
+	    SVE_PT_VL_INHERIT
+
+	    SVE_PT_VL_ONEXEC (SETREGSET only).
+
+* The effects of changing the vector length and/or flags are equivalent to
+  those documented for PR_SVE_SET_VL.
+
+  The caller must make a further GETREGSET call if it needs to know what VL is
+  actually set by SETREGSET, unless is it known in advance that the requested
+  VL is supported.
+
+* In the SVE_PT_REGS_SVE case, the size and layout of the payload depends on
+  the header fields.  The SVE_PT_SVE_*() macros are provided to facilitate
+  access to the members.
+
+* In either case, for SETREGSET it is permissible to omit the payload, in which
+  case only the vector length and flags are changed (along with any
+  consequences of those changes).
+
+* For SETREGSET, if an SVE_PT_REGS_SVE payload is present and the
+  requested VL is not supported, the effect will be the same as if the
+  payload were omitted, except that an EIO error is reported.  No
+  attempt is made to translate the payload data to the correct layout
+  for the vector length actually set.  The thread's FPSIMD state is
+  preserved, but the remaining bits of the SVE registers become
+  unspecified.  It is up to the caller to translate the payload layout
+  for the actual VL and retry.
+
+* The effect of writing a partial, incomplete payload is unspecified.
+
+
+8.  ELF coredump extensions
+---------------------------
+
+* A NT_ARM_SVE note will be added to each coredump for each thread of the
+  dumped process.  The contents will be equivalent to the data that would have
+  been read if a PTRACE_GETREGSET of NT_ARM_SVE were executed for each thread
+  when the coredump was generated.
+
+
+9.  System runtime configuration
+--------------------------------
+
+* To mitigate the ABI impact of expansion of the signal frame, a policy
+  mechanism is provided for administrators, distro maintainers and developers
+  to set the default vector length for userspace processes:
+
+/proc/sys/abi/sve_default_vector_length
+
+    Writing the text representation of an integer to this file sets the system
+    default vector length to the specified value, unless the value is greater
+    than the maximum vector length supported by the system in which case the
+    default vector length is set to that maximum.
+
+    The result can be determined by reopening the file and reading its
+    contents.
+
+    At boot, the default vector length is initially set to 64 or the maximum
+    supported vector length, whichever is smaller.  This determines the initial
+    vector length of the init process (PID 1).
+
+    Reading this file returns the current system default vector length.
+
+* At every execve() call, the new vector length of the new process is set to
+  the system default vector length, unless
+
+    * PR_SVE_SET_VL_INHERIT (or equivalently SVE_PT_VL_INHERIT) is set for the
+      calling thread, or
+
+    * a deferred vector length change is pending, established via the
+      PR_SVE_SET_VL_ONEXEC flag (or SVE_PT_VL_ONEXEC).
+
+* Modifying the system default vector length does not affect the vector length
+  of any existing process or thread that does not make an execve() call.
+
+
+Appendix A.  SVE programmer's model (informative)
+=================================================
+
+This section provides a minimal description of the additions made by SVE to the
+ARMv8-A programmer's model that are relevant to this document.
+
+Note: This section is for information only and not intended to be complete or
+to replace any architectural specification.
+
+A.1.  Registers
+---------------
+
+In A64 state, SVE adds the following:
+
+* 32 8VL-bit vector registers Z0..Z31
+  For each Zn, Zn bits [127:0] alias the ARMv8-A vector register Vn.
+
+  A register write using a Vn register name zeros all bits of the corresponding
+  Zn except for bits [127:0].
+
+* 16 VL-bit predicate registers P0..P15
+
+* 1 VL-bit special-purpose predicate register FFR (the "first-fault register")
+
+* a VL "pseudo-register" that determines the size of each vector register
+
+  The SVE instruction set architecture provides no way to write VL directly.
+  Instead, it can be modified only by EL1 and above, by writing appropriate
+  system registers.
+
+* The value of VL can be configured at runtime by EL1 and above:
+  16 <= VL <= VLmax, where VL must be a multiple of 16.
+
+* The maximum vector length is determined by the hardware:
+  16 <= VLmax <= 256.
+
+  (The SVE architecture specifies 256, but permits future architecture
+  revisions to raise this limit.)
+
+* FPSR and FPCR are retained from ARMv8-A, and interact with SVE floating-point
+  operations in a similar way to the way in which they interact with ARMv8
+  floating-point operations.
+
+         8VL-1                       128               0  bit index
+        +----          ////            -----------------+
+     Z0 |                               :       V0      |
+      :                                          :
+     Z7 |                               :       V7      |
+     Z8 |                               :     * V8      |
+      :                                       :  :
+    Z15 |                               :     *V15      |
+    Z16 |                               :      V16      |
+      :                                          :
+    Z31 |                               :      V31      |
+        +----          ////            -----------------+
+                                                 31    0
+         VL-1                  0                +-------+
+        +----       ////      --+          FPSR |       |
+     P0 |                       |               +-------+
+      : |                       |         *FPCR |       |
+    P15 |                       |               +-------+
+        +----       ////      --+
+    FFR |                       |               +-----+
+        +----       ////      --+            VL |     |
+                                                +-----+
+
+(*) callee-save:
+    This only applies to bits [63:0] of Z-/V-registers.
+    FPCR contains callee-save and caller-save bits.  See [4] for details.
+
+
+A.2.  Procedure call standard
+-----------------------------
+
+The ARMv8-A base procedure call standard is extended as follows with respect to
+the additional SVE register state:
+
+* All SVE register bits that are not shared with FP/SIMD are caller-save.
+
+* Z8 bits [63:0] .. Z15 bits [63:0] are callee-save.
+
+  This follows from the way these bits are mapped to V8..V15, which are caller-
+  save in the base procedure call standard.
+
+
+Appendix B.  ARMv8-A FP/SIMD programmer's model
+===============================================
+
+Note: This section is for information only and not intended to be complete or
+to replace any architectural specification.
+
+Refer to [4] for for more information.
+
+ARMv8-A defines the following floating-point / SIMD register state:
+
+* 32 128-bit vector registers V0..V31
+* 2 32-bit status/control registers FPSR, FPCR
+
+         127           0  bit index
+        +---------------+
+     V0 |               |
+      : :               :
+     V7 |               |
+   * V8 |               |
+   :  : :               :
+   *V15 |               |
+    V16 |               |
+      : :               :
+    V31 |               |
+        +---------------+
+
+                 31    0
+                +-------+
+           FPSR |       |
+                +-------+
+          *FPCR |       |
+                +-------+
+
+(*) callee-save:
+    This only applies to bits [63:0] of V-registers.
+    FPCR contains a mixture of callee-save and caller-save bits.
+
+
+References
+==========
+
+[1] arch/arm64/include/uapi/asm/sigcontext.h
+    AArch64 Linux signal ABI definitions
+
+[2] arch/arm64/include/uapi/asm/ptrace.h
+    AArch64 Linux ptrace ABI definitions
+
+[3] linux/Documentation/arm64/cpu-feature-registers.txt
+
+[4] ARM IHI0055C
+    http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055c/IHI0055C_beta_aapcs64.pdf
+    http://infocenter.arm.com/help/topic/com.arm.doc.subset.swdev.abi/index.html
+    Procedure Call Standard for the ARM 64-bit Architecture (AArch64)
diff --git a/Documentation/devicetree/bindings/arm/spe-pmu.txt b/Documentation/devicetree/bindings/arm/spe-pmu.txt
new file mode 100644
index 000000000000..93372f2a7df9
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/spe-pmu.txt
@@ -0,0 +1,20 @@
+* ARMv8.2 Statistical Profiling Extension (SPE) Performance Monitor Units (PMU)
+
+ARMv8.2 introduces the optional Statistical Profiling Extension for collecting
+performance sample data using an in-memory trace buffer.
+
+** SPE Required properties:
+
+- compatible : should be one of:
+	       "arm,statistical-profiling-extension-v1"
+
+- interrupts : Exactly 1 PPI must be listed. For heterogeneous systems where
+               SPE is only supported on a subset of the CPUs, please consult
+	       the arm,gic-v3 binding for details on describing a PPI partition.
+
+** Example:
+
+spe-pmu {
+        compatible = "arm,statistical-profiling-extension-v1";
+        interrupts = <GIC_PPI 05 IRQ_TYPE_LEVEL_HIGH &part1>;
+};
diff --git a/Documentation/perf/hisi-pmu.txt b/Documentation/perf/hisi-pmu.txt
new file mode 100644
index 000000000000..267a028b2741
--- /dev/null
+++ b/Documentation/perf/hisi-pmu.txt
@@ -0,0 +1,53 @@
+HiSilicon SoC uncore Performance Monitoring Unit (PMU)
+======================================================
+The HiSilicon SoC chip includes various independent system device PMUs
+such as L3 cache (L3C), Hydra Home Agent (HHA) and DDRC. These PMUs are
+independent and have hardware logic to gather statistics and performance
+information.
+
+The HiSilicon SoC encapsulates multiple CPU and IO dies. Each CPU cluster
+(CCL) is made up of 4 cpu cores sharing one L3 cache; each CPU die is
+called Super CPU cluster (SCCL) and is made up of 6 CCLs. Each SCCL has
+two HHAs (0 - 1) and four DDRCs (0 - 3), respectively.
+
+HiSilicon SoC uncore PMU driver
+---------------------------------------
+Each device PMU has separate registers for event counting, control and
+interrupt, and the PMU driver shall register perf PMU drivers like L3C,
+HHA and DDRC etc. The available events and configuration options shall
+be described in the sysfs, see :
+/sys/devices/hisi_sccl{X}_<l3c{Y}/hha{Y}/ddrc{Y}>/, or
+/sys/bus/event_source/devices/hisi_sccl{X}_<l3c{Y}/hha{Y}/ddrc{Y}>.
+The "perf list" command shall list the available events from sysfs.
+
+Each L3C, HHA and DDRC is registered as a separate PMU with perf. The PMU
+name will appear in event listing as hisi_sccl<sccl-id>_module<index-id>.
+where "sccl-id" is the identifier of the SCCL and "index-id" is the index of
+module.
+e.g. hisi_sccl3_l3c0/rd_hit_cpipe is READ_HIT_CPIPE event of L3C index #0 in
+SCCL ID #3.
+e.g. hisi_sccl1_hha0/rx_operations is RX_OPERATIONS event of HHA index #0 in
+SCCL ID #1.
+
+The driver also provides a "cpumask" sysfs attribute, which shows the CPU core
+ID used to count the uncore PMU event.
+
+Example usage of perf:
+$# perf list
+hisi_sccl3_l3c0/rd_hit_cpipe/ [kernel PMU event]
+------------------------------------------
+hisi_sccl3_l3c0/wr_hit_cpipe/ [kernel PMU event]
+------------------------------------------
+hisi_sccl1_l3c0/rd_hit_cpipe/ [kernel PMU event]
+------------------------------------------
+hisi_sccl1_l3c0/wr_hit_cpipe/ [kernel PMU event]
+------------------------------------------
+
+$# perf stat -a -e hisi_sccl3_l3c0/rd_hit_cpipe/ sleep 5
+$# perf stat -a -e hisi_sccl3_l3c0/config=0x02/ sleep 5
+
+The current driver does not support sampling. So "perf record" is unsupported.
+Also attach to a task is unsupported as the events are all uncore.
+
+Note: Please contact the maintainer for a complete list of events supported for
+the PMU devices in the SoC and its information if needed.
diff --git a/MAINTAINERS b/MAINTAINERS
index 82ed85135971..7e9c887ad951 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6259,6 +6259,13 @@ S:	Maintained
 F:	drivers/net/ethernet/hisilicon/
 F:	Documentation/devicetree/bindings/net/hisilicon*.txt
 
+HISILICON PMU DRIVER
+M:	Shaokun Zhang <zhangshaokun@hisilicon.com>
+W:	http://www.hisilicon.com
+S:	Supported
+F:	drivers/perf/hisilicon
+F:	Documentation/perf/hisi-pmu.txt
+
 HISILICON ROCE DRIVER
 M:	Lijun Ou <oulijun@huawei.com>
 M:	Wei Hu(Xavier) <xavier.huwei@huawei.com>
diff --git a/arch/arm/include/asm/arch_timer.h b/arch/arm/include/asm/arch_timer.h
index 9327e3a101dc..0a8d7bba2cb0 100644
--- a/arch/arm/include/asm/arch_timer.h
+++ b/arch/arm/include/asm/arch_timer.h
@@ -107,6 +107,7 @@ static inline u32 arch_timer_get_cntkctl(void)
 static inline void arch_timer_set_cntkctl(u32 cntkctl)
 {
 	asm volatile("mcr p15, 0, %0, c14, c1, 0" : : "r" (cntkctl));
+	isb();
 }
 
 #endif
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 4a879f6ff13b..242151ea6908 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -293,4 +293,7 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
 int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
 			       struct kvm_device_attr *attr);
 
+/* All host FP/SIMD state is restored on guest exit, so nothing to save: */
+static inline void kvm_fpsimd_flush_cpu_state(void) {}
+
 #endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index b907bf4403b5..ba6aab55d464 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -21,7 +21,7 @@ config ARM64
 	select ARCH_HAS_STRICT_KERNEL_RWX
 	select ARCH_HAS_STRICT_MODULE_RWX
 	select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
-	select ARCH_HAVE_NMI_SAFE_CMPXCHG if ACPI_APEI_SEA
+	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select ARCH_INLINE_READ_LOCK if !PREEMPT
 	select ARCH_INLINE_READ_LOCK_BH if !PREEMPT
 	select ARCH_INLINE_READ_LOCK_IRQ if !PREEMPT
@@ -115,7 +115,7 @@ config ARM64
 	select HAVE_IRQ_TIME_ACCOUNTING
 	select HAVE_MEMBLOCK
 	select HAVE_MEMBLOCK_NODE_MAP if NUMA
-	select HAVE_NMI if ACPI_APEI_SEA
+	select HAVE_NMI
 	select HAVE_PATA_PLATFORM
 	select HAVE_PERF_EVENTS
 	select HAVE_PERF_REGS
@@ -136,6 +136,7 @@ config ARM64
 	select PCI_ECAM if ACPI
 	select POWER_RESET
 	select POWER_SUPPLY
+	select REFCOUNT_FULL
 	select SPARSE_IRQ
 	select SYSCTL_EXCEPTION_TRACE
 	select THREAD_INFO_IN_TASK
@@ -842,6 +843,7 @@ config FORCE_MAX_ZONEORDER
 menuconfig ARMV8_DEPRECATED
 	bool "Emulate deprecated/obsolete ARMv8 instructions"
 	depends on COMPAT
+	depends on SYSCTL
 	help
 	  Legacy software support may require certain instructions
 	  that have been deprecated or obsoleted in the architecture.
@@ -1011,6 +1013,17 @@ config ARM64_PMEM
 
 endmenu
 
+config ARM64_SVE
+	bool "ARM Scalable Vector Extension support"
+	default y
+	help
+	  The Scalable Vector Extension (SVE) is an extension to the AArch64
+	  execution state which complements and extends the SIMD functionality
+	  of the base architecture to support much larger vectors and to enable
+	  additional vectorisation opportunities.
+
+	  To enable use of this extension on CPUs that implement it, say Y.
+
 config ARM64_MODULE_CMODEL_LARGE
 	bool
 
@@ -1099,6 +1112,7 @@ config EFI_STUB
 config EFI
 	bool "UEFI runtime support"
 	depends on OF && !CPU_BIG_ENDIAN
+	depends on KERNEL_MODE_NEON
 	select LIBFDT
 	select UCS2_STRING
 	select EFI_PARAMS_FROM_FDT
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 939b310913cf..b35788c909f1 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -14,8 +14,12 @@ LDFLAGS_vmlinux	:=-p --no-undefined -X
 CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET)
 GZFLAGS		:=-9
 
-ifneq ($(CONFIG_RELOCATABLE),)
-LDFLAGS_vmlinux		+= -pie -shared -Bsymbolic
+ifeq ($(CONFIG_RELOCATABLE), y)
+# Pass --no-apply-dynamic-relocs to restore pre-binutils-2.27 behaviour
+# for relative relocs, since this leads to better Image compression
+# with the relocation offsets always being zero.
+LDFLAGS_vmlinux		+= -pie -shared -Bsymbolic \
+			$(call ld-option, --no-apply-dynamic-relocs)
 endif
 
 ifeq ($(CONFIG_ARM64_ERRATUM_843419),y)
@@ -53,6 +57,8 @@ KBUILD_AFLAGS	+= $(lseinstr) $(brokengasinst)
 KBUILD_CFLAGS	+= $(call cc-option,-mabi=lp64)
 KBUILD_AFLAGS	+= $(call cc-option,-mabi=lp64)
 
+KBUILD_CFLAGS	+= $(call cc-ifversion, -ge, 0500, -DCONFIG_ARCH_SUPPORTS_INT128)
+
 ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
 KBUILD_CPPFLAGS	+= -mbig-endian
 CHECKFLAGS	+= -D__AARCH64EB__
diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h
index a652ce0a5cb2..bdedd8f748d1 100644
--- a/arch/arm64/include/asm/arch_timer.h
+++ b/arch/arm64/include/asm/arch_timer.h
@@ -144,6 +144,7 @@ static inline u32 arch_timer_get_cntkctl(void)
 static inline void arch_timer_set_cntkctl(u32 cntkctl)
 {
 	write_sysreg(cntkctl, cntkctl_el1);
+	isb();
 }
 
 static inline u64 arch_counter_get_cntpct(void)
diff --git a/arch/arm64/include/asm/asm-bug.h b/arch/arm64/include/asm/asm-bug.h
index 636e755bcdca..b3552c4a405f 100644
--- a/arch/arm64/include/asm/asm-bug.h
+++ b/arch/arm64/include/asm/asm-bug.h
@@ -22,10 +22,10 @@
 #define _BUGVERBOSE_LOCATION(file, line) __BUGVERBOSE_LOCATION(file, line)
 #define __BUGVERBOSE_LOCATION(file, line)			\
 		.pushsection .rodata.str,"aMS",@progbits,1;	\
-	2:	.string file;					\
+	14472:	.string file;					\
 		.popsection;					\
 								\
-		.long 2b - 0b;					\
+		.long 14472b - 14470b;				\
 		.short line;
 #else
 #define _BUGVERBOSE_LOCATION(file, line)
@@ -36,11 +36,11 @@
 #define __BUG_ENTRY(flags) 				\
 		.pushsection __bug_table,"aw";		\
 		.align 2;				\
-	0:	.long 1f - 0b;				\
+	14470:	.long 14471f - 14470b;			\
 _BUGVERBOSE_LOCATION(__FILE__, __LINE__)		\
 		.short flags; 				\
 		.popsection;				\
-	1:
+	14471:
 #else
 #define __BUG_ENTRY(flags)
 #endif
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index d58a6253c6ab..aef72d886677 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -25,12 +25,41 @@
 
 #include <asm/asm-offsets.h>
 #include <asm/cpufeature.h>
+#include <asm/debug-monitors.h>
 #include <asm/mmu_context.h>
 #include <asm/page.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/ptrace.h>
 #include <asm/thread_info.h>
 
+	.macro save_and_disable_daif, flags
+	mrs	\flags, daif
+	msr	daifset, #0xf
+	.endm
+
+	.macro disable_daif
+	msr	daifset, #0xf
+	.endm
+
+	.macro enable_daif
+	msr	daifclr, #0xf
+	.endm
+
+	.macro	restore_daif, flags:req
+	msr	daif, \flags
+	.endm
+
+	/* Only on aarch64 pstate, PSR_D_BIT is different for aarch32 */
+	.macro	inherit_daif, pstate:req, tmp:req
+	and	\tmp, \pstate, #(PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT)
+	msr	daif, \tmp
+	.endm
+
+	/* IRQ is the lowest priority flag, unconditionally unmask the rest. */
+	.macro enable_da_f
+	msr	daifclr, #(8 | 4 | 1)
+	.endm
+
 /*
  * Enable and disable interrupts.
  */
@@ -51,13 +80,6 @@
 	msr	daif, \flags
 	.endm
 
-/*
- * Enable and disable debug exceptions.
- */
-	.macro	disable_dbg
-	msr	daifset, #8
-	.endm
-
 	.macro	enable_dbg
 	msr	daifclr, #8
 	.endm
@@ -65,31 +87,22 @@
 	.macro	disable_step_tsk, flgs, tmp
 	tbz	\flgs, #TIF_SINGLESTEP, 9990f
 	mrs	\tmp, mdscr_el1
-	bic	\tmp, \tmp, #1
+	bic	\tmp, \tmp, #DBG_MDSCR_SS
 	msr	mdscr_el1, \tmp
 	isb	// Synchronise with enable_dbg
 9990:
 	.endm
 
+	/* call with daif masked */
 	.macro	enable_step_tsk, flgs, tmp
 	tbz	\flgs, #TIF_SINGLESTEP, 9990f
-	disable_dbg
 	mrs	\tmp, mdscr_el1
-	orr	\tmp, \tmp, #1
+	orr	\tmp, \tmp, #DBG_MDSCR_SS
 	msr	mdscr_el1, \tmp
 9990:
 	.endm
 
 /*
- * Enable both debug exceptions and interrupts. This is likely to be
- * faster than two daifclr operations, since writes to this register
- * are self-synchronising.
- */
-	.macro	enable_dbg_and_irq
-	msr	daifclr, #(8 | 2)
-	.endm
-
-/*
  * SMP data memory barrier
  */
 	.macro	smp_dmb, opt
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 0fe7e43b7fbc..77651c49ef44 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -31,6 +31,8 @@
 #define dmb(opt)	asm volatile("dmb " #opt : : : "memory")
 #define dsb(opt)	asm volatile("dsb " #opt : : : "memory")
 
+#define psb_csync()	asm volatile("hint #17" : : : "memory")
+
 #define mb()		dsb(sy)
 #define rmb()		dsb(ld)
 #define wmb()		dsb(st)
diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h
index 889226b4c6e1..88392272250e 100644
--- a/arch/arm64/include/asm/cpu.h
+++ b/arch/arm64/include/asm/cpu.h
@@ -41,6 +41,7 @@ struct cpuinfo_arm64 {
 	u64		reg_id_aa64mmfr2;
 	u64		reg_id_aa64pfr0;
 	u64		reg_id_aa64pfr1;
+	u64		reg_id_aa64zfr0;
 
 	u32		reg_id_dfr0;
 	u32		reg_id_isar0;
@@ -59,6 +60,9 @@ struct cpuinfo_arm64 {
 	u32		reg_mvfr0;
 	u32		reg_mvfr1;
 	u32		reg_mvfr2;
+
+	/* pseudo-ZCR for recording maximum ZCR_EL1 LEN value: */
+	u64		reg_zcr;
 };
 
 DECLARE_PER_CPU(struct cpuinfo_arm64, cpu_data);
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index 8da621627d7c..2ff7c5e8efab 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -40,7 +40,8 @@
 #define ARM64_WORKAROUND_858921			19
 #define ARM64_WORKAROUND_CAVIUM_30115		20
 #define ARM64_HAS_DCPOP				21
+#define ARM64_SVE				22
 
-#define ARM64_NCAPS				22
+#define ARM64_NCAPS				23
 
 #endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 428ee1f2468c..ac67cfc2585a 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -10,7 +10,9 @@
 #define __ASM_CPUFEATURE_H
 
 #include <asm/cpucaps.h>
+#include <asm/fpsimd.h>
 #include <asm/hwcap.h>
+#include <asm/sigcontext.h>
 #include <asm/sysreg.h>
 
 /*
@@ -223,6 +225,13 @@ static inline bool id_aa64pfr0_32bit_el0(u64 pfr0)
 	return val == ID_AA64PFR0_EL0_32BIT_64BIT;
 }
 
+static inline bool id_aa64pfr0_sve(u64 pfr0)
+{
+	u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_SVE_SHIFT);
+
+	return val > 0;
+}
+
 void __init setup_cpu_features(void);
 
 void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
@@ -262,6 +271,39 @@ static inline bool system_uses_ttbr0_pan(void)
 		!cpus_have_const_cap(ARM64_HAS_PAN);
 }
 
+static inline bool system_supports_sve(void)
+{
+	return IS_ENABLED(CONFIG_ARM64_SVE) &&
+		cpus_have_const_cap(ARM64_SVE);
+}
+
+/*
+ * Read the pseudo-ZCR used by cpufeatures to identify the supported SVE
+ * vector length.
+ *
+ * Use only if SVE is present.
+ * This function clobbers the SVE vector length.
+ */
+static inline u64 read_zcr_features(void)
+{
+	u64 zcr;
+	unsigned int vq_max;
+
+	/*
+	 * Set the maximum possible VL, and write zeroes to all other
+	 * bits to see if they stick.
+	 */
+	sve_kernel_enable(NULL);
+	write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL1);
+
+	zcr = read_sysreg_s(SYS_ZCR_EL1);
+	zcr &= ~(u64)ZCR_ELx_LEN_MASK; /* find sticky 1s outside LEN field */
+	vq_max = sve_vq_from_vl(sve_get_vl());
+	zcr |= vq_max - 1; /* set LEN field to maximum effective value */
+
+	return zcr;
+}
+
 #endif /* __ASSEMBLY__ */
 
 #endif
diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h
new file mode 100644
index 000000000000..22e4c83de5a5
--- /dev/null
+++ b/arch/arm64/include/asm/daifflags.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2017 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_DAIFFLAGS_H
+#define __ASM_DAIFFLAGS_H
+
+#include <linux/irqflags.h>
+
+#define DAIF_PROCCTX		0
+#define DAIF_PROCCTX_NOIRQ	PSR_I_BIT
+
+/* mask/save/unmask/restore all exceptions, including interrupts. */
+static inline void local_daif_mask(void)
+{
+	asm volatile(
+		"msr	daifset, #0xf		// local_daif_mask\n"
+		:
+		:
+		: "memory");
+	trace_hardirqs_off();
+}
+
+static inline unsigned long local_daif_save(void)
+{
+	unsigned long flags;
+
+	asm volatile(
+		"mrs	%0, daif		// local_daif_save\n"
+		: "=r" (flags)
+		:
+		: "memory");
+	local_daif_mask();
+
+	return flags;
+}
+
+static inline void local_daif_unmask(void)
+{
+	trace_hardirqs_on();
+	asm volatile(
+		"msr	daifclr, #0xf		// local_daif_unmask"
+		:
+		:
+		: "memory");
+}
+
+static inline void local_daif_restore(unsigned long flags)
+{
+	if (!arch_irqs_disabled_flags(flags))
+		trace_hardirqs_on();
+	asm volatile(
+		"msr	daif, %0		// local_daif_restore"
+		:
+		: "r" (flags)
+		: "memory");
+	if (arch_irqs_disabled_flags(flags))
+		trace_hardirqs_off();
+}
+
+#endif
diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
index 33be513ef24c..fac1c4de7898 100644
--- a/arch/arm64/include/asm/elf.h
+++ b/arch/arm64/include/asm/elf.h
@@ -188,8 +188,8 @@ typedef compat_elf_greg_t		compat_elf_gregset_t[COMPAT_ELF_NGREG];
 
 #define compat_start_thread		compat_start_thread
 /*
- * Unlike the native SET_PERSONALITY macro, the compat version inherits
- * READ_IMPLIES_EXEC across a fork() since this is the behaviour on
+ * Unlike the native SET_PERSONALITY macro, the compat version maintains
+ * READ_IMPLIES_EXEC across an execve() since this is the behaviour on
  * arch/arm/.
  */
 #define COMPAT_SET_PERSONALITY(ex)					\
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index 66ed8b6b9976..014d7d8edcf9 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -43,7 +43,8 @@
 #define ESR_ELx_EC_HVC64	(0x16)
 #define ESR_ELx_EC_SMC64	(0x17)
 #define ESR_ELx_EC_SYS64	(0x18)
-/* Unallocated EC: 0x19 - 0x1E */
+#define ESR_ELx_EC_SVE		(0x19)
+/* Unallocated EC: 0x1A - 0x1E */
 #define ESR_ELx_EC_IMP_DEF	(0x1f)
 #define ESR_ELx_EC_IABT_LOW	(0x20)
 #define ESR_ELx_EC_IABT_CUR	(0x21)
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index 410c48163c6a..74f34392a531 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -17,9 +17,13 @@
 #define __ASM_FP_H
 
 #include <asm/ptrace.h>
+#include <asm/errno.h>
 
 #ifndef __ASSEMBLY__
 
+#include <linux/cache.h>
+#include <linux/stddef.h>
+
 /*
  * FP/SIMD storage area has:
  *  - FPSR and FPCR
@@ -35,13 +39,16 @@ struct fpsimd_state {
 			__uint128_t vregs[32];
 			u32 fpsr;
 			u32 fpcr;
+			/*
+			 * For ptrace compatibility, pad to next 128-bit
+			 * boundary here if extending this struct.
+			 */
 		};
 	};
 	/* the id of the last cpu to have restored this state */
 	unsigned int cpu;
 };
 
-
 #if defined(__KERNEL__) && defined(CONFIG_COMPAT)
 /* Masks for extracting the FPSR and FPCR from the FPSCR */
 #define VFP_FPSCR_STAT_MASK	0xf800009f
@@ -61,11 +68,73 @@ extern void fpsimd_load_state(struct fpsimd_state *state);
 extern void fpsimd_thread_switch(struct task_struct *next);
 extern void fpsimd_flush_thread(void);
 
+extern void fpsimd_signal_preserve_current_state(void);
 extern void fpsimd_preserve_current_state(void);
 extern void fpsimd_restore_current_state(void);
 extern void fpsimd_update_current_state(struct fpsimd_state *state);
 
 extern void fpsimd_flush_task_state(struct task_struct *target);
+extern void sve_flush_cpu_state(void);
+
+/* Maximum VL that SVE VL-agnostic software can transparently support */
+#define SVE_VL_ARCH_MAX 0x100
+
+extern void sve_save_state(void *state, u32 *pfpsr);
+extern void sve_load_state(void const *state, u32 const *pfpsr,
+			   unsigned long vq_minus_1);
+extern unsigned int sve_get_vl(void);
+extern int sve_kernel_enable(void *);
+
+extern int __ro_after_init sve_max_vl;
+
+#ifdef CONFIG_ARM64_SVE
+
+extern size_t sve_state_size(struct task_struct const *task);
+
+extern void sve_alloc(struct task_struct *task);
+extern void fpsimd_release_task(struct task_struct *task);
+extern void fpsimd_sync_to_sve(struct task_struct *task);
+extern void sve_sync_to_fpsimd(struct task_struct *task);
+extern void sve_sync_from_fpsimd_zeropad(struct task_struct *task);
+
+extern int sve_set_vector_length(struct task_struct *task,
+				 unsigned long vl, unsigned long flags);
+
+extern int sve_set_current_vl(unsigned long arg);
+extern int sve_get_current_vl(void);
+
+/*
+ * Probing and setup functions.
+ * Calls to these functions must be serialised with one another.
+ */
+extern void __init sve_init_vq_map(void);
+extern void sve_update_vq_map(void);
+extern int sve_verify_vq_map(void);
+extern void __init sve_setup(void);
+
+#else /* ! CONFIG_ARM64_SVE */
+
+static inline void sve_alloc(struct task_struct *task) { }
+static inline void fpsimd_release_task(struct task_struct *task) { }
+static inline void sve_sync_to_fpsimd(struct task_struct *task) { }
+static inline void sve_sync_from_fpsimd_zeropad(struct task_struct *task) { }
+
+static inline int sve_set_current_vl(unsigned long arg)
+{
+	return -EINVAL;
+}
+
+static inline int sve_get_current_vl(void)
+{
+	return -EINVAL;
+}
+
+static inline void sve_init_vq_map(void) { }
+static inline void sve_update_vq_map(void) { }
+static inline int sve_verify_vq_map(void) { return 0; }
+static inline void sve_setup(void) { }
+
+#endif /* ! CONFIG_ARM64_SVE */
 
 /* For use by EFI runtime services calls only */
 extern void __efi_fpsimd_begin(void);
diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h
index 0f5fdd388b0d..e050d765ca9e 100644
--- a/arch/arm64/include/asm/fpsimdmacros.h
+++ b/arch/arm64/include/asm/fpsimdmacros.h
@@ -75,3 +75,151 @@
 	ldr	w\tmpnr, [\state, #16 * 2 + 4]
 	fpsimd_restore_fpcr x\tmpnr, \state
 .endm
+
+/* Sanity-check macros to help avoid encoding garbage instructions */
+
+.macro _check_general_reg nr
+	.if (\nr) < 0 || (\nr) > 30
+		.error "Bad register number \nr."
+	.endif
+.endm
+
+.macro _sve_check_zreg znr
+	.if (\znr) < 0 || (\znr) > 31
+		.error "Bad Scalable Vector Extension vector register number \znr."
+	.endif
+.endm
+
+.macro _sve_check_preg pnr
+	.if (\pnr) < 0 || (\pnr) > 15
+		.error "Bad Scalable Vector Extension predicate register number \pnr."
+	.endif
+.endm
+
+.macro _check_num n, min, max
+	.if (\n) < (\min) || (\n) > (\max)
+		.error "Number \n out of range [\min,\max]"
+	.endif
+.endm
+
+/* SVE instruction encodings for non-SVE-capable assemblers */
+
+/* STR (vector): STR Z\nz, [X\nxbase, #\offset, MUL VL] */
+.macro _sve_str_v nz, nxbase, offset=0
+	_sve_check_zreg \nz
+	_check_general_reg \nxbase
+	_check_num (\offset), -0x100, 0xff
+	.inst	0xe5804000			\
+		| (\nz)				\
+		| ((\nxbase) << 5)		\
+		| (((\offset) & 7) << 10)	\
+		| (((\offset) & 0x1f8) << 13)
+.endm
+
+/* LDR (vector): LDR Z\nz, [X\nxbase, #\offset, MUL VL] */
+.macro _sve_ldr_v nz, nxbase, offset=0
+	_sve_check_zreg \nz
+	_check_general_reg \nxbase
+	_check_num (\offset), -0x100, 0xff
+	.inst	0x85804000			\
+		| (\nz)				\
+		| ((\nxbase) << 5)		\
+		| (((\offset) & 7) << 10)	\
+		| (((\offset) & 0x1f8) << 13)
+.endm
+
+/* STR (predicate): STR P\np, [X\nxbase, #\offset, MUL VL] */
+.macro _sve_str_p np, nxbase, offset=0
+	_sve_check_preg \np
+	_check_general_reg \nxbase
+	_check_num (\offset), -0x100, 0xff
+	.inst	0xe5800000			\
+		| (\np)				\
+		| ((\nxbase) << 5)		\
+		| (((\offset) & 7) << 10)	\
+		| (((\offset) & 0x1f8) << 13)
+.endm
+
+/* LDR (predicate): LDR P\np, [X\nxbase, #\offset, MUL VL] */
+.macro _sve_ldr_p np, nxbase, offset=0
+	_sve_check_preg \np
+	_check_general_reg \nxbase
+	_check_num (\offset), -0x100, 0xff
+	.inst	0x85800000			\
+		| (\np)				\
+		| ((\nxbase) << 5)		\
+		| (((\offset) & 7) << 10)	\
+		| (((\offset) & 0x1f8) << 13)
+.endm
+
+/* RDVL X\nx, #\imm */
+.macro _sve_rdvl nx, imm
+	_check_general_reg \nx
+	_check_num (\imm), -0x20, 0x1f
+	.inst	0x04bf5000			\
+		| (\nx)				\
+		| (((\imm) & 0x3f) << 5)
+.endm
+
+/* RDFFR (unpredicated): RDFFR P\np.B */
+.macro _sve_rdffr np
+	_sve_check_preg \np
+	.inst	0x2519f000			\
+		| (\np)
+.endm
+
+/* WRFFR P\np.B */
+.macro _sve_wrffr np
+	_sve_check_preg \np
+	.inst	0x25289000			\
+		| ((\np) << 5)
+.endm
+
+.macro __for from:req, to:req
+	.if (\from) == (\to)
+		_for__body \from
+	.else
+		__for \from, (\from) + ((\to) - (\from)) / 2
+		__for (\from) + ((\to) - (\from)) / 2 + 1, \to
+	.endif
+.endm
+
+.macro _for var:req, from:req, to:req, insn:vararg
+	.macro _for__body \var:req
+		\insn
+	.endm
+
+	__for \from, \to
+
+	.purgem _for__body
+.endm
+
+.macro sve_save nxbase, xpfpsr, nxtmp
+ _for n, 0, 31,	_sve_str_v	\n, \nxbase, \n - 34
+ _for n, 0, 15,	_sve_str_p	\n, \nxbase, \n - 16
+		_sve_rdffr	0
+		_sve_str_p	0, \nxbase
+		_sve_ldr_p	0, \nxbase, -16
+
+		mrs		x\nxtmp, fpsr
+		str		w\nxtmp, [\xpfpsr]
+		mrs		x\nxtmp, fpcr
+		str		w\nxtmp, [\xpfpsr, #4]
+.endm
+
+.macro sve_load nxbase, xpfpsr, xvqminus1, nxtmp
+		mrs_s		x\nxtmp, SYS_ZCR_EL1
+		bic		x\nxtmp, x\nxtmp, ZCR_ELx_LEN_MASK
+		orr		x\nxtmp, x\nxtmp, \xvqminus1
+		msr_s		SYS_ZCR_EL1, x\nxtmp	// self-synchronising
+
+ _for n, 0, 31,	_sve_ldr_v	\n, \nxbase, \n - 34
+		_sve_ldr_p	0, \nxbase
+		_sve_wrffr	0
+ _for n, 0, 15,	_sve_ldr_p	\n, \nxbase, \n - 16
+
+		ldr		w\nxtmp, [\xpfpsr]
+		msr		fpsr, x\nxtmp
+		ldr		w\nxtmp, [\xpfpsr, #4]
+		msr		fpcr, x\nxtmp
+.endm
diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h
index 8c581281fa12..24692edf1a69 100644
--- a/arch/arm64/include/asm/irqflags.h
+++ b/arch/arm64/include/asm/irqflags.h
@@ -21,6 +21,19 @@
 #include <asm/ptrace.h>
 
 /*
+ * Aarch64 has flags for masking: Debug, Asynchronous (serror), Interrupts and
+ * FIQ exceptions, in the 'daif' register. We mask and unmask them in 'dai'
+ * order:
+ * Masking debug exceptions causes all other exceptions to be masked too/
+ * Masking SError masks irq, but not debug exceptions. Masking irqs has no
+ * side effects for other flags. Keeping to this order makes it easier for
+ * entry.S to know which exceptions should be unmasked.
+ *
+ * FIQ is never expected, but we mask it when we disable debug exceptions, and
+ * unmask it at all other times.
+ */
+
+/*
  * CPU interrupt mask handling.
  */
 static inline unsigned long arch_local_irq_save(void)
@@ -53,12 +66,6 @@ static inline void arch_local_irq_disable(void)
 		: "memory");
 }
 
-#define local_fiq_enable()	asm("msr	daifclr, #1" : : : "memory")
-#define local_fiq_disable()	asm("msr	daifset, #1" : : : "memory")
-
-#define local_async_enable()	asm("msr	daifclr, #4" : : : "memory")
-#define local_async_disable()	asm("msr	daifset, #4" : : : "memory")
-
 /*
  * Save the current interrupt enable state.
  */
@@ -89,26 +96,5 @@ static inline int arch_irqs_disabled_flags(unsigned long flags)
 {
 	return flags & PSR_I_BIT;
 }
-
-/*
- * save and restore debug state
- */
-#define local_dbg_save(flags)						\
-	do {								\
-		typecheck(unsigned long, flags);			\
-		asm volatile(						\
-		"mrs    %0, daif		// local_dbg_save\n"	\
-		"msr    daifset, #8"					\
-		: "=r" (flags) : : "memory");				\
-	} while (0)
-
-#define local_dbg_restore(flags)					\
-	do {								\
-		typecheck(unsigned long, flags);			\
-		asm volatile(						\
-		"msr    daif, %0		// local_dbg_restore\n"	\
-		: : "r" (flags) : "memory");				\
-	} while (0)
-
 #endif
 #endif
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 61d694c2eae5..7f069ff37f06 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -185,7 +185,9 @@
 #define CPTR_EL2_TCPAC	(1 << 31)
 #define CPTR_EL2_TTA	(1 << 20)
 #define CPTR_EL2_TFP	(1 << CPTR_EL2_TFP_SHIFT)
-#define CPTR_EL2_DEFAULT	0x000033ff
+#define CPTR_EL2_TZ	(1 << 8)
+#define CPTR_EL2_RES1	0x000032ff /* known RES1 bits in CPTR_EL2 */
+#define CPTR_EL2_DEFAULT	CPTR_EL2_RES1
 
 /* Hyp Debug Configuration Register bits */
 #define MDCR_EL2_TPMS		(1 << 14)
@@ -236,5 +238,6 @@
 
 #define CPACR_EL1_FPEN		(3 << 20)
 #define CPACR_EL1_TTA		(1 << 28)
+#define CPACR_EL1_DEFAULT	(CPACR_EL1_FPEN | CPACR_EL1_ZEN_EL1EN)
 
 #endif /* __ARM64_KVM_ARM_H__ */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index e923b58606e2..674912d7a571 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -25,6 +25,7 @@
 #include <linux/types.h>
 #include <linux/kvm_types.h>
 #include <asm/cpufeature.h>
+#include <asm/fpsimd.h>
 #include <asm/kvm.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_mmio.h>
@@ -384,4 +385,14 @@ static inline void __cpu_init_stage2(void)
 		  "PARange is %d bits, unsupported configuration!", parange);
 }
 
+/*
+ * All host FP/SIMD state is restored on guest exit, so nothing needs
+ * doing here except in the SVE case:
+*/
+static inline void kvm_fpsimd_flush_cpu_state(void)
+{
+	if (system_supports_sve())
+		sve_flush_cpu_state();
+}
+
 #endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index f7c4d2146aed..d4bae7d6e0d8 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -61,8 +61,6 @@
  * KIMAGE_VADDR - the virtual address of the start of the kernel image
  * VA_BITS - the maximum number of bits for virtual addresses.
  * VA_START - the first kernel virtual address.
- * TASK_SIZE - the maximum size of a user space task.
- * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area.
  */
 #define VA_BITS			(CONFIG_ARM64_VA_BITS)
 #define VA_START		(UL(0xffffffffffffffff) - \
@@ -77,19 +75,6 @@
 #define PCI_IO_END		(VMEMMAP_START - SZ_2M)
 #define PCI_IO_START		(PCI_IO_END - PCI_IO_SIZE)
 #define FIXADDR_TOP		(PCI_IO_START - SZ_2M)
-#define TASK_SIZE_64		(UL(1) << VA_BITS)
-
-#ifdef CONFIG_COMPAT
-#define TASK_SIZE_32		UL(0x100000000)
-#define TASK_SIZE		(test_thread_flag(TIF_32BIT) ? \
-				TASK_SIZE_32 : TASK_SIZE_64)
-#define TASK_SIZE_OF(tsk)	(test_tsk_thread_flag(tsk, TIF_32BIT) ? \
-				TASK_SIZE_32 : TASK_SIZE_64)
-#else
-#define TASK_SIZE		TASK_SIZE_64
-#endif /* CONFIG_COMPAT */
-
-#define TASK_UNMAPPED_BASE	(PAGE_ALIGN(TASK_SIZE / 4))
 
 #define KERNEL_START      _text
 #define KERNEL_END        _end
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index b46e54c2399b..c9530b5b5ca8 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -98,6 +98,8 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
 	((pte_val(pte) & (PTE_VALID | PTE_USER | PTE_UXN)) == (PTE_VALID | PTE_UXN))
 #define pte_valid_young(pte) \
 	((pte_val(pte) & (PTE_VALID | PTE_AF)) == (PTE_VALID | PTE_AF))
+#define pte_valid_user(pte) \
+	((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER))
 
 /*
  * Could the pte be present in the TLB? We must check mm_tlb_flush_pending
@@ -107,6 +109,18 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
 #define pte_accessible(mm, pte)	\
 	(mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid_young(pte))
 
+/*
+ * p??_access_permitted() is true for valid user mappings (subject to the
+ * write permission check) other than user execute-only which do not have the
+ * PTE_USER bit set. PROT_NONE mappings do not have the PTE_VALID bit set.
+ */
+#define pte_access_permitted(pte, write) \
+	(pte_valid_user(pte) && (!(write) || pte_write(pte)))
+#define pmd_access_permitted(pmd, write) \
+	(pte_access_permitted(pmd_pte(pmd), (write)))
+#define pud_access_permitted(pud, write) \
+	(pte_access_permitted(pud_pte(pud), (write)))
+
 static inline pte_t clear_pte_bit(pte_t pte, pgprot_t prot)
 {
 	pte_val(pte) &= ~pgprot_val(prot);
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 29adab8138c3..023cacb946c3 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -19,6 +19,10 @@
 #ifndef __ASM_PROCESSOR_H
 #define __ASM_PROCESSOR_H
 
+#define TASK_SIZE_64		(UL(1) << VA_BITS)
+
+#ifndef __ASSEMBLY__
+
 /*
  * Default implementation of macro that returns current
  * instruction pointer ("program counter").
@@ -37,6 +41,22 @@
 #include <asm/ptrace.h>
 #include <asm/types.h>
 
+/*
+ * TASK_SIZE - the maximum size of a user space task.
+ * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area.
+ */
+#ifdef CONFIG_COMPAT
+#define TASK_SIZE_32		UL(0x100000000)
+#define TASK_SIZE		(test_thread_flag(TIF_32BIT) ? \
+				TASK_SIZE_32 : TASK_SIZE_64)
+#define TASK_SIZE_OF(tsk)	(test_tsk_thread_flag(tsk, TIF_32BIT) ? \
+				TASK_SIZE_32 : TASK_SIZE_64)
+#else
+#define TASK_SIZE		TASK_SIZE_64
+#endif /* CONFIG_COMPAT */
+
+#define TASK_UNMAPPED_BASE	(PAGE_ALIGN(TASK_SIZE / 4))
+
 #define STACK_TOP_MAX		TASK_SIZE_64
 #ifdef CONFIG_COMPAT
 #define AARCH32_VECTORS_BASE	0xffff0000
@@ -85,6 +105,9 @@ struct thread_struct {
 	unsigned long		tp2_value;
 #endif
 	struct fpsimd_state	fpsimd_state;
+	void			*sve_state;	/* SVE registers, if any */
+	unsigned int		sve_vl;		/* SVE vector length */
+	unsigned int		sve_vl_onexec;	/* SVE vl after next exec */
 	unsigned long		fault_address;	/* fault info */
 	unsigned long		fault_code;	/* ESR_EL1 value */
 	struct debug_info	debug;		/* debugging */
@@ -194,4 +217,9 @@ static inline void spin_lock_prefetch(const void *ptr)
 int cpu_enable_pan(void *__unused);
 int cpu_enable_cache_maint_trap(void *__unused);
 
+/* Userspace interface for PR_SVE_{SET,GET}_VL prctl()s: */
+#define SVE_SET_VL(arg)	sve_set_current_vl(arg)
+#define SVE_GET_VL()	sve_get_current_vl()
+
+#endif /* __ASSEMBLY__ */
 #endif /* __ASM_PROCESSOR_H */
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index f707fed5886f..08cc88574659 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -145,10 +145,14 @@
 
 #define SYS_ID_AA64PFR0_EL1		sys_reg(3, 0, 0, 4, 0)
 #define SYS_ID_AA64PFR1_EL1		sys_reg(3, 0, 0, 4, 1)
+#define SYS_ID_AA64ZFR0_EL1		sys_reg(3, 0, 0, 4, 4)
 
 #define SYS_ID_AA64DFR0_EL1		sys_reg(3, 0, 0, 5, 0)
 #define SYS_ID_AA64DFR1_EL1		sys_reg(3, 0, 0, 5, 1)
 
+#define SYS_ID_AA64AFR0_EL1		sys_reg(3, 0, 0, 5, 4)
+#define SYS_ID_AA64AFR1_EL1		sys_reg(3, 0, 0, 5, 5)
+
 #define SYS_ID_AA64ISAR0_EL1		sys_reg(3, 0, 0, 6, 0)
 #define SYS_ID_AA64ISAR1_EL1		sys_reg(3, 0, 0, 6, 1)
 
@@ -160,6 +164,8 @@
 #define SYS_ACTLR_EL1			sys_reg(3, 0, 1, 0, 1)
 #define SYS_CPACR_EL1			sys_reg(3, 0, 1, 0, 2)
 
+#define SYS_ZCR_EL1			sys_reg(3, 0, 1, 2, 0)
+
 #define SYS_TTBR0_EL1			sys_reg(3, 0, 2, 0, 0)
 #define SYS_TTBR1_EL1			sys_reg(3, 0, 2, 0, 1)
 #define SYS_TCR_EL1			sys_reg(3, 0, 2, 0, 2)
@@ -172,6 +178,99 @@
 #define SYS_FAR_EL1			sys_reg(3, 0, 6, 0, 0)
 #define SYS_PAR_EL1			sys_reg(3, 0, 7, 4, 0)
 
+/*** Statistical Profiling Extension ***/
+/* ID registers */
+#define SYS_PMSIDR_EL1			sys_reg(3, 0, 9, 9, 7)
+#define SYS_PMSIDR_EL1_FE_SHIFT		0
+#define SYS_PMSIDR_EL1_FT_SHIFT		1
+#define SYS_PMSIDR_EL1_FL_SHIFT		2
+#define SYS_PMSIDR_EL1_ARCHINST_SHIFT	3
+#define SYS_PMSIDR_EL1_LDS_SHIFT	4
+#define SYS_PMSIDR_EL1_ERND_SHIFT	5
+#define SYS_PMSIDR_EL1_INTERVAL_SHIFT	8
+#define SYS_PMSIDR_EL1_INTERVAL_MASK	0xfUL
+#define SYS_PMSIDR_EL1_MAXSIZE_SHIFT	12
+#define SYS_PMSIDR_EL1_MAXSIZE_MASK	0xfUL
+#define SYS_PMSIDR_EL1_COUNTSIZE_SHIFT	16
+#define SYS_PMSIDR_EL1_COUNTSIZE_MASK	0xfUL
+
+#define SYS_PMBIDR_EL1			sys_reg(3, 0, 9, 10, 7)
+#define SYS_PMBIDR_EL1_ALIGN_SHIFT	0
+#define SYS_PMBIDR_EL1_ALIGN_MASK	0xfU
+#define SYS_PMBIDR_EL1_P_SHIFT		4
+#define SYS_PMBIDR_EL1_F_SHIFT		5
+
+/* Sampling controls */
+#define SYS_PMSCR_EL1			sys_reg(3, 0, 9, 9, 0)
+#define SYS_PMSCR_EL1_E0SPE_SHIFT	0
+#define SYS_PMSCR_EL1_E1SPE_SHIFT	1
+#define SYS_PMSCR_EL1_CX_SHIFT		3
+#define SYS_PMSCR_EL1_PA_SHIFT		4
+#define SYS_PMSCR_EL1_TS_SHIFT		5
+#define SYS_PMSCR_EL1_PCT_SHIFT		6
+
+#define SYS_PMSCR_EL2			sys_reg(3, 4, 9, 9, 0)
+#define SYS_PMSCR_EL2_E0HSPE_SHIFT	0
+#define SYS_PMSCR_EL2_E2SPE_SHIFT	1
+#define SYS_PMSCR_EL2_CX_SHIFT		3
+#define SYS_PMSCR_EL2_PA_SHIFT		4
+#define SYS_PMSCR_EL2_TS_SHIFT		5
+#define SYS_PMSCR_EL2_PCT_SHIFT		6
+
+#define SYS_PMSICR_EL1			sys_reg(3, 0, 9, 9, 2)
+
+#define SYS_PMSIRR_EL1			sys_reg(3, 0, 9, 9, 3)
+#define SYS_PMSIRR_EL1_RND_SHIFT	0
+#define SYS_PMSIRR_EL1_INTERVAL_SHIFT	8
+#define SYS_PMSIRR_EL1_INTERVAL_MASK	0xffffffUL
+
+/* Filtering controls */
+#define SYS_PMSFCR_EL1			sys_reg(3, 0, 9, 9, 4)
+#define SYS_PMSFCR_EL1_FE_SHIFT		0
+#define SYS_PMSFCR_EL1_FT_SHIFT		1
+#define SYS_PMSFCR_EL1_FL_SHIFT		2
+#define SYS_PMSFCR_EL1_B_SHIFT		16
+#define SYS_PMSFCR_EL1_LD_SHIFT		17
+#define SYS_PMSFCR_EL1_ST_SHIFT		18
+
+#define SYS_PMSEVFR_EL1			sys_reg(3, 0, 9, 9, 5)
+#define SYS_PMSEVFR_EL1_RES0		0x0000ffff00ff0f55UL
+
+#define SYS_PMSLATFR_EL1		sys_reg(3, 0, 9, 9, 6)
+#define SYS_PMSLATFR_EL1_MINLAT_SHIFT	0
+
+/* Buffer controls */
+#define SYS_PMBLIMITR_EL1		sys_reg(3, 0, 9, 10, 0)
+#define SYS_PMBLIMITR_EL1_E_SHIFT	0
+#define SYS_PMBLIMITR_EL1_FM_SHIFT	1
+#define SYS_PMBLIMITR_EL1_FM_MASK	0x3UL
+#define SYS_PMBLIMITR_EL1_FM_STOP_IRQ	(0 << SYS_PMBLIMITR_EL1_FM_SHIFT)
+
+#define SYS_PMBPTR_EL1			sys_reg(3, 0, 9, 10, 1)
+
+/* Buffer error reporting */
+#define SYS_PMBSR_EL1			sys_reg(3, 0, 9, 10, 3)
+#define SYS_PMBSR_EL1_COLL_SHIFT	16
+#define SYS_PMBSR_EL1_S_SHIFT		17
+#define SYS_PMBSR_EL1_EA_SHIFT		18
+#define SYS_PMBSR_EL1_DL_SHIFT		19
+#define SYS_PMBSR_EL1_EC_SHIFT		26
+#define SYS_PMBSR_EL1_EC_MASK		0x3fUL
+
+#define SYS_PMBSR_EL1_EC_BUF		(0x0UL << SYS_PMBSR_EL1_EC_SHIFT)
+#define SYS_PMBSR_EL1_EC_FAULT_S1	(0x24UL << SYS_PMBSR_EL1_EC_SHIFT)
+#define SYS_PMBSR_EL1_EC_FAULT_S2	(0x25UL << SYS_PMBSR_EL1_EC_SHIFT)
+
+#define SYS_PMBSR_EL1_FAULT_FSC_SHIFT	0
+#define SYS_PMBSR_EL1_FAULT_FSC_MASK	0x3fUL
+
+#define SYS_PMBSR_EL1_BUF_BSC_SHIFT	0
+#define SYS_PMBSR_EL1_BUF_BSC_MASK	0x3fUL
+
+#define SYS_PMBSR_EL1_BUF_BSC_FULL	(0x1UL << SYS_PMBSR_EL1_BUF_BSC_SHIFT)
+
+/*** End of Statistical Profiling Extension ***/
+
 #define SYS_PMINTENSET_EL1		sys_reg(3, 0, 9, 14, 1)
 #define SYS_PMINTENCLR_EL1		sys_reg(3, 0, 9, 14, 2)
 
@@ -250,6 +349,8 @@
 
 #define SYS_PMCCFILTR_EL0		sys_reg (3, 3, 14, 15, 7)
 
+#define SYS_ZCR_EL2			sys_reg(3, 4, 1, 2, 0)
+
 #define SYS_DACR32_EL2			sys_reg(3, 4, 3, 0, 0)
 #define SYS_IFSR32_EL2			sys_reg(3, 4, 5, 0, 1)
 #define SYS_FPEXC32_EL2			sys_reg(3, 4, 5, 3, 0)
@@ -318,6 +419,10 @@
 #define SCTLR_EL1_CP15BEN	(1 << 5)
 
 /* id_aa64isar0 */
+#define ID_AA64ISAR0_DP_SHIFT		44
+#define ID_AA64ISAR0_SM4_SHIFT		40
+#define ID_AA64ISAR0_SM3_SHIFT		36
+#define ID_AA64ISAR0_SHA3_SHIFT		32
 #define ID_AA64ISAR0_RDM_SHIFT		28
 #define ID_AA64ISAR0_ATOMICS_SHIFT	20
 #define ID_AA64ISAR0_CRC32_SHIFT	16
@@ -332,6 +437,7 @@
 #define ID_AA64ISAR1_DPB_SHIFT		0
 
 /* id_aa64pfr0 */
+#define ID_AA64PFR0_SVE_SHIFT		32
 #define ID_AA64PFR0_GIC_SHIFT		24
 #define ID_AA64PFR0_ASIMD_SHIFT		20
 #define ID_AA64PFR0_FP_SHIFT		16
@@ -340,6 +446,7 @@
 #define ID_AA64PFR0_EL1_SHIFT		4
 #define ID_AA64PFR0_EL0_SHIFT		0
 
+#define ID_AA64PFR0_SVE			0x1
 #define ID_AA64PFR0_FP_NI		0xf
 #define ID_AA64PFR0_FP_SUPPORTED	0x0
 #define ID_AA64PFR0_ASIMD_NI		0xf
@@ -441,6 +548,20 @@
 #endif
 
 
+/*
+ * The ZCR_ELx_LEN_* definitions intentionally include bits [8:4] which
+ * are reserved by the SVE architecture for future expansion of the LEN
+ * field, with compatible semantics.
+ */
+#define ZCR_ELx_LEN_SHIFT	0
+#define ZCR_ELx_LEN_SIZE	9
+#define ZCR_ELx_LEN_MASK	0x1ff
+
+#define CPACR_EL1_ZEN_EL1EN	(1 << 16) /* enable EL1 access */
+#define CPACR_EL1_ZEN_EL0EN	(1 << 17) /* enable EL0 access, if EL1EN set */
+#define CPACR_EL1_ZEN		(CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN)
+
+
 /* Safe value for MPIDR_EL1: Bit31:RES1, Bit30:U:0, Bit24:MT:0 */
 #define SYS_MPIDR_SAFE_VAL		(1UL << 31)
 
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index ddded6497a8a..eb431286bacd 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -63,6 +63,8 @@ struct thread_info {
 void arch_setup_new_exec(void);
 #define arch_setup_new_exec     arch_setup_new_exec
 
+void arch_release_task_struct(struct task_struct *tsk);
+
 #endif
 
 /*
@@ -92,6 +94,8 @@ void arch_setup_new_exec(void);
 #define TIF_RESTORE_SIGMASK	20
 #define TIF_SINGLESTEP		21
 #define TIF_32BIT		22	/* 32bit process */
+#define TIF_SVE			23	/* Scalable Vector Extension in use */
+#define TIF_SVE_VL_INHERIT	24	/* Inherit sve_vl_onexec across exec */
 
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
@@ -105,6 +109,7 @@ void arch_setup_new_exec(void);
 #define _TIF_UPROBE		(1 << TIF_UPROBE)
 #define _TIF_FSCHECK		(1 << TIF_FSCHECK)
 #define _TIF_32BIT		(1 << TIF_32BIT)
+#define _TIF_SVE		(1 << TIF_SVE)
 
 #define _TIF_WORK_MASK		(_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
 				 _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \
diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h
index d131501c6222..1696f9de9359 100644
--- a/arch/arm64/include/asm/traps.h
+++ b/arch/arm64/include/asm/traps.h
@@ -34,9 +34,17 @@ struct undef_hook {
 
 void register_undef_hook(struct undef_hook *hook);
 void unregister_undef_hook(struct undef_hook *hook);
+void force_signal_inject(int signal, int code, struct pt_regs *regs,
+			 unsigned long address);
 
 void arm64_notify_segfault(struct pt_regs *regs, unsigned long addr);
 
+/*
+ * Move regs->pc to next instruction and do necessary setup before it
+ * is executed.
+ */
+void arm64_skip_faulting_instruction(struct pt_regs *regs, unsigned long size);
+
 static inline int __in_irqentry_text(unsigned long ptr)
 {
 	return ptr >= (unsigned long)&__irqentry_text_start &&
diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h
index b3fdeee739ea..cda76fa8b9b2 100644
--- a/arch/arm64/include/uapi/asm/hwcap.h
+++ b/arch/arm64/include/uapi/asm/hwcap.h
@@ -37,5 +37,11 @@
 #define HWCAP_FCMA		(1 << 14)
 #define HWCAP_LRCPC		(1 << 15)
 #define HWCAP_DCPOP		(1 << 16)
+#define HWCAP_SHA3		(1 << 17)
+#define HWCAP_SM3		(1 << 18)
+#define HWCAP_SM4		(1 << 19)
+#define HWCAP_ASIMDDP		(1 << 20)
+#define HWCAP_SHA512		(1 << 21)
+#define HWCAP_SVE		(1 << 22)
 
 #endif /* _UAPI__ASM_HWCAP_H */
diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h
index 67d4c33974e8..98c4ce55d9c3 100644
--- a/arch/arm64/include/uapi/asm/ptrace.h
+++ b/arch/arm64/include/uapi/asm/ptrace.h
@@ -23,6 +23,7 @@
 #include <linux/types.h>
 
 #include <asm/hwcap.h>
+#include <asm/sigcontext.h>
 
 
 /*
@@ -47,7 +48,6 @@
 #define PSR_D_BIT	0x00000200
 #define PSR_PAN_BIT	0x00400000
 #define PSR_UAO_BIT	0x00800000
-#define PSR_Q_BIT	0x08000000
 #define PSR_V_BIT	0x10000000
 #define PSR_C_BIT	0x20000000
 #define PSR_Z_BIT	0x40000000
@@ -64,6 +64,8 @@
 
 #ifndef __ASSEMBLY__
 
+#include <linux/prctl.h>
+
 /*
  * User structures for general purpose, floating point and debug registers.
  */
@@ -91,6 +93,141 @@ struct user_hwdebug_state {
 	}		dbg_regs[16];
 };
 
+/* SVE/FP/SIMD state (NT_ARM_SVE) */
+
+struct user_sve_header {
+	__u32 size; /* total meaningful regset content in bytes */
+	__u32 max_size; /* maxmium possible size for this thread */
+	__u16 vl; /* current vector length */
+	__u16 max_vl; /* maximum possible vector length */
+	__u16 flags;
+	__u16 __reserved;
+};
+
+/* Definitions for user_sve_header.flags: */
+#define SVE_PT_REGS_MASK		(1 << 0)
+
+#define SVE_PT_REGS_FPSIMD		0
+#define SVE_PT_REGS_SVE			SVE_PT_REGS_MASK
+
+/*
+ * Common SVE_PT_* flags:
+ * These must be kept in sync with prctl interface in <linux/ptrace.h>
+ */
+#define SVE_PT_VL_INHERIT		(PR_SVE_VL_INHERIT >> 16)
+#define SVE_PT_VL_ONEXEC		(PR_SVE_SET_VL_ONEXEC >> 16)
+
+
+/*
+ * The remainder of the SVE state follows struct user_sve_header.  The
+ * total size of the SVE state (including header) depends on the
+ * metadata in the header:  SVE_PT_SIZE(vq, flags) gives the total size
+ * of the state in bytes, including the header.
+ *
+ * Refer to <asm/sigcontext.h> for details of how to pass the correct
+ * "vq" argument to these macros.
+ */
+
+/* Offset from the start of struct user_sve_header to the register data */
+#define SVE_PT_REGS_OFFSET					\
+	((sizeof(struct sve_context) + (SVE_VQ_BYTES - 1))	\
+		/ SVE_VQ_BYTES * SVE_VQ_BYTES)
+
+/*
+ * The register data content and layout depends on the value of the
+ * flags field.
+ */
+
+/*
+ * (flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD case:
+ *
+ * The payload starts at offset SVE_PT_FPSIMD_OFFSET, and is of type
+ * struct user_fpsimd_state.  Additional data might be appended in the
+ * future: use SVE_PT_FPSIMD_SIZE(vq, flags) to compute the total size.
+ * SVE_PT_FPSIMD_SIZE(vq, flags) will never be less than
+ * sizeof(struct user_fpsimd_state).
+ */
+
+#define SVE_PT_FPSIMD_OFFSET		SVE_PT_REGS_OFFSET
+
+#define SVE_PT_FPSIMD_SIZE(vq, flags)	(sizeof(struct user_fpsimd_state))
+
+/*
+ * (flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_SVE case:
+ *
+ * The payload starts at offset SVE_PT_SVE_OFFSET, and is of size
+ * SVE_PT_SVE_SIZE(vq, flags).
+ *
+ * Additional macros describe the contents and layout of the payload.
+ * For each, SVE_PT_SVE_x_OFFSET(args) is the start offset relative to
+ * the start of struct user_sve_header, and SVE_PT_SVE_x_SIZE(args) is
+ * the size in bytes:
+ *
+ *	x	type				description
+ *	-	----				-----------
+ *	ZREGS		\
+ *	ZREG		|
+ *	PREGS		| refer to <asm/sigcontext.h>
+ *	PREG		|
+ *	FFR		/
+ *
+ *	FPSR	uint32_t			FPSR
+ *	FPCR	uint32_t			FPCR
+ *
+ * Additional data might be appended in the future.
+ */
+
+#define SVE_PT_SVE_ZREG_SIZE(vq)	SVE_SIG_ZREG_SIZE(vq)
+#define SVE_PT_SVE_PREG_SIZE(vq)	SVE_SIG_PREG_SIZE(vq)
+#define SVE_PT_SVE_FFR_SIZE(vq)		SVE_SIG_FFR_SIZE(vq)
+#define SVE_PT_SVE_FPSR_SIZE		sizeof(__u32)
+#define SVE_PT_SVE_FPCR_SIZE		sizeof(__u32)
+
+#define __SVE_SIG_TO_PT(offset) \
+	((offset) - SVE_SIG_REGS_OFFSET + SVE_PT_REGS_OFFSET)
+
+#define SVE_PT_SVE_OFFSET		SVE_PT_REGS_OFFSET
+
+#define SVE_PT_SVE_ZREGS_OFFSET \
+	__SVE_SIG_TO_PT(SVE_SIG_ZREGS_OFFSET)
+#define SVE_PT_SVE_ZREG_OFFSET(vq, n) \
+	__SVE_SIG_TO_PT(SVE_SIG_ZREG_OFFSET(vq, n))
+#define SVE_PT_SVE_ZREGS_SIZE(vq) \
+	(SVE_PT_SVE_ZREG_OFFSET(vq, SVE_NUM_ZREGS) - SVE_PT_SVE_ZREGS_OFFSET)
+
+#define SVE_PT_SVE_PREGS_OFFSET(vq) \
+	__SVE_SIG_TO_PT(SVE_SIG_PREGS_OFFSET(vq))
+#define SVE_PT_SVE_PREG_OFFSET(vq, n) \
+	__SVE_SIG_TO_PT(SVE_SIG_PREG_OFFSET(vq, n))
+#define SVE_PT_SVE_PREGS_SIZE(vq) \
+	(SVE_PT_SVE_PREG_OFFSET(vq, SVE_NUM_PREGS) - \
+		SVE_PT_SVE_PREGS_OFFSET(vq))
+
+#define SVE_PT_SVE_FFR_OFFSET(vq) \
+	__SVE_SIG_TO_PT(SVE_SIG_FFR_OFFSET(vq))
+
+#define SVE_PT_SVE_FPSR_OFFSET(vq)				\
+	((SVE_PT_SVE_FFR_OFFSET(vq) + SVE_PT_SVE_FFR_SIZE(vq) +	\
+			(SVE_VQ_BYTES - 1))			\
+		/ SVE_VQ_BYTES * SVE_VQ_BYTES)
+#define SVE_PT_SVE_FPCR_OFFSET(vq) \
+	(SVE_PT_SVE_FPSR_OFFSET(vq) + SVE_PT_SVE_FPSR_SIZE)
+
+/*
+ * Any future extension appended after FPCR must be aligned to the next
+ * 128-bit boundary.
+ */
+
+#define SVE_PT_SVE_SIZE(vq, flags)					\
+	((SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE		\
+			- SVE_PT_SVE_OFFSET + (SVE_VQ_BYTES - 1))	\
+		/ SVE_VQ_BYTES * SVE_VQ_BYTES)
+
+#define SVE_PT_SIZE(vq, flags)						\
+	 (((flags) & SVE_PT_REGS_MASK) == SVE_PT_REGS_SVE ?		\
+		  SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, flags)	\
+		: SVE_PT_FPSIMD_OFFSET + SVE_PT_FPSIMD_SIZE(vq, flags))
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _UAPI__ASM_PTRACE_H */
diff --git a/arch/arm64/include/uapi/asm/sigcontext.h b/arch/arm64/include/uapi/asm/sigcontext.h
index f6cc3061b1ae..dca8f8b5168b 100644
--- a/arch/arm64/include/uapi/asm/sigcontext.h
+++ b/arch/arm64/include/uapi/asm/sigcontext.h
@@ -17,6 +17,8 @@
 #ifndef _UAPI__ASM_SIGCONTEXT_H
 #define _UAPI__ASM_SIGCONTEXT_H
 
+#ifndef __ASSEMBLY__
+
 #include <linux/types.h>
 
 /*
@@ -42,10 +44,11 @@ struct sigcontext {
  *
  *	0x210		fpsimd_context
  *	 0x10		esr_context
+ *	0x8a0		sve_context (vl <= 64) (optional)
  *	 0x20		extra_context (optional)
  *	 0x10		terminator (null _aarch64_ctx)
  *
- *	0xdb0		(reserved for future allocation)
+ *	0x510		(reserved for future allocation)
  *
  * New records that can exceed this space need to be opt-in for userspace, so
  * that an expanded signal frame is not generated unexpectedly.  The mechanism
@@ -117,4 +120,119 @@ struct extra_context {
 	__u32 __reserved[3];
 };
 
+#define SVE_MAGIC	0x53564501
+
+struct sve_context {
+	struct _aarch64_ctx head;
+	__u16 vl;
+	__u16 __reserved[3];
+};
+
+#endif /* !__ASSEMBLY__ */
+
+/*
+ * The SVE architecture leaves space for future expansion of the
+ * vector length beyond its initial architectural limit of 2048 bits
+ * (16 quadwords).
+ *
+ * See linux/Documentation/arm64/sve.txt for a description of the VL/VQ
+ * terminology.
+ */
+#define SVE_VQ_BYTES		16	/* number of bytes per quadword */
+
+#define SVE_VQ_MIN		1
+#define SVE_VQ_MAX		512
+
+#define SVE_VL_MIN		(SVE_VQ_MIN * SVE_VQ_BYTES)
+#define SVE_VL_MAX		(SVE_VQ_MAX * SVE_VQ_BYTES)
+
+#define SVE_NUM_ZREGS		32
+#define SVE_NUM_PREGS		16
+
+#define sve_vl_valid(vl) \
+	((vl) % SVE_VQ_BYTES == 0 && (vl) >= SVE_VL_MIN && (vl) <= SVE_VL_MAX)
+#define sve_vq_from_vl(vl)	((vl) / SVE_VQ_BYTES)
+#define sve_vl_from_vq(vq)	((vq) * SVE_VQ_BYTES)
+
+/*
+ * If the SVE registers are currently live for the thread at signal delivery,
+ * sve_context.head.size >=
+ *	SVE_SIG_CONTEXT_SIZE(sve_vq_from_vl(sve_context.vl))
+ * and the register data may be accessed using the SVE_SIG_*() macros.
+ *
+ * If sve_context.head.size <
+ *	SVE_SIG_CONTEXT_SIZE(sve_vq_from_vl(sve_context.vl)),
+ * the SVE registers were not live for the thread and no register data
+ * is included: in this case, the SVE_SIG_*() macros should not be
+ * used except for this check.
+ *
+ * The same convention applies when returning from a signal: a caller
+ * will need to remove or resize the sve_context block if it wants to
+ * make the SVE registers live when they were previously non-live or
+ * vice-versa.  This may require the the caller to allocate fresh
+ * memory and/or move other context blocks in the signal frame.
+ *
+ * Changing the vector length during signal return is not permitted:
+ * sve_context.vl must equal the thread's current vector length when
+ * doing a sigreturn.
+ *
+ *
+ * Note: for all these macros, the "vq" argument denotes the SVE
+ * vector length in quadwords (i.e., units of 128 bits).
+ *
+ * The correct way to obtain vq is to use sve_vq_from_vl(vl).  The
+ * result is valid if and only if sve_vl_valid(vl) is true.  This is
+ * guaranteed for a struct sve_context written by the kernel.
+ *
+ *
+ * Additional macros describe the contents and layout of the payload.
+ * For each, SVE_SIG_x_OFFSET(args) is the start offset relative to
+ * the start of struct sve_context, and SVE_SIG_x_SIZE(args) is the
+ * size in bytes:
+ *
+ *	x	type				description
+ *	-	----				-----------
+ *	REGS					the entire SVE context
+ *
+ *	ZREGS	__uint128_t[SVE_NUM_ZREGS][vq]	all Z-registers
+ *	ZREG	__uint128_t[vq]			individual Z-register Zn
+ *
+ *	PREGS	uint16_t[SVE_NUM_PREGS][vq]	all P-registers
+ *	PREG	uint16_t[vq]			individual P-register Pn
+ *
+ *	FFR	uint16_t[vq]			first-fault status register
+ *
+ * Additional data might be appended in the future.
+ */
+
+#define SVE_SIG_ZREG_SIZE(vq)	((__u32)(vq) * SVE_VQ_BYTES)
+#define SVE_SIG_PREG_SIZE(vq)	((__u32)(vq) * (SVE_VQ_BYTES / 8))
+#define SVE_SIG_FFR_SIZE(vq)	SVE_SIG_PREG_SIZE(vq)
+
+#define SVE_SIG_REGS_OFFSET					\
+	((sizeof(struct sve_context) + (SVE_VQ_BYTES - 1))	\
+		/ SVE_VQ_BYTES * SVE_VQ_BYTES)
+
+#define SVE_SIG_ZREGS_OFFSET	SVE_SIG_REGS_OFFSET
+#define SVE_SIG_ZREG_OFFSET(vq, n) \
+	(SVE_SIG_ZREGS_OFFSET + SVE_SIG_ZREG_SIZE(vq) * (n))
+#define SVE_SIG_ZREGS_SIZE(vq) \
+	(SVE_SIG_ZREG_OFFSET(vq, SVE_NUM_ZREGS) - SVE_SIG_ZREGS_OFFSET)
+
+#define SVE_SIG_PREGS_OFFSET(vq) \
+	(SVE_SIG_ZREGS_OFFSET + SVE_SIG_ZREGS_SIZE(vq))
+#define SVE_SIG_PREG_OFFSET(vq, n) \
+	(SVE_SIG_PREGS_OFFSET(vq) + SVE_SIG_PREG_SIZE(vq) * (n))
+#define SVE_SIG_PREGS_SIZE(vq) \
+	(SVE_SIG_PREG_OFFSET(vq, SVE_NUM_PREGS) - SVE_SIG_PREGS_OFFSET(vq))
+
+#define SVE_SIG_FFR_OFFSET(vq) \
+	(SVE_SIG_PREGS_OFFSET(vq) + SVE_SIG_PREGS_SIZE(vq))
+
+#define SVE_SIG_REGS_SIZE(vq) \
+	(SVE_SIG_FFR_OFFSET(vq) + SVE_SIG_FFR_SIZE(vq) - SVE_SIG_REGS_OFFSET)
+
+#define SVE_SIG_CONTEXT_SIZE(vq) (SVE_SIG_REGS_OFFSET + SVE_SIG_REGS_SIZE(vq))
+
+
 #endif /* _UAPI__ASM_SIGCONTEXT_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 0029e13adb59..8265dd790895 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -11,8 +11,6 @@ CFLAGS_REMOVE_ftrace.o = -pg
 CFLAGS_REMOVE_insn.o = -pg
 CFLAGS_REMOVE_return_address.o = -pg
 
-CFLAGS_setup.o = -DUTS_MACHINE='"$(UTS_MACHINE)"'
-
 # Object file lists.
 arm64-obj-y		:= debug-monitors.o entry.o irq.o fpsimd.o		\
 			   entry-fpsimd.o process.o ptrace.o setup.o signal.o	\
diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c
index d06fbe4cd38d..c33b5e4010ab 100644
--- a/arch/arm64/kernel/armv8_deprecated.c
+++ b/arch/arm64/kernel/armv8_deprecated.c
@@ -228,15 +228,7 @@ ret:
 	return ret;
 }
 
-static struct ctl_table ctl_abi[] = {
-	{
-		.procname = "abi",
-		.mode = 0555,
-	},
-	{ }
-};
-
-static void __init register_insn_emulation_sysctl(struct ctl_table *table)
+static void __init register_insn_emulation_sysctl(void)
 {
 	unsigned long flags;
 	int i = 0;
@@ -262,8 +254,7 @@ static void __init register_insn_emulation_sysctl(struct ctl_table *table)
 	}
 	raw_spin_unlock_irqrestore(&insn_emulation_lock, flags);
 
-	table->child = insns_sysctl;
-	register_sysctl_table(table);
+	register_sysctl("abi", insns_sysctl);
 }
 
 /*
@@ -431,7 +422,7 @@ ret:
 	pr_warn_ratelimited("\"%s\" (%ld) uses obsolete SWP{B} instruction at 0x%llx\n",
 			current->comm, (unsigned long)current->pid, regs->pc);
 
-	regs->pc += 4;
+	arm64_skip_faulting_instruction(regs, 4);
 	return 0;
 
 fault:
@@ -512,7 +503,7 @@ ret:
 	pr_warn_ratelimited("\"%s\" (%ld) uses deprecated CP15 Barrier instruction at 0x%llx\n",
 			current->comm, (unsigned long)current->pid, regs->pc);
 
-	regs->pc += 4;
+	arm64_skip_faulting_instruction(regs, 4);
 	return 0;
 }
 
@@ -586,14 +577,14 @@ static int compat_setend_handler(struct pt_regs *regs, u32 big_endian)
 static int a32_setend_handler(struct pt_regs *regs, u32 instr)
 {
 	int rc = compat_setend_handler(regs, (instr >> 9) & 1);
-	regs->pc += 4;
+	arm64_skip_faulting_instruction(regs, 4);
 	return rc;
 }
 
 static int t16_setend_handler(struct pt_regs *regs, u32 instr)
 {
 	int rc = compat_setend_handler(regs, (instr >> 3) & 1);
-	regs->pc += 2;
+	arm64_skip_faulting_instruction(regs, 2);
 	return rc;
 }
 
@@ -644,7 +635,7 @@ static int __init armv8_deprecated_init(void)
 	cpuhp_setup_state_nocalls(CPUHP_AP_ARM64_ISNDEP_STARTING,
 				  "arm64/isndep:starting",
 				  run_all_insn_set_hw_mode, NULL);
-	register_insn_emulation_sysctl(ctl_abi);
+	register_insn_emulation_sysctl();
 
 	return 0;
 }
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 21e2c95d24e7..c5ba0097887f 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -27,6 +27,7 @@
 #include <asm/cpu.h>
 #include <asm/cpufeature.h>
 #include <asm/cpu_ops.h>
+#include <asm/fpsimd.h>
 #include <asm/mmu_context.h>
 #include <asm/processor.h>
 #include <asm/sysreg.h>
@@ -51,6 +52,21 @@ unsigned int compat_elf_hwcap2 __read_mostly;
 DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
 EXPORT_SYMBOL(cpu_hwcaps);
 
+/*
+ * Flag to indicate if we have computed the system wide
+ * capabilities based on the boot time active CPUs. This
+ * will be used to determine if a new booting CPU should
+ * go through the verification process to make sure that it
+ * supports the system capabilities, without using a hotplug
+ * notifier.
+ */
+static bool sys_caps_initialised;
+
+static inline void set_sys_caps_initialised(void)
+{
+	sys_caps_initialised = true;
+}
+
 static int dump_cpu_hwcaps(struct notifier_block *self, unsigned long v, void *p)
 {
 	/* file-wide pr_fmt adds "CPU features: " prefix */
@@ -107,7 +123,11 @@ cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused)
  * sync with the documentation of the CPU feature register ABI.
  */
 static const struct arm64_ftr_bits ftr_id_aa64isar0[] = {
-	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_RDM_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_DP_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SM4_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SM3_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SHA3_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_RDM_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_ATOMICS_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_CRC32_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SHA2_SHIFT, 4, 0),
@@ -117,34 +137,35 @@ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = {
 };
 
 static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
-	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_LRCPC_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_FCMA_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_JSCVT_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_DPB_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_LRCPC_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_FCMA_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_JSCVT_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_DPB_SHIFT, 4, 0),
 	ARM64_FTR_END,
 };
 
 static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64PFR0_GIC_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SVE_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_GIC_SHIFT, 4, 0),
 	S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI),
 	S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI),
 	/* Linux doesn't care about the EL3 */
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64PFR0_EL3_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL2_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL1_SHIFT, 4, ID_AA64PFR0_EL1_64BIT_ONLY),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL0_SHIFT, 4, ID_AA64PFR0_EL0_64BIT_ONLY),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL3_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL2_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_SHIFT, 4, ID_AA64PFR0_EL1_64BIT_ONLY),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL0_SHIFT, 4, ID_AA64PFR0_EL0_64BIT_ONLY),
 	ARM64_FTR_END,
 };
 
 static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
-	S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI),
-	S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN16_SHIFT, 4, ID_AA64MMFR0_TGRAN16_NI),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_BIGENDEL0_SHIFT, 4, 0),
+	S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI),
+	S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN16_SHIFT, 4, ID_AA64MMFR0_TGRAN16_NI),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_BIGENDEL0_SHIFT, 4, 0),
 	/* Linux shouldn't care about secure memory */
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64MMFR0_SNSMEM_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_BIGENDEL_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_ASID_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_SNSMEM_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_BIGENDEL_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_ASID_SHIFT, 4, 0),
 	/*
 	 * Differing PARange is fine as long as all peripherals and memory are mapped
 	 * within the minimum PARange of all CPUs
@@ -155,20 +176,20 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
 
 static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = {
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_PAN_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_LOR_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_HPD_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_VHE_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_VMIDBITS_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_HADBS_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_LOR_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_HPD_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_VHE_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_VMIDBITS_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_HADBS_SHIFT, 4, 0),
 	ARM64_FTR_END,
 };
 
 static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = {
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_LVA_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_IESB_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_LSM_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_UAO_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_CNP_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_LVA_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_IESB_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_LSM_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_UAO_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_CNP_SHIFT, 4, 0),
 	ARM64_FTR_END,
 };
 
@@ -193,14 +214,14 @@ struct arm64_ftr_reg arm64_ftr_reg_ctrel0 = {
 };
 
 static const struct arm64_ftr_bits ftr_id_mmfr0[] = {
-	S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 28, 4, 0xf),	/* InnerShr */
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 24, 4, 0),	/* FCSE */
+	S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0xf),	/* InnerShr */
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0),	/* FCSE */
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, 20, 4, 0),	/* AuxReg */
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 16, 4, 0),	/* TCM */
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 12, 4, 0),	/* ShareLvl */
-	S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 8, 4, 0xf),	/* OuterShr */
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 4, 4, 0),	/* PMSA */
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 0, 4, 0),	/* VMSA */
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 0),	/* TCM */
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0),	/* ShareLvl */
+	S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0xf),	/* OuterShr */
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0),	/* PMSA */
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0),	/* VMSA */
 	ARM64_FTR_END,
 };
 
@@ -221,8 +242,8 @@ static const struct arm64_ftr_bits ftr_id_aa64dfr0[] = {
 };
 
 static const struct arm64_ftr_bits ftr_mvfr2[] = {
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 4, 4, 0),		/* FPMisc */
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 0, 4, 0),		/* SIMDMisc */
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0),		/* FPMisc */
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0),		/* SIMDMisc */
 	ARM64_FTR_END,
 };
 
@@ -234,25 +255,25 @@ static const struct arm64_ftr_bits ftr_dczid[] = {
 
 
 static const struct arm64_ftr_bits ftr_id_isar5[] = {
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_ISAR5_RDM_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_ISAR5_CRC32_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_ISAR5_SHA2_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_ISAR5_SHA1_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_ISAR5_AES_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_ISAR5_SEVL_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_RDM_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_CRC32_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_SHA2_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_SHA1_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_AES_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_SEVL_SHIFT, 4, 0),
 	ARM64_FTR_END,
 };
 
 static const struct arm64_ftr_bits ftr_id_mmfr4[] = {
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 4, 4, 0),		/* ac2 */
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0),	/* ac2 */
 	ARM64_FTR_END,
 };
 
 static const struct arm64_ftr_bits ftr_id_pfr0[] = {
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 12, 4, 0),	/* State3 */
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 8, 4, 0),		/* State2 */
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 4, 4, 0),		/* State1 */
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 0, 4, 0),		/* State0 */
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0),		/* State3 */
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0),		/* State2 */
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0),		/* State1 */
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0),		/* State0 */
 	ARM64_FTR_END,
 };
 
@@ -268,6 +289,12 @@ static const struct arm64_ftr_bits ftr_id_dfr0[] = {
 	ARM64_FTR_END,
 };
 
+static const struct arm64_ftr_bits ftr_zcr[] = {
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE,
+		ZCR_ELx_LEN_SHIFT, ZCR_ELx_LEN_SIZE, 0),	/* LEN */
+	ARM64_FTR_END,
+};
+
 /*
  * Common ftr bits for a 32bit register with all hidden, strict
  * attributes, with 4bit feature fields and a default safe value of
@@ -334,6 +361,7 @@ static const struct __ftr_reg_entry {
 	/* Op1 = 0, CRn = 0, CRm = 4 */
 	ARM64_FTR_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0),
 	ARM64_FTR_REG(SYS_ID_AA64PFR1_EL1, ftr_raz),
+	ARM64_FTR_REG(SYS_ID_AA64ZFR0_EL1, ftr_raz),
 
 	/* Op1 = 0, CRn = 0, CRm = 5 */
 	ARM64_FTR_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0),
@@ -348,6 +376,9 @@ static const struct __ftr_reg_entry {
 	ARM64_FTR_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1),
 	ARM64_FTR_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2),
 
+	/* Op1 = 0, CRn = 1, CRm = 2 */
+	ARM64_FTR_REG(SYS_ZCR_EL1, ftr_zcr),
+
 	/* Op1 = 3, CRn = 0, CRm = 0 */
 	{ SYS_CTR_EL0, &arm64_ftr_reg_ctrel0 },
 	ARM64_FTR_REG(SYS_DCZID_EL0, ftr_dczid),
@@ -485,6 +516,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
 	init_cpu_ftr_reg(SYS_ID_AA64MMFR2_EL1, info->reg_id_aa64mmfr2);
 	init_cpu_ftr_reg(SYS_ID_AA64PFR0_EL1, info->reg_id_aa64pfr0);
 	init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1);
+	init_cpu_ftr_reg(SYS_ID_AA64ZFR0_EL1, info->reg_id_aa64zfr0);
 
 	if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) {
 		init_cpu_ftr_reg(SYS_ID_DFR0_EL1, info->reg_id_dfr0);
@@ -505,6 +537,10 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
 		init_cpu_ftr_reg(SYS_MVFR2_EL1, info->reg_mvfr2);
 	}
 
+	if (id_aa64pfr0_sve(info->reg_id_aa64pfr0)) {
+		init_cpu_ftr_reg(SYS_ZCR_EL1, info->reg_zcr);
+		sve_init_vq_map();
+	}
 }
 
 static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new)
@@ -608,6 +644,9 @@ void update_cpu_features(int cpu,
 	taint |= check_update_ftr_reg(SYS_ID_AA64PFR1_EL1, cpu,
 				      info->reg_id_aa64pfr1, boot->reg_id_aa64pfr1);
 
+	taint |= check_update_ftr_reg(SYS_ID_AA64ZFR0_EL1, cpu,
+				      info->reg_id_aa64zfr0, boot->reg_id_aa64zfr0);
+
 	/*
 	 * If we have AArch32, we care about 32-bit features for compat.
 	 * If the system doesn't support AArch32, don't update them.
@@ -655,6 +694,16 @@ void update_cpu_features(int cpu,
 					info->reg_mvfr2, boot->reg_mvfr2);
 	}
 
+	if (id_aa64pfr0_sve(info->reg_id_aa64pfr0)) {
+		taint |= check_update_ftr_reg(SYS_ZCR_EL1, cpu,
+					info->reg_zcr, boot->reg_zcr);
+
+		/* Probe vector lengths, unless we already gave up on SVE */
+		if (id_aa64pfr0_sve(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1)) &&
+		    !sys_caps_initialised)
+			sve_update_vq_map();
+	}
+
 	/*
 	 * Mismatched CPU features are a recipe for disaster. Don't even
 	 * pretend to support them.
@@ -900,6 +949,19 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.min_field_value = 1,
 	},
 #endif
+#ifdef CONFIG_ARM64_SVE
+	{
+		.desc = "Scalable Vector Extension",
+		.capability = ARM64_SVE,
+		.def_scope = SCOPE_SYSTEM,
+		.sys_reg = SYS_ID_AA64PFR0_EL1,
+		.sign = FTR_UNSIGNED,
+		.field_pos = ID_AA64PFR0_SVE_SHIFT,
+		.min_field_value = ID_AA64PFR0_SVE,
+		.matches = has_cpuid_feature,
+		.enable = sve_kernel_enable,
+	},
+#endif /* CONFIG_ARM64_SVE */
 	{},
 };
 
@@ -921,9 +983,14 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
 	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_AES),
 	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA1),
 	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA2),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_SHA512),
 	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_CRC32),
 	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_ATOMICS),
 	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RDM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_ASIMDRDM),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA3),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SM3),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM4_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SM4),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_ASIMDDP),
 	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_FP),
 	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_FPHP),
 	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_ASIMD),
@@ -932,6 +999,9 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
 	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_JSCVT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_JSCVT),
 	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_FCMA),
 	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_LRCPC),
+#ifdef CONFIG_ARM64_SVE
+	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, FTR_UNSIGNED, ID_AA64PFR0_SVE, CAP_HWCAP, HWCAP_SVE),
+#endif
 	{},
 };
 
@@ -1041,21 +1111,6 @@ void __init enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps)
 }
 
 /*
- * Flag to indicate if we have computed the system wide
- * capabilities based on the boot time active CPUs. This
- * will be used to determine if a new booting CPU should
- * go through the verification process to make sure that it
- * supports the system capabilities, without using a hotplug
- * notifier.
- */
-static bool sys_caps_initialised;
-
-static inline void set_sys_caps_initialised(void)
-{
-	sys_caps_initialised = true;
-}
-
-/*
  * Check for CPU features that are used in early boot
  * based on the Boot CPU value.
  */
@@ -1097,6 +1152,23 @@ verify_local_cpu_features(const struct arm64_cpu_capabilities *caps)
 	}
 }
 
+static void verify_sve_features(void)
+{
+	u64 safe_zcr = read_sanitised_ftr_reg(SYS_ZCR_EL1);
+	u64 zcr = read_zcr_features();
+
+	unsigned int safe_len = safe_zcr & ZCR_ELx_LEN_MASK;
+	unsigned int len = zcr & ZCR_ELx_LEN_MASK;
+
+	if (len < safe_len || sve_verify_vq_map()) {
+		pr_crit("CPU%d: SVE: required vector length(s) missing\n",
+			smp_processor_id());
+		cpu_die_early();
+	}
+
+	/* Add checks on other ZCR bits here if necessary */
+}
+
 /*
  * Run through the enabled system capabilities and enable() it on this CPU.
  * The capabilities were decided based on the available CPUs at the boot time.
@@ -1110,8 +1182,12 @@ static void verify_local_cpu_capabilities(void)
 	verify_local_cpu_errata_workarounds();
 	verify_local_cpu_features(arm64_features);
 	verify_local_elf_hwcaps(arm64_elf_hwcaps);
+
 	if (system_supports_32bit_el0())
 		verify_local_elf_hwcaps(compat_elf_hwcaps);
+
+	if (system_supports_sve())
+		verify_sve_features();
 }
 
 void check_local_cpu_capabilities(void)
@@ -1189,6 +1265,8 @@ void __init setup_cpu_features(void)
 	if (system_supports_32bit_el0())
 		setup_elf_hwcaps(compat_elf_hwcaps);
 
+	sve_setup();
+
 	/* Advertise that we have computed the system capabilities */
 	set_sys_caps_initialised();
 
@@ -1287,7 +1365,7 @@ static int emulate_mrs(struct pt_regs *regs, u32 insn)
 	if (!rc) {
 		dst = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RT, insn);
 		pt_regs_write_reg(regs, dst, val);
-		regs->pc += 4;
+		arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
 	}
 
 	return rc;
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 311885962830..1e2554543506 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -19,6 +19,7 @@
 #include <asm/cpu.h>
 #include <asm/cputype.h>
 #include <asm/cpufeature.h>
+#include <asm/fpsimd.h>
 
 #include <linux/bitops.h>
 #include <linux/bug.h>
@@ -69,6 +70,12 @@ static const char *const hwcap_str[] = {
 	"fcma",
 	"lrcpc",
 	"dcpop",
+	"sha3",
+	"sm3",
+	"sm4",
+	"asimddp",
+	"sha512",
+	"sve",
 	NULL
 };
 
@@ -326,6 +333,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
 	info->reg_id_aa64mmfr2 = read_cpuid(ID_AA64MMFR2_EL1);
 	info->reg_id_aa64pfr0 = read_cpuid(ID_AA64PFR0_EL1);
 	info->reg_id_aa64pfr1 = read_cpuid(ID_AA64PFR1_EL1);
+	info->reg_id_aa64zfr0 = read_cpuid(ID_AA64ZFR0_EL1);
 
 	/* Update the 32bit ID registers only if AArch32 is implemented */
 	if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) {
@@ -348,6 +356,10 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
 		info->reg_mvfr2 = read_cpuid(MVFR2_EL1);
 	}
 
+	if (IS_ENABLED(CONFIG_ARM64_SVE) &&
+	    id_aa64pfr0_sve(info->reg_id_aa64pfr0))
+		info->reg_zcr = read_zcr_features();
+
 	cpuinfo_detect_icache_policy(info);
 }
 
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index c7ef99904934..a88b6ccebbb4 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -30,6 +30,7 @@
 
 #include <asm/cpufeature.h>
 #include <asm/cputype.h>
+#include <asm/daifflags.h>
 #include <asm/debug-monitors.h>
 #include <asm/system_misc.h>
 
@@ -46,9 +47,9 @@ u8 debug_monitors_arch(void)
 static void mdscr_write(u32 mdscr)
 {
 	unsigned long flags;
-	local_dbg_save(flags);
+	flags = local_daif_save();
 	write_sysreg(mdscr, mdscr_el1);
-	local_dbg_restore(flags);
+	local_daif_restore(flags);
 }
 NOKPROBE_SYMBOL(mdscr_write);
 
diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S
index 6a27cd6dbfa6..73f17bffcd23 100644
--- a/arch/arm64/kernel/entry-fpsimd.S
+++ b/arch/arm64/kernel/entry-fpsimd.S
@@ -41,3 +41,20 @@ ENTRY(fpsimd_load_state)
 	fpsimd_restore x0, 8
 	ret
 ENDPROC(fpsimd_load_state)
+
+#ifdef CONFIG_ARM64_SVE
+ENTRY(sve_save_state)
+	sve_save 0, x1, 2
+	ret
+ENDPROC(sve_save_state)
+
+ENTRY(sve_load_state)
+	sve_load 0, x1, x2, 3
+	ret
+ENDPROC(sve_load_state)
+
+ENTRY(sve_get_vl)
+	_sve_rdvl	0, 1
+	ret
+ENDPROC(sve_get_vl)
+#endif /* CONFIG_ARM64_SVE */
diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S
index e1be42e11ff5..1175f5827ae1 100644
--- a/arch/arm64/kernel/entry-ftrace.S
+++ b/arch/arm64/kernel/entry-ftrace.S
@@ -108,13 +108,8 @@ ENTRY(_mcount)
 	mcount_get_lr	x1		//       function's lr (= parent's pc)
 	blr	x2			//   (*ftrace_trace_function)(pc, lr);
 
-#ifndef CONFIG_FUNCTION_GRAPH_TRACER
-skip_ftrace_call:			//   return;
-	mcount_exit			// }
-#else
-	mcount_exit			//   return;
-					// }
-skip_ftrace_call:
+skip_ftrace_call:			// }
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	ldr_l	x2, ftrace_graph_return
 	cmp	x0, x2			//   if ((ftrace_graph_return
 	b.ne	ftrace_graph_caller	//        != ftrace_stub)
@@ -123,9 +118,8 @@ skip_ftrace_call:
 	adr_l	x0, ftrace_graph_entry_stub //     != ftrace_graph_entry_stub))
 	cmp	x0, x2
 	b.ne	ftrace_graph_caller	//     ftrace_graph_caller();
-
-	mcount_exit
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+	mcount_exit
 ENDPROC(_mcount)
 
 #else /* CONFIG_DYNAMIC_FTRACE */
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index e1c59d4008a8..6d14b8f29b5f 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -28,7 +28,7 @@
 #include <asm/errno.h>
 #include <asm/esr.h>
 #include <asm/irq.h>
-#include <asm/memory.h>
+#include <asm/processor.h>
 #include <asm/ptrace.h>
 #include <asm/thread_info.h>
 #include <asm/asm-uaccess.h>
@@ -221,6 +221,8 @@ alternative_else_nop_endif
 
 	.macro	kernel_exit, el
 	.if	\el != 0
+	disable_daif
+
 	/* Restore the task's original addr_limit. */
 	ldr	x20, [sp, #S_ORIG_ADDR_LIMIT]
 	str	x20, [tsk, #TSK_TI_ADDR_LIMIT]
@@ -373,18 +375,18 @@ ENTRY(vectors)
 	kernel_ventry	el1_sync			// Synchronous EL1h
 	kernel_ventry	el1_irq				// IRQ EL1h
 	kernel_ventry	el1_fiq_invalid			// FIQ EL1h
-	kernel_ventry	el1_error_invalid		// Error EL1h
+	kernel_ventry	el1_error			// Error EL1h
 
 	kernel_ventry	el0_sync			// Synchronous 64-bit EL0
 	kernel_ventry	el0_irq				// IRQ 64-bit EL0
 	kernel_ventry	el0_fiq_invalid			// FIQ 64-bit EL0
-	kernel_ventry	el0_error_invalid		// Error 64-bit EL0
+	kernel_ventry	el0_error			// Error 64-bit EL0
 
 #ifdef CONFIG_COMPAT
 	kernel_ventry	el0_sync_compat			// Synchronous 32-bit EL0
 	kernel_ventry	el0_irq_compat			// IRQ 32-bit EL0
 	kernel_ventry	el0_fiq_invalid_compat		// FIQ 32-bit EL0
-	kernel_ventry	el0_error_invalid_compat	// Error 32-bit EL0
+	kernel_ventry	el0_error_compat		// Error 32-bit EL0
 #else
 	kernel_ventry	el0_sync_invalid		// Synchronous 32-bit EL0
 	kernel_ventry	el0_irq_invalid			// IRQ 32-bit EL0
@@ -453,10 +455,6 @@ ENDPROC(el0_error_invalid)
 el0_fiq_invalid_compat:
 	inv_entry 0, BAD_FIQ, 32
 ENDPROC(el0_fiq_invalid_compat)
-
-el0_error_invalid_compat:
-	inv_entry 0, BAD_ERROR, 32
-ENDPROC(el0_error_invalid_compat)
 #endif
 
 el1_sync_invalid:
@@ -508,24 +506,18 @@ el1_da:
 	 * Data abort handling
 	 */
 	mrs	x3, far_el1
-	enable_dbg
-	// re-enable interrupts if they were enabled in the aborted context
-	tbnz	x23, #7, 1f			// PSR_I_BIT
-	enable_irq
-1:
+	inherit_daif	pstate=x23, tmp=x2
 	clear_address_tag x0, x3
 	mov	x2, sp				// struct pt_regs
 	bl	do_mem_abort
 
-	// disable interrupts before pulling preserved data off the stack
-	disable_irq
 	kernel_exit 1
 el1_sp_pc:
 	/*
 	 * Stack or PC alignment exception handling
 	 */
 	mrs	x0, far_el1
-	enable_dbg
+	inherit_daif	pstate=x23, tmp=x2
 	mov	x2, sp
 	bl	do_sp_pc_abort
 	ASM_BUG()
@@ -533,7 +525,7 @@ el1_undef:
 	/*
 	 * Undefined instruction
 	 */
-	enable_dbg
+	inherit_daif	pstate=x23, tmp=x2
 	mov	x0, sp
 	bl	do_undefinstr
 	ASM_BUG()
@@ -550,7 +542,7 @@ el1_dbg:
 	kernel_exit 1
 el1_inv:
 	// TODO: add support for undefined instructions in kernel mode
-	enable_dbg
+	inherit_daif	pstate=x23, tmp=x2
 	mov	x0, sp
 	mov	x2, x1
 	mov	x1, #BAD_SYNC
@@ -561,7 +553,7 @@ ENDPROC(el1_sync)
 	.align	6
 el1_irq:
 	kernel_entry 1
-	enable_dbg
+	enable_da_f
 #ifdef CONFIG_TRACE_IRQFLAGS
 	bl	trace_hardirqs_off
 #endif
@@ -607,6 +599,8 @@ el0_sync:
 	b.eq	el0_ia
 	cmp	x24, #ESR_ELx_EC_FP_ASIMD	// FP/ASIMD access
 	b.eq	el0_fpsimd_acc
+	cmp	x24, #ESR_ELx_EC_SVE		// SVE access
+	b.eq	el0_sve_acc
 	cmp	x24, #ESR_ELx_EC_FP_EXC64	// FP/ASIMD exception
 	b.eq	el0_fpsimd_exc
 	cmp	x24, #ESR_ELx_EC_SYS64		// configurable trap
@@ -658,6 +652,7 @@ el0_svc_compat:
 	/*
 	 * AArch32 syscall handling
 	 */
+	ldr	x16, [tsk, #TSK_TI_FLAGS]	// load thread flags
 	adrp	stbl, compat_sys_call_table	// load compat syscall table pointer
 	mov	wscno, w7			// syscall number in w7 (r7)
 	mov     wsc_nr, #__NR_compat_syscalls
@@ -667,6 +662,10 @@ el0_svc_compat:
 el0_irq_compat:
 	kernel_entry 0, 32
 	b	el0_irq_naked
+
+el0_error_compat:
+	kernel_entry 0, 32
+	b	el0_error_naked
 #endif
 
 el0_da:
@@ -674,8 +673,7 @@ el0_da:
 	 * Data abort handling
 	 */
 	mrs	x26, far_el1
-	// enable interrupts before calling the main handler
-	enable_dbg_and_irq
+	enable_daif
 	ct_user_exit
 	clear_address_tag x0, x26
 	mov	x1, x25
@@ -687,8 +685,7 @@ el0_ia:
 	 * Instruction abort handling
 	 */
 	mrs	x26, far_el1
-	// enable interrupts before calling the main handler
-	enable_dbg_and_irq
+	enable_daif
 	ct_user_exit
 	mov	x0, x26
 	mov	x1, x25
@@ -699,17 +696,27 @@ el0_fpsimd_acc:
 	/*
 	 * Floating Point or Advanced SIMD access
 	 */
-	enable_dbg
+	enable_daif
 	ct_user_exit
 	mov	x0, x25
 	mov	x1, sp
 	bl	do_fpsimd_acc
 	b	ret_to_user
+el0_sve_acc:
+	/*
+	 * Scalable Vector Extension access
+	 */
+	enable_daif
+	ct_user_exit
+	mov	x0, x25
+	mov	x1, sp
+	bl	do_sve_acc
+	b	ret_to_user
 el0_fpsimd_exc:
 	/*
-	 * Floating Point or Advanced SIMD exception
+	 * Floating Point, Advanced SIMD or SVE exception
 	 */
-	enable_dbg
+	enable_daif
 	ct_user_exit
 	mov	x0, x25
 	mov	x1, sp
@@ -720,8 +727,7 @@ el0_sp_pc:
 	 * Stack or PC alignment exception handling
 	 */
 	mrs	x26, far_el1
-	// enable interrupts before calling the main handler
-	enable_dbg_and_irq
+	enable_daif
 	ct_user_exit
 	mov	x0, x26
 	mov	x1, x25
@@ -732,8 +738,7 @@ el0_undef:
 	/*
 	 * Undefined instruction
 	 */
-	// enable interrupts before calling the main handler
-	enable_dbg_and_irq
+	enable_daif
 	ct_user_exit
 	mov	x0, sp
 	bl	do_undefinstr
@@ -742,7 +747,7 @@ el0_sys:
 	/*
 	 * System instructions, for trapped cache maintenance instructions
 	 */
-	enable_dbg_and_irq
+	enable_daif
 	ct_user_exit
 	mov	x0, x25
 	mov	x1, sp
@@ -757,11 +762,11 @@ el0_dbg:
 	mov	x1, x25
 	mov	x2, sp
 	bl	do_debug_exception
-	enable_dbg
+	enable_daif
 	ct_user_exit
 	b	ret_to_user
 el0_inv:
-	enable_dbg
+	enable_daif
 	ct_user_exit
 	mov	x0, sp
 	mov	x1, #BAD_SYNC
@@ -774,7 +779,7 @@ ENDPROC(el0_sync)
 el0_irq:
 	kernel_entry 0
 el0_irq_naked:
-	enable_dbg
+	enable_da_f
 #ifdef CONFIG_TRACE_IRQFLAGS
 	bl	trace_hardirqs_off
 #endif
@@ -788,12 +793,34 @@ el0_irq_naked:
 	b	ret_to_user
 ENDPROC(el0_irq)
 
+el1_error:
+	kernel_entry 1
+	mrs	x1, esr_el1
+	enable_dbg
+	mov	x0, sp
+	bl	do_serror
+	kernel_exit 1
+ENDPROC(el1_error)
+
+el0_error:
+	kernel_entry 0
+el0_error_naked:
+	mrs	x1, esr_el1
+	enable_dbg
+	mov	x0, sp
+	bl	do_serror
+	enable_daif
+	ct_user_exit
+	b	ret_to_user
+ENDPROC(el0_error)
+
+
 /*
  * This is the fast syscall return path.  We do as little as possible here,
  * and this includes saving x0 back into the kernel stack.
  */
 ret_fast_syscall:
-	disable_irq				// disable interrupts
+	disable_daif
 	str	x0, [sp, #S_X0]			// returned x0
 	ldr	x1, [tsk, #TSK_TI_FLAGS]	// re-check for syscall tracing
 	and	x2, x1, #_TIF_SYSCALL_WORK
@@ -803,7 +830,7 @@ ret_fast_syscall:
 	enable_step_tsk x1, x2
 	kernel_exit 0
 ret_fast_syscall_trace:
-	enable_irq				// enable interrupts
+	enable_daif
 	b	__sys_trace_return_skipped	// we already saved x0
 
 /*
@@ -821,7 +848,7 @@ work_pending:
  * "slow" syscall return path.
  */
 ret_to_user:
-	disable_irq				// disable interrupts
+	disable_daif
 	ldr	x1, [tsk, #TSK_TI_FLAGS]
 	and	x2, x1, #_TIF_WORK_MASK
 	cbnz	x2, work_pending
@@ -835,16 +862,37 @@ ENDPROC(ret_to_user)
  */
 	.align	6
 el0_svc:
+	ldr	x16, [tsk, #TSK_TI_FLAGS]	// load thread flags
 	adrp	stbl, sys_call_table		// load syscall table pointer
 	mov	wscno, w8			// syscall number in w8
 	mov	wsc_nr, #__NR_syscalls
+
+#ifdef CONFIG_ARM64_SVE
+alternative_if_not ARM64_SVE
+	b	el0_svc_naked
+alternative_else_nop_endif
+	tbz	x16, #TIF_SVE, el0_svc_naked	// Skip unless TIF_SVE set:
+	bic	x16, x16, #_TIF_SVE		// discard SVE state
+	str	x16, [tsk, #TSK_TI_FLAGS]
+
+	/*
+	 * task_fpsimd_load() won't be called to update CPACR_EL1 in
+	 * ret_to_user unless TIF_FOREIGN_FPSTATE is still set, which only
+	 * happens if a context switch or kernel_neon_begin() or context
+	 * modification (sigreturn, ptrace) intervenes.
+	 * So, ensure that CPACR_EL1 is already correct for the fast-path case:
+	 */
+	mrs	x9, cpacr_el1
+	bic	x9, x9, #CPACR_EL1_ZEN_EL0EN	// disable SVE for el0
+	msr	cpacr_el1, x9			// synchronised by eret to el0
+#endif
+
 el0_svc_naked:					// compat entry point
 	stp	x0, xscno, [sp, #S_ORIG_X0]	// save the original x0 and syscall number
-	enable_dbg_and_irq
+	enable_daif
 	ct_user_exit 1
 
-	ldr	x16, [tsk, #TSK_TI_FLAGS]	// check for syscall hooks
-	tst	x16, #_TIF_SYSCALL_WORK
+	tst	x16, #_TIF_SYSCALL_WORK		// check for syscall hooks
 	b.ne	__sys_trace
 	cmp     wscno, wsc_nr			// check upper syscall limit
 	b.hs	ni_sys
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 5d547deb6996..143b3e72c25e 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -17,19 +17,34 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/bitmap.h>
 #include <linux/bottom_half.h>
+#include <linux/bug.h>
+#include <linux/cache.h>
+#include <linux/compat.h>
 #include <linux/cpu.h>
 #include <linux/cpu_pm.h>
 #include <linux/kernel.h>
+#include <linux/linkage.h>
+#include <linux/irqflags.h>
 #include <linux/init.h>
 #include <linux/percpu.h>
+#include <linux/prctl.h>
 #include <linux/preempt.h>
+#include <linux/prctl.h>
+#include <linux/ptrace.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/task_stack.h>
 #include <linux/signal.h>
+#include <linux/slab.h>
+#include <linux/sysctl.h>
 
 #include <asm/fpsimd.h>
 #include <asm/cputype.h>
 #include <asm/simd.h>
+#include <asm/sigcontext.h>
+#include <asm/sysreg.h>
+#include <asm/traps.h>
 
 #define FPEXC_IOF	(1 << 0)
 #define FPEXC_DZF	(1 << 1)
@@ -39,6 +54,8 @@
 #define FPEXC_IDF	(1 << 7)
 
 /*
+ * (Note: in this discussion, statements about FPSIMD apply equally to SVE.)
+ *
  * In order to reduce the number of times the FPSIMD state is needlessly saved
  * and restored, we need to keep track of two things:
  * (a) for each task, we need to remember which CPU was the last one to have
@@ -99,10 +116,741 @@
  */
 static DEFINE_PER_CPU(struct fpsimd_state *, fpsimd_last_state);
 
+/* Default VL for tasks that don't set it explicitly: */
+static int sve_default_vl = -1;
+
+#ifdef CONFIG_ARM64_SVE
+
+/* Maximum supported vector length across all CPUs (initially poisoned) */
+int __ro_after_init sve_max_vl = -1;
+/* Set of available vector lengths, as vq_to_bit(vq): */
+static __ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
+static void __percpu *efi_sve_state;
+
+#else /* ! CONFIG_ARM64_SVE */
+
+/* Dummy declaration for code that will be optimised out: */
+extern __ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
+extern void __percpu *efi_sve_state;
+
+#endif /* ! CONFIG_ARM64_SVE */
+
+/*
+ * Call __sve_free() directly only if you know task can't be scheduled
+ * or preempted.
+ */
+static void __sve_free(struct task_struct *task)
+{
+	kfree(task->thread.sve_state);
+	task->thread.sve_state = NULL;
+}
+
+static void sve_free(struct task_struct *task)
+{
+	WARN_ON(test_tsk_thread_flag(task, TIF_SVE));
+
+	__sve_free(task);
+}
+
+
+/* Offset of FFR in the SVE register dump */
+static size_t sve_ffr_offset(int vl)
+{
+	return SVE_SIG_FFR_OFFSET(sve_vq_from_vl(vl)) - SVE_SIG_REGS_OFFSET;
+}
+
+static void *sve_pffr(struct task_struct *task)
+{
+	return (char *)task->thread.sve_state +
+		sve_ffr_offset(task->thread.sve_vl);
+}
+
+static void change_cpacr(u64 val, u64 mask)
+{
+	u64 cpacr = read_sysreg(CPACR_EL1);
+	u64 new = (cpacr & ~mask) | val;
+
+	if (new != cpacr)
+		write_sysreg(new, CPACR_EL1);
+}
+
+static void sve_user_disable(void)
+{
+	change_cpacr(0, CPACR_EL1_ZEN_EL0EN);
+}
+
+static void sve_user_enable(void)
+{
+	change_cpacr(CPACR_EL1_ZEN_EL0EN, CPACR_EL1_ZEN_EL0EN);
+}
+
+/*
+ * TIF_SVE controls whether a task can use SVE without trapping while
+ * in userspace, and also the way a task's FPSIMD/SVE state is stored
+ * in thread_struct.
+ *
+ * The kernel uses this flag to track whether a user task is actively
+ * using SVE, and therefore whether full SVE register state needs to
+ * be tracked.  If not, the cheaper FPSIMD context handling code can
+ * be used instead of the more costly SVE equivalents.
+ *
+ *  * TIF_SVE set:
+ *
+ *    The task can execute SVE instructions while in userspace without
+ *    trapping to the kernel.
+ *
+ *    When stored, Z0-Z31 (incorporating Vn in bits[127:0] or the
+ *    corresponding Zn), P0-P15 and FFR are encoded in in
+ *    task->thread.sve_state, formatted appropriately for vector
+ *    length task->thread.sve_vl.
+ *
+ *    task->thread.sve_state must point to a valid buffer at least
+ *    sve_state_size(task) bytes in size.
+ *
+ *    During any syscall, the kernel may optionally clear TIF_SVE and
+ *    discard the vector state except for the FPSIMD subset.
+ *
+ *  * TIF_SVE clear:
+ *
+ *    An attempt by the user task to execute an SVE instruction causes
+ *    do_sve_acc() to be called, which does some preparation and then
+ *    sets TIF_SVE.
+ *
+ *    When stored, FPSIMD registers V0-V31 are encoded in
+ *    task->fpsimd_state; bits [max : 128] for each of Z0-Z31 are
+ *    logically zero but not stored anywhere; P0-P15 and FFR are not
+ *    stored and have unspecified values from userspace's point of
+ *    view.  For hygiene purposes, the kernel zeroes them on next use,
+ *    but userspace is discouraged from relying on this.
+ *
+ *    task->thread.sve_state does not need to be non-NULL, valid or any
+ *    particular size: it must not be dereferenced.
+ *
+ *  * FPSR and FPCR are always stored in task->fpsimd_state irrespctive of
+ *    whether TIF_SVE is clear or set, since these are not vector length
+ *    dependent.
+ */
+
+/*
+ * Update current's FPSIMD/SVE registers from thread_struct.
+ *
+ * This function should be called only when the FPSIMD/SVE state in
+ * thread_struct is known to be up to date, when preparing to enter
+ * userspace.
+ *
+ * Softirqs (and preemption) must be disabled.
+ */
+static void task_fpsimd_load(void)
+{
+	WARN_ON(!in_softirq() && !irqs_disabled());
+
+	if (system_supports_sve() && test_thread_flag(TIF_SVE))
+		sve_load_state(sve_pffr(current),
+			       &current->thread.fpsimd_state.fpsr,
+			       sve_vq_from_vl(current->thread.sve_vl) - 1);
+	else
+		fpsimd_load_state(&current->thread.fpsimd_state);
+
+	if (system_supports_sve()) {
+		/* Toggle SVE trapping for userspace if needed */
+		if (test_thread_flag(TIF_SVE))
+			sve_user_enable();
+		else
+			sve_user_disable();
+
+		/* Serialised by exception return to user */
+	}
+}
+
+/*
+ * Ensure current's FPSIMD/SVE storage in thread_struct is up to date
+ * with respect to the CPU registers.
+ *
+ * Softirqs (and preemption) must be disabled.
+ */
+static void task_fpsimd_save(void)
+{
+	WARN_ON(!in_softirq() && !irqs_disabled());
+
+	if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
+		if (system_supports_sve() && test_thread_flag(TIF_SVE)) {
+			if (WARN_ON(sve_get_vl() != current->thread.sve_vl)) {
+				/*
+				 * Can't save the user regs, so current would
+				 * re-enter user with corrupt state.
+				 * There's no way to recover, so kill it:
+				 */
+				force_signal_inject(
+					SIGKILL, 0, current_pt_regs(), 0);
+				return;
+			}
+
+			sve_save_state(sve_pffr(current),
+				       &current->thread.fpsimd_state.fpsr);
+		} else
+			fpsimd_save_state(&current->thread.fpsimd_state);
+	}
+}
+
+/*
+ * Helpers to translate bit indices in sve_vq_map to VQ values (and
+ * vice versa).  This allows find_next_bit() to be used to find the
+ * _maximum_ VQ not exceeding a certain value.
+ */
+
+static unsigned int vq_to_bit(unsigned int vq)
+{
+	return SVE_VQ_MAX - vq;
+}
+
+static unsigned int bit_to_vq(unsigned int bit)
+{
+	if (WARN_ON(bit >= SVE_VQ_MAX))
+		bit = SVE_VQ_MAX - 1;
+
+	return SVE_VQ_MAX - bit;
+}
+
+/*
+ * All vector length selection from userspace comes through here.
+ * We're on a slow path, so some sanity-checks are included.
+ * If things go wrong there's a bug somewhere, but try to fall back to a
+ * safe choice.
+ */
+static unsigned int find_supported_vector_length(unsigned int vl)
+{
+	int bit;
+	int max_vl = sve_max_vl;
+
+	if (WARN_ON(!sve_vl_valid(vl)))
+		vl = SVE_VL_MIN;
+
+	if (WARN_ON(!sve_vl_valid(max_vl)))
+		max_vl = SVE_VL_MIN;
+
+	if (vl > max_vl)
+		vl = max_vl;
+
+	bit = find_next_bit(sve_vq_map, SVE_VQ_MAX,
+			    vq_to_bit(sve_vq_from_vl(vl)));
+	return sve_vl_from_vq(bit_to_vq(bit));
+}
+
+#ifdef CONFIG_SYSCTL
+
+static int sve_proc_do_default_vl(struct ctl_table *table, int write,
+				  void __user *buffer, size_t *lenp,
+				  loff_t *ppos)
+{
+	int ret;
+	int vl = sve_default_vl;
+	struct ctl_table tmp_table = {
+		.data = &vl,
+		.maxlen = sizeof(vl),
+	};
+
+	ret = proc_dointvec(&tmp_table, write, buffer, lenp, ppos);
+	if (ret || !write)
+		return ret;
+
+	/* Writing -1 has the special meaning "set to max": */
+	if (vl == -1) {
+		/* Fail safe if sve_max_vl wasn't initialised */
+		if (WARN_ON(!sve_vl_valid(sve_max_vl)))
+			vl = SVE_VL_MIN;
+		else
+			vl = sve_max_vl;
+
+		goto chosen;
+	}
+
+	if (!sve_vl_valid(vl))
+		return -EINVAL;
+
+	vl = find_supported_vector_length(vl);
+chosen:
+	sve_default_vl = vl;
+	return 0;
+}
+
+static struct ctl_table sve_default_vl_table[] = {
+	{
+		.procname	= "sve_default_vector_length",
+		.mode		= 0644,
+		.proc_handler	= sve_proc_do_default_vl,
+	},
+	{ }
+};
+
+static int __init sve_sysctl_init(void)
+{
+	if (system_supports_sve())
+		if (!register_sysctl("abi", sve_default_vl_table))
+			return -EINVAL;
+
+	return 0;
+}
+
+#else /* ! CONFIG_SYSCTL */
+static int __init sve_sysctl_init(void) { return 0; }
+#endif /* ! CONFIG_SYSCTL */
+
+#define ZREG(sve_state, vq, n) ((char *)(sve_state) +		\
+	(SVE_SIG_ZREG_OFFSET(vq, n) - SVE_SIG_REGS_OFFSET))
+
+/*
+ * Transfer the FPSIMD state in task->thread.fpsimd_state to
+ * task->thread.sve_state.
+ *
+ * Task can be a non-runnable task, or current.  In the latter case,
+ * softirqs (and preemption) must be disabled.
+ * task->thread.sve_state must point to at least sve_state_size(task)
+ * bytes of allocated kernel memory.
+ * task->thread.fpsimd_state must be up to date before calling this function.
+ */
+static void fpsimd_to_sve(struct task_struct *task)
+{
+	unsigned int vq;
+	void *sst = task->thread.sve_state;
+	struct fpsimd_state const *fst = &task->thread.fpsimd_state;
+	unsigned int i;
+
+	if (!system_supports_sve())
+		return;
+
+	vq = sve_vq_from_vl(task->thread.sve_vl);
+	for (i = 0; i < 32; ++i)
+		memcpy(ZREG(sst, vq, i), &fst->vregs[i],
+		       sizeof(fst->vregs[i]));
+}
+
+/*
+ * Transfer the SVE state in task->thread.sve_state to
+ * task->thread.fpsimd_state.
+ *
+ * Task can be a non-runnable task, or current.  In the latter case,
+ * softirqs (and preemption) must be disabled.
+ * task->thread.sve_state must point to at least sve_state_size(task)
+ * bytes of allocated kernel memory.
+ * task->thread.sve_state must be up to date before calling this function.
+ */
+static void sve_to_fpsimd(struct task_struct *task)
+{
+	unsigned int vq;
+	void const *sst = task->thread.sve_state;
+	struct fpsimd_state *fst = &task->thread.fpsimd_state;
+	unsigned int i;
+
+	if (!system_supports_sve())
+		return;
+
+	vq = sve_vq_from_vl(task->thread.sve_vl);
+	for (i = 0; i < 32; ++i)
+		memcpy(&fst->vregs[i], ZREG(sst, vq, i),
+		       sizeof(fst->vregs[i]));
+}
+
+#ifdef CONFIG_ARM64_SVE
+
+/*
+ * Return how many bytes of memory are required to store the full SVE
+ * state for task, given task's currently configured vector length.
+ */
+size_t sve_state_size(struct task_struct const *task)
+{
+	return SVE_SIG_REGS_SIZE(sve_vq_from_vl(task->thread.sve_vl));
+}
+
+/*
+ * Ensure that task->thread.sve_state is allocated and sufficiently large.
+ *
+ * This function should be used only in preparation for replacing
+ * task->thread.sve_state with new data.  The memory is always zeroed
+ * here to prevent stale data from showing through: this is done in
+ * the interest of testability and predictability: except in the
+ * do_sve_acc() case, there is no ABI requirement to hide stale data
+ * written previously be task.
+ */
+void sve_alloc(struct task_struct *task)
+{
+	if (task->thread.sve_state) {
+		memset(task->thread.sve_state, 0, sve_state_size(current));
+		return;
+	}
+
+	/* This is a small allocation (maximum ~8KB) and Should Not Fail. */
+	task->thread.sve_state =
+		kzalloc(sve_state_size(task), GFP_KERNEL);
+
+	/*
+	 * If future SVE revisions can have larger vectors though,
+	 * this may cease to be true:
+	 */
+	BUG_ON(!task->thread.sve_state);
+}
+
+
+/*
+ * Ensure that task->thread.sve_state is up to date with respect to
+ * the user task, irrespective of when SVE is in use or not.
+ *
+ * This should only be called by ptrace.  task must be non-runnable.
+ * task->thread.sve_state must point to at least sve_state_size(task)
+ * bytes of allocated kernel memory.
+ */
+void fpsimd_sync_to_sve(struct task_struct *task)
+{
+	if (!test_tsk_thread_flag(task, TIF_SVE))
+		fpsimd_to_sve(task);
+}
+
+/*
+ * Ensure that task->thread.fpsimd_state is up to date with respect to
+ * the user task, irrespective of whether SVE is in use or not.
+ *
+ * This should only be called by ptrace.  task must be non-runnable.
+ * task->thread.sve_state must point to at least sve_state_size(task)
+ * bytes of allocated kernel memory.
+ */
+void sve_sync_to_fpsimd(struct task_struct *task)
+{
+	if (test_tsk_thread_flag(task, TIF_SVE))
+		sve_to_fpsimd(task);
+}
+
+/*
+ * Ensure that task->thread.sve_state is up to date with respect to
+ * the task->thread.fpsimd_state.
+ *
+ * This should only be called by ptrace to merge new FPSIMD register
+ * values into a task for which SVE is currently active.
+ * task must be non-runnable.
+ * task->thread.sve_state must point to at least sve_state_size(task)
+ * bytes of allocated kernel memory.
+ * task->thread.fpsimd_state must already have been initialised with
+ * the new FPSIMD register values to be merged in.
+ */
+void sve_sync_from_fpsimd_zeropad(struct task_struct *task)
+{
+	unsigned int vq;
+	void *sst = task->thread.sve_state;
+	struct fpsimd_state const *fst = &task->thread.fpsimd_state;
+	unsigned int i;
+
+	if (!test_tsk_thread_flag(task, TIF_SVE))
+		return;
+
+	vq = sve_vq_from_vl(task->thread.sve_vl);
+
+	memset(sst, 0, SVE_SIG_REGS_SIZE(vq));
+
+	for (i = 0; i < 32; ++i)
+		memcpy(ZREG(sst, vq, i), &fst->vregs[i],
+		       sizeof(fst->vregs[i]));
+}
+
+int sve_set_vector_length(struct task_struct *task,
+			  unsigned long vl, unsigned long flags)
+{
+	if (flags & ~(unsigned long)(PR_SVE_VL_INHERIT |
+				     PR_SVE_SET_VL_ONEXEC))
+		return -EINVAL;
+
+	if (!sve_vl_valid(vl))
+		return -EINVAL;
+
+	/*
+	 * Clamp to the maximum vector length that VL-agnostic SVE code can
+	 * work with.  A flag may be assigned in the future to allow setting
+	 * of larger vector lengths without confusing older software.
+	 */
+	if (vl > SVE_VL_ARCH_MAX)
+		vl = SVE_VL_ARCH_MAX;
+
+	vl = find_supported_vector_length(vl);
+
+	if (flags & (PR_SVE_VL_INHERIT |
+		     PR_SVE_SET_VL_ONEXEC))
+		task->thread.sve_vl_onexec = vl;
+	else
+		/* Reset VL to system default on next exec: */
+		task->thread.sve_vl_onexec = 0;
+
+	/* Only actually set the VL if not deferred: */
+	if (flags & PR_SVE_SET_VL_ONEXEC)
+		goto out;
+
+	if (vl == task->thread.sve_vl)
+		goto out;
+
+	/*
+	 * To ensure the FPSIMD bits of the SVE vector registers are preserved,
+	 * write any live register state back to task_struct, and convert to a
+	 * non-SVE thread.
+	 */
+	if (task == current) {
+		local_bh_disable();
+
+		task_fpsimd_save();
+		set_thread_flag(TIF_FOREIGN_FPSTATE);
+	}
+
+	fpsimd_flush_task_state(task);
+	if (test_and_clear_tsk_thread_flag(task, TIF_SVE))
+		sve_to_fpsimd(task);
+
+	if (task == current)
+		local_bh_enable();
+
+	/*
+	 * Force reallocation of task SVE state to the correct size
+	 * on next use:
+	 */
+	sve_free(task);
+
+	task->thread.sve_vl = vl;
+
+out:
+	if (flags & PR_SVE_VL_INHERIT)
+		set_tsk_thread_flag(task, TIF_SVE_VL_INHERIT);
+	else
+		clear_tsk_thread_flag(task, TIF_SVE_VL_INHERIT);
+
+	return 0;
+}
+
+/*
+ * Encode the current vector length and flags for return.
+ * This is only required for prctl(): ptrace has separate fields
+ *
+ * flags are as for sve_set_vector_length().
+ */
+static int sve_prctl_status(unsigned long flags)
+{
+	int ret;
+
+	if (flags & PR_SVE_SET_VL_ONEXEC)
+		ret = current->thread.sve_vl_onexec;
+	else
+		ret = current->thread.sve_vl;
+
+	if (test_thread_flag(TIF_SVE_VL_INHERIT))
+		ret |= PR_SVE_VL_INHERIT;
+
+	return ret;
+}
+
+/* PR_SVE_SET_VL */
+int sve_set_current_vl(unsigned long arg)
+{
+	unsigned long vl, flags;
+	int ret;
+
+	vl = arg & PR_SVE_VL_LEN_MASK;
+	flags = arg & ~vl;
+
+	if (!system_supports_sve())
+		return -EINVAL;
+
+	ret = sve_set_vector_length(current, vl, flags);
+	if (ret)
+		return ret;
+
+	return sve_prctl_status(flags);
+}
+
+/* PR_SVE_GET_VL */
+int sve_get_current_vl(void)
+{
+	if (!system_supports_sve())
+		return -EINVAL;
+
+	return sve_prctl_status(0);
+}
+
+/*
+ * Bitmap for temporary storage of the per-CPU set of supported vector lengths
+ * during secondary boot.
+ */
+static DECLARE_BITMAP(sve_secondary_vq_map, SVE_VQ_MAX);
+
+static void sve_probe_vqs(DECLARE_BITMAP(map, SVE_VQ_MAX))
+{
+	unsigned int vq, vl;
+	unsigned long zcr;
+
+	bitmap_zero(map, SVE_VQ_MAX);
+
+	zcr = ZCR_ELx_LEN_MASK;
+	zcr = read_sysreg_s(SYS_ZCR_EL1) & ~zcr;
+
+	for (vq = SVE_VQ_MAX; vq >= SVE_VQ_MIN; --vq) {
+		write_sysreg_s(zcr | (vq - 1), SYS_ZCR_EL1); /* self-syncing */
+		vl = sve_get_vl();
+		vq = sve_vq_from_vl(vl); /* skip intervening lengths */
+		set_bit(vq_to_bit(vq), map);
+	}
+}
+
+void __init sve_init_vq_map(void)
+{
+	sve_probe_vqs(sve_vq_map);
+}
+
+/*
+ * If we haven't committed to the set of supported VQs yet, filter out
+ * those not supported by the current CPU.
+ */
+void sve_update_vq_map(void)
+{
+	sve_probe_vqs(sve_secondary_vq_map);
+	bitmap_and(sve_vq_map, sve_vq_map, sve_secondary_vq_map, SVE_VQ_MAX);
+}
+
+/* Check whether the current CPU supports all VQs in the committed set */
+int sve_verify_vq_map(void)
+{
+	int ret = 0;
+
+	sve_probe_vqs(sve_secondary_vq_map);
+	bitmap_andnot(sve_secondary_vq_map, sve_vq_map, sve_secondary_vq_map,
+		      SVE_VQ_MAX);
+	if (!bitmap_empty(sve_secondary_vq_map, SVE_VQ_MAX)) {
+		pr_warn("SVE: cpu%d: Required vector length(s) missing\n",
+			smp_processor_id());
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static void __init sve_efi_setup(void)
+{
+	if (!IS_ENABLED(CONFIG_EFI))
+		return;
+
+	/*
+	 * alloc_percpu() warns and prints a backtrace if this goes wrong.
+	 * This is evidence of a crippled system and we are returning void,
+	 * so no attempt is made to handle this situation here.
+	 */
+	if (!sve_vl_valid(sve_max_vl))
+		goto fail;
+
+	efi_sve_state = __alloc_percpu(
+		SVE_SIG_REGS_SIZE(sve_vq_from_vl(sve_max_vl)), SVE_VQ_BYTES);
+	if (!efi_sve_state)
+		goto fail;
+
+	return;
+
+fail:
+	panic("Cannot allocate percpu memory for EFI SVE save/restore");
+}
+
+/*
+ * Enable SVE for EL1.
+ * Intended for use by the cpufeatures code during CPU boot.
+ */
+int sve_kernel_enable(void *__always_unused p)
+{
+	write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_ZEN_EL1EN, CPACR_EL1);
+	isb();
+
+	return 0;
+}
+
+void __init sve_setup(void)
+{
+	u64 zcr;
+
+	if (!system_supports_sve())
+		return;
+
+	/*
+	 * The SVE architecture mandates support for 128-bit vectors,
+	 * so sve_vq_map must have at least SVE_VQ_MIN set.
+	 * If something went wrong, at least try to patch it up:
+	 */
+	if (WARN_ON(!test_bit(vq_to_bit(SVE_VQ_MIN), sve_vq_map)))
+		set_bit(vq_to_bit(SVE_VQ_MIN), sve_vq_map);
+
+	zcr = read_sanitised_ftr_reg(SYS_ZCR_EL1);
+	sve_max_vl = sve_vl_from_vq((zcr & ZCR_ELx_LEN_MASK) + 1);
+
+	/*
+	 * Sanity-check that the max VL we determined through CPU features
+	 * corresponds properly to sve_vq_map.  If not, do our best:
+	 */
+	if (WARN_ON(sve_max_vl != find_supported_vector_length(sve_max_vl)))
+		sve_max_vl = find_supported_vector_length(sve_max_vl);
+
+	/*
+	 * For the default VL, pick the maximum supported value <= 64.
+	 * VL == 64 is guaranteed not to grow the signal frame.
+	 */
+	sve_default_vl = find_supported_vector_length(64);
+
+	pr_info("SVE: maximum available vector length %u bytes per vector\n",
+		sve_max_vl);
+	pr_info("SVE: default vector length %u bytes per vector\n",
+		sve_default_vl);
+
+	sve_efi_setup();
+}
+
+/*
+ * Called from the put_task_struct() path, which cannot get here
+ * unless dead_task is really dead and not schedulable.
+ */
+void fpsimd_release_task(struct task_struct *dead_task)
+{
+	__sve_free(dead_task);
+}
+
+#endif /* CONFIG_ARM64_SVE */
+
+/*
+ * Trapped SVE access
+ *
+ * Storage is allocated for the full SVE state, the current FPSIMD
+ * register contents are migrated across, and TIF_SVE is set so that
+ * the SVE access trap will be disabled the next time this task
+ * reaches ret_to_user.
+ *
+ * TIF_SVE should be clear on entry: otherwise, task_fpsimd_load()
+ * would have disabled the SVE access trap for userspace during
+ * ret_to_user, making an SVE access trap impossible in that case.
+ */
+asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs)
+{
+	/* Even if we chose not to use SVE, the hardware could still trap: */
+	if (unlikely(!system_supports_sve()) || WARN_ON(is_compat_task())) {
+		force_signal_inject(SIGILL, ILL_ILLOPC, regs, 0);
+		return;
+	}
+
+	sve_alloc(current);
+
+	local_bh_disable();
+
+	task_fpsimd_save();
+	fpsimd_to_sve(current);
+
+	/* Force ret_to_user to reload the registers: */
+	fpsimd_flush_task_state(current);
+	set_thread_flag(TIF_FOREIGN_FPSTATE);
+
+	if (test_and_set_thread_flag(TIF_SVE))
+		WARN_ON(1); /* SVE access shouldn't have trapped */
+
+	local_bh_enable();
+}
+
 /*
  * Trapped FP/ASIMD access.
  */
-void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs)
+asmlinkage void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs)
 {
 	/* TODO: implement lazy context saving/restoring */
 	WARN_ON(1);
@@ -111,7 +859,7 @@ void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs)
 /*
  * Raise a SIGFPE for the current process.
  */
-void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs)
+asmlinkage void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs)
 {
 	siginfo_t info;
 	unsigned int si_code = 0;
@@ -144,8 +892,8 @@ void fpsimd_thread_switch(struct task_struct *next)
 	 * the registers is in fact the most recent userland FPSIMD state of
 	 * 'current'.
 	 */
-	if (current->mm && !test_thread_flag(TIF_FOREIGN_FPSTATE))
-		fpsimd_save_state(&current->thread.fpsimd_state);
+	if (current->mm)
+		task_fpsimd_save();
 
 	if (next->mm) {
 		/*
@@ -159,16 +907,16 @@ void fpsimd_thread_switch(struct task_struct *next)
 
 		if (__this_cpu_read(fpsimd_last_state) == st
 		    && st->cpu == smp_processor_id())
-			clear_ti_thread_flag(task_thread_info(next),
-					     TIF_FOREIGN_FPSTATE);
+			clear_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE);
 		else
-			set_ti_thread_flag(task_thread_info(next),
-					   TIF_FOREIGN_FPSTATE);
+			set_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE);
 	}
 }
 
 void fpsimd_flush_thread(void)
 {
+	int vl, supported_vl;
+
 	if (!system_supports_fpsimd())
 		return;
 
@@ -176,6 +924,42 @@ void fpsimd_flush_thread(void)
 
 	memset(&current->thread.fpsimd_state, 0, sizeof(struct fpsimd_state));
 	fpsimd_flush_task_state(current);
+
+	if (system_supports_sve()) {
+		clear_thread_flag(TIF_SVE);
+		sve_free(current);
+
+		/*
+		 * Reset the task vector length as required.
+		 * This is where we ensure that all user tasks have a valid
+		 * vector length configured: no kernel task can become a user
+		 * task without an exec and hence a call to this function.
+		 * By the time the first call to this function is made, all
+		 * early hardware probing is complete, so sve_default_vl
+		 * should be valid.
+		 * If a bug causes this to go wrong, we make some noise and
+		 * try to fudge thread.sve_vl to a safe value here.
+		 */
+		vl = current->thread.sve_vl_onexec ?
+			current->thread.sve_vl_onexec : sve_default_vl;
+
+		if (WARN_ON(!sve_vl_valid(vl)))
+			vl = SVE_VL_MIN;
+
+		supported_vl = find_supported_vector_length(vl);
+		if (WARN_ON(supported_vl != vl))
+			vl = supported_vl;
+
+		current->thread.sve_vl = vl;
+
+		/*
+		 * If the task is not set to inherit, ensure that the vector
+		 * length will be reset by a subsequent exec:
+		 */
+		if (!test_thread_flag(TIF_SVE_VL_INHERIT))
+			current->thread.sve_vl_onexec = 0;
+	}
+
 	set_thread_flag(TIF_FOREIGN_FPSTATE);
 
 	local_bh_enable();
@@ -191,14 +975,23 @@ void fpsimd_preserve_current_state(void)
 		return;
 
 	local_bh_disable();
-
-	if (!test_thread_flag(TIF_FOREIGN_FPSTATE))
-		fpsimd_save_state(&current->thread.fpsimd_state);
-
+	task_fpsimd_save();
 	local_bh_enable();
 }
 
 /*
+ * Like fpsimd_preserve_current_state(), but ensure that
+ * current->thread.fpsimd_state is updated so that it can be copied to
+ * the signal frame.
+ */
+void fpsimd_signal_preserve_current_state(void)
+{
+	fpsimd_preserve_current_state();
+	if (system_supports_sve() && test_thread_flag(TIF_SVE))
+		sve_to_fpsimd(current);
+}
+
+/*
  * Load the userland FPSIMD state of 'current' from memory, but only if the
  * FPSIMD state already held in the registers is /not/ the most recent FPSIMD
  * state of 'current'
@@ -213,7 +1006,7 @@ void fpsimd_restore_current_state(void)
 	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
 		struct fpsimd_state *st = &current->thread.fpsimd_state;
 
-		fpsimd_load_state(st);
+		task_fpsimd_load();
 		__this_cpu_write(fpsimd_last_state, st);
 		st->cpu = smp_processor_id();
 	}
@@ -233,7 +1026,12 @@ void fpsimd_update_current_state(struct fpsimd_state *state)
 
 	local_bh_disable();
 
-	fpsimd_load_state(state);
+	if (system_supports_sve() && test_thread_flag(TIF_SVE)) {
+		current->thread.fpsimd_state = *state;
+		fpsimd_to_sve(current);
+	}
+	task_fpsimd_load();
+
 	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
 		struct fpsimd_state *st = &current->thread.fpsimd_state;
 
@@ -252,6 +1050,33 @@ void fpsimd_flush_task_state(struct task_struct *t)
 	t->thread.fpsimd_state.cpu = NR_CPUS;
 }
 
+static inline void fpsimd_flush_cpu_state(void)
+{
+	__this_cpu_write(fpsimd_last_state, NULL);
+}
+
+/*
+ * Invalidate any task SVE state currently held in this CPU's regs.
+ *
+ * This is used to prevent the kernel from trying to reuse SVE register data
+ * that is detroyed by KVM guest enter/exit.  This function should go away when
+ * KVM SVE support is implemented.  Don't use it for anything else.
+ */
+#ifdef CONFIG_ARM64_SVE
+void sve_flush_cpu_state(void)
+{
+	struct fpsimd_state *const fpstate = __this_cpu_read(fpsimd_last_state);
+	struct task_struct *tsk;
+
+	if (!fpstate)
+		return;
+
+	tsk = container_of(fpstate, struct task_struct, thread.fpsimd_state);
+	if (test_tsk_thread_flag(tsk, TIF_SVE))
+		fpsimd_flush_cpu_state();
+}
+#endif /* CONFIG_ARM64_SVE */
+
 #ifdef CONFIG_KERNEL_MODE_NEON
 
 DEFINE_PER_CPU(bool, kernel_neon_busy);
@@ -286,11 +1111,13 @@ void kernel_neon_begin(void)
 	__this_cpu_write(kernel_neon_busy, true);
 
 	/* Save unsaved task fpsimd state, if any: */
-	if (current->mm && !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
-		fpsimd_save_state(&current->thread.fpsimd_state);
+	if (current->mm) {
+		task_fpsimd_save();
+		set_thread_flag(TIF_FOREIGN_FPSTATE);
+	}
 
 	/* Invalidate any task state remaining in the fpsimd regs: */
-	__this_cpu_write(fpsimd_last_state, NULL);
+	fpsimd_flush_cpu_state();
 
 	preempt_disable();
 
@@ -325,6 +1152,7 @@ EXPORT_SYMBOL(kernel_neon_end);
 
 static DEFINE_PER_CPU(struct fpsimd_state, efi_fpsimd_state);
 static DEFINE_PER_CPU(bool, efi_fpsimd_state_used);
+static DEFINE_PER_CPU(bool, efi_sve_state_used);
 
 /*
  * EFI runtime services support functions
@@ -350,10 +1178,24 @@ void __efi_fpsimd_begin(void)
 
 	WARN_ON(preemptible());
 
-	if (may_use_simd())
+	if (may_use_simd()) {
 		kernel_neon_begin();
-	else {
-		fpsimd_save_state(this_cpu_ptr(&efi_fpsimd_state));
+	} else {
+		/*
+		 * If !efi_sve_state, SVE can't be in use yet and doesn't need
+		 * preserving:
+		 */
+		if (system_supports_sve() && likely(efi_sve_state)) {
+			char *sve_state = this_cpu_ptr(efi_sve_state);
+
+			__this_cpu_write(efi_sve_state_used, true);
+
+			sve_save_state(sve_state + sve_ffr_offset(sve_max_vl),
+				       &this_cpu_ptr(&efi_fpsimd_state)->fpsr);
+		} else {
+			fpsimd_save_state(this_cpu_ptr(&efi_fpsimd_state));
+		}
+
 		__this_cpu_write(efi_fpsimd_state_used, true);
 	}
 }
@@ -366,10 +1208,22 @@ void __efi_fpsimd_end(void)
 	if (!system_supports_fpsimd())
 		return;
 
-	if (__this_cpu_xchg(efi_fpsimd_state_used, false))
-		fpsimd_load_state(this_cpu_ptr(&efi_fpsimd_state));
-	else
+	if (!__this_cpu_xchg(efi_fpsimd_state_used, false)) {
 		kernel_neon_end();
+	} else {
+		if (system_supports_sve() &&
+		    likely(__this_cpu_read(efi_sve_state_used))) {
+			char const *sve_state = this_cpu_ptr(efi_sve_state);
+
+			sve_load_state(sve_state + sve_ffr_offset(sve_max_vl),
+				       &this_cpu_ptr(&efi_fpsimd_state)->fpsr,
+				       sve_vq_from_vl(sve_get_vl()) - 1);
+
+			__this_cpu_write(efi_sve_state_used, false);
+		} else {
+			fpsimd_load_state(this_cpu_ptr(&efi_fpsimd_state));
+		}
+	}
 }
 
 #endif /* CONFIG_EFI */
@@ -382,9 +1236,9 @@ static int fpsimd_cpu_pm_notifier(struct notifier_block *self,
 {
 	switch (cmd) {
 	case CPU_PM_ENTER:
-		if (current->mm && !test_thread_flag(TIF_FOREIGN_FPSTATE))
-			fpsimd_save_state(&current->thread.fpsimd_state);
-		this_cpu_write(fpsimd_last_state, NULL);
+		if (current->mm)
+			task_fpsimd_save();
+		fpsimd_flush_cpu_state();
 		break;
 	case CPU_PM_EXIT:
 		if (current->mm)
@@ -442,6 +1296,6 @@ static int __init fpsimd_init(void)
 	if (!(elf_hwcap & HWCAP_ASIMD))
 		pr_notice("Advanced SIMD is not implemented\n");
 
-	return 0;
+	return sve_sysctl_init();
 }
 core_initcall(fpsimd_init);
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 0b243ecaf7ac..67e86a0f57ac 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -480,14 +480,21 @@ set_hcr:
 
 	/* Statistical profiling */
 	ubfx	x0, x1, #32, #4			// Check ID_AA64DFR0_EL1 PMSVer
-	cbz	x0, 6f				// Skip if SPE not present
-	cbnz	x2, 5f				// VHE?
+	cbz	x0, 7f				// Skip if SPE not present
+	cbnz	x2, 6f				// VHE?
+	mrs_s	x4, SYS_PMBIDR_EL1		// If SPE available at EL2,
+	and	x4, x4, #(1 << SYS_PMBIDR_EL1_P_SHIFT)
+	cbnz	x4, 5f				// then permit sampling of physical
+	mov	x4, #(1 << SYS_PMSCR_EL2_PCT_SHIFT | \
+		      1 << SYS_PMSCR_EL2_PA_SHIFT)
+	msr_s	SYS_PMSCR_EL2, x4		// addresses and physical counter
+5:
 	mov	x1, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT)
 	orr	x3, x3, x1			// If we don't have VHE, then
-	b	6f				// use EL1&0 translation.
-5:						// For VHE, use EL2 translation
+	b	7f				// use EL1&0 translation.
+6:						// For VHE, use EL2 translation
 	orr	x3, x3, #MDCR_EL2_TPMS		// and disable access from EL1
-6:
+7:
 	msr	mdcr_el2, x3			// Configure debug traps
 
 	/* Stage-2 translation */
@@ -517,8 +524,19 @@ CPU_LE(	movk	x0, #0x30d0, lsl #16	)	// Clear EE and E0E on LE systems
 	mov	x0, #0x33ff
 	msr	cptr_el2, x0			// Disable copro. traps to EL2
 
+	/* SVE register access */
+	mrs	x1, id_aa64pfr0_el1
+	ubfx	x1, x1, #ID_AA64PFR0_SVE_SHIFT, #4
+	cbz	x1, 7f
+
+	bic	x0, x0, #CPTR_EL2_TZ		// Also disable SVE traps
+	msr	cptr_el2, x0			// Disable copro. traps to EL2
+	isb
+	mov	x1, #ZCR_ELx_LEN_MASK		// SVE: Enable full vector
+	msr_s	SYS_ZCR_EL2, x1			// length for EL1.
+
 	/* Hypervisor stub */
-	adr_l	x0, __hyp_stub_vectors
+7:	adr_l	x0, __hyp_stub_vectors
 	msr	vbar_el2, x0
 
 	/* spsr */
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index 095d3c170f5d..3009b8b80f08 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -27,6 +27,7 @@
 #include <asm/barrier.h>
 #include <asm/cacheflush.h>
 #include <asm/cputype.h>
+#include <asm/daifflags.h>
 #include <asm/irqflags.h>
 #include <asm/kexec.h>
 #include <asm/memory.h>
@@ -285,7 +286,7 @@ int swsusp_arch_suspend(void)
 		return -EBUSY;
 	}
 
-	local_dbg_save(flags);
+	flags = local_daif_save();
 
 	if (__cpu_suspend_enter(&state)) {
 		/* make the crash dump kernel image visible/saveable */
@@ -315,7 +316,7 @@ int swsusp_arch_suspend(void)
 		__cpu_suspend_exit();
 	}
 
-	local_dbg_restore(flags);
+	local_daif_restore(flags);
 
 	return ret;
 }
diff --git a/arch/arm64/kernel/io.c b/arch/arm64/kernel/io.c
index 354be2a872ae..79b17384effa 100644
--- a/arch/arm64/kernel/io.c
+++ b/arch/arm64/kernel/io.c
@@ -25,8 +25,7 @@
  */
 void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t count)
 {
-	while (count && (!IS_ALIGNED((unsigned long)from, 8) ||
-			 !IS_ALIGNED((unsigned long)to, 8))) {
+	while (count && !IS_ALIGNED((unsigned long)from, 8)) {
 		*(u8 *)to = __raw_readb(from);
 		from++;
 		to++;
@@ -54,23 +53,22 @@ EXPORT_SYMBOL(__memcpy_fromio);
  */
 void __memcpy_toio(volatile void __iomem *to, const void *from, size_t count)
 {
-	while (count && (!IS_ALIGNED((unsigned long)to, 8) ||
-			 !IS_ALIGNED((unsigned long)from, 8))) {
-		__raw_writeb(*(volatile u8 *)from, to);
+	while (count && !IS_ALIGNED((unsigned long)to, 8)) {
+		__raw_writeb(*(u8 *)from, to);
 		from++;
 		to++;
 		count--;
 	}
 
 	while (count >= 8) {
-		__raw_writeq(*(volatile u64 *)from, to);
+		__raw_writeq(*(u64 *)from, to);
 		from += 8;
 		to += 8;
 		count -= 8;
 	}
 
 	while (count) {
-		__raw_writeb(*(volatile u8 *)from, to);
+		__raw_writeb(*(u8 *)from, to);
 		from++;
 		to++;
 		count--;
diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
index 11121f608eb5..f76ea92dff91 100644
--- a/arch/arm64/kernel/machine_kexec.c
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -18,6 +18,7 @@
 
 #include <asm/cacheflush.h>
 #include <asm/cpu_ops.h>
+#include <asm/daifflags.h>
 #include <asm/memory.h>
 #include <asm/mmu.h>
 #include <asm/mmu_context.h>
@@ -195,8 +196,7 @@ void machine_kexec(struct kimage *kimage)
 
 	pr_info("Bye!\n");
 
-	/* Disable all DAIF exceptions. */
-	asm volatile ("msr daifset, #0xf" : : : "memory");
+	local_daif_mask();
 
 	/*
 	 * cpu_soft_restart will shutdown the MMU, disable data caches, then
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 2dc0f8482210..b2adcce7bc18 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -49,6 +49,7 @@
 #include <linux/notifier.h>
 #include <trace/events/power.h>
 #include <linux/percpu.h>
+#include <linux/thread_info.h>
 
 #include <asm/alternative.h>
 #include <asm/compat.h>
@@ -170,6 +171,39 @@ void machine_restart(char *cmd)
 	while (1);
 }
 
+static void print_pstate(struct pt_regs *regs)
+{
+	u64 pstate = regs->pstate;
+
+	if (compat_user_mode(regs)) {
+		printk("pstate: %08llx (%c%c%c%c %c %s %s %c%c%c)\n",
+			pstate,
+			pstate & COMPAT_PSR_N_BIT ? 'N' : 'n',
+			pstate & COMPAT_PSR_Z_BIT ? 'Z' : 'z',
+			pstate & COMPAT_PSR_C_BIT ? 'C' : 'c',
+			pstate & COMPAT_PSR_V_BIT ? 'V' : 'v',
+			pstate & COMPAT_PSR_Q_BIT ? 'Q' : 'q',
+			pstate & COMPAT_PSR_T_BIT ? "T32" : "A32",
+			pstate & COMPAT_PSR_E_BIT ? "BE" : "LE",
+			pstate & COMPAT_PSR_A_BIT ? 'A' : 'a',
+			pstate & COMPAT_PSR_I_BIT ? 'I' : 'i',
+			pstate & COMPAT_PSR_F_BIT ? 'F' : 'f');
+	} else {
+		printk("pstate: %08llx (%c%c%c%c %c%c%c%c %cPAN %cUAO)\n",
+			pstate,
+			pstate & PSR_N_BIT ? 'N' : 'n',
+			pstate & PSR_Z_BIT ? 'Z' : 'z',
+			pstate & PSR_C_BIT ? 'C' : 'c',
+			pstate & PSR_V_BIT ? 'V' : 'v',
+			pstate & PSR_D_BIT ? 'D' : 'd',
+			pstate & PSR_A_BIT ? 'A' : 'a',
+			pstate & PSR_I_BIT ? 'I' : 'i',
+			pstate & PSR_F_BIT ? 'F' : 'f',
+			pstate & PSR_PAN_BIT ? '+' : '-',
+			pstate & PSR_UAO_BIT ? '+' : '-');
+	}
+}
+
 void __show_regs(struct pt_regs *regs)
 {
 	int i, top_reg;
@@ -186,10 +220,9 @@ void __show_regs(struct pt_regs *regs)
 	}
 
 	show_regs_print_info(KERN_DEFAULT);
-	print_symbol("PC is at %s\n", instruction_pointer(regs));
-	print_symbol("LR is at %s\n", lr);
-	printk("pc : [<%016llx>] lr : [<%016llx>] pstate: %08llx\n",
-	       regs->pc, lr, regs->pstate);
+	print_pstate(regs);
+	print_symbol("pc : %s\n", regs->pc);
+	print_symbol("lr : %s\n", lr);
 	printk("sp : %016llx\n", sp);
 
 	i = top_reg;
@@ -241,11 +274,27 @@ void release_thread(struct task_struct *dead_task)
 {
 }
 
+void arch_release_task_struct(struct task_struct *tsk)
+{
+	fpsimd_release_task(tsk);
+}
+
+/*
+ * src and dst may temporarily have aliased sve_state after task_struct
+ * is copied.  We cannot fix this properly here, because src may have
+ * live SVE state and dst's thread_info may not exist yet, so tweaking
+ * either src's or dst's TIF_SVE is not safe.
+ *
+ * The unaliasing is done in copy_thread() instead.  This works because
+ * dst is not schedulable or traceable until both of these functions
+ * have been called.
+ */
 int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 {
 	if (current->mm)
 		fpsimd_preserve_current_state();
 	*dst = *src;
+
 	return 0;
 }
 
@@ -258,6 +307,13 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
 
 	memset(&p->thread.cpu_context, 0, sizeof(struct cpu_context));
 
+	/*
+	 * Unalias p->thread.sve_state (if any) from the parent task
+	 * and disable discard SVE state for p:
+	 */
+	clear_tsk_thread_flag(p, TIF_SVE);
+	p->thread.sve_state = NULL;
+
 	if (likely(!(p->flags & PF_KTHREAD))) {
 		*childregs = *current_pt_regs();
 		childregs->regs[0] = 0;
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 9cbb6123208f..7c44658b316d 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -32,6 +32,7 @@
 #include <linux/security.h>
 #include <linux/init.h>
 #include <linux/signal.h>
+#include <linux/string.h>
 #include <linux/uaccess.h>
 #include <linux/perf_event.h>
 #include <linux/hw_breakpoint.h>
@@ -40,6 +41,7 @@
 #include <linux/elf.h>
 
 #include <asm/compat.h>
+#include <asm/cpufeature.h>
 #include <asm/debug-monitors.h>
 #include <asm/pgtable.h>
 #include <asm/stacktrace.h>
@@ -618,17 +620,56 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset,
 /*
  * TODO: update fp accessors for lazy context switching (sync/flush hwstate)
  */
-static int fpr_get(struct task_struct *target, const struct user_regset *regset,
-		   unsigned int pos, unsigned int count,
-		   void *kbuf, void __user *ubuf)
+static int __fpr_get(struct task_struct *target,
+		     const struct user_regset *regset,
+		     unsigned int pos, unsigned int count,
+		     void *kbuf, void __user *ubuf, unsigned int start_pos)
 {
 	struct user_fpsimd_state *uregs;
+
+	sve_sync_to_fpsimd(target);
+
 	uregs = &target->thread.fpsimd_state.user_fpsimd;
 
+	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs,
+				   start_pos, start_pos + sizeof(*uregs));
+}
+
+static int fpr_get(struct task_struct *target, const struct user_regset *regset,
+		   unsigned int pos, unsigned int count,
+		   void *kbuf, void __user *ubuf)
+{
 	if (target == current)
 		fpsimd_preserve_current_state();
 
-	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, 0, -1);
+	return __fpr_get(target, regset, pos, count, kbuf, ubuf, 0);
+}
+
+static int __fpr_set(struct task_struct *target,
+		     const struct user_regset *regset,
+		     unsigned int pos, unsigned int count,
+		     const void *kbuf, const void __user *ubuf,
+		     unsigned int start_pos)
+{
+	int ret;
+	struct user_fpsimd_state newstate;
+
+	/*
+	 * Ensure target->thread.fpsimd_state is up to date, so that a
+	 * short copyin can't resurrect stale data.
+	 */
+	sve_sync_to_fpsimd(target);
+
+	newstate = target->thread.fpsimd_state.user_fpsimd;
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newstate,
+				 start_pos, start_pos + sizeof(newstate));
+	if (ret)
+		return ret;
+
+	target->thread.fpsimd_state.user_fpsimd = newstate;
+
+	return ret;
 }
 
 static int fpr_set(struct task_struct *target, const struct user_regset *regset,
@@ -636,15 +677,14 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset,
 		   const void *kbuf, const void __user *ubuf)
 {
 	int ret;
-	struct user_fpsimd_state newstate =
-		target->thread.fpsimd_state.user_fpsimd;
 
-	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newstate, 0, -1);
+	ret = __fpr_set(target, regset, pos, count, kbuf, ubuf, 0);
 	if (ret)
 		return ret;
 
-	target->thread.fpsimd_state.user_fpsimd = newstate;
+	sve_sync_from_fpsimd_zeropad(target);
 	fpsimd_flush_task_state(target);
+
 	return ret;
 }
 
@@ -702,6 +742,215 @@ static int system_call_set(struct task_struct *target,
 	return ret;
 }
 
+#ifdef CONFIG_ARM64_SVE
+
+static void sve_init_header_from_task(struct user_sve_header *header,
+				      struct task_struct *target)
+{
+	unsigned int vq;
+
+	memset(header, 0, sizeof(*header));
+
+	header->flags = test_tsk_thread_flag(target, TIF_SVE) ?
+		SVE_PT_REGS_SVE : SVE_PT_REGS_FPSIMD;
+	if (test_tsk_thread_flag(target, TIF_SVE_VL_INHERIT))
+		header->flags |= SVE_PT_VL_INHERIT;
+
+	header->vl = target->thread.sve_vl;
+	vq = sve_vq_from_vl(header->vl);
+
+	header->max_vl = sve_max_vl;
+	if (WARN_ON(!sve_vl_valid(sve_max_vl)))
+		header->max_vl = header->vl;
+
+	header->size = SVE_PT_SIZE(vq, header->flags);
+	header->max_size = SVE_PT_SIZE(sve_vq_from_vl(header->max_vl),
+				      SVE_PT_REGS_SVE);
+}
+
+static unsigned int sve_size_from_header(struct user_sve_header const *header)
+{
+	return ALIGN(header->size, SVE_VQ_BYTES);
+}
+
+static unsigned int sve_get_size(struct task_struct *target,
+				 const struct user_regset *regset)
+{
+	struct user_sve_header header;
+
+	if (!system_supports_sve())
+		return 0;
+
+	sve_init_header_from_task(&header, target);
+	return sve_size_from_header(&header);
+}
+
+static int sve_get(struct task_struct *target,
+		   const struct user_regset *regset,
+		   unsigned int pos, unsigned int count,
+		   void *kbuf, void __user *ubuf)
+{
+	int ret;
+	struct user_sve_header header;
+	unsigned int vq;
+	unsigned long start, end;
+
+	if (!system_supports_sve())
+		return -EINVAL;
+
+	/* Header */
+	sve_init_header_from_task(&header, target);
+	vq = sve_vq_from_vl(header.vl);
+
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &header,
+				  0, sizeof(header));
+	if (ret)
+		return ret;
+
+	if (target == current)
+		fpsimd_preserve_current_state();
+
+	/* Registers: FPSIMD-only case */
+
+	BUILD_BUG_ON(SVE_PT_FPSIMD_OFFSET != sizeof(header));
+	if ((header.flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD)
+		return __fpr_get(target, regset, pos, count, kbuf, ubuf,
+				 SVE_PT_FPSIMD_OFFSET);
+
+	/* Otherwise: full SVE case */
+
+	BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header));
+	start = SVE_PT_SVE_OFFSET;
+	end = SVE_PT_SVE_FFR_OFFSET(vq) + SVE_PT_SVE_FFR_SIZE(vq);
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				  target->thread.sve_state,
+				  start, end);
+	if (ret)
+		return ret;
+
+	start = end;
+	end = SVE_PT_SVE_FPSR_OFFSET(vq);
+	ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+				       start, end);
+	if (ret)
+		return ret;
+
+	/*
+	 * Copy fpsr, and fpcr which must follow contiguously in
+	 * struct fpsimd_state:
+	 */
+	start = end;
+	end = SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE;
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				  &target->thread.fpsimd_state.fpsr,
+				  start, end);
+	if (ret)
+		return ret;
+
+	start = end;
+	end = sve_size_from_header(&header);
+	return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+					start, end);
+}
+
+static int sve_set(struct task_struct *target,
+		   const struct user_regset *regset,
+		   unsigned int pos, unsigned int count,
+		   const void *kbuf, const void __user *ubuf)
+{
+	int ret;
+	struct user_sve_header header;
+	unsigned int vq;
+	unsigned long start, end;
+
+	if (!system_supports_sve())
+		return -EINVAL;
+
+	/* Header */
+	if (count < sizeof(header))
+		return -EINVAL;
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &header,
+				 0, sizeof(header));
+	if (ret)
+		goto out;
+
+	/*
+	 * Apart from PT_SVE_REGS_MASK, all PT_SVE_* flags are consumed by
+	 * sve_set_vector_length(), which will also validate them for us:
+	 */
+	ret = sve_set_vector_length(target, header.vl,
+		((unsigned long)header.flags & ~SVE_PT_REGS_MASK) << 16);
+	if (ret)
+		goto out;
+
+	/* Actual VL set may be less than the user asked for: */
+	vq = sve_vq_from_vl(target->thread.sve_vl);
+
+	/* Registers: FPSIMD-only case */
+
+	BUILD_BUG_ON(SVE_PT_FPSIMD_OFFSET != sizeof(header));
+	if ((header.flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD) {
+		ret = __fpr_set(target, regset, pos, count, kbuf, ubuf,
+				SVE_PT_FPSIMD_OFFSET);
+		clear_tsk_thread_flag(target, TIF_SVE);
+		goto out;
+	}
+
+	/* Otherwise: full SVE case */
+
+	/*
+	 * If setting a different VL from the requested VL and there is
+	 * register data, the data layout will be wrong: don't even
+	 * try to set the registers in this case.
+	 */
+	if (count && vq != sve_vq_from_vl(header.vl)) {
+		ret = -EIO;
+		goto out;
+	}
+
+	sve_alloc(target);
+
+	/*
+	 * Ensure target->thread.sve_state is up to date with target's
+	 * FPSIMD regs, so that a short copyin leaves trailing registers
+	 * unmodified.
+	 */
+	fpsimd_sync_to_sve(target);
+	set_tsk_thread_flag(target, TIF_SVE);
+
+	BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header));
+	start = SVE_PT_SVE_OFFSET;
+	end = SVE_PT_SVE_FFR_OFFSET(vq) + SVE_PT_SVE_FFR_SIZE(vq);
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 target->thread.sve_state,
+				 start, end);
+	if (ret)
+		goto out;
+
+	start = end;
+	end = SVE_PT_SVE_FPSR_OFFSET(vq);
+	ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+					start, end);
+	if (ret)
+		goto out;
+
+	/*
+	 * Copy fpsr, and fpcr which must follow contiguously in
+	 * struct fpsimd_state:
+	 */
+	start = end;
+	end = SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE;
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 &target->thread.fpsimd_state.fpsr,
+				 start, end);
+
+out:
+	fpsimd_flush_task_state(target);
+	return ret;
+}
+
+#endif /* CONFIG_ARM64_SVE */
+
 enum aarch64_regset {
 	REGSET_GPR,
 	REGSET_FPR,
@@ -711,6 +960,9 @@ enum aarch64_regset {
 	REGSET_HW_WATCH,
 #endif
 	REGSET_SYSTEM_CALL,
+#ifdef CONFIG_ARM64_SVE
+	REGSET_SVE,
+#endif
 };
 
 static const struct user_regset aarch64_regsets[] = {
@@ -768,6 +1020,18 @@ static const struct user_regset aarch64_regsets[] = {
 		.get = system_call_get,
 		.set = system_call_set,
 	},
+#ifdef CONFIG_ARM64_SVE
+	[REGSET_SVE] = { /* Scalable Vector Extension */
+		.core_note_type = NT_ARM_SVE,
+		.n = DIV_ROUND_UP(SVE_PT_SIZE(SVE_VQ_MAX, SVE_PT_REGS_SVE),
+				  SVE_VQ_BYTES),
+		.size = SVE_VQ_BYTES,
+		.align = SVE_VQ_BYTES,
+		.get = sve_get,
+		.set = sve_set,
+		.get_size = sve_get_size,
+	},
+#endif
 };
 
 static const struct user_regset_view user_aarch64_view = {
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index d4b740538ad5..30ad2f085d1f 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -23,7 +23,6 @@
 #include <linux/stddef.h>
 #include <linux/ioport.h>
 #include <linux/delay.h>
-#include <linux/utsname.h>
 #include <linux/initrd.h>
 #include <linux/console.h>
 #include <linux/cache.h>
@@ -48,6 +47,7 @@
 #include <asm/fixmap.h>
 #include <asm/cpu.h>
 #include <asm/cputype.h>
+#include <asm/daifflags.h>
 #include <asm/elf.h>
 #include <asm/cpufeature.h>
 #include <asm/cpu_ops.h>
@@ -103,7 +103,8 @@ void __init smp_setup_processor_id(void)
 	 * access percpu variable inside lock_release
 	 */
 	set_my_cpu_offset(0);
-	pr_info("Booting Linux on physical CPU 0x%lx\n", (unsigned long)mpidr);
+	pr_info("Booting Linux on physical CPU 0x%010lx [0x%08x]\n",
+		(unsigned long)mpidr, read_cpuid_id());
 }
 
 bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
@@ -244,9 +245,6 @@ u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID };
 
 void __init setup_arch(char **cmdline_p)
 {
-	pr_info("Boot CPU: AArch64 Processor [%08x]\n", read_cpuid_id());
-
-	sprintf(init_utsname()->machine, UTS_MACHINE);
 	init_mm.start_code = (unsigned long) _text;
 	init_mm.end_code   = (unsigned long) _etext;
 	init_mm.end_data   = (unsigned long) _edata;
@@ -262,10 +260,11 @@ void __init setup_arch(char **cmdline_p)
 	parse_early_param();
 
 	/*
-	 *  Unmask asynchronous aborts after bringing up possible earlycon.
-	 * (Report possible System Errors once we can report this occurred)
+	 * Unmask asynchronous aborts and fiq after bringing up possible
+	 * earlycon. (Report possible System Errors once we can report this
+	 * occurred).
 	 */
-	local_async_enable();
+	local_daif_restore(DAIF_PROCCTX_NOIRQ);
 
 	/*
 	 * TTBR0 is only used for the identity mapping at this stage. Make it
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 0bdc96c61bc0..b120111a46be 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -31,6 +31,7 @@
 #include <linux/ratelimit.h>
 #include <linux/syscalls.h>
 
+#include <asm/daifflags.h>
 #include <asm/debug-monitors.h>
 #include <asm/elf.h>
 #include <asm/cacheflush.h>
@@ -63,6 +64,7 @@ struct rt_sigframe_user_layout {
 
 	unsigned long fpsimd_offset;
 	unsigned long esr_offset;
+	unsigned long sve_offset;
 	unsigned long extra_offset;
 	unsigned long end_offset;
 };
@@ -179,9 +181,6 @@ static int preserve_fpsimd_context(struct fpsimd_context __user *ctx)
 	struct fpsimd_state *fpsimd = &current->thread.fpsimd_state;
 	int err;
 
-	/* dump the hardware registers to the fpsimd_state structure */
-	fpsimd_preserve_current_state();
-
 	/* copy the FP and status/control registers */
 	err = __copy_to_user(ctx->vregs, fpsimd->vregs, sizeof(fpsimd->vregs));
 	__put_user_error(fpsimd->fpsr, &ctx->fpsr, err);
@@ -214,6 +213,8 @@ static int restore_fpsimd_context(struct fpsimd_context __user *ctx)
 	__get_user_error(fpsimd.fpsr, &ctx->fpsr, err);
 	__get_user_error(fpsimd.fpcr, &ctx->fpcr, err);
 
+	clear_thread_flag(TIF_SVE);
+
 	/* load the hardware registers from the fpsimd_state structure */
 	if (!err)
 		fpsimd_update_current_state(&fpsimd);
@@ -221,10 +222,118 @@ static int restore_fpsimd_context(struct fpsimd_context __user *ctx)
 	return err ? -EFAULT : 0;
 }
 
+
 struct user_ctxs {
 	struct fpsimd_context __user *fpsimd;
+	struct sve_context __user *sve;
 };
 
+#ifdef CONFIG_ARM64_SVE
+
+static int preserve_sve_context(struct sve_context __user *ctx)
+{
+	int err = 0;
+	u16 reserved[ARRAY_SIZE(ctx->__reserved)];
+	unsigned int vl = current->thread.sve_vl;
+	unsigned int vq = 0;
+
+	if (test_thread_flag(TIF_SVE))
+		vq = sve_vq_from_vl(vl);
+
+	memset(reserved, 0, sizeof(reserved));
+
+	__put_user_error(SVE_MAGIC, &ctx->head.magic, err);
+	__put_user_error(round_up(SVE_SIG_CONTEXT_SIZE(vq), 16),
+			 &ctx->head.size, err);
+	__put_user_error(vl, &ctx->vl, err);
+	BUILD_BUG_ON(sizeof(ctx->__reserved) != sizeof(reserved));
+	err |= __copy_to_user(&ctx->__reserved, reserved, sizeof(reserved));
+
+	if (vq) {
+		/*
+		 * This assumes that the SVE state has already been saved to
+		 * the task struct by calling preserve_fpsimd_context().
+		 */
+		err |= __copy_to_user((char __user *)ctx + SVE_SIG_REGS_OFFSET,
+				      current->thread.sve_state,
+				      SVE_SIG_REGS_SIZE(vq));
+	}
+
+	return err ? -EFAULT : 0;
+}
+
+static int restore_sve_fpsimd_context(struct user_ctxs *user)
+{
+	int err;
+	unsigned int vq;
+	struct fpsimd_state fpsimd;
+	struct sve_context sve;
+
+	if (__copy_from_user(&sve, user->sve, sizeof(sve)))
+		return -EFAULT;
+
+	if (sve.vl != current->thread.sve_vl)
+		return -EINVAL;
+
+	if (sve.head.size <= sizeof(*user->sve)) {
+		clear_thread_flag(TIF_SVE);
+		goto fpsimd_only;
+	}
+
+	vq = sve_vq_from_vl(sve.vl);
+
+	if (sve.head.size < SVE_SIG_CONTEXT_SIZE(vq))
+		return -EINVAL;
+
+	/*
+	 * Careful: we are about __copy_from_user() directly into
+	 * thread.sve_state with preemption enabled, so protection is
+	 * needed to prevent a racing context switch from writing stale
+	 * registers back over the new data.
+	 */
+
+	fpsimd_flush_task_state(current);
+	barrier();
+	/* From now, fpsimd_thread_switch() won't clear TIF_FOREIGN_FPSTATE */
+
+	set_thread_flag(TIF_FOREIGN_FPSTATE);
+	barrier();
+	/* From now, fpsimd_thread_switch() won't touch thread.sve_state */
+
+	sve_alloc(current);
+	err = __copy_from_user(current->thread.sve_state,
+			       (char __user const *)user->sve +
+					SVE_SIG_REGS_OFFSET,
+			       SVE_SIG_REGS_SIZE(vq));
+	if (err)
+		return -EFAULT;
+
+	set_thread_flag(TIF_SVE);
+
+fpsimd_only:
+	/* copy the FP and status/control registers */
+	/* restore_sigframe() already checked that user->fpsimd != NULL. */
+	err = __copy_from_user(fpsimd.vregs, user->fpsimd->vregs,
+			       sizeof(fpsimd.vregs));
+	__get_user_error(fpsimd.fpsr, &user->fpsimd->fpsr, err);
+	__get_user_error(fpsimd.fpcr, &user->fpsimd->fpcr, err);
+
+	/* load the hardware registers from the fpsimd_state structure */
+	if (!err)
+		fpsimd_update_current_state(&fpsimd);
+
+	return err ? -EFAULT : 0;
+}
+
+#else /* ! CONFIG_ARM64_SVE */
+
+/* Turn any non-optimised out attempts to use these into a link error: */
+extern int preserve_sve_context(void __user *ctx);
+extern int restore_sve_fpsimd_context(struct user_ctxs *user);
+
+#endif /* ! CONFIG_ARM64_SVE */
+
+
 static int parse_user_sigframe(struct user_ctxs *user,
 			       struct rt_sigframe __user *sf)
 {
@@ -237,6 +346,7 @@ static int parse_user_sigframe(struct user_ctxs *user,
 	char const __user *const sfp = (char const __user *)sf;
 
 	user->fpsimd = NULL;
+	user->sve = NULL;
 
 	if (!IS_ALIGNED((unsigned long)base, 16))
 		goto invalid;
@@ -287,6 +397,19 @@ static int parse_user_sigframe(struct user_ctxs *user,
 			/* ignore */
 			break;
 
+		case SVE_MAGIC:
+			if (!system_supports_sve())
+				goto invalid;
+
+			if (user->sve)
+				goto invalid;
+
+			if (size < sizeof(*user->sve))
+				goto invalid;
+
+			user->sve = (struct sve_context __user *)head;
+			break;
+
 		case EXTRA_MAGIC:
 			if (have_extra_context)
 				goto invalid;
@@ -343,6 +466,10 @@ static int parse_user_sigframe(struct user_ctxs *user,
 			 */
 			offset = 0;
 			limit = extra_size;
+
+			if (!access_ok(VERIFY_READ, base, limit))
+				goto invalid;
+
 			continue;
 
 		default:
@@ -359,9 +486,6 @@ static int parse_user_sigframe(struct user_ctxs *user,
 	}
 
 done:
-	if (!user->fpsimd)
-		goto invalid;
-
 	return 0;
 
 invalid:
@@ -395,8 +519,19 @@ static int restore_sigframe(struct pt_regs *regs,
 	if (err == 0)
 		err = parse_user_sigframe(&user, sf);
 
-	if (err == 0)
-		err = restore_fpsimd_context(user.fpsimd);
+	if (err == 0) {
+		if (!user.fpsimd)
+			return -EINVAL;
+
+		if (user.sve) {
+			if (!system_supports_sve())
+				return -EINVAL;
+
+			err = restore_sve_fpsimd_context(&user);
+		} else {
+			err = restore_fpsimd_context(user.fpsimd);
+		}
+	}
 
 	return err;
 }
@@ -455,6 +590,18 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user)
 			return err;
 	}
 
+	if (system_supports_sve()) {
+		unsigned int vq = 0;
+
+		if (test_thread_flag(TIF_SVE))
+			vq = sve_vq_from_vl(current->thread.sve_vl);
+
+		err = sigframe_alloc(user, &user->sve_offset,
+				     SVE_SIG_CONTEXT_SIZE(vq));
+		if (err)
+			return err;
+	}
+
 	return sigframe_alloc_end(user);
 }
 
@@ -496,6 +643,13 @@ static int setup_sigframe(struct rt_sigframe_user_layout *user,
 		__put_user_error(current->thread.fault_code, &esr_ctx->esr, err);
 	}
 
+	/* Scalable Vector Extension state, if present */
+	if (system_supports_sve() && err == 0 && user->sve_offset) {
+		struct sve_context __user *sve_ctx =
+			apply_user_offset(user, user->sve_offset);
+		err |= preserve_sve_context(sve_ctx);
+	}
+
 	if (err == 0 && user->extra_offset) {
 		char __user *sfp = (char __user *)user->sigframe;
 		char __user *userp =
@@ -595,6 +749,8 @@ static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set,
 	struct rt_sigframe __user *frame;
 	int err = 0;
 
+	fpsimd_signal_preserve_current_state();
+
 	if (get_sigframe(&user, ksig, regs))
 		return 1;
 
@@ -756,9 +912,12 @@ asmlinkage void do_notify_resume(struct pt_regs *regs,
 		addr_limit_user_check();
 
 		if (thread_flags & _TIF_NEED_RESCHED) {
+			/* Unmask Debug and SError for the next task */
+			local_daif_restore(DAIF_PROCCTX_NOIRQ);
+
 			schedule();
 		} else {
-			local_irq_enable();
+			local_daif_restore(DAIF_PROCCTX);
 
 			if (thread_flags & _TIF_UPROBE)
 				uprobe_notify_resume(regs);
@@ -775,7 +934,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs,
 				fpsimd_restore_current_state();
 		}
 
-		local_irq_disable();
+		local_daif_mask();
 		thread_flags = READ_ONCE(current_thread_info()->flags);
 	} while (thread_flags & _TIF_WORK_MASK);
 }
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index e09bf5d15606..22711ee8e36c 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -239,7 +239,7 @@ static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame)
 	 * Note that this also saves V16-31, which aren't visible
 	 * in AArch32.
 	 */
-	fpsimd_preserve_current_state();
+	fpsimd_signal_preserve_current_state();
 
 	/* Place structure header on the stack */
 	__put_user_error(magic, &frame->magic, err);
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 9f7195a5773e..551eb07c53b6 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -47,6 +47,7 @@
 #include <asm/cpu.h>
 #include <asm/cputype.h>
 #include <asm/cpu_ops.h>
+#include <asm/daifflags.h>
 #include <asm/mmu_context.h>
 #include <asm/numa.h>
 #include <asm/pgtable.h>
@@ -216,6 +217,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
  */
 asmlinkage void secondary_start_kernel(void)
 {
+	u64 mpidr = read_cpuid_mpidr() & MPIDR_HWID_BITMASK;
 	struct mm_struct *mm = &init_mm;
 	unsigned int cpu;
 
@@ -265,14 +267,14 @@ asmlinkage void secondary_start_kernel(void)
 	 * the CPU migration code to notice that the CPU is online
 	 * before we continue.
 	 */
-	pr_info("CPU%u: Booted secondary processor [%08x]\n",
-					 cpu, read_cpuid_id());
+	pr_info("CPU%u: Booted secondary processor 0x%010lx [0x%08x]\n",
+					 cpu, (unsigned long)mpidr,
+					 read_cpuid_id());
 	update_cpu_boot_status(CPU_BOOT_SUCCESS);
 	set_cpu_online(cpu, true);
 	complete(&cpu_running);
 
-	local_irq_enable();
-	local_async_enable();
+	local_daif_restore(DAIF_PROCCTX);
 
 	/*
 	 * OK, it's off to the idle thread for us
@@ -368,10 +370,6 @@ void __cpu_die(unsigned int cpu)
 /*
  * Called from the idle thread for the CPU which has been shutdown.
  *
- * Note that we disable IRQs here, but do not re-enable them
- * before returning to the caller. This is also the behaviour
- * of the other hotplug-cpu capable cores, so presumably coming
- * out of idle fixes this.
  */
 void cpu_die(void)
 {
@@ -379,7 +377,7 @@ void cpu_die(void)
 
 	idle_task_exit();
 
-	local_irq_disable();
+	local_daif_mask();
 
 	/* Tell __cpu_die() that this CPU is now safe to dispose of */
 	(void)cpu_report_death();
@@ -837,7 +835,7 @@ static void ipi_cpu_stop(unsigned int cpu)
 {
 	set_cpu_online(cpu, false);
 
-	local_irq_disable();
+	local_daif_mask();
 
 	while (1)
 		cpu_relax();
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index 77cd655e6eb7..3fe5ad884418 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -5,6 +5,7 @@
 #include <asm/alternative.h>
 #include <asm/cacheflush.h>
 #include <asm/cpufeature.h>
+#include <asm/daifflags.h>
 #include <asm/debug-monitors.h>
 #include <asm/exec.h>
 #include <asm/pgtable.h>
@@ -12,7 +13,6 @@
 #include <asm/mmu_context.h>
 #include <asm/smp_plat.h>
 #include <asm/suspend.h>
-#include <asm/tlbflush.h>
 
 /*
  * This is allocated by cpu_suspend_init(), and used to store a pointer to
@@ -58,7 +58,7 @@ void notrace __cpu_suspend_exit(void)
 	/*
 	 * Restore HW breakpoint registers to sane values
 	 * before debug exceptions are possibly reenabled
-	 * through local_dbg_restore.
+	 * by cpu_suspend()s local_daif_restore() call.
 	 */
 	if (hw_breakpoint_restore)
 		hw_breakpoint_restore(cpu);
@@ -82,7 +82,7 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
 	 * updates to mdscr register (saved and restored along with
 	 * general purpose registers) from kernel debuggers.
 	 */
-	local_dbg_save(flags);
+	flags = local_daif_save();
 
 	/*
 	 * Function graph tracer state gets incosistent when the kernel
@@ -115,7 +115,7 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
 	 * restored, so from this point onwards, debugging is fully
 	 * renabled if it was enabled when core started shutdown.
 	 */
-	local_dbg_restore(flags);
+	local_daif_restore(flags);
 
 	return ret;
 }
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 8383af15a759..3d3588fcd1c7 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -38,6 +38,7 @@
 
 #include <asm/atomic.h>
 #include <asm/bug.h>
+#include <asm/daifflags.h>
 #include <asm/debug-monitors.h>
 #include <asm/esr.h>
 #include <asm/insn.h>
@@ -58,55 +59,9 @@ static const char *handler[]= {
 
 int show_unhandled_signals = 1;
 
-/*
- * Dump out the contents of some kernel memory nicely...
- */
-static void dump_mem(const char *lvl, const char *str, unsigned long bottom,
-		     unsigned long top)
-{
-	unsigned long first;
-	mm_segment_t fs;
-	int i;
-
-	/*
-	 * We need to switch to kernel mode so that we can use __get_user
-	 * to safely read from kernel space.
-	 */
-	fs = get_fs();
-	set_fs(KERNEL_DS);
-
-	printk("%s%s(0x%016lx to 0x%016lx)\n", lvl, str, bottom, top);
-
-	for (first = bottom & ~31; first < top; first += 32) {
-		unsigned long p;
-		char str[sizeof(" 12345678") * 8 + 1];
-
-		memset(str, ' ', sizeof(str));
-		str[sizeof(str) - 1] = '\0';
-
-		for (p = first, i = 0; i < (32 / 8)
-					&& p < top; i++, p += 8) {
-			if (p >= bottom && p < top) {
-				unsigned long val;
-
-				if (__get_user(val, (unsigned long *)p) == 0)
-					sprintf(str + i * 17, " %016lx", val);
-				else
-					sprintf(str + i * 17, " ????????????????");
-			}
-		}
-		printk("%s%04lx:%s\n", lvl, first & 0xffff, str);
-	}
-
-	set_fs(fs);
-}
-
 static void dump_backtrace_entry(unsigned long where)
 {
-	/*
-	 * Note that 'where' can have a physical address, but it's not handled.
-	 */
-	print_ip_sym(where);
+	printk(" %pS\n", (void *)where);
 }
 
 static void __dump_instr(const char *lvl, struct pt_regs *regs)
@@ -171,10 +126,7 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
 
 	skip = !!regs;
 	printk("Call trace:\n");
-	while (1) {
-		unsigned long stack;
-		int ret;
-
+	do {
 		/* skip until specified stack frame */
 		if (!skip) {
 			dump_backtrace_entry(frame.pc);
@@ -189,17 +141,7 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
 			 */
 			dump_backtrace_entry(regs->pc);
 		}
-		ret = unwind_frame(tsk, &frame);
-		if (ret < 0)
-			break;
-		if (in_entry_text(frame.pc)) {
-			stack = frame.fp - offsetof(struct pt_regs, stackframe);
-
-			if (on_accessible_stack(tsk, stack))
-				dump_mem("", "Exception stack", stack,
-					 stack + sizeof(struct pt_regs));
-		}
-	}
+	} while (!unwind_frame(tsk, &frame));
 
 	put_task_stack(tsk);
 }
@@ -293,6 +235,17 @@ void arm64_notify_die(const char *str, struct pt_regs *regs,
 	}
 }
 
+void arm64_skip_faulting_instruction(struct pt_regs *regs, unsigned long size)
+{
+	regs->pc += size;
+
+	/*
+	 * If we were single stepping, we want to get the step exception after
+	 * we return from the trap.
+	 */
+	user_fastforward_single_step(current);
+}
+
 static LIST_HEAD(undef_hook);
 static DEFINE_RAW_SPINLOCK(undef_lock);
 
@@ -358,8 +311,8 @@ exit:
 	return fn ? fn(regs, instr) : 1;
 }
 
-static void force_signal_inject(int signal, int code, struct pt_regs *regs,
-				unsigned long address)
+void force_signal_inject(int signal, int code, struct pt_regs *regs,
+			 unsigned long address)
 {
 	siginfo_t info;
 	void __user *pc = (void __user *)instruction_pointer(regs);
@@ -373,7 +326,7 @@ static void force_signal_inject(int signal, int code, struct pt_regs *regs,
 		desc = "illegal memory access";
 		break;
 	default:
-		desc = "bad mode";
+		desc = "unknown or unrecoverable error";
 		break;
 	}
 
@@ -480,7 +433,7 @@ static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs)
 	if (ret)
 		arm64_notify_segfault(regs, address);
 	else
-		regs->pc += 4;
+		arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
 }
 
 static void ctr_read_handler(unsigned int esr, struct pt_regs *regs)
@@ -490,7 +443,7 @@ static void ctr_read_handler(unsigned int esr, struct pt_regs *regs)
 
 	pt_regs_write_reg(regs, rt, val);
 
-	regs->pc += 4;
+	arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
 }
 
 static void cntvct_read_handler(unsigned int esr, struct pt_regs *regs)
@@ -498,7 +451,7 @@ static void cntvct_read_handler(unsigned int esr, struct pt_regs *regs)
 	int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT;
 
 	pt_regs_write_reg(regs, rt, arch_counter_get_cntvct());
-	regs->pc += 4;
+	arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
 }
 
 static void cntfrq_read_handler(unsigned int esr, struct pt_regs *regs)
@@ -506,7 +459,7 @@ static void cntfrq_read_handler(unsigned int esr, struct pt_regs *regs)
 	int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT;
 
 	pt_regs_write_reg(regs, rt, arch_timer_get_rate());
-	regs->pc += 4;
+	arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
 }
 
 struct sys64_hook {
@@ -603,6 +556,7 @@ static const char *esr_class_str[] = {
 	[ESR_ELx_EC_HVC64]		= "HVC (AArch64)",
 	[ESR_ELx_EC_SMC64]		= "SMC (AArch64)",
 	[ESR_ELx_EC_SYS64]		= "MSR/MRS (AArch64)",
+	[ESR_ELx_EC_SVE]		= "SVE",
 	[ESR_ELx_EC_IMP_DEF]		= "EL3 IMP DEF",
 	[ESR_ELx_EC_IABT_LOW]		= "IABT (lower EL)",
 	[ESR_ELx_EC_IABT_CUR]		= "IABT (current EL)",
@@ -642,7 +596,7 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr)
 		esr_get_class_string(esr));
 
 	die("Oops - bad mode", regs, 0);
-	local_irq_disable();
+	local_daif_mask();
 	panic("bad mode");
 }
 
@@ -708,6 +662,19 @@ asmlinkage void handle_bad_stack(struct pt_regs *regs)
 }
 #endif
 
+asmlinkage void do_serror(struct pt_regs *regs, unsigned int esr)
+{
+	nmi_enter();
+
+	console_verbose();
+
+	pr_crit("SError Interrupt on CPU%d, code 0x%08x -- %s\n",
+		smp_processor_id(), esr, esr_get_class_string(esr));
+	__show_regs(regs);
+
+	panic("Asynchronous SError Interrupt");
+}
+
 void __pte_error(const char *file, int line, unsigned long val)
 {
 	pr_err("%s:%d: bad pte %016lx.\n", file, line, val);
@@ -761,7 +728,7 @@ static int bug_handler(struct pt_regs *regs, unsigned int esr)
 	}
 
 	/* If thread survives, skip over the BUG instruction and continue: */
-	regs->pc += AARCH64_INSN_SIZE;	/* skip BRK and resume */
+	arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
 	return DBG_HOOK_HANDLED;
 }
 
diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S
index 76320e920965..c39872a7b03c 100644
--- a/arch/arm64/kernel/vdso/gettimeofday.S
+++ b/arch/arm64/kernel/vdso/gettimeofday.S
@@ -309,7 +309,7 @@ ENTRY(__kernel_clock_getres)
 	b.ne	4f
 	ldr	x2, 6f
 2:
-	cbz	w1, 3f
+	cbz	x1, 3f
 	stp	xzr, x2, [x1]
 
 3:	/* res == NULL. */
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 7debb74843a0..b71247995469 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -147,6 +147,13 @@ static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	return 1;
 }
 
+static int handle_sve(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	/* Until SVE is supported for guests: */
+	kvm_inject_undefined(vcpu);
+	return 1;
+}
+
 static exit_handle_fn arm_exit_handlers[] = {
 	[0 ... ESR_ELx_EC_MAX]	= kvm_handle_unknown_ec,
 	[ESR_ELx_EC_WFx]	= kvm_handle_wfx,
@@ -160,6 +167,7 @@ static exit_handle_fn arm_exit_handlers[] = {
 	[ESR_ELx_EC_HVC64]	= handle_hvc,
 	[ESR_ELx_EC_SMC64]	= handle_smc,
 	[ESR_ELx_EC_SYS64]	= kvm_handle_sys_reg,
+	[ESR_ELx_EC_SVE]	= handle_sve,
 	[ESR_ELx_EC_IABT_LOW]	= kvm_handle_guest_abort,
 	[ESR_ELx_EC_DABT_LOW]	= kvm_handle_guest_abort,
 	[ESR_ELx_EC_SOFTSTP_LOW]= kvm_handle_guest_debug,
diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c
index f5154ed3da6c..321c9c05dd9e 100644
--- a/arch/arm64/kvm/hyp/debug-sr.c
+++ b/arch/arm64/kvm/hyp/debug-sr.c
@@ -65,16 +65,6 @@
 	default:	write_debug(ptr[0], reg, 0);			\
 	}
 
-#define PMSCR_EL1		sys_reg(3, 0, 9, 9, 0)
-
-#define PMBLIMITR_EL1		sys_reg(3, 0, 9, 10, 0)
-#define PMBLIMITR_EL1_E		BIT(0)
-
-#define PMBIDR_EL1		sys_reg(3, 0, 9, 10, 7)
-#define PMBIDR_EL1_P		BIT(4)
-
-#define psb_csync()		asm volatile("hint #17")
-
 static void __hyp_text __debug_save_spe_vhe(u64 *pmscr_el1)
 {
 	/* The vcpu can run. but it can't hide. */
@@ -90,18 +80,18 @@ static void __hyp_text __debug_save_spe_nvhe(u64 *pmscr_el1)
 		return;
 
 	/* Yes; is it owned by EL3? */
-	reg = read_sysreg_s(PMBIDR_EL1);
-	if (reg & PMBIDR_EL1_P)
+	reg = read_sysreg_s(SYS_PMBIDR_EL1);
+	if (reg & BIT(SYS_PMBIDR_EL1_P_SHIFT))
 		return;
 
 	/* No; is the host actually using the thing? */
-	reg = read_sysreg_s(PMBLIMITR_EL1);
-	if (!(reg & PMBLIMITR_EL1_E))
+	reg = read_sysreg_s(SYS_PMBLIMITR_EL1);
+	if (!(reg & BIT(SYS_PMBLIMITR_EL1_E_SHIFT)))
 		return;
 
 	/* Yes; save the control register and disable data generation */
-	*pmscr_el1 = read_sysreg_s(PMSCR_EL1);
-	write_sysreg_s(0, PMSCR_EL1);
+	*pmscr_el1 = read_sysreg_s(SYS_PMSCR_EL1);
+	write_sysreg_s(0, SYS_PMSCR_EL1);
 	isb();
 
 	/* Now drain all buffered data to memory */
@@ -122,7 +112,7 @@ static void __hyp_text __debug_restore_spe(u64 pmscr_el1)
 	isb();
 
 	/* Re-enable data generation */
-	write_sysreg_s(pmscr_el1, PMSCR_EL1);
+	write_sysreg_s(pmscr_el1, SYS_PMSCR_EL1);
 }
 
 void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu,
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 945e79c641c4..951f3ebaff26 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -48,7 +48,7 @@ static void __hyp_text __activate_traps_vhe(void)
 
 	val = read_sysreg(cpacr_el1);
 	val |= CPACR_EL1_TTA;
-	val &= ~CPACR_EL1_FPEN;
+	val &= ~(CPACR_EL1_FPEN | CPACR_EL1_ZEN);
 	write_sysreg(val, cpacr_el1);
 
 	write_sysreg(__kvm_hyp_vector, vbar_el1);
@@ -59,7 +59,7 @@ static void __hyp_text __activate_traps_nvhe(void)
 	u64 val;
 
 	val = CPTR_EL2_DEFAULT;
-	val |= CPTR_EL2_TTA | CPTR_EL2_TFP;
+	val |= CPTR_EL2_TTA | CPTR_EL2_TFP | CPTR_EL2_TZ;
 	write_sysreg(val, cptr_el2);
 }
 
@@ -81,11 +81,17 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
 	 * it will cause an exception.
 	 */
 	val = vcpu->arch.hcr_el2;
+
 	if (!(val & HCR_RW) && system_supports_fpsimd()) {
 		write_sysreg(1 << 30, fpexc32_el2);
 		isb();
 	}
+
+	if (val & HCR_RW) /* for AArch64 only: */
+		val |= HCR_TID3; /* TID3: trap feature register accesses */
+
 	write_sysreg(val, hcr_el2);
+
 	/* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */
 	write_sysreg(1 << 15, hstr_el2);
 	/*
@@ -111,7 +117,7 @@ static void __hyp_text __deactivate_traps_vhe(void)
 
 	write_sysreg(mdcr_el2, mdcr_el2);
 	write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
-	write_sysreg(CPACR_EL1_FPEN, cpacr_el1);
+	write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1);
 	write_sysreg(vectors, vbar_el1);
 }
 
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 2e070d3baf9f..a0ee9b05e3d4 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -23,6 +23,7 @@
 #include <linux/bsearch.h>
 #include <linux/kvm_host.h>
 #include <linux/mm.h>
+#include <linux/printk.h>
 #include <linux/uaccess.h>
 
 #include <asm/cacheflush.h>
@@ -892,6 +893,146 @@ static bool access_cntp_cval(struct kvm_vcpu *vcpu,
 	return true;
 }
 
+/* Read a sanitised cpufeature ID register by sys_reg_desc */
+static u64 read_id_reg(struct sys_reg_desc const *r, bool raz)
+{
+	u32 id = sys_reg((u32)r->Op0, (u32)r->Op1,
+			 (u32)r->CRn, (u32)r->CRm, (u32)r->Op2);
+	u64 val = raz ? 0 : read_sanitised_ftr_reg(id);
+
+	if (id == SYS_ID_AA64PFR0_EL1) {
+		if (val & (0xfUL << ID_AA64PFR0_SVE_SHIFT))
+			pr_err_once("kvm [%i]: SVE unsupported for guests, suppressing\n",
+				    task_pid_nr(current));
+
+		val &= ~(0xfUL << ID_AA64PFR0_SVE_SHIFT);
+	}
+
+	return val;
+}
+
+/* cpufeature ID register access trap handlers */
+
+static bool __access_id_reg(struct kvm_vcpu *vcpu,
+			    struct sys_reg_params *p,
+			    const struct sys_reg_desc *r,
+			    bool raz)
+{
+	if (p->is_write)
+		return write_to_read_only(vcpu, p, r);
+
+	p->regval = read_id_reg(r, raz);
+	return true;
+}
+
+static bool access_id_reg(struct kvm_vcpu *vcpu,
+			  struct sys_reg_params *p,
+			  const struct sys_reg_desc *r)
+{
+	return __access_id_reg(vcpu, p, r, false);
+}
+
+static bool access_raz_id_reg(struct kvm_vcpu *vcpu,
+			      struct sys_reg_params *p,
+			      const struct sys_reg_desc *r)
+{
+	return __access_id_reg(vcpu, p, r, true);
+}
+
+static int reg_from_user(u64 *val, const void __user *uaddr, u64 id);
+static int reg_to_user(void __user *uaddr, const u64 *val, u64 id);
+static u64 sys_reg_to_index(const struct sys_reg_desc *reg);
+
+/*
+ * cpufeature ID register user accessors
+ *
+ * For now, these registers are immutable for userspace, so no values
+ * are stored, and for set_id_reg() we don't allow the effective value
+ * to be changed.
+ */
+static int __get_id_reg(const struct sys_reg_desc *rd, void __user *uaddr,
+			bool raz)
+{
+	const u64 id = sys_reg_to_index(rd);
+	const u64 val = read_id_reg(rd, raz);
+
+	return reg_to_user(uaddr, &val, id);
+}
+
+static int __set_id_reg(const struct sys_reg_desc *rd, void __user *uaddr,
+			bool raz)
+{
+	const u64 id = sys_reg_to_index(rd);
+	int err;
+	u64 val;
+
+	err = reg_from_user(&val, uaddr, id);
+	if (err)
+		return err;
+
+	/* This is what we mean by invariant: you can't change it. */
+	if (val != read_id_reg(rd, raz))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int get_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+		      const struct kvm_one_reg *reg, void __user *uaddr)
+{
+	return __get_id_reg(rd, uaddr, false);
+}
+
+static int set_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+		      const struct kvm_one_reg *reg, void __user *uaddr)
+{
+	return __set_id_reg(rd, uaddr, false);
+}
+
+static int get_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+			  const struct kvm_one_reg *reg, void __user *uaddr)
+{
+	return __get_id_reg(rd, uaddr, true);
+}
+
+static int set_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+			  const struct kvm_one_reg *reg, void __user *uaddr)
+{
+	return __set_id_reg(rd, uaddr, true);
+}
+
+/* sys_reg_desc initialiser for known cpufeature ID registers */
+#define ID_SANITISED(name) {			\
+	SYS_DESC(SYS_##name),			\
+	.access	= access_id_reg,		\
+	.get_user = get_id_reg,			\
+	.set_user = set_id_reg,			\
+}
+
+/*
+ * sys_reg_desc initialiser for architecturally unallocated cpufeature ID
+ * register with encoding Op0=3, Op1=0, CRn=0, CRm=crm, Op2=op2
+ * (1 <= crm < 8, 0 <= Op2 < 8).
+ */
+#define ID_UNALLOCATED(crm, op2) {			\
+	Op0(3), Op1(0), CRn(0), CRm(crm), Op2(op2),	\
+	.access = access_raz_id_reg,			\
+	.get_user = get_raz_id_reg,			\
+	.set_user = set_raz_id_reg,			\
+}
+
+/*
+ * sys_reg_desc initialiser for known ID registers that we hide from guests.
+ * For now, these are exposed just like unallocated ID regs: they appear
+ * RAZ for the guest.
+ */
+#define ID_HIDDEN(name) {			\
+	SYS_DESC(SYS_##name),			\
+	.access = access_raz_id_reg,		\
+	.get_user = get_raz_id_reg,		\
+	.set_user = set_raz_id_reg,		\
+}
+
 /*
  * Architected system registers.
  * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
@@ -944,6 +1085,84 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 	{ SYS_DESC(SYS_DBGVCR32_EL2), NULL, reset_val, DBGVCR32_EL2, 0 },
 
 	{ SYS_DESC(SYS_MPIDR_EL1), NULL, reset_mpidr, MPIDR_EL1 },
+
+	/*
+	 * ID regs: all ID_SANITISED() entries here must have corresponding
+	 * entries in arm64_ftr_regs[].
+	 */
+
+	/* AArch64 mappings of the AArch32 ID registers */
+	/* CRm=1 */
+	ID_SANITISED(ID_PFR0_EL1),
+	ID_SANITISED(ID_PFR1_EL1),
+	ID_SANITISED(ID_DFR0_EL1),
+	ID_HIDDEN(ID_AFR0_EL1),
+	ID_SANITISED(ID_MMFR0_EL1),
+	ID_SANITISED(ID_MMFR1_EL1),
+	ID_SANITISED(ID_MMFR2_EL1),
+	ID_SANITISED(ID_MMFR3_EL1),
+
+	/* CRm=2 */
+	ID_SANITISED(ID_ISAR0_EL1),
+	ID_SANITISED(ID_ISAR1_EL1),
+	ID_SANITISED(ID_ISAR2_EL1),
+	ID_SANITISED(ID_ISAR3_EL1),
+	ID_SANITISED(ID_ISAR4_EL1),
+	ID_SANITISED(ID_ISAR5_EL1),
+	ID_SANITISED(ID_MMFR4_EL1),
+	ID_UNALLOCATED(2,7),
+
+	/* CRm=3 */
+	ID_SANITISED(MVFR0_EL1),
+	ID_SANITISED(MVFR1_EL1),
+	ID_SANITISED(MVFR2_EL1),
+	ID_UNALLOCATED(3,3),
+	ID_UNALLOCATED(3,4),
+	ID_UNALLOCATED(3,5),
+	ID_UNALLOCATED(3,6),
+	ID_UNALLOCATED(3,7),
+
+	/* AArch64 ID registers */
+	/* CRm=4 */
+	ID_SANITISED(ID_AA64PFR0_EL1),
+	ID_SANITISED(ID_AA64PFR1_EL1),
+	ID_UNALLOCATED(4,2),
+	ID_UNALLOCATED(4,3),
+	ID_UNALLOCATED(4,4),
+	ID_UNALLOCATED(4,5),
+	ID_UNALLOCATED(4,6),
+	ID_UNALLOCATED(4,7),
+
+	/* CRm=5 */
+	ID_SANITISED(ID_AA64DFR0_EL1),
+	ID_SANITISED(ID_AA64DFR1_EL1),
+	ID_UNALLOCATED(5,2),
+	ID_UNALLOCATED(5,3),
+	ID_HIDDEN(ID_AA64AFR0_EL1),
+	ID_HIDDEN(ID_AA64AFR1_EL1),
+	ID_UNALLOCATED(5,6),
+	ID_UNALLOCATED(5,7),
+
+	/* CRm=6 */
+	ID_SANITISED(ID_AA64ISAR0_EL1),
+	ID_SANITISED(ID_AA64ISAR1_EL1),
+	ID_UNALLOCATED(6,2),
+	ID_UNALLOCATED(6,3),
+	ID_UNALLOCATED(6,4),
+	ID_UNALLOCATED(6,5),
+	ID_UNALLOCATED(6,6),
+	ID_UNALLOCATED(6,7),
+
+	/* CRm=7 */
+	ID_SANITISED(ID_AA64MMFR0_EL1),
+	ID_SANITISED(ID_AA64MMFR1_EL1),
+	ID_SANITISED(ID_AA64MMFR2_EL1),
+	ID_UNALLOCATED(7,3),
+	ID_UNALLOCATED(7,4),
+	ID_UNALLOCATED(7,5),
+	ID_UNALLOCATED(7,6),
+	ID_UNALLOCATED(7,7),
+
 	{ SYS_DESC(SYS_SCTLR_EL1), access_vm_reg, reset_val, SCTLR_EL1, 0x00C50078 },
 	{ SYS_DESC(SYS_CPACR_EL1), NULL, reset_val, CPACR_EL1, 0 },
 	{ SYS_DESC(SYS_TTBR0_EL1), access_vm_reg, reset_unknown, TTBR0_EL1 },
@@ -1790,8 +2009,8 @@ static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu,
 	if (!r)
 		r = find_reg(&params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
 
-	/* Not saved in the sys_reg array? */
-	if (r && !r->reg)
+	/* Not saved in the sys_reg array and not otherwise accessible? */
+	if (r && !(r->reg || r->get_user))
 		r = NULL;
 
 	return r;
@@ -1815,20 +2034,6 @@ static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu,
 FUNCTION_INVARIANT(midr_el1)
 FUNCTION_INVARIANT(ctr_el0)
 FUNCTION_INVARIANT(revidr_el1)
-FUNCTION_INVARIANT(id_pfr0_el1)
-FUNCTION_INVARIANT(id_pfr1_el1)
-FUNCTION_INVARIANT(id_dfr0_el1)
-FUNCTION_INVARIANT(id_afr0_el1)
-FUNCTION_INVARIANT(id_mmfr0_el1)
-FUNCTION_INVARIANT(id_mmfr1_el1)
-FUNCTION_INVARIANT(id_mmfr2_el1)
-FUNCTION_INVARIANT(id_mmfr3_el1)
-FUNCTION_INVARIANT(id_isar0_el1)
-FUNCTION_INVARIANT(id_isar1_el1)
-FUNCTION_INVARIANT(id_isar2_el1)
-FUNCTION_INVARIANT(id_isar3_el1)
-FUNCTION_INVARIANT(id_isar4_el1)
-FUNCTION_INVARIANT(id_isar5_el1)
 FUNCTION_INVARIANT(clidr_el1)
 FUNCTION_INVARIANT(aidr_el1)
 
@@ -1836,20 +2041,6 @@ FUNCTION_INVARIANT(aidr_el1)
 static struct sys_reg_desc invariant_sys_regs[] = {
 	{ SYS_DESC(SYS_MIDR_EL1), NULL, get_midr_el1 },
 	{ SYS_DESC(SYS_REVIDR_EL1), NULL, get_revidr_el1 },
-	{ SYS_DESC(SYS_ID_PFR0_EL1), NULL, get_id_pfr0_el1 },
-	{ SYS_DESC(SYS_ID_PFR1_EL1), NULL, get_id_pfr1_el1 },
-	{ SYS_DESC(SYS_ID_DFR0_EL1), NULL, get_id_dfr0_el1 },
-	{ SYS_DESC(SYS_ID_AFR0_EL1), NULL, get_id_afr0_el1 },
-	{ SYS_DESC(SYS_ID_MMFR0_EL1), NULL, get_id_mmfr0_el1 },
-	{ SYS_DESC(SYS_ID_MMFR1_EL1), NULL, get_id_mmfr1_el1 },
-	{ SYS_DESC(SYS_ID_MMFR2_EL1), NULL, get_id_mmfr2_el1 },
-	{ SYS_DESC(SYS_ID_MMFR3_EL1), NULL, get_id_mmfr3_el1 },
-	{ SYS_DESC(SYS_ID_ISAR0_EL1), NULL, get_id_isar0_el1 },
-	{ SYS_DESC(SYS_ID_ISAR1_EL1), NULL, get_id_isar1_el1 },
-	{ SYS_DESC(SYS_ID_ISAR2_EL1), NULL, get_id_isar2_el1 },
-	{ SYS_DESC(SYS_ID_ISAR3_EL1), NULL, get_id_isar3_el1 },
-	{ SYS_DESC(SYS_ID_ISAR4_EL1), NULL, get_id_isar4_el1 },
-	{ SYS_DESC(SYS_ID_ISAR5_EL1), NULL, get_id_isar5_el1 },
 	{ SYS_DESC(SYS_CLIDR_EL1), NULL, get_clidr_el1 },
 	{ SYS_DESC(SYS_AIDR_EL1), NULL, get_aidr_el1 },
 	{ SYS_DESC(SYS_CTR_EL0), NULL, get_ctr_el0 },
@@ -2079,12 +2270,31 @@ static bool copy_reg_to_user(const struct sys_reg_desc *reg, u64 __user **uind)
 	return true;
 }
 
+static int walk_one_sys_reg(const struct sys_reg_desc *rd,
+			    u64 __user **uind,
+			    unsigned int *total)
+{
+	/*
+	 * Ignore registers we trap but don't save,
+	 * and for which no custom user accessor is provided.
+	 */
+	if (!(rd->reg || rd->get_user))
+		return 0;
+
+	if (!copy_reg_to_user(rd, uind))
+		return -EFAULT;
+
+	(*total)++;
+	return 0;
+}
+
 /* Assumed ordered tables, see kvm_sys_reg_table_init. */
 static int walk_sys_regs(struct kvm_vcpu *vcpu, u64 __user *uind)
 {
 	const struct sys_reg_desc *i1, *i2, *end1, *end2;
 	unsigned int total = 0;
 	size_t num;
+	int err;
 
 	/* We check for duplicates here, to allow arch-specific overrides. */
 	i1 = get_target_table(vcpu->arch.target, true, &num);
@@ -2098,21 +2308,13 @@ static int walk_sys_regs(struct kvm_vcpu *vcpu, u64 __user *uind)
 	while (i1 || i2) {
 		int cmp = cmp_sys_reg(i1, i2);
 		/* target-specific overrides generic entry. */
-		if (cmp <= 0) {
-			/* Ignore registers we trap but don't save. */
-			if (i1->reg) {
-				if (!copy_reg_to_user(i1, &uind))
-					return -EFAULT;
-				total++;
-			}
-		} else {
-			/* Ignore registers we trap but don't save. */
-			if (i2->reg) {
-				if (!copy_reg_to_user(i2, &uind))
-					return -EFAULT;
-				total++;
-			}
-		}
+		if (cmp <= 0)
+			err = walk_one_sys_reg(i1, &uind, &total);
+		else
+			err = walk_one_sys_reg(i2, &uind, &total);
+
+		if (err)
+			return err;
 
 		if (cmp <= 0 && ++i1 == end1)
 			i1 = NULL;
diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile
index 9a8cb96555d6..4e696f96451f 100644
--- a/arch/arm64/lib/Makefile
+++ b/arch/arm64/lib/Makefile
@@ -3,7 +3,7 @@ lib-y		:= bitops.o clear_user.o delay.o copy_from_user.o	\
 		   copy_to_user.o copy_in_user.o copy_page.o		\
 		   clear_page.o memchr.o memcpy.o memmove.o memset.o	\
 		   memcmp.o strcmp.o strncmp.o strlen.o strnlen.o	\
-		   strchr.o strrchr.o
+		   strchr.o strrchr.o tishift.o
 
 # Tell the compiler to treat all general purpose registers (with the
 # exception of the IP registers, which are already handled by the caller
diff --git a/arch/arm64/lib/delay.c b/arch/arm64/lib/delay.c
index dad4ec9bbfd1..e48ac402e7be 100644
--- a/arch/arm64/lib/delay.c
+++ b/arch/arm64/lib/delay.c
@@ -24,10 +24,28 @@
 #include <linux/module.h>
 #include <linux/timex.h>
 
+#include <clocksource/arm_arch_timer.h>
+
+#define USECS_TO_CYCLES(time_usecs)			\
+	xloops_to_cycles((time_usecs) * 0x10C7UL)
+
+static inline unsigned long xloops_to_cycles(unsigned long xloops)
+{
+	return (xloops * loops_per_jiffy * HZ) >> 32;
+}
+
 void __delay(unsigned long cycles)
 {
 	cycles_t start = get_cycles();
 
+	if (arch_timer_evtstrm_available()) {
+		const cycles_t timer_evt_period =
+			USECS_TO_CYCLES(ARCH_TIMER_EVT_STREAM_PERIOD_US);
+
+		while ((get_cycles() - start + timer_evt_period) < cycles)
+			wfe();
+	}
+
 	while ((get_cycles() - start) < cycles)
 		cpu_relax();
 }
@@ -35,10 +53,7 @@ EXPORT_SYMBOL(__delay);
 
 inline void __const_udelay(unsigned long xloops)
 {
-	unsigned long loops;
-
-	loops = xloops * loops_per_jiffy * HZ;
-	__delay(loops >> 32);
+	__delay(xloops_to_cycles(xloops));
 }
 EXPORT_SYMBOL(__const_udelay);
 
diff --git a/arch/arm64/lib/tishift.S b/arch/arm64/lib/tishift.S
new file mode 100644
index 000000000000..0179a43cc045
--- /dev/null
+++ b/arch/arm64/lib/tishift.S
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+
+ENTRY(__ashlti3)
+	cbz	x2, 1f
+	mov	x3, #64
+	sub	x3, x3, x2
+	cmp	x3, #0
+	b.le	2f
+	lsl	x1, x1, x2
+	lsr	x3, x0, x3
+	lsl	x2, x0, x2
+	orr	x1, x1, x3
+	mov	x0, x2
+1:
+	ret
+2:
+	neg	w1, w3
+	mov	x2, #0
+	lsl	x1, x0, x1
+	mov	x0, x2
+	ret
+ENDPROC(__ashlti3)
+
+ENTRY(__ashrti3)
+	cbz	x2, 3f
+	mov	x3, #64
+	sub	x3, x3, x2
+	cmp	x3, #0
+	b.le	4f
+	lsr	x0, x0, x2
+	lsl	x3, x1, x3
+	asr	x2, x1, x2
+	orr	x0, x0, x3
+	mov	x1, x2
+3:
+	ret
+4:
+	neg	w0, w3
+	asr	x2, x1, #63
+	asr	x0, x1, x0
+	mov	x1, x2
+	ret
+ENDPROC(__ashrti3)
+
+ENTRY(__lshrti3)
+	cbz	x2, 1f
+	mov	x3, #64
+	sub	x3, x3, x2
+	cmp	x3, #0
+	b.le	2f
+	lsr	x0, x0, x2
+	lsl	x3, x1, x3
+	lsr	x2, x1, x2
+	orr	x0, x0, x3
+	mov	x1, x2
+1:
+	ret
+2:
+	neg	w0, w3
+	mov	x2, #0
+	lsr	x0, x1, x0
+	mov	x1, x2
+	ret
+ENDPROC(__lshrti3)
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 614af886b7ef..b45c5bcaeccb 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -166,7 +166,7 @@ static void *__dma_alloc(struct device *dev, size_t size,
 	/* create a coherent mapping */
 	page = virt_to_page(ptr);
 	coherent_ptr = dma_common_contiguous_remap(page, size, VM_USERMAP,
-						   prot, NULL);
+						   prot, __builtin_return_address(0));
 	if (!coherent_ptr)
 		goto no_map;
 
@@ -303,8 +303,7 @@ static int __swiotlb_mmap_pfn(struct vm_area_struct *vma,
 			      unsigned long pfn, size_t size)
 {
 	int ret = -ENXIO;
-	unsigned long nr_vma_pages = (vma->vm_end - vma->vm_start) >>
-					PAGE_SHIFT;
+	unsigned long nr_vma_pages = vma_pages(vma);
 	unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
 	unsigned long off = vma->vm_pgoff;
 
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index b64958b23a7f..22168cd0dde7 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -105,13 +105,11 @@ static void data_abort_decode(unsigned int esr)
 		 (esr & ESR_ELx_WNR) >> ESR_ELx_WNR_SHIFT);
 }
 
-/*
- * Decode mem abort information
- */
 static void mem_abort_decode(unsigned int esr)
 {
 	pr_alert("Mem abort info:\n");
 
+	pr_alert("  ESR = 0x%08x\n", esr);
 	pr_alert("  Exception class = %s, IL = %u bits\n",
 		 esr_get_class_string(esr),
 		 (esr & ESR_ELx_IL) ? 32 : 16);
@@ -249,9 +247,6 @@ static inline bool is_permission_fault(unsigned int esr, struct pt_regs *regs,
 	return false;
 }
 
-/*
- * The kernel tried to access some page that wasn't present.
- */
 static void __do_kernel_fault(unsigned long addr, unsigned int esr,
 			      struct pt_regs *regs)
 {
@@ -264,9 +259,6 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr,
 	if (!is_el1_instruction_abort(esr) && fixup_exception(regs))
 		return;
 
-	/*
-	 * No handler, we'll have to terminate things with extreme prejudice.
-	 */
 	bust_spinlocks(1);
 
 	if (is_permission_fault(esr, regs, addr)) {
@@ -291,10 +283,6 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr,
 	do_exit(SIGKILL);
 }
 
-/*
- * Something tried to access memory that isn't in our memory map. User mode
- * accesses just cause a SIGSEGV
- */
 static void __do_user_fault(struct task_struct *tsk, unsigned long addr,
 			    unsigned int esr, unsigned int sig, int code,
 			    struct pt_regs *regs, int fault)
@@ -559,23 +547,6 @@ no_context:
 	return 0;
 }
 
-/*
- * First Level Translation Fault Handler
- *
- * We enter here because the first level page table doesn't contain a valid
- * entry for the address.
- *
- * If the address is in kernel space (>= TASK_SIZE), then we are probably
- * faulting in the vmalloc() area.
- *
- * If the init_task's first level page tables contains the relevant entry, we
- * copy the it to this task.  If not, we send the process a signal, fixup the
- * exception, or oops the kernel.
- *
- * NOTE! We MUST NOT take any locks for this case. We may be in an interrupt
- * or a critical region, and should only copy the information from the master
- * page table, nothing more.
- */
 static int __kprobes do_translation_fault(unsigned long addr,
 					  unsigned int esr,
 					  struct pt_regs *regs)
@@ -594,18 +565,11 @@ static int do_alignment_fault(unsigned long addr, unsigned int esr,
 	return 0;
 }
 
-/*
- * This abort handler always returns "fault".
- */
 static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs)
 {
-	return 1;
+	return 1; /* "fault" */
 }
 
-/*
- * This abort handler deals with Synchronous External Abort.
- * It calls notifiers, and then returns "fault".
- */
 static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
 {
 	struct siginfo info;
@@ -668,14 +632,14 @@ static const struct fault_info fault_info[] = {
 	{ do_sea,		SIGBUS,  0,		"level 1 (translation table walk)"	},
 	{ do_sea,		SIGBUS,  0,		"level 2 (translation table walk)"	},
 	{ do_sea,		SIGBUS,  0,		"level 3 (translation table walk)"	},
-	{ do_sea,		SIGBUS,  0,		"synchronous parity or ECC error" },
+	{ do_sea,		SIGBUS,  0,		"synchronous parity or ECC error" },	// Reserved when RAS is implemented
 	{ do_bad,		SIGBUS,  0,		"unknown 25"			},
 	{ do_bad,		SIGBUS,  0,		"unknown 26"			},
 	{ do_bad,		SIGBUS,  0,		"unknown 27"			},
-	{ do_sea,		SIGBUS,  0,		"level 0 synchronous parity error (translation table walk)"	},
-	{ do_sea,		SIGBUS,  0,		"level 1 synchronous parity error (translation table walk)"	},
-	{ do_sea,		SIGBUS,  0,		"level 2 synchronous parity error (translation table walk)"	},
-	{ do_sea,		SIGBUS,  0,		"level 3 synchronous parity error (translation table walk)"	},
+	{ do_sea,		SIGBUS,  0,		"level 0 synchronous parity error (translation table walk)"	},	// Reserved when RAS is implemented
+	{ do_sea,		SIGBUS,  0,		"level 1 synchronous parity error (translation table walk)"	},	// Reserved when RAS is implemented
+	{ do_sea,		SIGBUS,  0,		"level 2 synchronous parity error (translation table walk)"	},	// Reserved when RAS is implemented
+	{ do_sea,		SIGBUS,  0,		"level 3 synchronous parity error (translation table walk)"	},	// Reserved when RAS is implemented
 	{ do_bad,		SIGBUS,  0,		"unknown 32"			},
 	{ do_alignment_fault,	SIGBUS,  BUS_ADRALN,	"alignment fault"		},
 	{ do_bad,		SIGBUS,  0,		"unknown 34"			},
@@ -693,7 +657,7 @@ static const struct fault_info fault_info[] = {
 	{ do_bad,		SIGBUS,  0,		"unknown 46"			},
 	{ do_bad,		SIGBUS,  0,		"unknown 47"			},
 	{ do_bad,		SIGBUS,  0,		"TLB conflict abort"		},
-	{ do_bad,		SIGBUS,  0,		"unknown 49"			},
+	{ do_bad,		SIGBUS,  0,		"Unsupported atomic hardware update fault"	},
 	{ do_bad,		SIGBUS,  0,		"unknown 50"			},
 	{ do_bad,		SIGBUS,  0,		"unknown 51"			},
 	{ do_bad,		SIGBUS,  0,		"implementation fault (lockdown abort)" },
@@ -710,13 +674,6 @@ static const struct fault_info fault_info[] = {
 	{ do_bad,		SIGBUS,  0,		"unknown 63"			},
 };
 
-/*
- * Handle Synchronous External Aborts that occur in a guest kernel.
- *
- * The return value will be zero if the SEA was successfully handled
- * and non-zero if there was an error processing the error or there was
- * no error to process.
- */
 int handle_guest_sea(phys_addr_t addr, unsigned int esr)
 {
 	int ret = -ENOENT;
@@ -727,9 +684,6 @@ int handle_guest_sea(phys_addr_t addr, unsigned int esr)
 	return ret;
 }
 
-/*
- * Dispatch a data abort to the relevant handler.
- */
 asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr,
 					 struct pt_regs *regs)
 {
@@ -739,11 +693,14 @@ asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr,
 	if (!inf->fn(addr, esr, regs))
 		return;
 
-	pr_alert("Unhandled fault: %s (0x%08x) at 0x%016lx\n",
-		 inf->name, esr, addr);
+	pr_alert("Unhandled fault: %s at 0x%016lx\n",
+		 inf->name, addr);
 
 	mem_abort_decode(esr);
 
+	if (!user_mode(regs))
+		show_pte(addr);
+
 	info.si_signo = inf->sig;
 	info.si_errno = 0;
 	info.si_code  = inf->code;
@@ -751,9 +708,6 @@ asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr,
 	arm64_notify_die("", regs, &info, esr);
 }
 
-/*
- * Handle stack alignment exceptions.
- */
 asmlinkage void __exception do_sp_pc_abort(unsigned long addr,
 					   unsigned int esr,
 					   struct pt_regs *regs)
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 877d42fb0df6..95233dfc4c39 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -109,10 +109,10 @@ ENTRY(cpu_do_resume)
 	/*
 	 * __cpu_setup() cleared MDSCR_EL1.MDE and friends, before unmasking
 	 * debug exceptions. By restoring MDSCR_EL1 here, we may take a debug
-	 * exception. Mask them until local_dbg_restore() in cpu_suspend()
+	 * exception. Mask them until local_daif_restore() in cpu_suspend()
 	 * resets them.
 	 */
-	disable_dbg
+	disable_daif
 	msr	mdscr_el1, x10
 
 	msr	sctlr_el1, x12
@@ -155,8 +155,7 @@ ENDPROC(cpu_do_switch_mm)
  * called by anything else. It can only be executed from a TTBR0 mapping.
  */
 ENTRY(idmap_cpu_replace_ttbr1)
-	mrs	x2, daif
-	msr	daifset, #0xf
+	save_and_disable_daif flags=x2
 
 	adrp	x1, empty_zero_page
 	msr	ttbr1_el1, x1
@@ -169,7 +168,7 @@ ENTRY(idmap_cpu_replace_ttbr1)
 	msr	ttbr1_el1, x0
 	isb
 
-	msr	daif, x2
+	restore_daif x2
 
 	ret
 ENDPROC(idmap_cpu_replace_ttbr1)
diff --git a/drivers/acpi/arm64/gtdt.c b/drivers/acpi/arm64/gtdt.c
index 597a737d538f..92f9edf9d11e 100644
--- a/drivers/acpi/arm64/gtdt.c
+++ b/drivers/acpi/arm64/gtdt.c
@@ -199,7 +199,7 @@ static int __init gtdt_parse_timer_block(struct acpi_gtdt_timer_block *block,
 	struct acpi_gtdt_timer_entry *gtdt_frame;
 
 	if (!block->timer_count) {
-		pr_err(FW_BUG "GT block present, but frame count is zero.");
+		pr_err(FW_BUG "GT block present, but frame count is zero.\n");
 		return -ENODEV;
 	}
 
diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index de56394dd161..95255ecfae7c 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -88,8 +88,8 @@ static inline int iort_set_fwnode(struct acpi_iort_node *iort_node,
  *
  * Returns: fwnode_handle pointer on success, NULL on failure
  */
-static inline
-struct fwnode_handle *iort_get_fwnode(struct acpi_iort_node *node)
+static inline struct fwnode_handle *iort_get_fwnode(
+			struct acpi_iort_node *node)
 {
 	struct iort_fwnode *curr;
 	struct fwnode_handle *fwnode = NULL;
@@ -126,6 +126,31 @@ static inline void iort_delete_fwnode(struct acpi_iort_node *node)
 	spin_unlock(&iort_fwnode_lock);
 }
 
+/**
+ * iort_get_iort_node() - Retrieve iort_node associated with an fwnode
+ *
+ * @fwnode: fwnode associated with device to be looked-up
+ *
+ * Returns: iort_node pointer on success, NULL on failure
+ */
+static inline struct acpi_iort_node *iort_get_iort_node(
+			struct fwnode_handle *fwnode)
+{
+	struct iort_fwnode *curr;
+	struct acpi_iort_node *iort_node = NULL;
+
+	spin_lock(&iort_fwnode_lock);
+	list_for_each_entry(curr, &iort_fwnode_list, list) {
+		if (curr->fwnode == fwnode) {
+			iort_node = curr->iort_node;
+			break;
+		}
+	}
+	spin_unlock(&iort_fwnode_lock);
+
+	return iort_node;
+}
+
 typedef acpi_status (*iort_find_node_callback)
 	(struct acpi_iort_node *node, void *context);
 
@@ -306,9 +331,8 @@ static int iort_id_map(struct acpi_iort_id_mapping *map, u8 type, u32 rid_in,
 	return 0;
 }
 
-static
-struct acpi_iort_node *iort_node_get_id(struct acpi_iort_node *node,
-					u32 *id_out, int index)
+static struct acpi_iort_node *iort_node_get_id(struct acpi_iort_node *node,
+					       u32 *id_out, int index)
 {
 	struct acpi_iort_node *parent;
 	struct acpi_iort_id_mapping *map;
@@ -332,7 +356,8 @@ struct acpi_iort_node *iort_node_get_id(struct acpi_iort_node *node,
 
 	if (map->flags & ACPI_IORT_ID_SINGLE_MAPPING) {
 		if (node->type == ACPI_IORT_NODE_NAMED_COMPONENT ||
-		    node->type == ACPI_IORT_NODE_PCI_ROOT_COMPLEX) {
+		    node->type == ACPI_IORT_NODE_PCI_ROOT_COMPLEX ||
+		    node->type == ACPI_IORT_NODE_SMMU_V3) {
 			*id_out = map->output_base;
 			return parent;
 		}
@@ -341,6 +366,47 @@ struct acpi_iort_node *iort_node_get_id(struct acpi_iort_node *node,
 	return NULL;
 }
 
+#if (ACPI_CA_VERSION > 0x20170929)
+static int iort_get_id_mapping_index(struct acpi_iort_node *node)
+{
+	struct acpi_iort_smmu_v3 *smmu;
+
+	switch (node->type) {
+	case ACPI_IORT_NODE_SMMU_V3:
+		/*
+		 * SMMUv3 dev ID mapping index was introduced in revision 1
+		 * table, not available in revision 0
+		 */
+		if (node->revision < 1)
+			return -EINVAL;
+
+		smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
+		/*
+		 * ID mapping index is only ignored if all interrupts are
+		 * GSIV based
+		 */
+		if (smmu->event_gsiv && smmu->pri_gsiv && smmu->gerr_gsiv
+		    && smmu->sync_gsiv)
+			return -EINVAL;
+
+		if (smmu->id_mapping_index >= node->mapping_count) {
+			pr_err(FW_BUG "[node %p type %d] ID mapping index overflows valid mappings\n",
+			       node, node->type);
+			return -EINVAL;
+		}
+
+		return smmu->id_mapping_index;
+	default:
+		return -EINVAL;
+	}
+}
+#else
+static inline int iort_get_id_mapping_index(struct acpi_iort_node *node)
+{
+	return -EINVAL;
+}
+#endif
+
 static struct acpi_iort_node *iort_node_map_id(struct acpi_iort_node *node,
 					       u32 id_in, u32 *id_out,
 					       u8 type_mask)
@@ -350,7 +416,7 @@ static struct acpi_iort_node *iort_node_map_id(struct acpi_iort_node *node,
 	/* Parse the ID mapping tree to find specified node type */
 	while (node) {
 		struct acpi_iort_id_mapping *map;
-		int i;
+		int i, index;
 
 		if (IORT_TYPE_MASK(node->type) & type_mask) {
 			if (id_out)
@@ -371,8 +437,19 @@ static struct acpi_iort_node *iort_node_map_id(struct acpi_iort_node *node,
 			goto fail_map;
 		}
 
+		/*
+		 * Get the special ID mapping index (if any) and skip its
+		 * associated ID map to prevent erroneous multi-stage
+		 * IORT ID translations.
+		 */
+		index = iort_get_id_mapping_index(node);
+
 		/* Do the ID translation */
 		for (i = 0; i < node->mapping_count; i++, map++) {
+			/* if it is special mapping index, skip it */
+			if (i == index)
+				continue;
+
 			if (!iort_id_map(map, node->type, id, &id))
 				break;
 		}
@@ -392,10 +469,9 @@ fail_map:
 	return NULL;
 }
 
-static
-struct acpi_iort_node *iort_node_map_platform_id(struct acpi_iort_node *node,
-						 u32 *id_out, u8 type_mask,
-						 int index)
+static struct acpi_iort_node *iort_node_map_platform_id(
+		struct acpi_iort_node *node, u32 *id_out, u8 type_mask,
+		int index)
 {
 	struct acpi_iort_node *parent;
 	u32 id;
@@ -424,9 +500,25 @@ static struct acpi_iort_node *iort_find_dev_node(struct device *dev)
 {
 	struct pci_bus *pbus;
 
-	if (!dev_is_pci(dev))
+	if (!dev_is_pci(dev)) {
+		struct acpi_iort_node *node;
+		/*
+		 * scan iort_fwnode_list to see if it's an iort platform
+		 * device (such as SMMU, PMCG),its iort node already cached
+		 * and associated with fwnode when iort platform devices
+		 * were initialized.
+		 */
+		node = iort_get_iort_node(dev->fwnode);
+		if (node)
+			return node;
+
+		/*
+		 * if not, then it should be a platform device defined in
+		 * DSDT/SSDT (with Named Component node in IORT)
+		 */
 		return iort_scan_node(ACPI_IORT_NODE_NAMED_COMPONENT,
 				      iort_match_node_callback, dev);
+	}
 
 	/* Find a PCI root bus */
 	pbus = to_pci_dev(dev)->bus;
@@ -466,16 +558,24 @@ u32 iort_msi_map_rid(struct device *dev, u32 req_id)
  */
 int iort_pmsi_get_dev_id(struct device *dev, u32 *dev_id)
 {
-	int i;
+	int i, index;
 	struct acpi_iort_node *node;
 
 	node = iort_find_dev_node(dev);
 	if (!node)
 		return -ENODEV;
 
-	for (i = 0; i < node->mapping_count; i++) {
-		if (iort_node_map_platform_id(node, dev_id, IORT_MSI_TYPE, i))
+	index = iort_get_id_mapping_index(node);
+	/* if there is a valid index, go get the dev_id directly */
+	if (index >= 0) {
+		if (iort_node_get_id(node, dev_id, index))
 			return 0;
+	} else {
+		for (i = 0; i < node->mapping_count; i++) {
+			if (iort_node_map_platform_id(node, dev_id,
+						      IORT_MSI_TYPE, i))
+				return 0;
+		}
 	}
 
 	return -ENODEV;
@@ -538,6 +638,49 @@ struct irq_domain *iort_get_device_domain(struct device *dev, u32 req_id)
 	return irq_find_matching_fwnode(handle, DOMAIN_BUS_PCI_MSI);
 }
 
+static void iort_set_device_domain(struct device *dev,
+				   struct acpi_iort_node *node)
+{
+	struct acpi_iort_its_group *its;
+	struct acpi_iort_node *msi_parent;
+	struct acpi_iort_id_mapping *map;
+	struct fwnode_handle *iort_fwnode;
+	struct irq_domain *domain;
+	int index;
+
+	index = iort_get_id_mapping_index(node);
+	if (index < 0)
+		return;
+
+	map = ACPI_ADD_PTR(struct acpi_iort_id_mapping, node,
+			   node->mapping_offset + index * sizeof(*map));
+
+	/* Firmware bug! */
+	if (!map->output_reference ||
+	    !(map->flags & ACPI_IORT_ID_SINGLE_MAPPING)) {
+		pr_err(FW_BUG "[node %p type %d] Invalid MSI mapping\n",
+		       node, node->type);
+		return;
+	}
+
+	msi_parent = ACPI_ADD_PTR(struct acpi_iort_node, iort_table,
+				  map->output_reference);
+
+	if (!msi_parent || msi_parent->type != ACPI_IORT_NODE_ITS_GROUP)
+		return;
+
+	/* Move to ITS specific data */
+	its = (struct acpi_iort_its_group *)msi_parent->node_data;
+
+	iort_fwnode = iort_find_domain_token(its->identifiers[0]);
+	if (!iort_fwnode)
+		return;
+
+	domain = irq_find_matching_fwnode(iort_fwnode, DOMAIN_BUS_PLATFORM_MSI);
+	if (domain)
+		dev_set_msi_domain(dev, domain);
+}
+
 /**
  * iort_get_platform_device_domain() - Find MSI domain related to a
  * platform device
@@ -623,14 +766,14 @@ static inline bool iort_iommu_driver_enabled(u8 type)
 }
 
 #ifdef CONFIG_IOMMU_API
-static inline
-const struct iommu_ops *iort_fwspec_iommu_ops(struct iommu_fwspec *fwspec)
+static inline const struct iommu_ops *iort_fwspec_iommu_ops(
+				struct iommu_fwspec *fwspec)
 {
 	return (fwspec && fwspec->ops) ? fwspec->ops : NULL;
 }
 
-static inline
-int iort_add_device_replay(const struct iommu_ops *ops, struct device *dev)
+static inline int iort_add_device_replay(const struct iommu_ops *ops,
+					 struct device *dev)
 {
 	int err = 0;
 
@@ -640,11 +783,11 @@ int iort_add_device_replay(const struct iommu_ops *ops, struct device *dev)
 	return err;
 }
 #else
-static inline
-const struct iommu_ops *iort_fwspec_iommu_ops(struct iommu_fwspec *fwspec)
+static inline const struct iommu_ops *iort_fwspec_iommu_ops(
+				struct iommu_fwspec *fwspec)
 { return NULL; }
-static inline
-int iort_add_device_replay(const struct iommu_ops *ops, struct device *dev)
+static inline int iort_add_device_replay(const struct iommu_ops *ops,
+					 struct device *dev)
 { return 0; }
 #endif
 
@@ -968,7 +1111,7 @@ static bool __init arm_smmu_v3_is_coherent(struct acpi_iort_node *node)
 	return smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE;
 }
 
-#if defined(CONFIG_ACPI_NUMA) && defined(ACPI_IORT_SMMU_V3_PXM_VALID)
+#if defined(CONFIG_ACPI_NUMA)
 /*
  * set numa proximity domain for smmuv3 device
  */
@@ -1051,34 +1194,34 @@ static bool __init arm_smmu_is_coherent(struct acpi_iort_node *node)
 	return smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK;
 }
 
-struct iort_iommu_config {
+struct iort_dev_config {
 	const char *name;
-	int (*iommu_init)(struct acpi_iort_node *node);
-	bool (*iommu_is_coherent)(struct acpi_iort_node *node);
-	int (*iommu_count_resources)(struct acpi_iort_node *node);
-	void (*iommu_init_resources)(struct resource *res,
+	int (*dev_init)(struct acpi_iort_node *node);
+	bool (*dev_is_coherent)(struct acpi_iort_node *node);
+	int (*dev_count_resources)(struct acpi_iort_node *node);
+	void (*dev_init_resources)(struct resource *res,
 				     struct acpi_iort_node *node);
-	void (*iommu_set_proximity)(struct device *dev,
+	void (*dev_set_proximity)(struct device *dev,
 				    struct acpi_iort_node *node);
 };
 
-static const struct iort_iommu_config iort_arm_smmu_v3_cfg __initconst = {
+static const struct iort_dev_config iort_arm_smmu_v3_cfg __initconst = {
 	.name = "arm-smmu-v3",
-	.iommu_is_coherent = arm_smmu_v3_is_coherent,
-	.iommu_count_resources = arm_smmu_v3_count_resources,
-	.iommu_init_resources = arm_smmu_v3_init_resources,
-	.iommu_set_proximity = arm_smmu_v3_set_proximity,
+	.dev_is_coherent = arm_smmu_v3_is_coherent,
+	.dev_count_resources = arm_smmu_v3_count_resources,
+	.dev_init_resources = arm_smmu_v3_init_resources,
+	.dev_set_proximity = arm_smmu_v3_set_proximity,
 };
 
-static const struct iort_iommu_config iort_arm_smmu_cfg __initconst = {
+static const struct iort_dev_config iort_arm_smmu_cfg __initconst = {
 	.name = "arm-smmu",
-	.iommu_is_coherent = arm_smmu_is_coherent,
-	.iommu_count_resources = arm_smmu_count_resources,
-	.iommu_init_resources = arm_smmu_init_resources
+	.dev_is_coherent = arm_smmu_is_coherent,
+	.dev_count_resources = arm_smmu_count_resources,
+	.dev_init_resources = arm_smmu_init_resources
 };
 
-static __init
-const struct iort_iommu_config *iort_get_iommu_cfg(struct acpi_iort_node *node)
+static __init const struct iort_dev_config *iort_get_dev_cfg(
+			struct acpi_iort_node *node)
 {
 	switch (node->type) {
 	case ACPI_IORT_NODE_SMMU_V3:
@@ -1091,31 +1234,28 @@ const struct iort_iommu_config *iort_get_iommu_cfg(struct acpi_iort_node *node)
 }
 
 /**
- * iort_add_smmu_platform_device() - Allocate a platform device for SMMU
- * @node: Pointer to SMMU ACPI IORT node
+ * iort_add_platform_device() - Allocate a platform device for IORT node
+ * @node: Pointer to device ACPI IORT node
  *
  * Returns: 0 on success, <0 failure
  */
-static int __init iort_add_smmu_platform_device(struct acpi_iort_node *node)
+static int __init iort_add_platform_device(struct acpi_iort_node *node,
+					   const struct iort_dev_config *ops)
 {
 	struct fwnode_handle *fwnode;
 	struct platform_device *pdev;
 	struct resource *r;
 	enum dev_dma_attr attr;
 	int ret, count;
-	const struct iort_iommu_config *ops = iort_get_iommu_cfg(node);
-
-	if (!ops)
-		return -ENODEV;
 
 	pdev = platform_device_alloc(ops->name, PLATFORM_DEVID_AUTO);
 	if (!pdev)
 		return -ENOMEM;
 
-	if (ops->iommu_set_proximity)
-		ops->iommu_set_proximity(&pdev->dev, node);
+	if (ops->dev_set_proximity)
+		ops->dev_set_proximity(&pdev->dev, node);
 
-	count = ops->iommu_count_resources(node);
+	count = ops->dev_count_resources(node);
 
 	r = kcalloc(count, sizeof(*r), GFP_KERNEL);
 	if (!r) {
@@ -1123,7 +1263,7 @@ static int __init iort_add_smmu_platform_device(struct acpi_iort_node *node)
 		goto dev_put;
 	}
 
-	ops->iommu_init_resources(r, node);
+	ops->dev_init_resources(r, node);
 
 	ret = platform_device_add_resources(pdev, r, count);
 	/*
@@ -1158,12 +1298,14 @@ static int __init iort_add_smmu_platform_device(struct acpi_iort_node *node)
 
 	pdev->dev.fwnode = fwnode;
 
-	attr = ops->iommu_is_coherent(node) ?
-			     DEV_DMA_COHERENT : DEV_DMA_NON_COHERENT;
+	attr = ops->dev_is_coherent && ops->dev_is_coherent(node) ?
+			DEV_DMA_COHERENT : DEV_DMA_NON_COHERENT;
 
 	/* Configure DMA for the page table walker */
 	acpi_dma_configure(&pdev->dev, attr);
 
+	iort_set_device_domain(&pdev->dev, node);
+
 	ret = platform_device_add(pdev);
 	if (ret)
 		goto dma_deconfigure;
@@ -1216,6 +1358,7 @@ static void __init iort_init_platform_devices(void)
 	struct fwnode_handle *fwnode;
 	int i, ret;
 	bool acs_enabled = false;
+	const struct iort_dev_config *ops;
 
 	/*
 	 * iort_table and iort both point to the start of IORT table, but
@@ -1238,16 +1381,15 @@ static void __init iort_init_platform_devices(void)
 		if (!acs_enabled)
 			acs_enabled = iort_enable_acs(iort_node);
 
-		if ((iort_node->type == ACPI_IORT_NODE_SMMU) ||
-			(iort_node->type == ACPI_IORT_NODE_SMMU_V3)) {
-
+		ops = iort_get_dev_cfg(iort_node);
+		if (ops) {
 			fwnode = acpi_alloc_fwnode_static();
 			if (!fwnode)
 				return;
 
 			iort_set_fwnode(iort_node, fwnode);
 
-			ret = iort_add_smmu_platform_device(iort_node);
+			ret = iort_add_platform_device(iort_node, ops);
 			if (ret) {
 				iort_delete_fwnode(iort_node);
 				acpi_free_fwnode_static(fwnode);
diff --git a/drivers/bus/arm-ccn.c b/drivers/bus/arm-ccn.c
index e8c6946fed9d..3063f5312397 100644
--- a/drivers/bus/arm-ccn.c
+++ b/drivers/bus/arm-ccn.c
@@ -1276,6 +1276,7 @@ static int arm_ccn_pmu_init(struct arm_ccn *ccn)
 
 	/* Perf driver registration */
 	ccn->dt.pmu = (struct pmu) {
+		.module = THIS_MODULE,
 		.attr_groups = arm_ccn_pmu_attr_groups,
 		.task_ctx_nr = perf_invalid_context,
 		.event_init = arm_ccn_pmu_event_init,
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index 0ecf5beb56ec..538bfa8ba9b4 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -77,6 +77,7 @@ static bool arch_timer_mem_use_virtual;
 static bool arch_counter_suspend_stop;
 static bool vdso_default = true;
 
+static cpumask_t evtstrm_available = CPU_MASK_NONE;
 static bool evtstrm_enable = IS_ENABLED(CONFIG_ARM_ARCH_TIMER_EVTSTREAM);
 
 static int __init early_evtstrm_cfg(char *buf)
@@ -739,6 +740,7 @@ static void arch_timer_evtstrm_enable(int divider)
 #ifdef CONFIG_COMPAT
 	compat_elf_hwcap |= COMPAT_HWCAP_EVTSTRM;
 #endif
+	cpumask_set_cpu(smp_processor_id(), &evtstrm_available);
 }
 
 static void arch_timer_configure_evtstream(void)
@@ -863,6 +865,16 @@ u32 arch_timer_get_rate(void)
 	return arch_timer_rate;
 }
 
+bool arch_timer_evtstrm_available(void)
+{
+	/*
+	 * We might get called from a preemptible context. This is fine
+	 * because availability of the event stream should be always the same
+	 * for a preemptible context and context where we might resume a task.
+	 */
+	return cpumask_test_cpu(raw_smp_processor_id(), &evtstrm_available);
+}
+
 static u64 arch_counter_get_cntvct_mem(void)
 {
 	u32 vct_lo, vct_hi, tmp_hi;
@@ -928,6 +940,8 @@ static int arch_timer_dying_cpu(unsigned int cpu)
 {
 	struct clock_event_device *clk = this_cpu_ptr(arch_timer_evt);
 
+	cpumask_clear_cpu(smp_processor_id(), &evtstrm_available);
+
 	arch_timer_stop(clk);
 	return 0;
 }
@@ -937,10 +951,16 @@ static DEFINE_PER_CPU(unsigned long, saved_cntkctl);
 static int arch_timer_cpu_pm_notify(struct notifier_block *self,
 				    unsigned long action, void *hcpu)
 {
-	if (action == CPU_PM_ENTER)
+	if (action == CPU_PM_ENTER) {
 		__this_cpu_write(saved_cntkctl, arch_timer_get_cntkctl());
-	else if (action == CPU_PM_ENTER_FAILED || action == CPU_PM_EXIT)
+
+		cpumask_clear_cpu(smp_processor_id(), &evtstrm_available);
+	} else if (action == CPU_PM_ENTER_FAILED || action == CPU_PM_EXIT) {
 		arch_timer_set_cntkctl(__this_cpu_read(saved_cntkctl));
+
+		if (elf_hwcap & HWCAP_EVTSTRM)
+			cpumask_set_cpu(smp_processor_id(), &evtstrm_available);
+	}
 	return NOTIFY_OK;
 }
 
@@ -1016,7 +1036,6 @@ static int __init arch_timer_register(void)
 	if (err)
 		goto out_unreg_notify;
 
-
 	/* Register and immediately configure the timer on the boot CPU */
 	err = cpuhp_setup_state(CPUHP_AP_ARM_ARCH_TIMER_STARTING,
 				"clockevents/arm/arch_timer:starting",
diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index e5197ffb7422..b8f44b068fc6 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -17,6 +17,13 @@ config ARM_PMU_ACPI
 	depends on ARM_PMU && ACPI
 	def_bool y
 
+config HISI_PMU
+       bool "HiSilicon SoC PMU"
+       depends on ARM64 && ACPI
+       help
+         Support for HiSilicon SoC uncore performance monitoring
+         unit (PMU), such as: L3C, HHA and DDRC.
+
 config QCOM_L2_PMU
 	bool "Qualcomm Technologies L2-cache PMU"
 	depends on ARCH_QCOM && ARM64 && ACPI
@@ -43,4 +50,12 @@ config XGENE_PMU
         help
           Say y if you want to use APM X-Gene SoC performance monitors.
 
+config ARM_SPE_PMU
+	tristate "Enable support for the ARMv8.2 Statistical Profiling Extension"
+	depends on PERF_EVENTS && ARM64
+	help
+	  Enable perf support for the ARMv8.2 Statistical Profiling
+	  Extension, which provides periodic sampling of operations in
+	  the CPU pipeline and reports this via the perf AUX interface.
+
 endmenu
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
index 9402dc8ff22c..710a0135bd61 100644
--- a/drivers/perf/Makefile
+++ b/drivers/perf/Makefile
@@ -1,6 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_ARM_PMU) += arm_pmu.o arm_pmu_platform.o
 obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o
+obj-$(CONFIG_HISI_PMU) += hisilicon/
 obj-$(CONFIG_QCOM_L2_PMU)	+= qcom_l2_pmu.o
 obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o
 obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o
+obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index d14fc2e67f93..7bc5eee96b31 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -539,7 +539,7 @@ void armpmu_free_irq(struct arm_pmu *armpmu, int cpu)
 	if (!cpumask_test_and_clear_cpu(cpu, &armpmu->active_irqs))
 		return;
 
-	if (irq_is_percpu(irq)) {
+	if (irq_is_percpu_devid(irq)) {
 		free_percpu_irq(irq, &hw_events->percpu_pmu);
 		cpumask_clear(&armpmu->active_irqs);
 		return;
@@ -565,10 +565,10 @@ int armpmu_request_irq(struct arm_pmu *armpmu, int cpu)
 	if (!irq)
 		return 0;
 
-	if (irq_is_percpu(irq) && cpumask_empty(&armpmu->active_irqs)) {
+	if (irq_is_percpu_devid(irq) && cpumask_empty(&armpmu->active_irqs)) {
 		err = request_percpu_irq(irq, handler, "arm-pmu",
 					 &hw_events->percpu_pmu);
-	} else if (irq_is_percpu(irq)) {
+	} else if (irq_is_percpu_devid(irq)) {
 		int other_cpu = cpumask_first(&armpmu->active_irqs);
 		int other_irq = per_cpu(hw_events->irq, other_cpu);
 
@@ -649,7 +649,7 @@ static int arm_perf_starting_cpu(unsigned int cpu, struct hlist_node *node)
 
 	irq = armpmu_get_cpu_irq(pmu, cpu);
 	if (irq) {
-		if (irq_is_percpu(irq)) {
+		if (irq_is_percpu_devid(irq)) {
 			enable_percpu_irq(irq, IRQ_TYPE_NONE);
 			return 0;
 		}
@@ -667,7 +667,7 @@ static int arm_perf_teardown_cpu(unsigned int cpu, struct hlist_node *node)
 		return 0;
 
 	irq = armpmu_get_cpu_irq(pmu, cpu);
-	if (irq && irq_is_percpu(irq))
+	if (irq && irq_is_percpu_devid(irq))
 		disable_percpu_irq(irq);
 
 	return 0;
diff --git a/drivers/perf/arm_pmu_acpi.c b/drivers/perf/arm_pmu_acpi.c
index 3303dd8d8eb5..705f1a390e31 100644
--- a/drivers/perf/arm_pmu_acpi.c
+++ b/drivers/perf/arm_pmu_acpi.c
@@ -193,9 +193,6 @@ int arm_pmu_acpi_probe(armpmu_init_fn init_fn)
 	int pmu_idx = 0;
 	int cpu, ret;
 
-	if (acpi_disabled)
-		return 0;
-
 	/*
 	 * Initialise and register the set of PMUs which we know about right
 	 * now. Ideally we'd do this in arm_pmu_acpi_cpu_starting() so that we
diff --git a/drivers/perf/arm_pmu_platform.c b/drivers/perf/arm_pmu_platform.c
index 4428852e1da1..91b224eced18 100644
--- a/drivers/perf/arm_pmu_platform.c
+++ b/drivers/perf/arm_pmu_platform.c
@@ -127,7 +127,7 @@ static int pmu_parse_irqs(struct arm_pmu *pmu)
 
 	if (num_irqs == 1) {
 		int irq = platform_get_irq(pdev, 0);
-		if (irq && irq_is_percpu(irq))
+		if (irq && irq_is_percpu_devid(irq))
 			return pmu_parse_percpu_irq(pmu, irq);
 	}
 
@@ -150,7 +150,7 @@ static int pmu_parse_irqs(struct arm_pmu *pmu)
 		if (WARN_ON(irq <= 0))
 			continue;
 
-		if (irq_is_percpu(irq)) {
+		if (irq_is_percpu_devid(irq)) {
 			pr_warn("multiple PPIs or mismatched SPI/PPI detected\n");
 			return -EINVAL;
 		}
diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c
new file mode 100644
index 000000000000..8ce262fc2561
--- /dev/null
+++ b/drivers/perf/arm_spe_pmu.c
@@ -0,0 +1,1249 @@
+/*
+ * Perf support for the Statistical Profiling Extension, introduced as
+ * part of ARMv8.2.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Copyright (C) 2016 ARM Limited
+ *
+ * Author: Will Deacon <will.deacon@arm.com>
+ */
+
+#define PMUNAME					"arm_spe"
+#define DRVNAME					PMUNAME "_pmu"
+#define pr_fmt(fmt)				DRVNAME ": " fmt
+
+#include <linux/cpuhotplug.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include <asm/sysreg.h>
+
+#define ARM_SPE_BUF_PAD_BYTE			0
+
+struct arm_spe_pmu_buf {
+	int					nr_pages;
+	bool					snapshot;
+	void					*base;
+};
+
+struct arm_spe_pmu {
+	struct pmu				pmu;
+	struct platform_device			*pdev;
+	cpumask_t				supported_cpus;
+	struct hlist_node			hotplug_node;
+
+	int					irq; /* PPI */
+
+	u16					min_period;
+	u16					counter_sz;
+
+#define SPE_PMU_FEAT_FILT_EVT			(1UL << 0)
+#define SPE_PMU_FEAT_FILT_TYP			(1UL << 1)
+#define SPE_PMU_FEAT_FILT_LAT			(1UL << 2)
+#define SPE_PMU_FEAT_ARCH_INST			(1UL << 3)
+#define SPE_PMU_FEAT_LDS			(1UL << 4)
+#define SPE_PMU_FEAT_ERND			(1UL << 5)
+#define SPE_PMU_FEAT_DEV_PROBED			(1UL << 63)
+	u64					features;
+
+	u16					max_record_sz;
+	u16					align;
+	struct perf_output_handle __percpu	*handle;
+};
+
+#define to_spe_pmu(p) (container_of(p, struct arm_spe_pmu, pmu))
+
+/* Convert a free-running index from perf into an SPE buffer offset */
+#define PERF_IDX2OFF(idx, buf)	((idx) % ((buf)->nr_pages << PAGE_SHIFT))
+
+/* Keep track of our dynamic hotplug state */
+static enum cpuhp_state arm_spe_pmu_online;
+
+enum arm_spe_pmu_buf_fault_action {
+	SPE_PMU_BUF_FAULT_ACT_SPURIOUS,
+	SPE_PMU_BUF_FAULT_ACT_FATAL,
+	SPE_PMU_BUF_FAULT_ACT_OK,
+};
+
+/* This sysfs gunk was really good fun to write. */
+enum arm_spe_pmu_capabilities {
+	SPE_PMU_CAP_ARCH_INST = 0,
+	SPE_PMU_CAP_ERND,
+	SPE_PMU_CAP_FEAT_MAX,
+	SPE_PMU_CAP_CNT_SZ = SPE_PMU_CAP_FEAT_MAX,
+	SPE_PMU_CAP_MIN_IVAL,
+};
+
+static int arm_spe_pmu_feat_caps[SPE_PMU_CAP_FEAT_MAX] = {
+	[SPE_PMU_CAP_ARCH_INST]	= SPE_PMU_FEAT_ARCH_INST,
+	[SPE_PMU_CAP_ERND]	= SPE_PMU_FEAT_ERND,
+};
+
+static u32 arm_spe_pmu_cap_get(struct arm_spe_pmu *spe_pmu, int cap)
+{
+	if (cap < SPE_PMU_CAP_FEAT_MAX)
+		return !!(spe_pmu->features & arm_spe_pmu_feat_caps[cap]);
+
+	switch (cap) {
+	case SPE_PMU_CAP_CNT_SZ:
+		return spe_pmu->counter_sz;
+	case SPE_PMU_CAP_MIN_IVAL:
+		return spe_pmu->min_period;
+	default:
+		WARN(1, "unknown cap %d\n", cap);
+	}
+
+	return 0;
+}
+
+static ssize_t arm_spe_pmu_cap_show(struct device *dev,
+				    struct device_attribute *attr,
+				    char *buf)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct arm_spe_pmu *spe_pmu = platform_get_drvdata(pdev);
+	struct dev_ext_attribute *ea =
+		container_of(attr, struct dev_ext_attribute, attr);
+	int cap = (long)ea->var;
+
+	return snprintf(buf, PAGE_SIZE, "%u\n",
+		arm_spe_pmu_cap_get(spe_pmu, cap));
+}
+
+#define SPE_EXT_ATTR_ENTRY(_name, _func, _var)				\
+	&((struct dev_ext_attribute[]) {				\
+		{ __ATTR(_name, S_IRUGO, _func, NULL), (void *)_var }	\
+	})[0].attr.attr
+
+#define SPE_CAP_EXT_ATTR_ENTRY(_name, _var)				\
+	SPE_EXT_ATTR_ENTRY(_name, arm_spe_pmu_cap_show, _var)
+
+static struct attribute *arm_spe_pmu_cap_attr[] = {
+	SPE_CAP_EXT_ATTR_ENTRY(arch_inst, SPE_PMU_CAP_ARCH_INST),
+	SPE_CAP_EXT_ATTR_ENTRY(ernd, SPE_PMU_CAP_ERND),
+	SPE_CAP_EXT_ATTR_ENTRY(count_size, SPE_PMU_CAP_CNT_SZ),
+	SPE_CAP_EXT_ATTR_ENTRY(min_interval, SPE_PMU_CAP_MIN_IVAL),
+	NULL,
+};
+
+static struct attribute_group arm_spe_pmu_cap_group = {
+	.name	= "caps",
+	.attrs	= arm_spe_pmu_cap_attr,
+};
+
+/* User ABI */
+#define ATTR_CFG_FLD_ts_enable_CFG		config	/* PMSCR_EL1.TS */
+#define ATTR_CFG_FLD_ts_enable_LO		0
+#define ATTR_CFG_FLD_ts_enable_HI		0
+#define ATTR_CFG_FLD_pa_enable_CFG		config	/* PMSCR_EL1.PA */
+#define ATTR_CFG_FLD_pa_enable_LO		1
+#define ATTR_CFG_FLD_pa_enable_HI		1
+#define ATTR_CFG_FLD_pct_enable_CFG		config	/* PMSCR_EL1.PCT */
+#define ATTR_CFG_FLD_pct_enable_LO		2
+#define ATTR_CFG_FLD_pct_enable_HI		2
+#define ATTR_CFG_FLD_jitter_CFG			config	/* PMSIRR_EL1.RND */
+#define ATTR_CFG_FLD_jitter_LO			16
+#define ATTR_CFG_FLD_jitter_HI			16
+#define ATTR_CFG_FLD_branch_filter_CFG		config	/* PMSFCR_EL1.B */
+#define ATTR_CFG_FLD_branch_filter_LO		32
+#define ATTR_CFG_FLD_branch_filter_HI		32
+#define ATTR_CFG_FLD_load_filter_CFG		config	/* PMSFCR_EL1.LD */
+#define ATTR_CFG_FLD_load_filter_LO		33
+#define ATTR_CFG_FLD_load_filter_HI		33
+#define ATTR_CFG_FLD_store_filter_CFG		config	/* PMSFCR_EL1.ST */
+#define ATTR_CFG_FLD_store_filter_LO		34
+#define ATTR_CFG_FLD_store_filter_HI		34
+
+#define ATTR_CFG_FLD_event_filter_CFG		config1	/* PMSEVFR_EL1 */
+#define ATTR_CFG_FLD_event_filter_LO		0
+#define ATTR_CFG_FLD_event_filter_HI		63
+
+#define ATTR_CFG_FLD_min_latency_CFG		config2	/* PMSLATFR_EL1.MINLAT */
+#define ATTR_CFG_FLD_min_latency_LO		0
+#define ATTR_CFG_FLD_min_latency_HI		11
+
+/* Why does everything I do descend into this? */
+#define __GEN_PMU_FORMAT_ATTR(cfg, lo, hi)				\
+	(lo) == (hi) ? #cfg ":" #lo "\n" : #cfg ":" #lo "-" #hi
+
+#define _GEN_PMU_FORMAT_ATTR(cfg, lo, hi)				\
+	__GEN_PMU_FORMAT_ATTR(cfg, lo, hi)
+
+#define GEN_PMU_FORMAT_ATTR(name)					\
+	PMU_FORMAT_ATTR(name,						\
+	_GEN_PMU_FORMAT_ATTR(ATTR_CFG_FLD_##name##_CFG,			\
+			     ATTR_CFG_FLD_##name##_LO,			\
+			     ATTR_CFG_FLD_##name##_HI))
+
+#define _ATTR_CFG_GET_FLD(attr, cfg, lo, hi)				\
+	((((attr)->cfg) >> lo) & GENMASK(hi - lo, 0))
+
+#define ATTR_CFG_GET_FLD(attr, name)					\
+	_ATTR_CFG_GET_FLD(attr,						\
+			  ATTR_CFG_FLD_##name##_CFG,			\
+			  ATTR_CFG_FLD_##name##_LO,			\
+			  ATTR_CFG_FLD_##name##_HI)
+
+GEN_PMU_FORMAT_ATTR(ts_enable);
+GEN_PMU_FORMAT_ATTR(pa_enable);
+GEN_PMU_FORMAT_ATTR(pct_enable);
+GEN_PMU_FORMAT_ATTR(jitter);
+GEN_PMU_FORMAT_ATTR(branch_filter);
+GEN_PMU_FORMAT_ATTR(load_filter);
+GEN_PMU_FORMAT_ATTR(store_filter);
+GEN_PMU_FORMAT_ATTR(event_filter);
+GEN_PMU_FORMAT_ATTR(min_latency);
+
+static struct attribute *arm_spe_pmu_formats_attr[] = {
+	&format_attr_ts_enable.attr,
+	&format_attr_pa_enable.attr,
+	&format_attr_pct_enable.attr,
+	&format_attr_jitter.attr,
+	&format_attr_branch_filter.attr,
+	&format_attr_load_filter.attr,
+	&format_attr_store_filter.attr,
+	&format_attr_event_filter.attr,
+	&format_attr_min_latency.attr,
+	NULL,
+};
+
+static struct attribute_group arm_spe_pmu_format_group = {
+	.name	= "format",
+	.attrs	= arm_spe_pmu_formats_attr,
+};
+
+static ssize_t arm_spe_pmu_get_attr_cpumask(struct device *dev,
+					    struct device_attribute *attr,
+					    char *buf)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct arm_spe_pmu *spe_pmu = platform_get_drvdata(pdev);
+
+	return cpumap_print_to_pagebuf(true, buf, &spe_pmu->supported_cpus);
+}
+static DEVICE_ATTR(cpumask, S_IRUGO, arm_spe_pmu_get_attr_cpumask, NULL);
+
+static struct attribute *arm_spe_pmu_attrs[] = {
+	&dev_attr_cpumask.attr,
+	NULL,
+};
+
+static struct attribute_group arm_spe_pmu_group = {
+	.attrs	= arm_spe_pmu_attrs,
+};
+
+static const struct attribute_group *arm_spe_pmu_attr_groups[] = {
+	&arm_spe_pmu_group,
+	&arm_spe_pmu_cap_group,
+	&arm_spe_pmu_format_group,
+	NULL,
+};
+
+/* Convert between user ABI and register values */
+static u64 arm_spe_event_to_pmscr(struct perf_event *event)
+{
+	struct perf_event_attr *attr = &event->attr;
+	u64 reg = 0;
+
+	reg |= ATTR_CFG_GET_FLD(attr, ts_enable) << SYS_PMSCR_EL1_TS_SHIFT;
+	reg |= ATTR_CFG_GET_FLD(attr, pa_enable) << SYS_PMSCR_EL1_PA_SHIFT;
+	reg |= ATTR_CFG_GET_FLD(attr, pct_enable) << SYS_PMSCR_EL1_PCT_SHIFT;
+
+	if (!attr->exclude_user)
+		reg |= BIT(SYS_PMSCR_EL1_E0SPE_SHIFT);
+
+	if (!attr->exclude_kernel)
+		reg |= BIT(SYS_PMSCR_EL1_E1SPE_SHIFT);
+
+	if (IS_ENABLED(CONFIG_PID_IN_CONTEXTIDR) && capable(CAP_SYS_ADMIN))
+		reg |= BIT(SYS_PMSCR_EL1_CX_SHIFT);
+
+	return reg;
+}
+
+static void arm_spe_event_sanitise_period(struct perf_event *event)
+{
+	struct arm_spe_pmu *spe_pmu = to_spe_pmu(event->pmu);
+	u64 period = event->hw.sample_period;
+	u64 max_period = SYS_PMSIRR_EL1_INTERVAL_MASK
+			 << SYS_PMSIRR_EL1_INTERVAL_SHIFT;
+
+	if (period < spe_pmu->min_period)
+		period = spe_pmu->min_period;
+	else if (period > max_period)
+		period = max_period;
+	else
+		period &= max_period;
+
+	event->hw.sample_period = period;
+}
+
+static u64 arm_spe_event_to_pmsirr(struct perf_event *event)
+{
+	struct perf_event_attr *attr = &event->attr;
+	u64 reg = 0;
+
+	arm_spe_event_sanitise_period(event);
+
+	reg |= ATTR_CFG_GET_FLD(attr, jitter) << SYS_PMSIRR_EL1_RND_SHIFT;
+	reg |= event->hw.sample_period;
+
+	return reg;
+}
+
+static u64 arm_spe_event_to_pmsfcr(struct perf_event *event)
+{
+	struct perf_event_attr *attr = &event->attr;
+	u64 reg = 0;
+
+	reg |= ATTR_CFG_GET_FLD(attr, load_filter) << SYS_PMSFCR_EL1_LD_SHIFT;
+	reg |= ATTR_CFG_GET_FLD(attr, store_filter) << SYS_PMSFCR_EL1_ST_SHIFT;
+	reg |= ATTR_CFG_GET_FLD(attr, branch_filter) << SYS_PMSFCR_EL1_B_SHIFT;
+
+	if (reg)
+		reg |= BIT(SYS_PMSFCR_EL1_FT_SHIFT);
+
+	if (ATTR_CFG_GET_FLD(attr, event_filter))
+		reg |= BIT(SYS_PMSFCR_EL1_FE_SHIFT);
+
+	if (ATTR_CFG_GET_FLD(attr, min_latency))
+		reg |= BIT(SYS_PMSFCR_EL1_FL_SHIFT);
+
+	return reg;
+}
+
+static u64 arm_spe_event_to_pmsevfr(struct perf_event *event)
+{
+	struct perf_event_attr *attr = &event->attr;
+	return ATTR_CFG_GET_FLD(attr, event_filter);
+}
+
+static u64 arm_spe_event_to_pmslatfr(struct perf_event *event)
+{
+	struct perf_event_attr *attr = &event->attr;
+	return ATTR_CFG_GET_FLD(attr, min_latency)
+	       << SYS_PMSLATFR_EL1_MINLAT_SHIFT;
+}
+
+static void arm_spe_pmu_pad_buf(struct perf_output_handle *handle, int len)
+{
+	struct arm_spe_pmu_buf *buf = perf_get_aux(handle);
+	u64 head = PERF_IDX2OFF(handle->head, buf);
+
+	memset(buf->base + head, ARM_SPE_BUF_PAD_BYTE, len);
+	if (!buf->snapshot)
+		perf_aux_output_skip(handle, len);
+}
+
+static u64 arm_spe_pmu_next_snapshot_off(struct perf_output_handle *handle)
+{
+	struct arm_spe_pmu_buf *buf = perf_get_aux(handle);
+	struct arm_spe_pmu *spe_pmu = to_spe_pmu(handle->event->pmu);
+	u64 head = PERF_IDX2OFF(handle->head, buf);
+	u64 limit = buf->nr_pages * PAGE_SIZE;
+
+	/*
+	 * The trace format isn't parseable in reverse, so clamp
+	 * the limit to half of the buffer size in snapshot mode
+	 * so that the worst case is half a buffer of records, as
+	 * opposed to a single record.
+	 */
+	if (head < limit >> 1)
+		limit >>= 1;
+
+	/*
+	 * If we're within max_record_sz of the limit, we must
+	 * pad, move the head index and recompute the limit.
+	 */
+	if (limit - head < spe_pmu->max_record_sz) {
+		arm_spe_pmu_pad_buf(handle, limit - head);
+		handle->head = PERF_IDX2OFF(limit, buf);
+		limit = ((buf->nr_pages * PAGE_SIZE) >> 1) + handle->head;
+	}
+
+	return limit;
+}
+
+static u64 __arm_spe_pmu_next_off(struct perf_output_handle *handle)
+{
+	struct arm_spe_pmu *spe_pmu = to_spe_pmu(handle->event->pmu);
+	struct arm_spe_pmu_buf *buf = perf_get_aux(handle);
+	const u64 bufsize = buf->nr_pages * PAGE_SIZE;
+	u64 limit = bufsize;
+	u64 head, tail, wakeup;
+
+	/*
+	 * The head can be misaligned for two reasons:
+	 *
+	 * 1. The hardware left PMBPTR pointing to the first byte after
+	 *    a record when generating a buffer management event.
+	 *
+	 * 2. We used perf_aux_output_skip to consume handle->size bytes
+	 *    and CIRC_SPACE was used to compute the size, which always
+	 *    leaves one entry free.
+	 *
+	 * Deal with this by padding to the next alignment boundary and
+	 * moving the head index. If we run out of buffer space, we'll
+	 * reduce handle->size to zero and end up reporting truncation.
+	 */
+	head = PERF_IDX2OFF(handle->head, buf);
+	if (!IS_ALIGNED(head, spe_pmu->align)) {
+		unsigned long delta = roundup(head, spe_pmu->align) - head;
+
+		delta = min(delta, handle->size);
+		arm_spe_pmu_pad_buf(handle, delta);
+		head = PERF_IDX2OFF(handle->head, buf);
+	}
+
+	/* If we've run out of free space, then nothing more to do */
+	if (!handle->size)
+		goto no_space;
+
+	/* Compute the tail and wakeup indices now that we've aligned head */
+	tail = PERF_IDX2OFF(handle->head + handle->size, buf);
+	wakeup = PERF_IDX2OFF(handle->wakeup, buf);
+
+	/*
+	 * Avoid clobbering unconsumed data. We know we have space, so
+	 * if we see head == tail we know that the buffer is empty. If
+	 * head > tail, then there's nothing to clobber prior to
+	 * wrapping.
+	 */
+	if (head < tail)
+		limit = round_down(tail, PAGE_SIZE);
+
+	/*
+	 * Wakeup may be arbitrarily far into the future. If it's not in
+	 * the current generation, either we'll wrap before hitting it,
+	 * or it's in the past and has been handled already.
+	 *
+	 * If there's a wakeup before we wrap, arrange to be woken up by
+	 * the page boundary following it. Keep the tail boundary if
+	 * that's lower.
+	 */
+	if (handle->wakeup < (handle->head + handle->size) && head <= wakeup)
+		limit = min(limit, round_up(wakeup, PAGE_SIZE));
+
+	if (limit > head)
+		return limit;
+
+	arm_spe_pmu_pad_buf(handle, handle->size);
+no_space:
+	perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
+	perf_aux_output_end(handle, 0);
+	return 0;
+}
+
+static u64 arm_spe_pmu_next_off(struct perf_output_handle *handle)
+{
+	struct arm_spe_pmu_buf *buf = perf_get_aux(handle);
+	struct arm_spe_pmu *spe_pmu = to_spe_pmu(handle->event->pmu);
+	u64 limit = __arm_spe_pmu_next_off(handle);
+	u64 head = PERF_IDX2OFF(handle->head, buf);
+
+	/*
+	 * If the head has come too close to the end of the buffer,
+	 * then pad to the end and recompute the limit.
+	 */
+	if (limit && (limit - head < spe_pmu->max_record_sz)) {
+		arm_spe_pmu_pad_buf(handle, limit - head);
+		limit = __arm_spe_pmu_next_off(handle);
+	}
+
+	return limit;
+}
+
+static void arm_spe_perf_aux_output_begin(struct perf_output_handle *handle,
+					  struct perf_event *event)
+{
+	u64 base, limit;
+	struct arm_spe_pmu_buf *buf;
+
+	/* Start a new aux session */
+	buf = perf_aux_output_begin(handle, event);
+	if (!buf) {
+		event->hw.state |= PERF_HES_STOPPED;
+		/*
+		 * We still need to clear the limit pointer, since the
+		 * profiler might only be disabled by virtue of a fault.
+		 */
+		limit = 0;
+		goto out_write_limit;
+	}
+
+	limit = buf->snapshot ? arm_spe_pmu_next_snapshot_off(handle)
+			      : arm_spe_pmu_next_off(handle);
+	if (limit)
+		limit |= BIT(SYS_PMBLIMITR_EL1_E_SHIFT);
+
+	limit += (u64)buf->base;
+	base = (u64)buf->base + PERF_IDX2OFF(handle->head, buf);
+	write_sysreg_s(base, SYS_PMBPTR_EL1);
+
+out_write_limit:
+	write_sysreg_s(limit, SYS_PMBLIMITR_EL1);
+}
+
+static void arm_spe_perf_aux_output_end(struct perf_output_handle *handle)
+{
+	struct arm_spe_pmu_buf *buf = perf_get_aux(handle);
+	u64 offset, size;
+
+	offset = read_sysreg_s(SYS_PMBPTR_EL1) - (u64)buf->base;
+	size = offset - PERF_IDX2OFF(handle->head, buf);
+
+	if (buf->snapshot)
+		handle->head = offset;
+
+	perf_aux_output_end(handle, size);
+}
+
+static void arm_spe_pmu_disable_and_drain_local(void)
+{
+	/* Disable profiling at EL0 and EL1 */
+	write_sysreg_s(0, SYS_PMSCR_EL1);
+	isb();
+
+	/* Drain any buffered data */
+	psb_csync();
+	dsb(nsh);
+
+	/* Disable the profiling buffer */
+	write_sysreg_s(0, SYS_PMBLIMITR_EL1);
+	isb();
+}
+
+/* IRQ handling */
+static enum arm_spe_pmu_buf_fault_action
+arm_spe_pmu_buf_get_fault_act(struct perf_output_handle *handle)
+{
+	const char *err_str;
+	u64 pmbsr;
+	enum arm_spe_pmu_buf_fault_action ret;
+
+	/*
+	 * Ensure new profiling data is visible to the CPU and any external
+	 * aborts have been resolved.
+	 */
+	psb_csync();
+	dsb(nsh);
+
+	/* Ensure hardware updates to PMBPTR_EL1 are visible */
+	isb();
+
+	/* Service required? */
+	pmbsr = read_sysreg_s(SYS_PMBSR_EL1);
+	if (!(pmbsr & BIT(SYS_PMBSR_EL1_S_SHIFT)))
+		return SPE_PMU_BUF_FAULT_ACT_SPURIOUS;
+
+	/*
+	 * If we've lost data, disable profiling and also set the PARTIAL
+	 * flag to indicate that the last record is corrupted.
+	 */
+	if (pmbsr & BIT(SYS_PMBSR_EL1_DL_SHIFT))
+		perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED |
+					     PERF_AUX_FLAG_PARTIAL);
+
+	/* Report collisions to userspace so that it can up the period */
+	if (pmbsr & BIT(SYS_PMBSR_EL1_COLL_SHIFT))
+		perf_aux_output_flag(handle, PERF_AUX_FLAG_COLLISION);
+
+	/* We only expect buffer management events */
+	switch (pmbsr & (SYS_PMBSR_EL1_EC_MASK << SYS_PMBSR_EL1_EC_SHIFT)) {
+	case SYS_PMBSR_EL1_EC_BUF:
+		/* Handled below */
+		break;
+	case SYS_PMBSR_EL1_EC_FAULT_S1:
+	case SYS_PMBSR_EL1_EC_FAULT_S2:
+		err_str = "Unexpected buffer fault";
+		goto out_err;
+	default:
+		err_str = "Unknown error code";
+		goto out_err;
+	}
+
+	/* Buffer management event */
+	switch (pmbsr &
+		(SYS_PMBSR_EL1_BUF_BSC_MASK << SYS_PMBSR_EL1_BUF_BSC_SHIFT)) {
+	case SYS_PMBSR_EL1_BUF_BSC_FULL:
+		ret = SPE_PMU_BUF_FAULT_ACT_OK;
+		goto out_stop;
+	default:
+		err_str = "Unknown buffer status code";
+	}
+
+out_err:
+	pr_err_ratelimited("%s on CPU %d [PMBSR=0x%016llx, PMBPTR=0x%016llx, PMBLIMITR=0x%016llx]\n",
+			   err_str, smp_processor_id(), pmbsr,
+			   read_sysreg_s(SYS_PMBPTR_EL1),
+			   read_sysreg_s(SYS_PMBLIMITR_EL1));
+	ret = SPE_PMU_BUF_FAULT_ACT_FATAL;
+
+out_stop:
+	arm_spe_perf_aux_output_end(handle);
+	return ret;
+}
+
+static irqreturn_t arm_spe_pmu_irq_handler(int irq, void *dev)
+{
+	struct perf_output_handle *handle = dev;
+	struct perf_event *event = handle->event;
+	enum arm_spe_pmu_buf_fault_action act;
+
+	if (!perf_get_aux(handle))
+		return IRQ_NONE;
+
+	act = arm_spe_pmu_buf_get_fault_act(handle);
+	if (act == SPE_PMU_BUF_FAULT_ACT_SPURIOUS)
+		return IRQ_NONE;
+
+	/*
+	 * Ensure perf callbacks have completed, which may disable the
+	 * profiling buffer in response to a TRUNCATION flag.
+	 */
+	irq_work_run();
+
+	switch (act) {
+	case SPE_PMU_BUF_FAULT_ACT_FATAL:
+		/*
+		 * If a fatal exception occurred then leaving the profiling
+		 * buffer enabled is a recipe waiting to happen. Since
+		 * fatal faults don't always imply truncation, make sure
+		 * that the profiling buffer is disabled explicitly before
+		 * clearing the syndrome register.
+		 */
+		arm_spe_pmu_disable_and_drain_local();
+		break;
+	case SPE_PMU_BUF_FAULT_ACT_OK:
+		/*
+		 * We handled the fault (the buffer was full), so resume
+		 * profiling as long as we didn't detect truncation.
+		 * PMBPTR might be misaligned, but we'll burn that bridge
+		 * when we get to it.
+		 */
+		if (!(handle->aux_flags & PERF_AUX_FLAG_TRUNCATED)) {
+			arm_spe_perf_aux_output_begin(handle, event);
+			isb();
+		}
+		break;
+	case SPE_PMU_BUF_FAULT_ACT_SPURIOUS:
+		/* We've seen you before, but GCC has the memory of a sieve. */
+		break;
+	}
+
+	/* The buffer pointers are now sane, so resume profiling. */
+	write_sysreg_s(0, SYS_PMBSR_EL1);
+	return IRQ_HANDLED;
+}
+
+/* Perf callbacks */
+static int arm_spe_pmu_event_init(struct perf_event *event)
+{
+	u64 reg;
+	struct perf_event_attr *attr = &event->attr;
+	struct arm_spe_pmu *spe_pmu = to_spe_pmu(event->pmu);
+
+	/* This is, of course, deeply driver-specific */
+	if (attr->type != event->pmu->type)
+		return -ENOENT;
+
+	if (event->cpu >= 0 &&
+	    !cpumask_test_cpu(event->cpu, &spe_pmu->supported_cpus))
+		return -ENOENT;
+
+	if (arm_spe_event_to_pmsevfr(event) & SYS_PMSEVFR_EL1_RES0)
+		return -EOPNOTSUPP;
+
+	if (attr->exclude_idle)
+		return -EOPNOTSUPP;
+
+	/*
+	 * Feedback-directed frequency throttling doesn't work when we
+	 * have a buffer of samples. We'd need to manually count the
+	 * samples in the buffer when it fills up and adjust the event
+	 * count to reflect that. Instead, just force the user to specify
+	 * a sample period.
+	 */
+	if (attr->freq)
+		return -EINVAL;
+
+	reg = arm_spe_event_to_pmsfcr(event);
+	if ((reg & BIT(SYS_PMSFCR_EL1_FE_SHIFT)) &&
+	    !(spe_pmu->features & SPE_PMU_FEAT_FILT_EVT))
+		return -EOPNOTSUPP;
+
+	if ((reg & BIT(SYS_PMSFCR_EL1_FT_SHIFT)) &&
+	    !(spe_pmu->features & SPE_PMU_FEAT_FILT_TYP))
+		return -EOPNOTSUPP;
+
+	if ((reg & BIT(SYS_PMSFCR_EL1_FL_SHIFT)) &&
+	    !(spe_pmu->features & SPE_PMU_FEAT_FILT_LAT))
+		return -EOPNOTSUPP;
+
+	reg = arm_spe_event_to_pmscr(event);
+	if (!capable(CAP_SYS_ADMIN) &&
+	    (reg & (BIT(SYS_PMSCR_EL1_PA_SHIFT) |
+		    BIT(SYS_PMSCR_EL1_CX_SHIFT) |
+		    BIT(SYS_PMSCR_EL1_PCT_SHIFT))))
+		return -EACCES;
+
+	return 0;
+}
+
+static void arm_spe_pmu_start(struct perf_event *event, int flags)
+{
+	u64 reg;
+	struct arm_spe_pmu *spe_pmu = to_spe_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	struct perf_output_handle *handle = this_cpu_ptr(spe_pmu->handle);
+
+	hwc->state = 0;
+	arm_spe_perf_aux_output_begin(handle, event);
+	if (hwc->state)
+		return;
+
+	reg = arm_spe_event_to_pmsfcr(event);
+	write_sysreg_s(reg, SYS_PMSFCR_EL1);
+
+	reg = arm_spe_event_to_pmsevfr(event);
+	write_sysreg_s(reg, SYS_PMSEVFR_EL1);
+
+	reg = arm_spe_event_to_pmslatfr(event);
+	write_sysreg_s(reg, SYS_PMSLATFR_EL1);
+
+	if (flags & PERF_EF_RELOAD) {
+		reg = arm_spe_event_to_pmsirr(event);
+		write_sysreg_s(reg, SYS_PMSIRR_EL1);
+		isb();
+		reg = local64_read(&hwc->period_left);
+		write_sysreg_s(reg, SYS_PMSICR_EL1);
+	}
+
+	reg = arm_spe_event_to_pmscr(event);
+	isb();
+	write_sysreg_s(reg, SYS_PMSCR_EL1);
+}
+
+static void arm_spe_pmu_stop(struct perf_event *event, int flags)
+{
+	struct arm_spe_pmu *spe_pmu = to_spe_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	struct perf_output_handle *handle = this_cpu_ptr(spe_pmu->handle);
+
+	/* If we're already stopped, then nothing to do */
+	if (hwc->state & PERF_HES_STOPPED)
+		return;
+
+	/* Stop all trace generation */
+	arm_spe_pmu_disable_and_drain_local();
+
+	if (flags & PERF_EF_UPDATE) {
+		/*
+		 * If there's a fault pending then ensure we contain it
+		 * to this buffer, since we might be on the context-switch
+		 * path.
+		 */
+		if (perf_get_aux(handle)) {
+			enum arm_spe_pmu_buf_fault_action act;
+
+			act = arm_spe_pmu_buf_get_fault_act(handle);
+			if (act == SPE_PMU_BUF_FAULT_ACT_SPURIOUS)
+				arm_spe_perf_aux_output_end(handle);
+			else
+				write_sysreg_s(0, SYS_PMBSR_EL1);
+		}
+
+		/*
+		 * This may also contain ECOUNT, but nobody else should
+		 * be looking at period_left, since we forbid frequency
+		 * based sampling.
+		 */
+		local64_set(&hwc->period_left, read_sysreg_s(SYS_PMSICR_EL1));
+		hwc->state |= PERF_HES_UPTODATE;
+	}
+
+	hwc->state |= PERF_HES_STOPPED;
+}
+
+static int arm_spe_pmu_add(struct perf_event *event, int flags)
+{
+	int ret = 0;
+	struct arm_spe_pmu *spe_pmu = to_spe_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int cpu = event->cpu == -1 ? smp_processor_id() : event->cpu;
+
+	if (!cpumask_test_cpu(cpu, &spe_pmu->supported_cpus))
+		return -ENOENT;
+
+	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+	if (flags & PERF_EF_START) {
+		arm_spe_pmu_start(event, PERF_EF_RELOAD);
+		if (hwc->state & PERF_HES_STOPPED)
+			ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static void arm_spe_pmu_del(struct perf_event *event, int flags)
+{
+	arm_spe_pmu_stop(event, PERF_EF_UPDATE);
+}
+
+static void arm_spe_pmu_read(struct perf_event *event)
+{
+}
+
+static void *arm_spe_pmu_setup_aux(int cpu, void **pages, int nr_pages,
+				   bool snapshot)
+{
+	int i;
+	struct page **pglist;
+	struct arm_spe_pmu_buf *buf;
+
+	/* We need at least two pages for this to work. */
+	if (nr_pages < 2)
+		return NULL;
+
+	/*
+	 * We require an even number of pages for snapshot mode, so that
+	 * we can effectively treat the buffer as consisting of two equal
+	 * parts and give userspace a fighting chance of getting some
+	 * useful data out of it.
+	 */
+	if (!nr_pages || (snapshot && (nr_pages & 1)))
+		return NULL;
+
+	if (cpu == -1)
+		cpu = raw_smp_processor_id();
+
+	buf = kzalloc_node(sizeof(*buf), GFP_KERNEL, cpu_to_node(cpu));
+	if (!buf)
+		return NULL;
+
+	pglist = kcalloc(nr_pages, sizeof(*pglist), GFP_KERNEL);
+	if (!pglist)
+		goto out_free_buf;
+
+	for (i = 0; i < nr_pages; ++i) {
+		struct page *page = virt_to_page(pages[i]);
+
+		if (PagePrivate(page)) {
+			pr_warn("unexpected high-order page for auxbuf!");
+			goto out_free_pglist;
+		}
+
+		pglist[i] = virt_to_page(pages[i]);
+	}
+
+	buf->base = vmap(pglist, nr_pages, VM_MAP, PAGE_KERNEL);
+	if (!buf->base)
+		goto out_free_pglist;
+
+	buf->nr_pages	= nr_pages;
+	buf->snapshot	= snapshot;
+
+	kfree(pglist);
+	return buf;
+
+out_free_pglist:
+	kfree(pglist);
+out_free_buf:
+	kfree(buf);
+	return NULL;
+}
+
+static void arm_spe_pmu_free_aux(void *aux)
+{
+	struct arm_spe_pmu_buf *buf = aux;
+
+	vunmap(buf->base);
+	kfree(buf);
+}
+
+/* Initialisation and teardown functions */
+static int arm_spe_pmu_perf_init(struct arm_spe_pmu *spe_pmu)
+{
+	static atomic_t pmu_idx = ATOMIC_INIT(-1);
+
+	int idx;
+	char *name;
+	struct device *dev = &spe_pmu->pdev->dev;
+
+	spe_pmu->pmu = (struct pmu) {
+		.module = THIS_MODULE,
+		.capabilities	= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE,
+		.attr_groups	= arm_spe_pmu_attr_groups,
+		/*
+		 * We hitch a ride on the software context here, so that
+		 * we can support per-task profiling (which is not possible
+		 * with the invalid context as it doesn't get sched callbacks).
+		 * This requires that userspace either uses a dummy event for
+		 * perf_event_open, since the aux buffer is not setup until
+		 * a subsequent mmap, or creates the profiling event in a
+		 * disabled state and explicitly PERF_EVENT_IOC_ENABLEs it
+		 * once the buffer has been created.
+		 */
+		.task_ctx_nr	= perf_sw_context,
+		.event_init	= arm_spe_pmu_event_init,
+		.add		= arm_spe_pmu_add,
+		.del		= arm_spe_pmu_del,
+		.start		= arm_spe_pmu_start,
+		.stop		= arm_spe_pmu_stop,
+		.read		= arm_spe_pmu_read,
+		.setup_aux	= arm_spe_pmu_setup_aux,
+		.free_aux	= arm_spe_pmu_free_aux,
+	};
+
+	idx = atomic_inc_return(&pmu_idx);
+	name = devm_kasprintf(dev, GFP_KERNEL, "%s_%d", PMUNAME, idx);
+	return perf_pmu_register(&spe_pmu->pmu, name, -1);
+}
+
+static void arm_spe_pmu_perf_destroy(struct arm_spe_pmu *spe_pmu)
+{
+	perf_pmu_unregister(&spe_pmu->pmu);
+}
+
+static void __arm_spe_pmu_dev_probe(void *info)
+{
+	int fld;
+	u64 reg;
+	struct arm_spe_pmu *spe_pmu = info;
+	struct device *dev = &spe_pmu->pdev->dev;
+
+	fld = cpuid_feature_extract_unsigned_field(read_cpuid(ID_AA64DFR0_EL1),
+						   ID_AA64DFR0_PMSVER_SHIFT);
+	if (!fld) {
+		dev_err(dev,
+			"unsupported ID_AA64DFR0_EL1.PMSVer [%d] on CPU %d\n",
+			fld, smp_processor_id());
+		return;
+	}
+
+	/* Read PMBIDR first to determine whether or not we have access */
+	reg = read_sysreg_s(SYS_PMBIDR_EL1);
+	if (reg & BIT(SYS_PMBIDR_EL1_P_SHIFT)) {
+		dev_err(dev,
+			"profiling buffer owned by higher exception level\n");
+		return;
+	}
+
+	/* Minimum alignment. If it's out-of-range, then fail the probe */
+	fld = reg >> SYS_PMBIDR_EL1_ALIGN_SHIFT & SYS_PMBIDR_EL1_ALIGN_MASK;
+	spe_pmu->align = 1 << fld;
+	if (spe_pmu->align > SZ_2K) {
+		dev_err(dev, "unsupported PMBIDR.Align [%d] on CPU %d\n",
+			fld, smp_processor_id());
+		return;
+	}
+
+	/* It's now safe to read PMSIDR and figure out what we've got */
+	reg = read_sysreg_s(SYS_PMSIDR_EL1);
+	if (reg & BIT(SYS_PMSIDR_EL1_FE_SHIFT))
+		spe_pmu->features |= SPE_PMU_FEAT_FILT_EVT;
+
+	if (reg & BIT(SYS_PMSIDR_EL1_FT_SHIFT))
+		spe_pmu->features |= SPE_PMU_FEAT_FILT_TYP;
+
+	if (reg & BIT(SYS_PMSIDR_EL1_FL_SHIFT))
+		spe_pmu->features |= SPE_PMU_FEAT_FILT_LAT;
+
+	if (reg & BIT(SYS_PMSIDR_EL1_ARCHINST_SHIFT))
+		spe_pmu->features |= SPE_PMU_FEAT_ARCH_INST;
+
+	if (reg & BIT(SYS_PMSIDR_EL1_LDS_SHIFT))
+		spe_pmu->features |= SPE_PMU_FEAT_LDS;
+
+	if (reg & BIT(SYS_PMSIDR_EL1_ERND_SHIFT))
+		spe_pmu->features |= SPE_PMU_FEAT_ERND;
+
+	/* This field has a spaced out encoding, so just use a look-up */
+	fld = reg >> SYS_PMSIDR_EL1_INTERVAL_SHIFT & SYS_PMSIDR_EL1_INTERVAL_MASK;
+	switch (fld) {
+	case 0:
+		spe_pmu->min_period = 256;
+		break;
+	case 2:
+		spe_pmu->min_period = 512;
+		break;
+	case 3:
+		spe_pmu->min_period = 768;
+		break;
+	case 4:
+		spe_pmu->min_period = 1024;
+		break;
+	case 5:
+		spe_pmu->min_period = 1536;
+		break;
+	case 6:
+		spe_pmu->min_period = 2048;
+		break;
+	case 7:
+		spe_pmu->min_period = 3072;
+		break;
+	default:
+		dev_warn(dev, "unknown PMSIDR_EL1.Interval [%d]; assuming 8\n",
+			 fld);
+		/* Fallthrough */
+	case 8:
+		spe_pmu->min_period = 4096;
+	}
+
+	/* Maximum record size. If it's out-of-range, then fail the probe */
+	fld = reg >> SYS_PMSIDR_EL1_MAXSIZE_SHIFT & SYS_PMSIDR_EL1_MAXSIZE_MASK;
+	spe_pmu->max_record_sz = 1 << fld;
+	if (spe_pmu->max_record_sz > SZ_2K || spe_pmu->max_record_sz < 16) {
+		dev_err(dev, "unsupported PMSIDR_EL1.MaxSize [%d] on CPU %d\n",
+			fld, smp_processor_id());
+		return;
+	}
+
+	fld = reg >> SYS_PMSIDR_EL1_COUNTSIZE_SHIFT & SYS_PMSIDR_EL1_COUNTSIZE_MASK;
+	switch (fld) {
+	default:
+		dev_warn(dev, "unknown PMSIDR_EL1.CountSize [%d]; assuming 2\n",
+			 fld);
+		/* Fallthrough */
+	case 2:
+		spe_pmu->counter_sz = 12;
+	}
+
+	dev_info(dev,
+		 "probed for CPUs %*pbl [max_record_sz %u, align %u, features 0x%llx]\n",
+		 cpumask_pr_args(&spe_pmu->supported_cpus),
+		 spe_pmu->max_record_sz, spe_pmu->align, spe_pmu->features);
+
+	spe_pmu->features |= SPE_PMU_FEAT_DEV_PROBED;
+	return;
+}
+
+static void __arm_spe_pmu_reset_local(void)
+{
+	/*
+	 * This is probably overkill, as we have no idea where we're
+	 * draining any buffered data to...
+	 */
+	arm_spe_pmu_disable_and_drain_local();
+
+	/* Reset the buffer base pointer */
+	write_sysreg_s(0, SYS_PMBPTR_EL1);
+	isb();
+
+	/* Clear any pending management interrupts */
+	write_sysreg_s(0, SYS_PMBSR_EL1);
+	isb();
+}
+
+static void __arm_spe_pmu_setup_one(void *info)
+{
+	struct arm_spe_pmu *spe_pmu = info;
+
+	__arm_spe_pmu_reset_local();
+	enable_percpu_irq(spe_pmu->irq, IRQ_TYPE_NONE);
+}
+
+static void __arm_spe_pmu_stop_one(void *info)
+{
+	struct arm_spe_pmu *spe_pmu = info;
+
+	disable_percpu_irq(spe_pmu->irq);
+	__arm_spe_pmu_reset_local();
+}
+
+static int arm_spe_pmu_cpu_startup(unsigned int cpu, struct hlist_node *node)
+{
+	struct arm_spe_pmu *spe_pmu;
+
+	spe_pmu = hlist_entry_safe(node, struct arm_spe_pmu, hotplug_node);
+	if (!cpumask_test_cpu(cpu, &spe_pmu->supported_cpus))
+		return 0;
+
+	__arm_spe_pmu_setup_one(spe_pmu);
+	return 0;
+}
+
+static int arm_spe_pmu_cpu_teardown(unsigned int cpu, struct hlist_node *node)
+{
+	struct arm_spe_pmu *spe_pmu;
+
+	spe_pmu = hlist_entry_safe(node, struct arm_spe_pmu, hotplug_node);
+	if (!cpumask_test_cpu(cpu, &spe_pmu->supported_cpus))
+		return 0;
+
+	__arm_spe_pmu_stop_one(spe_pmu);
+	return 0;
+}
+
+static int arm_spe_pmu_dev_init(struct arm_spe_pmu *spe_pmu)
+{
+	int ret;
+	cpumask_t *mask = &spe_pmu->supported_cpus;
+
+	/* Make sure we probe the hardware on a relevant CPU */
+	ret = smp_call_function_any(mask,  __arm_spe_pmu_dev_probe, spe_pmu, 1);
+	if (ret || !(spe_pmu->features & SPE_PMU_FEAT_DEV_PROBED))
+		return -ENXIO;
+
+	/* Request our PPIs (note that the IRQ is still disabled) */
+	ret = request_percpu_irq(spe_pmu->irq, arm_spe_pmu_irq_handler, DRVNAME,
+				 spe_pmu->handle);
+	if (ret)
+		return ret;
+
+	/*
+	 * Register our hotplug notifier now so we don't miss any events.
+	 * This will enable the IRQ for any supported CPUs that are already
+	 * up.
+	 */
+	ret = cpuhp_state_add_instance(arm_spe_pmu_online,
+				       &spe_pmu->hotplug_node);
+	if (ret)
+		free_percpu_irq(spe_pmu->irq, spe_pmu->handle);
+
+	return ret;
+}
+
+static void arm_spe_pmu_dev_teardown(struct arm_spe_pmu *spe_pmu)
+{
+	cpuhp_state_remove_instance(arm_spe_pmu_online, &spe_pmu->hotplug_node);
+	free_percpu_irq(spe_pmu->irq, spe_pmu->handle);
+}
+
+/* Driver and device probing */
+static int arm_spe_pmu_irq_probe(struct arm_spe_pmu *spe_pmu)
+{
+	struct platform_device *pdev = spe_pmu->pdev;
+	int irq = platform_get_irq(pdev, 0);
+
+	if (irq < 0) {
+		dev_err(&pdev->dev, "failed to get IRQ (%d)\n", irq);
+		return -ENXIO;
+	}
+
+	if (!irq_is_percpu(irq)) {
+		dev_err(&pdev->dev, "expected PPI but got SPI (%d)\n", irq);
+		return -EINVAL;
+	}
+
+	if (irq_get_percpu_devid_partition(irq, &spe_pmu->supported_cpus)) {
+		dev_err(&pdev->dev, "failed to get PPI partition (%d)\n", irq);
+		return -EINVAL;
+	}
+
+	spe_pmu->irq = irq;
+	return 0;
+}
+
+static const struct of_device_id arm_spe_pmu_of_match[] = {
+	{ .compatible = "arm,statistical-profiling-extension-v1", .data = (void *)1 },
+	{ /* Sentinel */ },
+};
+
+static int arm_spe_pmu_device_dt_probe(struct platform_device *pdev)
+{
+	int ret;
+	struct arm_spe_pmu *spe_pmu;
+	struct device *dev = &pdev->dev;
+
+	spe_pmu = devm_kzalloc(dev, sizeof(*spe_pmu), GFP_KERNEL);
+	if (!spe_pmu) {
+		dev_err(dev, "failed to allocate spe_pmu\n");
+		return -ENOMEM;
+	}
+
+	spe_pmu->handle = alloc_percpu(typeof(*spe_pmu->handle));
+	if (!spe_pmu->handle)
+		return -ENOMEM;
+
+	spe_pmu->pdev = pdev;
+	platform_set_drvdata(pdev, spe_pmu);
+
+	ret = arm_spe_pmu_irq_probe(spe_pmu);
+	if (ret)
+		goto out_free_handle;
+
+	ret = arm_spe_pmu_dev_init(spe_pmu);
+	if (ret)
+		goto out_free_handle;
+
+	ret = arm_spe_pmu_perf_init(spe_pmu);
+	if (ret)
+		goto out_teardown_dev;
+
+	return 0;
+
+out_teardown_dev:
+	arm_spe_pmu_dev_teardown(spe_pmu);
+out_free_handle:
+	free_percpu(spe_pmu->handle);
+	return ret;
+}
+
+static int arm_spe_pmu_device_remove(struct platform_device *pdev)
+{
+	struct arm_spe_pmu *spe_pmu = platform_get_drvdata(pdev);
+
+	arm_spe_pmu_perf_destroy(spe_pmu);
+	arm_spe_pmu_dev_teardown(spe_pmu);
+	free_percpu(spe_pmu->handle);
+	return 0;
+}
+
+static struct platform_driver arm_spe_pmu_driver = {
+	.driver	= {
+		.name		= DRVNAME,
+		.of_match_table	= of_match_ptr(arm_spe_pmu_of_match),
+	},
+	.probe	= arm_spe_pmu_device_dt_probe,
+	.remove	= arm_spe_pmu_device_remove,
+};
+
+static int __init arm_spe_pmu_init(void)
+{
+	int ret;
+
+	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, DRVNAME,
+				      arm_spe_pmu_cpu_startup,
+				      arm_spe_pmu_cpu_teardown);
+	if (ret < 0)
+		return ret;
+	arm_spe_pmu_online = ret;
+
+	ret = platform_driver_register(&arm_spe_pmu_driver);
+	if (ret)
+		cpuhp_remove_multi_state(arm_spe_pmu_online);
+
+	return ret;
+}
+
+static void __exit arm_spe_pmu_exit(void)
+{
+	platform_driver_unregister(&arm_spe_pmu_driver);
+	cpuhp_remove_multi_state(arm_spe_pmu_online);
+}
+
+module_init(arm_spe_pmu_init);
+module_exit(arm_spe_pmu_exit);
+
+MODULE_DESCRIPTION("Perf driver for the ARMv8.2 Statistical Profiling Extension");
+MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/hisilicon/Makefile b/drivers/perf/hisilicon/Makefile
new file mode 100644
index 000000000000..2621d51ae87a
--- /dev/null
+++ b/drivers/perf/hisilicon/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_HISI_PMU) += hisi_uncore_pmu.o hisi_uncore_l3c_pmu.o hisi_uncore_hha_pmu.o hisi_uncore_ddrc_pmu.o
diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c
new file mode 100644
index 000000000000..1b10ea05a914
--- /dev/null
+++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c
@@ -0,0 +1,463 @@
+/*
+ * HiSilicon SoC DDRC uncore Hardware event counters support
+ *
+ * Copyright (C) 2017 Hisilicon Limited
+ * Author: Shaokun Zhang <zhangshaokun@hisilicon.com>
+ *         Anurup M <anurup.m@huawei.com>
+ *
+ * This code is based on the uncore PMUs like arm-cci and arm-ccn.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/acpi.h>
+#include <linux/bug.h>
+#include <linux/cpuhotplug.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/list.h>
+#include <linux/platform_device.h>
+#include <linux/smp.h>
+
+#include "hisi_uncore_pmu.h"
+
+/* DDRC register definition */
+#define DDRC_PERF_CTRL		0x010
+#define DDRC_FLUX_WR		0x380
+#define DDRC_FLUX_RD		0x384
+#define DDRC_FLUX_WCMD          0x388
+#define DDRC_FLUX_RCMD          0x38c
+#define DDRC_PRE_CMD            0x3c0
+#define DDRC_ACT_CMD            0x3c4
+#define DDRC_BNK_CHG            0x3c8
+#define DDRC_RNK_CHG            0x3cc
+#define DDRC_EVENT_CTRL         0x6C0
+#define DDRC_INT_MASK		0x6c8
+#define DDRC_INT_STATUS		0x6cc
+#define DDRC_INT_CLEAR		0x6d0
+
+/* DDRC has 8-counters */
+#define DDRC_NR_COUNTERS	0x8
+#define DDRC_PERF_CTRL_EN	0x2
+
+/*
+ * For DDRC PMU, there are eight-events and every event has been mapped
+ * to fixed-purpose counters which register offset is not consistent.
+ * Therefore there is no write event type and we assume that event
+ * code (0 to 7) is equal to counter index in PMU driver.
+ */
+#define GET_DDRC_EVENTID(hwc)	(hwc->config_base & 0x7)
+
+static const u32 ddrc_reg_off[] = {
+	DDRC_FLUX_WR, DDRC_FLUX_RD, DDRC_FLUX_WCMD, DDRC_FLUX_RCMD,
+	DDRC_PRE_CMD, DDRC_ACT_CMD, DDRC_BNK_CHG, DDRC_RNK_CHG
+};
+
+/*
+ * Select the counter register offset using the counter index.
+ * In DDRC there are no programmable counter, the count
+ * is readed form the statistics counter register itself.
+ */
+static u32 hisi_ddrc_pmu_get_counter_offset(int cntr_idx)
+{
+	return ddrc_reg_off[cntr_idx];
+}
+
+static u64 hisi_ddrc_pmu_read_counter(struct hisi_pmu *ddrc_pmu,
+				      struct hw_perf_event *hwc)
+{
+	/* Use event code as counter index */
+	u32 idx = GET_DDRC_EVENTID(hwc);
+
+	if (!hisi_uncore_pmu_counter_valid(ddrc_pmu, idx)) {
+		dev_err(ddrc_pmu->dev, "Unsupported event index:%d!\n", idx);
+		return 0;
+	}
+
+	return readl(ddrc_pmu->base + hisi_ddrc_pmu_get_counter_offset(idx));
+}
+
+static void hisi_ddrc_pmu_write_counter(struct hisi_pmu *ddrc_pmu,
+					struct hw_perf_event *hwc, u64 val)
+{
+	u32 idx = GET_DDRC_EVENTID(hwc);
+
+	if (!hisi_uncore_pmu_counter_valid(ddrc_pmu, idx)) {
+		dev_err(ddrc_pmu->dev, "Unsupported event index:%d!\n", idx);
+		return;
+	}
+
+	writel((u32)val,
+	       ddrc_pmu->base + hisi_ddrc_pmu_get_counter_offset(idx));
+}
+
+/*
+ * For DDRC PMU, event has been mapped to fixed-purpose counter by hardware,
+ * so there is no need to write event type.
+ */
+static void hisi_ddrc_pmu_write_evtype(struct hisi_pmu *hha_pmu, int idx,
+				       u32 type)
+{
+}
+
+static void hisi_ddrc_pmu_start_counters(struct hisi_pmu *ddrc_pmu)
+{
+	u32 val;
+
+	/* Set perf_enable in DDRC_PERF_CTRL to start event counting */
+	val = readl(ddrc_pmu->base + DDRC_PERF_CTRL);
+	val |= DDRC_PERF_CTRL_EN;
+	writel(val, ddrc_pmu->base + DDRC_PERF_CTRL);
+}
+
+static void hisi_ddrc_pmu_stop_counters(struct hisi_pmu *ddrc_pmu)
+{
+	u32 val;
+
+	/* Clear perf_enable in DDRC_PERF_CTRL to stop event counting */
+	val = readl(ddrc_pmu->base + DDRC_PERF_CTRL);
+	val &= ~DDRC_PERF_CTRL_EN;
+	writel(val, ddrc_pmu->base + DDRC_PERF_CTRL);
+}
+
+static void hisi_ddrc_pmu_enable_counter(struct hisi_pmu *ddrc_pmu,
+					 struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	/* Set counter index(event code) in DDRC_EVENT_CTRL register */
+	val = readl(ddrc_pmu->base + DDRC_EVENT_CTRL);
+	val |= (1 << GET_DDRC_EVENTID(hwc));
+	writel(val, ddrc_pmu->base + DDRC_EVENT_CTRL);
+}
+
+static void hisi_ddrc_pmu_disable_counter(struct hisi_pmu *ddrc_pmu,
+					  struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	/* Clear counter index(event code) in DDRC_EVENT_CTRL register */
+	val = readl(ddrc_pmu->base + DDRC_EVENT_CTRL);
+	val &= ~(1 << GET_DDRC_EVENTID(hwc));
+	writel(val, ddrc_pmu->base + DDRC_EVENT_CTRL);
+}
+
+static int hisi_ddrc_pmu_get_event_idx(struct perf_event *event)
+{
+	struct hisi_pmu *ddrc_pmu = to_hisi_pmu(event->pmu);
+	unsigned long *used_mask = ddrc_pmu->pmu_events.used_mask;
+	struct hw_perf_event *hwc = &event->hw;
+	/* For DDRC PMU, we use event code as counter index */
+	int idx = GET_DDRC_EVENTID(hwc);
+
+	if (test_bit(idx, used_mask))
+		return -EAGAIN;
+
+	set_bit(idx, used_mask);
+
+	return idx;
+}
+
+static void hisi_ddrc_pmu_enable_counter_int(struct hisi_pmu *ddrc_pmu,
+					     struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	/* Write 0 to enable interrupt */
+	val = readl(ddrc_pmu->base + DDRC_INT_MASK);
+	val &= ~(1 << GET_DDRC_EVENTID(hwc));
+	writel(val, ddrc_pmu->base + DDRC_INT_MASK);
+}
+
+static void hisi_ddrc_pmu_disable_counter_int(struct hisi_pmu *ddrc_pmu,
+					      struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	/* Write 1 to mask interrupt */
+	val = readl(ddrc_pmu->base + DDRC_INT_MASK);
+	val |= (1 << GET_DDRC_EVENTID(hwc));
+	writel(val, ddrc_pmu->base + DDRC_INT_MASK);
+}
+
+static irqreturn_t hisi_ddrc_pmu_isr(int irq, void *dev_id)
+{
+	struct hisi_pmu *ddrc_pmu = dev_id;
+	struct perf_event *event;
+	unsigned long overflown;
+	int idx;
+
+	/* Read the DDRC_INT_STATUS register */
+	overflown = readl(ddrc_pmu->base + DDRC_INT_STATUS);
+	if (!overflown)
+		return IRQ_NONE;
+
+	/*
+	 * Find the counter index which overflowed if the bit was set
+	 * and handle it
+	 */
+	for_each_set_bit(idx, &overflown, DDRC_NR_COUNTERS) {
+		/* Write 1 to clear the IRQ status flag */
+		writel((1 << idx), ddrc_pmu->base + DDRC_INT_CLEAR);
+
+		/* Get the corresponding event struct */
+		event = ddrc_pmu->pmu_events.hw_events[idx];
+		if (!event)
+			continue;
+
+		hisi_uncore_pmu_event_update(event);
+		hisi_uncore_pmu_set_event_period(event);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int hisi_ddrc_pmu_init_irq(struct hisi_pmu *ddrc_pmu,
+				  struct platform_device *pdev)
+{
+	int irq, ret;
+
+	/* Read and init IRQ */
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		dev_err(&pdev->dev, "DDRC PMU get irq fail; irq:%d\n", irq);
+		return irq;
+	}
+
+	ret = devm_request_irq(&pdev->dev, irq, hisi_ddrc_pmu_isr,
+			       IRQF_NOBALANCING | IRQF_NO_THREAD,
+			       dev_name(&pdev->dev), ddrc_pmu);
+	if (ret < 0) {
+		dev_err(&pdev->dev,
+			"Fail to request IRQ:%d ret:%d\n", irq, ret);
+		return ret;
+	}
+
+	ddrc_pmu->irq = irq;
+
+	return 0;
+}
+
+static const struct acpi_device_id hisi_ddrc_pmu_acpi_match[] = {
+	{ "HISI0233", },
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, hisi_ddrc_pmu_acpi_match);
+
+static int hisi_ddrc_pmu_init_data(struct platform_device *pdev,
+				   struct hisi_pmu *ddrc_pmu)
+{
+	struct resource *res;
+
+	/*
+	 * Use the SCCL_ID and DDRC channel ID to identify the
+	 * DDRC PMU, while SCCL_ID is in MPIDR[aff2].
+	 */
+	if (device_property_read_u32(&pdev->dev, "hisilicon,ch-id",
+				     &ddrc_pmu->index_id)) {
+		dev_err(&pdev->dev, "Can not read ddrc channel-id!\n");
+		return -EINVAL;
+	}
+
+	if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id",
+				     &ddrc_pmu->sccl_id)) {
+		dev_err(&pdev->dev, "Can not read ddrc sccl-id!\n");
+		return -EINVAL;
+	}
+	/* DDRC PMUs only share the same SCCL */
+	ddrc_pmu->ccl_id = -1;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	ddrc_pmu->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(ddrc_pmu->base)) {
+		dev_err(&pdev->dev, "ioremap failed for ddrc_pmu resource\n");
+		return PTR_ERR(ddrc_pmu->base);
+	}
+
+	return 0;
+}
+
+static struct attribute *hisi_ddrc_pmu_format_attr[] = {
+	HISI_PMU_FORMAT_ATTR(event, "config:0-4"),
+	NULL,
+};
+
+static const struct attribute_group hisi_ddrc_pmu_format_group = {
+	.name = "format",
+	.attrs = hisi_ddrc_pmu_format_attr,
+};
+
+static struct attribute *hisi_ddrc_pmu_events_attr[] = {
+	HISI_PMU_EVENT_ATTR(flux_wr,		0x00),
+	HISI_PMU_EVENT_ATTR(flux_rd,		0x01),
+	HISI_PMU_EVENT_ATTR(flux_wcmd,		0x02),
+	HISI_PMU_EVENT_ATTR(flux_rcmd,		0x03),
+	HISI_PMU_EVENT_ATTR(pre_cmd,		0x04),
+	HISI_PMU_EVENT_ATTR(act_cmd,		0x05),
+	HISI_PMU_EVENT_ATTR(rnk_chg,		0x06),
+	HISI_PMU_EVENT_ATTR(rw_chg,		0x07),
+	NULL,
+};
+
+static const struct attribute_group hisi_ddrc_pmu_events_group = {
+	.name = "events",
+	.attrs = hisi_ddrc_pmu_events_attr,
+};
+
+static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL);
+
+static struct attribute *hisi_ddrc_pmu_cpumask_attrs[] = {
+	&dev_attr_cpumask.attr,
+	NULL,
+};
+
+static const struct attribute_group hisi_ddrc_pmu_cpumask_attr_group = {
+	.attrs = hisi_ddrc_pmu_cpumask_attrs,
+};
+
+static const struct attribute_group *hisi_ddrc_pmu_attr_groups[] = {
+	&hisi_ddrc_pmu_format_group,
+	&hisi_ddrc_pmu_events_group,
+	&hisi_ddrc_pmu_cpumask_attr_group,
+	NULL,
+};
+
+static const struct hisi_uncore_ops hisi_uncore_ddrc_ops = {
+	.write_evtype           = hisi_ddrc_pmu_write_evtype,
+	.get_event_idx		= hisi_ddrc_pmu_get_event_idx,
+	.start_counters		= hisi_ddrc_pmu_start_counters,
+	.stop_counters		= hisi_ddrc_pmu_stop_counters,
+	.enable_counter		= hisi_ddrc_pmu_enable_counter,
+	.disable_counter	= hisi_ddrc_pmu_disable_counter,
+	.enable_counter_int	= hisi_ddrc_pmu_enable_counter_int,
+	.disable_counter_int	= hisi_ddrc_pmu_disable_counter_int,
+	.write_counter		= hisi_ddrc_pmu_write_counter,
+	.read_counter		= hisi_ddrc_pmu_read_counter,
+};
+
+static int hisi_ddrc_pmu_dev_probe(struct platform_device *pdev,
+				   struct hisi_pmu *ddrc_pmu)
+{
+	int ret;
+
+	ret = hisi_ddrc_pmu_init_data(pdev, ddrc_pmu);
+	if (ret)
+		return ret;
+
+	ret = hisi_ddrc_pmu_init_irq(ddrc_pmu, pdev);
+	if (ret)
+		return ret;
+
+	ddrc_pmu->num_counters = DDRC_NR_COUNTERS;
+	ddrc_pmu->counter_bits = 32;
+	ddrc_pmu->ops = &hisi_uncore_ddrc_ops;
+	ddrc_pmu->dev = &pdev->dev;
+	ddrc_pmu->on_cpu = -1;
+	ddrc_pmu->check_event = 7;
+
+	return 0;
+}
+
+static int hisi_ddrc_pmu_probe(struct platform_device *pdev)
+{
+	struct hisi_pmu *ddrc_pmu;
+	char *name;
+	int ret;
+
+	ddrc_pmu = devm_kzalloc(&pdev->dev, sizeof(*ddrc_pmu), GFP_KERNEL);
+	if (!ddrc_pmu)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, ddrc_pmu);
+
+	ret = hisi_ddrc_pmu_dev_probe(pdev, ddrc_pmu);
+	if (ret)
+		return ret;
+
+	ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE,
+				       &ddrc_pmu->node);
+	if (ret) {
+		dev_err(&pdev->dev, "Error %d registering hotplug;\n", ret);
+		return ret;
+	}
+
+	name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_ddrc%u",
+			      ddrc_pmu->sccl_id, ddrc_pmu->index_id);
+	ddrc_pmu->pmu = (struct pmu) {
+		.name		= name,
+		.task_ctx_nr	= perf_invalid_context,
+		.event_init	= hisi_uncore_pmu_event_init,
+		.pmu_enable	= hisi_uncore_pmu_enable,
+		.pmu_disable	= hisi_uncore_pmu_disable,
+		.add		= hisi_uncore_pmu_add,
+		.del		= hisi_uncore_pmu_del,
+		.start		= hisi_uncore_pmu_start,
+		.stop		= hisi_uncore_pmu_stop,
+		.read		= hisi_uncore_pmu_read,
+		.attr_groups	= hisi_ddrc_pmu_attr_groups,
+	};
+
+	ret = perf_pmu_register(&ddrc_pmu->pmu, name, -1);
+	if (ret) {
+		dev_err(ddrc_pmu->dev, "DDRC PMU register failed!\n");
+		cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE,
+					    &ddrc_pmu->node);
+	}
+
+	return ret;
+}
+
+static int hisi_ddrc_pmu_remove(struct platform_device *pdev)
+{
+	struct hisi_pmu *ddrc_pmu = platform_get_drvdata(pdev);
+
+	perf_pmu_unregister(&ddrc_pmu->pmu);
+	cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE,
+				    &ddrc_pmu->node);
+
+	return 0;
+}
+
+static struct platform_driver hisi_ddrc_pmu_driver = {
+	.driver = {
+		.name = "hisi_ddrc_pmu",
+		.acpi_match_table = ACPI_PTR(hisi_ddrc_pmu_acpi_match),
+	},
+	.probe = hisi_ddrc_pmu_probe,
+	.remove = hisi_ddrc_pmu_remove,
+};
+
+static int __init hisi_ddrc_pmu_module_init(void)
+{
+	int ret;
+
+	ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE,
+				      "AP_PERF_ARM_HISI_DDRC_ONLINE",
+				      hisi_uncore_pmu_online_cpu,
+				      hisi_uncore_pmu_offline_cpu);
+	if (ret) {
+		pr_err("DDRC PMU: setup hotplug, ret = %d\n", ret);
+		return ret;
+	}
+
+	ret = platform_driver_register(&hisi_ddrc_pmu_driver);
+	if (ret)
+		cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE);
+
+	return ret;
+}
+module_init(hisi_ddrc_pmu_module_init);
+
+static void __exit hisi_ddrc_pmu_module_exit(void)
+{
+	platform_driver_unregister(&hisi_ddrc_pmu_driver);
+	cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE);
+
+}
+module_exit(hisi_ddrc_pmu_module_exit);
+
+MODULE_DESCRIPTION("HiSilicon SoC DDRC uncore PMU driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Shaokun Zhang <zhangshaokun@hisilicon.com>");
+MODULE_AUTHOR("Anurup M <anurup.m@huawei.com>");
diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c
new file mode 100644
index 000000000000..443906e0aff3
--- /dev/null
+++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c
@@ -0,0 +1,473 @@
+/*
+ * HiSilicon SoC HHA uncore Hardware event counters support
+ *
+ * Copyright (C) 2017 Hisilicon Limited
+ * Author: Shaokun Zhang <zhangshaokun@hisilicon.com>
+ *         Anurup M <anurup.m@huawei.com>
+ *
+ * This code is based on the uncore PMUs like arm-cci and arm-ccn.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/acpi.h>
+#include <linux/bug.h>
+#include <linux/cpuhotplug.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/list.h>
+#include <linux/platform_device.h>
+#include <linux/smp.h>
+
+#include "hisi_uncore_pmu.h"
+
+/* HHA register definition */
+#define HHA_INT_MASK		0x0804
+#define HHA_INT_STATUS		0x0808
+#define HHA_INT_CLEAR		0x080C
+#define HHA_PERF_CTRL		0x1E00
+#define HHA_EVENT_CTRL		0x1E04
+#define HHA_EVENT_TYPE0		0x1E80
+/*
+ * Each counter is 48-bits and [48:63] are reserved
+ * which are Read-As-Zero and Writes-Ignored.
+ */
+#define HHA_CNT0_LOWER		0x1F00
+
+/* HHA has 16-counters */
+#define HHA_NR_COUNTERS		0x10
+
+#define HHA_PERF_CTRL_EN	0x1
+#define HHA_EVTYPE_NONE		0xff
+
+/*
+ * Select the counter register offset using the counter index
+ * each counter is 48-bits.
+ */
+static u32 hisi_hha_pmu_get_counter_offset(int cntr_idx)
+{
+	return (HHA_CNT0_LOWER + (cntr_idx * 8));
+}
+
+static u64 hisi_hha_pmu_read_counter(struct hisi_pmu *hha_pmu,
+				     struct hw_perf_event *hwc)
+{
+	u32 idx = hwc->idx;
+
+	if (!hisi_uncore_pmu_counter_valid(hha_pmu, idx)) {
+		dev_err(hha_pmu->dev, "Unsupported event index:%d!\n", idx);
+		return 0;
+	}
+
+	/* Read 64 bits and like L3C, top 16 bits are RAZ */
+	return readq(hha_pmu->base + hisi_hha_pmu_get_counter_offset(idx));
+}
+
+static void hisi_hha_pmu_write_counter(struct hisi_pmu *hha_pmu,
+				       struct hw_perf_event *hwc, u64 val)
+{
+	u32 idx = hwc->idx;
+
+	if (!hisi_uncore_pmu_counter_valid(hha_pmu, idx)) {
+		dev_err(hha_pmu->dev, "Unsupported event index:%d!\n", idx);
+		return;
+	}
+
+	/* Write 64 bits and like L3C, top 16 bits are WI */
+	writeq(val, hha_pmu->base + hisi_hha_pmu_get_counter_offset(idx));
+}
+
+static void hisi_hha_pmu_write_evtype(struct hisi_pmu *hha_pmu, int idx,
+				      u32 type)
+{
+	u32 reg, reg_idx, shift, val;
+
+	/*
+	 * Select the appropriate event select register(HHA_EVENT_TYPEx).
+	 * There are 4 event select registers for the 16 hardware counters.
+	 * Event code is 8-bits and for the first 4 hardware counters,
+	 * HHA_EVENT_TYPE0 is chosen. For the next 4 hardware counters,
+	 * HHA_EVENT_TYPE1 is chosen and so on.
+	 */
+	reg = HHA_EVENT_TYPE0 + 4 * (idx / 4);
+	reg_idx = idx % 4;
+	shift = 8 * reg_idx;
+
+	/* Write event code to HHA_EVENT_TYPEx register */
+	val = readl(hha_pmu->base + reg);
+	val &= ~(HHA_EVTYPE_NONE << shift);
+	val |= (type << shift);
+	writel(val, hha_pmu->base + reg);
+}
+
+static void hisi_hha_pmu_start_counters(struct hisi_pmu *hha_pmu)
+{
+	u32 val;
+
+	/*
+	 * Set perf_enable bit in HHA_PERF_CTRL to start event
+	 * counting for all enabled counters.
+	 */
+	val = readl(hha_pmu->base + HHA_PERF_CTRL);
+	val |= HHA_PERF_CTRL_EN;
+	writel(val, hha_pmu->base + HHA_PERF_CTRL);
+}
+
+static void hisi_hha_pmu_stop_counters(struct hisi_pmu *hha_pmu)
+{
+	u32 val;
+
+	/*
+	 * Clear perf_enable bit in HHA_PERF_CTRL to stop event
+	 * counting for all enabled counters.
+	 */
+	val = readl(hha_pmu->base + HHA_PERF_CTRL);
+	val &= ~(HHA_PERF_CTRL_EN);
+	writel(val, hha_pmu->base + HHA_PERF_CTRL);
+}
+
+static void hisi_hha_pmu_enable_counter(struct hisi_pmu *hha_pmu,
+					struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	/* Enable counter index in HHA_EVENT_CTRL register */
+	val = readl(hha_pmu->base + HHA_EVENT_CTRL);
+	val |= (1 << hwc->idx);
+	writel(val, hha_pmu->base + HHA_EVENT_CTRL);
+}
+
+static void hisi_hha_pmu_disable_counter(struct hisi_pmu *hha_pmu,
+					 struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	/* Clear counter index in HHA_EVENT_CTRL register */
+	val = readl(hha_pmu->base + HHA_EVENT_CTRL);
+	val &= ~(1 << hwc->idx);
+	writel(val, hha_pmu->base + HHA_EVENT_CTRL);
+}
+
+static void hisi_hha_pmu_enable_counter_int(struct hisi_pmu *hha_pmu,
+					    struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	/* Write 0 to enable interrupt */
+	val = readl(hha_pmu->base + HHA_INT_MASK);
+	val &= ~(1 << hwc->idx);
+	writel(val, hha_pmu->base + HHA_INT_MASK);
+}
+
+static void hisi_hha_pmu_disable_counter_int(struct hisi_pmu *hha_pmu,
+					     struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	/* Write 1 to mask interrupt */
+	val = readl(hha_pmu->base + HHA_INT_MASK);
+	val |= (1 << hwc->idx);
+	writel(val, hha_pmu->base + HHA_INT_MASK);
+}
+
+static irqreturn_t hisi_hha_pmu_isr(int irq, void *dev_id)
+{
+	struct hisi_pmu *hha_pmu = dev_id;
+	struct perf_event *event;
+	unsigned long overflown;
+	int idx;
+
+	/* Read HHA_INT_STATUS register */
+	overflown = readl(hha_pmu->base + HHA_INT_STATUS);
+	if (!overflown)
+		return IRQ_NONE;
+
+	/*
+	 * Find the counter index which overflowed if the bit was set
+	 * and handle it
+	 */
+	for_each_set_bit(idx, &overflown, HHA_NR_COUNTERS) {
+		/* Write 1 to clear the IRQ status flag */
+		writel((1 << idx), hha_pmu->base + HHA_INT_CLEAR);
+
+		/* Get the corresponding event struct */
+		event = hha_pmu->pmu_events.hw_events[idx];
+		if (!event)
+			continue;
+
+		hisi_uncore_pmu_event_update(event);
+		hisi_uncore_pmu_set_event_period(event);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int hisi_hha_pmu_init_irq(struct hisi_pmu *hha_pmu,
+				 struct platform_device *pdev)
+{
+	int irq, ret;
+
+	/* Read and init IRQ */
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		dev_err(&pdev->dev, "HHA PMU get irq fail; irq:%d\n", irq);
+		return irq;
+	}
+
+	ret = devm_request_irq(&pdev->dev, irq, hisi_hha_pmu_isr,
+			      IRQF_NOBALANCING | IRQF_NO_THREAD,
+			      dev_name(&pdev->dev), hha_pmu);
+	if (ret < 0) {
+		dev_err(&pdev->dev,
+			"Fail to request IRQ:%d ret:%d\n", irq, ret);
+		return ret;
+	}
+
+	hha_pmu->irq = irq;
+
+	return 0;
+}
+
+static const struct acpi_device_id hisi_hha_pmu_acpi_match[] = {
+	{ "HISI0243", },
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, hisi_hha_pmu_acpi_match);
+
+static int hisi_hha_pmu_init_data(struct platform_device *pdev,
+				  struct hisi_pmu *hha_pmu)
+{
+	unsigned long long id;
+	struct resource *res;
+	acpi_status status;
+
+	status = acpi_evaluate_integer(ACPI_HANDLE(&pdev->dev),
+				       "_UID", NULL, &id);
+	if (ACPI_FAILURE(status))
+		return -EINVAL;
+
+	hha_pmu->index_id = id;
+
+	/*
+	 * Use SCCL_ID and UID to identify the HHA PMU, while
+	 * SCCL_ID is in MPIDR[aff2].
+	 */
+	if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id",
+				     &hha_pmu->sccl_id)) {
+		dev_err(&pdev->dev, "Can not read hha sccl-id!\n");
+		return -EINVAL;
+	}
+	/* HHA PMUs only share the same SCCL */
+	hha_pmu->ccl_id = -1;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	hha_pmu->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(hha_pmu->base)) {
+		dev_err(&pdev->dev, "ioremap failed for hha_pmu resource\n");
+		return PTR_ERR(hha_pmu->base);
+	}
+
+	return 0;
+}
+
+static struct attribute *hisi_hha_pmu_format_attr[] = {
+	HISI_PMU_FORMAT_ATTR(event, "config:0-7"),
+	NULL,
+};
+
+static const struct attribute_group hisi_hha_pmu_format_group = {
+	.name = "format",
+	.attrs = hisi_hha_pmu_format_attr,
+};
+
+static struct attribute *hisi_hha_pmu_events_attr[] = {
+	HISI_PMU_EVENT_ATTR(rx_ops_num,		0x00),
+	HISI_PMU_EVENT_ATTR(rx_outer,		0x01),
+	HISI_PMU_EVENT_ATTR(rx_sccl,		0x02),
+	HISI_PMU_EVENT_ATTR(rx_ccix,		0x03),
+	HISI_PMU_EVENT_ATTR(rx_wbi,		0x04),
+	HISI_PMU_EVENT_ATTR(rx_wbip,		0x05),
+	HISI_PMU_EVENT_ATTR(rx_wtistash,	0x11),
+	HISI_PMU_EVENT_ATTR(rd_ddr_64b,		0x1c),
+	HISI_PMU_EVENT_ATTR(wr_dr_64b,		0x1d),
+	HISI_PMU_EVENT_ATTR(rd_ddr_128b,	0x1e),
+	HISI_PMU_EVENT_ATTR(wr_ddr_128b,	0x1f),
+	HISI_PMU_EVENT_ATTR(spill_num,		0x20),
+	HISI_PMU_EVENT_ATTR(spill_success,	0x21),
+	HISI_PMU_EVENT_ATTR(bi_num,		0x23),
+	HISI_PMU_EVENT_ATTR(mediated_num,	0x32),
+	HISI_PMU_EVENT_ATTR(tx_snp_num,		0x33),
+	HISI_PMU_EVENT_ATTR(tx_snp_outer,	0x34),
+	HISI_PMU_EVENT_ATTR(tx_snp_ccix,	0x35),
+	HISI_PMU_EVENT_ATTR(rx_snprspdata,	0x38),
+	HISI_PMU_EVENT_ATTR(rx_snprsp_outer,	0x3c),
+	HISI_PMU_EVENT_ATTR(sdir-lookup,	0x40),
+	HISI_PMU_EVENT_ATTR(edir-lookup,	0x41),
+	HISI_PMU_EVENT_ATTR(sdir-hit,		0x42),
+	HISI_PMU_EVENT_ATTR(edir-hit,		0x43),
+	HISI_PMU_EVENT_ATTR(sdir-home-migrate,	0x4c),
+	HISI_PMU_EVENT_ATTR(edir-home-migrate,  0x4d),
+	NULL,
+};
+
+static const struct attribute_group hisi_hha_pmu_events_group = {
+	.name = "events",
+	.attrs = hisi_hha_pmu_events_attr,
+};
+
+static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL);
+
+static struct attribute *hisi_hha_pmu_cpumask_attrs[] = {
+	&dev_attr_cpumask.attr,
+	NULL,
+};
+
+static const struct attribute_group hisi_hha_pmu_cpumask_attr_group = {
+	.attrs = hisi_hha_pmu_cpumask_attrs,
+};
+
+static const struct attribute_group *hisi_hha_pmu_attr_groups[] = {
+	&hisi_hha_pmu_format_group,
+	&hisi_hha_pmu_events_group,
+	&hisi_hha_pmu_cpumask_attr_group,
+	NULL,
+};
+
+static const struct hisi_uncore_ops hisi_uncore_hha_ops = {
+	.write_evtype		= hisi_hha_pmu_write_evtype,
+	.get_event_idx		= hisi_uncore_pmu_get_event_idx,
+	.start_counters		= hisi_hha_pmu_start_counters,
+	.stop_counters		= hisi_hha_pmu_stop_counters,
+	.enable_counter		= hisi_hha_pmu_enable_counter,
+	.disable_counter	= hisi_hha_pmu_disable_counter,
+	.enable_counter_int	= hisi_hha_pmu_enable_counter_int,
+	.disable_counter_int	= hisi_hha_pmu_disable_counter_int,
+	.write_counter		= hisi_hha_pmu_write_counter,
+	.read_counter		= hisi_hha_pmu_read_counter,
+};
+
+static int hisi_hha_pmu_dev_probe(struct platform_device *pdev,
+				  struct hisi_pmu *hha_pmu)
+{
+	int ret;
+
+	ret = hisi_hha_pmu_init_data(pdev, hha_pmu);
+	if (ret)
+		return ret;
+
+	ret = hisi_hha_pmu_init_irq(hha_pmu, pdev);
+	if (ret)
+		return ret;
+
+	hha_pmu->num_counters = HHA_NR_COUNTERS;
+	hha_pmu->counter_bits = 48;
+	hha_pmu->ops = &hisi_uncore_hha_ops;
+	hha_pmu->dev = &pdev->dev;
+	hha_pmu->on_cpu = -1;
+	hha_pmu->check_event = 0x65;
+
+	return 0;
+}
+
+static int hisi_hha_pmu_probe(struct platform_device *pdev)
+{
+	struct hisi_pmu *hha_pmu;
+	char *name;
+	int ret;
+
+	hha_pmu = devm_kzalloc(&pdev->dev, sizeof(*hha_pmu), GFP_KERNEL);
+	if (!hha_pmu)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, hha_pmu);
+
+	ret = hisi_hha_pmu_dev_probe(pdev, hha_pmu);
+	if (ret)
+		return ret;
+
+	ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE,
+				       &hha_pmu->node);
+	if (ret) {
+		dev_err(&pdev->dev, "Error %d registering hotplug\n", ret);
+		return ret;
+	}
+
+	name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_hha%u",
+			      hha_pmu->sccl_id, hha_pmu->index_id);
+	hha_pmu->pmu = (struct pmu) {
+		.name		= name,
+		.task_ctx_nr	= perf_invalid_context,
+		.event_init	= hisi_uncore_pmu_event_init,
+		.pmu_enable	= hisi_uncore_pmu_enable,
+		.pmu_disable	= hisi_uncore_pmu_disable,
+		.add		= hisi_uncore_pmu_add,
+		.del		= hisi_uncore_pmu_del,
+		.start		= hisi_uncore_pmu_start,
+		.stop		= hisi_uncore_pmu_stop,
+		.read		= hisi_uncore_pmu_read,
+		.attr_groups	= hisi_hha_pmu_attr_groups,
+	};
+
+	ret = perf_pmu_register(&hha_pmu->pmu, name, -1);
+	if (ret) {
+		dev_err(hha_pmu->dev, "HHA PMU register failed!\n");
+		cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE,
+					    &hha_pmu->node);
+	}
+
+	return ret;
+}
+
+static int hisi_hha_pmu_remove(struct platform_device *pdev)
+{
+	struct hisi_pmu *hha_pmu = platform_get_drvdata(pdev);
+
+	perf_pmu_unregister(&hha_pmu->pmu);
+	cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE,
+				    &hha_pmu->node);
+
+	return 0;
+}
+
+static struct platform_driver hisi_hha_pmu_driver = {
+	.driver = {
+		.name = "hisi_hha_pmu",
+		.acpi_match_table = ACPI_PTR(hisi_hha_pmu_acpi_match),
+	},
+	.probe = hisi_hha_pmu_probe,
+	.remove = hisi_hha_pmu_remove,
+};
+
+static int __init hisi_hha_pmu_module_init(void)
+{
+	int ret;
+
+	ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE,
+				      "AP_PERF_ARM_HISI_HHA_ONLINE",
+				      hisi_uncore_pmu_online_cpu,
+				      hisi_uncore_pmu_offline_cpu);
+	if (ret) {
+		pr_err("HHA PMU: Error setup hotplug, ret = %d;\n", ret);
+		return ret;
+	}
+
+	ret = platform_driver_register(&hisi_hha_pmu_driver);
+	if (ret)
+		cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE);
+
+	return ret;
+}
+module_init(hisi_hha_pmu_module_init);
+
+static void __exit hisi_hha_pmu_module_exit(void)
+{
+	platform_driver_unregister(&hisi_hha_pmu_driver);
+	cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE);
+}
+module_exit(hisi_hha_pmu_module_exit);
+
+MODULE_DESCRIPTION("HiSilicon SoC HHA uncore PMU driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Shaokun Zhang <zhangshaokun@hisilicon.com>");
+MODULE_AUTHOR("Anurup M <anurup.m@huawei.com>");
diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
new file mode 100644
index 000000000000..0bde5d919b2e
--- /dev/null
+++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
@@ -0,0 +1,463 @@
+/*
+ * HiSilicon SoC L3C uncore Hardware event counters support
+ *
+ * Copyright (C) 2017 Hisilicon Limited
+ * Author: Anurup M <anurup.m@huawei.com>
+ *         Shaokun Zhang <zhangshaokun@hisilicon.com>
+ *
+ * This code is based on the uncore PMUs like arm-cci and arm-ccn.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/acpi.h>
+#include <linux/bug.h>
+#include <linux/cpuhotplug.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/list.h>
+#include <linux/platform_device.h>
+#include <linux/smp.h>
+
+#include "hisi_uncore_pmu.h"
+
+/* L3C register definition */
+#define L3C_PERF_CTRL		0x0408
+#define L3C_INT_MASK		0x0800
+#define L3C_INT_STATUS		0x0808
+#define L3C_INT_CLEAR		0x080c
+#define L3C_EVENT_CTRL	        0x1c00
+#define L3C_EVENT_TYPE0		0x1d00
+/*
+ * Each counter is 48-bits and [48:63] are reserved
+ * which are Read-As-Zero and Writes-Ignored.
+ */
+#define L3C_CNTR0_LOWER		0x1e00
+
+/* L3C has 8-counters */
+#define L3C_NR_COUNTERS		0x8
+
+#define L3C_PERF_CTRL_EN	0x20000
+#define L3C_EVTYPE_NONE		0xff
+
+/*
+ * Select the counter register offset using the counter index
+ */
+static u32 hisi_l3c_pmu_get_counter_offset(int cntr_idx)
+{
+	return (L3C_CNTR0_LOWER + (cntr_idx * 8));
+}
+
+static u64 hisi_l3c_pmu_read_counter(struct hisi_pmu *l3c_pmu,
+				     struct hw_perf_event *hwc)
+{
+	u32 idx = hwc->idx;
+
+	if (!hisi_uncore_pmu_counter_valid(l3c_pmu, idx)) {
+		dev_err(l3c_pmu->dev, "Unsupported event index:%d!\n", idx);
+		return 0;
+	}
+
+	/* Read 64-bits and the upper 16 bits are RAZ */
+	return readq(l3c_pmu->base + hisi_l3c_pmu_get_counter_offset(idx));
+}
+
+static void hisi_l3c_pmu_write_counter(struct hisi_pmu *l3c_pmu,
+				       struct hw_perf_event *hwc, u64 val)
+{
+	u32 idx = hwc->idx;
+
+	if (!hisi_uncore_pmu_counter_valid(l3c_pmu, idx)) {
+		dev_err(l3c_pmu->dev, "Unsupported event index:%d!\n", idx);
+		return;
+	}
+
+	/* Write 64-bits and the upper 16 bits are WI */
+	writeq(val, l3c_pmu->base + hisi_l3c_pmu_get_counter_offset(idx));
+}
+
+static void hisi_l3c_pmu_write_evtype(struct hisi_pmu *l3c_pmu, int idx,
+				      u32 type)
+{
+	u32 reg, reg_idx, shift, val;
+
+	/*
+	 * Select the appropriate event select register(L3C_EVENT_TYPE0/1).
+	 * There are 2 event select registers for the 8 hardware counters.
+	 * Event code is 8-bits and for the former 4 hardware counters,
+	 * L3C_EVENT_TYPE0 is chosen. For the latter 4 hardware counters,
+	 * L3C_EVENT_TYPE1 is chosen.
+	 */
+	reg = L3C_EVENT_TYPE0 + (idx / 4) * 4;
+	reg_idx = idx % 4;
+	shift = 8 * reg_idx;
+
+	/* Write event code to L3C_EVENT_TYPEx Register */
+	val = readl(l3c_pmu->base + reg);
+	val &= ~(L3C_EVTYPE_NONE << shift);
+	val |= (type << shift);
+	writel(val, l3c_pmu->base + reg);
+}
+
+static void hisi_l3c_pmu_start_counters(struct hisi_pmu *l3c_pmu)
+{
+	u32 val;
+
+	/*
+	 * Set perf_enable bit in L3C_PERF_CTRL register to start counting
+	 * for all enabled counters.
+	 */
+	val = readl(l3c_pmu->base + L3C_PERF_CTRL);
+	val |= L3C_PERF_CTRL_EN;
+	writel(val, l3c_pmu->base + L3C_PERF_CTRL);
+}
+
+static void hisi_l3c_pmu_stop_counters(struct hisi_pmu *l3c_pmu)
+{
+	u32 val;
+
+	/*
+	 * Clear perf_enable bit in L3C_PERF_CTRL register to stop counting
+	 * for all enabled counters.
+	 */
+	val = readl(l3c_pmu->base + L3C_PERF_CTRL);
+	val &= ~(L3C_PERF_CTRL_EN);
+	writel(val, l3c_pmu->base + L3C_PERF_CTRL);
+}
+
+static void hisi_l3c_pmu_enable_counter(struct hisi_pmu *l3c_pmu,
+					struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	/* Enable counter index in L3C_EVENT_CTRL register */
+	val = readl(l3c_pmu->base + L3C_EVENT_CTRL);
+	val |= (1 << hwc->idx);
+	writel(val, l3c_pmu->base + L3C_EVENT_CTRL);
+}
+
+static void hisi_l3c_pmu_disable_counter(struct hisi_pmu *l3c_pmu,
+					 struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	/* Clear counter index in L3C_EVENT_CTRL register */
+	val = readl(l3c_pmu->base + L3C_EVENT_CTRL);
+	val &= ~(1 << hwc->idx);
+	writel(val, l3c_pmu->base + L3C_EVENT_CTRL);
+}
+
+static void hisi_l3c_pmu_enable_counter_int(struct hisi_pmu *l3c_pmu,
+					    struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	val = readl(l3c_pmu->base + L3C_INT_MASK);
+	/* Write 0 to enable interrupt */
+	val &= ~(1 << hwc->idx);
+	writel(val, l3c_pmu->base + L3C_INT_MASK);
+}
+
+static void hisi_l3c_pmu_disable_counter_int(struct hisi_pmu *l3c_pmu,
+					     struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	val = readl(l3c_pmu->base + L3C_INT_MASK);
+	/* Write 1 to mask interrupt */
+	val |= (1 << hwc->idx);
+	writel(val, l3c_pmu->base + L3C_INT_MASK);
+}
+
+static irqreturn_t hisi_l3c_pmu_isr(int irq, void *dev_id)
+{
+	struct hisi_pmu *l3c_pmu = dev_id;
+	struct perf_event *event;
+	unsigned long overflown;
+	int idx;
+
+	/* Read L3C_INT_STATUS register */
+	overflown = readl(l3c_pmu->base + L3C_INT_STATUS);
+	if (!overflown)
+		return IRQ_NONE;
+
+	/*
+	 * Find the counter index which overflowed if the bit was set
+	 * and handle it.
+	 */
+	for_each_set_bit(idx, &overflown, L3C_NR_COUNTERS) {
+		/* Write 1 to clear the IRQ status flag */
+		writel((1 << idx), l3c_pmu->base + L3C_INT_CLEAR);
+
+		/* Get the corresponding event struct */
+		event = l3c_pmu->pmu_events.hw_events[idx];
+		if (!event)
+			continue;
+
+		hisi_uncore_pmu_event_update(event);
+		hisi_uncore_pmu_set_event_period(event);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int hisi_l3c_pmu_init_irq(struct hisi_pmu *l3c_pmu,
+				 struct platform_device *pdev)
+{
+	int irq, ret;
+
+	/* Read and init IRQ */
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		dev_err(&pdev->dev, "L3C PMU get irq fail; irq:%d\n", irq);
+		return irq;
+	}
+
+	ret = devm_request_irq(&pdev->dev, irq, hisi_l3c_pmu_isr,
+			       IRQF_NOBALANCING | IRQF_NO_THREAD,
+			       dev_name(&pdev->dev), l3c_pmu);
+	if (ret < 0) {
+		dev_err(&pdev->dev,
+			"Fail to request IRQ:%d ret:%d\n", irq, ret);
+		return ret;
+	}
+
+	l3c_pmu->irq = irq;
+
+	return 0;
+}
+
+static const struct acpi_device_id hisi_l3c_pmu_acpi_match[] = {
+	{ "HISI0213", },
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, hisi_l3c_pmu_acpi_match);
+
+static int hisi_l3c_pmu_init_data(struct platform_device *pdev,
+				  struct hisi_pmu *l3c_pmu)
+{
+	unsigned long long id;
+	struct resource *res;
+	acpi_status status;
+
+	status = acpi_evaluate_integer(ACPI_HANDLE(&pdev->dev),
+				       "_UID", NULL, &id);
+	if (ACPI_FAILURE(status))
+		return -EINVAL;
+
+	l3c_pmu->index_id = id;
+
+	/*
+	 * Use the SCCL_ID and CCL_ID to identify the L3C PMU, while
+	 * SCCL_ID is in MPIDR[aff2] and CCL_ID is in MPIDR[aff1].
+	 */
+	if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id",
+				     &l3c_pmu->sccl_id)) {
+		dev_err(&pdev->dev, "Can not read l3c sccl-id!\n");
+		return -EINVAL;
+	}
+
+	if (device_property_read_u32(&pdev->dev, "hisilicon,ccl-id",
+				     &l3c_pmu->ccl_id)) {
+		dev_err(&pdev->dev, "Can not read l3c ccl-id!\n");
+		return -EINVAL;
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	l3c_pmu->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(l3c_pmu->base)) {
+		dev_err(&pdev->dev, "ioremap failed for l3c_pmu resource\n");
+		return PTR_ERR(l3c_pmu->base);
+	}
+
+	return 0;
+}
+
+static struct attribute *hisi_l3c_pmu_format_attr[] = {
+	HISI_PMU_FORMAT_ATTR(event, "config:0-7"),
+	NULL,
+};
+
+static const struct attribute_group hisi_l3c_pmu_format_group = {
+	.name = "format",
+	.attrs = hisi_l3c_pmu_format_attr,
+};
+
+static struct attribute *hisi_l3c_pmu_events_attr[] = {
+	HISI_PMU_EVENT_ATTR(rd_cpipe,		0x00),
+	HISI_PMU_EVENT_ATTR(wr_cpipe,		0x01),
+	HISI_PMU_EVENT_ATTR(rd_hit_cpipe,	0x02),
+	HISI_PMU_EVENT_ATTR(wr_hit_cpipe,	0x03),
+	HISI_PMU_EVENT_ATTR(victim_num,		0x04),
+	HISI_PMU_EVENT_ATTR(rd_spipe,		0x20),
+	HISI_PMU_EVENT_ATTR(wr_spipe,		0x21),
+	HISI_PMU_EVENT_ATTR(rd_hit_spipe,	0x22),
+	HISI_PMU_EVENT_ATTR(wr_hit_spipe,	0x23),
+	HISI_PMU_EVENT_ATTR(back_invalid,	0x29),
+	HISI_PMU_EVENT_ATTR(retry_cpu,		0x40),
+	HISI_PMU_EVENT_ATTR(retry_ring,		0x41),
+	HISI_PMU_EVENT_ATTR(prefetch_drop,	0x42),
+	NULL,
+};
+
+static const struct attribute_group hisi_l3c_pmu_events_group = {
+	.name = "events",
+	.attrs = hisi_l3c_pmu_events_attr,
+};
+
+static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL);
+
+static struct attribute *hisi_l3c_pmu_cpumask_attrs[] = {
+	&dev_attr_cpumask.attr,
+	NULL,
+};
+
+static const struct attribute_group hisi_l3c_pmu_cpumask_attr_group = {
+	.attrs = hisi_l3c_pmu_cpumask_attrs,
+};
+
+static const struct attribute_group *hisi_l3c_pmu_attr_groups[] = {
+	&hisi_l3c_pmu_format_group,
+	&hisi_l3c_pmu_events_group,
+	&hisi_l3c_pmu_cpumask_attr_group,
+	NULL,
+};
+
+static const struct hisi_uncore_ops hisi_uncore_l3c_ops = {
+	.write_evtype		= hisi_l3c_pmu_write_evtype,
+	.get_event_idx		= hisi_uncore_pmu_get_event_idx,
+	.start_counters		= hisi_l3c_pmu_start_counters,
+	.stop_counters		= hisi_l3c_pmu_stop_counters,
+	.enable_counter		= hisi_l3c_pmu_enable_counter,
+	.disable_counter	= hisi_l3c_pmu_disable_counter,
+	.enable_counter_int	= hisi_l3c_pmu_enable_counter_int,
+	.disable_counter_int	= hisi_l3c_pmu_disable_counter_int,
+	.write_counter		= hisi_l3c_pmu_write_counter,
+	.read_counter		= hisi_l3c_pmu_read_counter,
+};
+
+static int hisi_l3c_pmu_dev_probe(struct platform_device *pdev,
+				  struct hisi_pmu *l3c_pmu)
+{
+	int ret;
+
+	ret = hisi_l3c_pmu_init_data(pdev, l3c_pmu);
+	if (ret)
+		return ret;
+
+	ret = hisi_l3c_pmu_init_irq(l3c_pmu, pdev);
+	if (ret)
+		return ret;
+
+	l3c_pmu->num_counters = L3C_NR_COUNTERS;
+	l3c_pmu->counter_bits = 48;
+	l3c_pmu->ops = &hisi_uncore_l3c_ops;
+	l3c_pmu->dev = &pdev->dev;
+	l3c_pmu->on_cpu = -1;
+	l3c_pmu->check_event = 0x59;
+
+	return 0;
+}
+
+static int hisi_l3c_pmu_probe(struct platform_device *pdev)
+{
+	struct hisi_pmu *l3c_pmu;
+	char *name;
+	int ret;
+
+	l3c_pmu = devm_kzalloc(&pdev->dev, sizeof(*l3c_pmu), GFP_KERNEL);
+	if (!l3c_pmu)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, l3c_pmu);
+
+	ret = hisi_l3c_pmu_dev_probe(pdev, l3c_pmu);
+	if (ret)
+		return ret;
+
+	ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_HISI_L3_ONLINE,
+				       &l3c_pmu->node);
+	if (ret) {
+		dev_err(&pdev->dev, "Error %d registering hotplug\n", ret);
+		return ret;
+	}
+
+	name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_l3c%u",
+			      l3c_pmu->sccl_id, l3c_pmu->index_id);
+	l3c_pmu->pmu = (struct pmu) {
+		.name		= name,
+		.task_ctx_nr	= perf_invalid_context,
+		.event_init	= hisi_uncore_pmu_event_init,
+		.pmu_enable	= hisi_uncore_pmu_enable,
+		.pmu_disable	= hisi_uncore_pmu_disable,
+		.add		= hisi_uncore_pmu_add,
+		.del		= hisi_uncore_pmu_del,
+		.start		= hisi_uncore_pmu_start,
+		.stop		= hisi_uncore_pmu_stop,
+		.read		= hisi_uncore_pmu_read,
+		.attr_groups	= hisi_l3c_pmu_attr_groups,
+	};
+
+	ret = perf_pmu_register(&l3c_pmu->pmu, name, -1);
+	if (ret) {
+		dev_err(l3c_pmu->dev, "L3C PMU register failed!\n");
+		cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HISI_L3_ONLINE,
+					    &l3c_pmu->node);
+	}
+
+	return ret;
+}
+
+static int hisi_l3c_pmu_remove(struct platform_device *pdev)
+{
+	struct hisi_pmu *l3c_pmu = platform_get_drvdata(pdev);
+
+	perf_pmu_unregister(&l3c_pmu->pmu);
+	cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HISI_L3_ONLINE,
+				    &l3c_pmu->node);
+
+	return 0;
+}
+
+static struct platform_driver hisi_l3c_pmu_driver = {
+	.driver = {
+		.name = "hisi_l3c_pmu",
+		.acpi_match_table = ACPI_PTR(hisi_l3c_pmu_acpi_match),
+	},
+	.probe = hisi_l3c_pmu_probe,
+	.remove = hisi_l3c_pmu_remove,
+};
+
+static int __init hisi_l3c_pmu_module_init(void)
+{
+	int ret;
+
+	ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_HISI_L3_ONLINE,
+				      "AP_PERF_ARM_HISI_L3_ONLINE",
+				      hisi_uncore_pmu_online_cpu,
+				      hisi_uncore_pmu_offline_cpu);
+	if (ret) {
+		pr_err("L3C PMU: Error setup hotplug, ret = %d\n", ret);
+		return ret;
+	}
+
+	ret = platform_driver_register(&hisi_l3c_pmu_driver);
+	if (ret)
+		cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_L3_ONLINE);
+
+	return ret;
+}
+module_init(hisi_l3c_pmu_module_init);
+
+static void __exit hisi_l3c_pmu_module_exit(void)
+{
+	platform_driver_unregister(&hisi_l3c_pmu_driver);
+	cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_L3_ONLINE);
+}
+module_exit(hisi_l3c_pmu_module_exit);
+
+MODULE_DESCRIPTION("HiSilicon SoC L3C uncore PMU driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Anurup M <anurup.m@huawei.com>");
+MODULE_AUTHOR("Shaokun Zhang <zhangshaokun@hisilicon.com>");
diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c
new file mode 100644
index 000000000000..7ed24b954422
--- /dev/null
+++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c
@@ -0,0 +1,447 @@
+/*
+ * HiSilicon SoC Hardware event counters support
+ *
+ * Copyright (C) 2017 Hisilicon Limited
+ * Author: Anurup M <anurup.m@huawei.com>
+ *         Shaokun Zhang <zhangshaokun@hisilicon.com>
+ *
+ * This code is based on the uncore PMUs like arm-cci and arm-ccn.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+#include <linux/bug.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+
+#include <asm/local64.h>
+
+#include "hisi_uncore_pmu.h"
+
+#define HISI_GET_EVENTID(ev) (ev->hw.config_base & 0xff)
+#define HISI_MAX_PERIOD(nr) (BIT_ULL(nr) - 1)
+
+/*
+ * PMU format attributes
+ */
+ssize_t hisi_format_sysfs_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	struct dev_ext_attribute *eattr;
+
+	eattr = container_of(attr, struct dev_ext_attribute, attr);
+
+	return sprintf(buf, "%s\n", (char *)eattr->var);
+}
+
+/*
+ * PMU event attributes
+ */
+ssize_t hisi_event_sysfs_show(struct device *dev,
+			      struct device_attribute *attr, char *page)
+{
+	struct dev_ext_attribute *eattr;
+
+	eattr = container_of(attr, struct dev_ext_attribute, attr);
+
+	return sprintf(page, "config=0x%lx\n", (unsigned long)eattr->var);
+}
+
+/*
+ * sysfs cpumask attributes. For uncore PMU, we only have a single CPU to show
+ */
+ssize_t hisi_cpumask_sysfs_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct hisi_pmu *hisi_pmu = to_hisi_pmu(dev_get_drvdata(dev));
+
+	return sprintf(buf, "%d\n", hisi_pmu->on_cpu);
+}
+
+static bool hisi_validate_event_group(struct perf_event *event)
+{
+	struct perf_event *sibling, *leader = event->group_leader;
+	struct hisi_pmu *hisi_pmu = to_hisi_pmu(event->pmu);
+	/* Include count for the event */
+	int counters = 1;
+
+	if (!is_software_event(leader)) {
+		/*
+		 * We must NOT create groups containing mixed PMUs, although
+		 * software events are acceptable
+		 */
+		if (leader->pmu != event->pmu)
+			return false;
+
+		/* Increment counter for the leader */
+		if (leader != event)
+			counters++;
+	}
+
+	list_for_each_entry(sibling, &event->group_leader->sibling_list,
+			    group_entry) {
+		if (is_software_event(sibling))
+			continue;
+		if (sibling->pmu != event->pmu)
+			return false;
+		/* Increment counter for each sibling */
+		counters++;
+	}
+
+	/* The group can not count events more than the counters in the HW */
+	return counters <= hisi_pmu->num_counters;
+}
+
+int hisi_uncore_pmu_counter_valid(struct hisi_pmu *hisi_pmu, int idx)
+{
+	return idx >= 0 && idx < hisi_pmu->num_counters;
+}
+
+int hisi_uncore_pmu_get_event_idx(struct perf_event *event)
+{
+	struct hisi_pmu *hisi_pmu = to_hisi_pmu(event->pmu);
+	unsigned long *used_mask = hisi_pmu->pmu_events.used_mask;
+	u32 num_counters = hisi_pmu->num_counters;
+	int idx;
+
+	idx = find_first_zero_bit(used_mask, num_counters);
+	if (idx == num_counters)
+		return -EAGAIN;
+
+	set_bit(idx, used_mask);
+
+	return idx;
+}
+
+static void hisi_uncore_pmu_clear_event_idx(struct hisi_pmu *hisi_pmu, int idx)
+{
+	if (!hisi_uncore_pmu_counter_valid(hisi_pmu, idx)) {
+		dev_err(hisi_pmu->dev, "Unsupported event index:%d!\n", idx);
+		return;
+	}
+
+	clear_bit(idx, hisi_pmu->pmu_events.used_mask);
+}
+
+int hisi_uncore_pmu_event_init(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct hisi_pmu *hisi_pmu;
+
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	/*
+	 * We do not support sampling as the counters are all
+	 * shared by all CPU cores in a CPU die(SCCL). Also we
+	 * do not support attach to a task(per-process mode)
+	 */
+	if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
+		return -EOPNOTSUPP;
+
+	/* counters do not have these bits */
+	if (event->attr.exclude_user	||
+	    event->attr.exclude_kernel	||
+	    event->attr.exclude_host	||
+	    event->attr.exclude_guest	||
+	    event->attr.exclude_hv	||
+	    event->attr.exclude_idle)
+		return -EINVAL;
+
+	/*
+	 *  The uncore counters not specific to any CPU, so cannot
+	 *  support per-task
+	 */
+	if (event->cpu < 0)
+		return -EINVAL;
+
+	/*
+	 * Validate if the events in group does not exceed the
+	 * available counters in hardware.
+	 */
+	if (!hisi_validate_event_group(event))
+		return -EINVAL;
+
+	hisi_pmu = to_hisi_pmu(event->pmu);
+	if (event->attr.config > hisi_pmu->check_event)
+		return -EINVAL;
+
+	if (hisi_pmu->on_cpu == -1)
+		return -EINVAL;
+	/*
+	 * We don't assign an index until we actually place the event onto
+	 * hardware. Use -1 to signify that we haven't decided where to put it
+	 * yet.
+	 */
+	hwc->idx		= -1;
+	hwc->config_base	= event->attr.config;
+
+	/* Enforce to use the same CPU for all events in this PMU */
+	event->cpu = hisi_pmu->on_cpu;
+
+	return 0;
+}
+
+/*
+ * Set the counter to count the event that we're interested in,
+ * and enable interrupt and counter.
+ */
+static void hisi_uncore_pmu_enable_event(struct perf_event *event)
+{
+	struct hisi_pmu *hisi_pmu = to_hisi_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+
+	hisi_pmu->ops->write_evtype(hisi_pmu, hwc->idx,
+				    HISI_GET_EVENTID(event));
+
+	hisi_pmu->ops->enable_counter_int(hisi_pmu, hwc);
+	hisi_pmu->ops->enable_counter(hisi_pmu, hwc);
+}
+
+/*
+ * Disable counter and interrupt.
+ */
+static void hisi_uncore_pmu_disable_event(struct perf_event *event)
+{
+	struct hisi_pmu *hisi_pmu = to_hisi_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+
+	hisi_pmu->ops->disable_counter(hisi_pmu, hwc);
+	hisi_pmu->ops->disable_counter_int(hisi_pmu, hwc);
+}
+
+void hisi_uncore_pmu_set_event_period(struct perf_event *event)
+{
+	struct hisi_pmu *hisi_pmu = to_hisi_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+
+	/*
+	 * The HiSilicon PMU counters support 32 bits or 48 bits, depending on
+	 * the PMU. We reduce it to 2^(counter_bits - 1) to account for the
+	 * extreme interrupt latency. So we could hopefully handle the overflow
+	 * interrupt before another 2^(counter_bits - 1) events occur and the
+	 * counter overtakes its previous value.
+	 */
+	u64 val = BIT_ULL(hisi_pmu->counter_bits - 1);
+
+	local64_set(&hwc->prev_count, val);
+	/* Write start value to the hardware event counter */
+	hisi_pmu->ops->write_counter(hisi_pmu, hwc, val);
+}
+
+void hisi_uncore_pmu_event_update(struct perf_event *event)
+{
+	struct hisi_pmu *hisi_pmu = to_hisi_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	u64 delta, prev_raw_count, new_raw_count;
+
+	do {
+		/* Read the count from the counter register */
+		new_raw_count = hisi_pmu->ops->read_counter(hisi_pmu, hwc);
+		prev_raw_count = local64_read(&hwc->prev_count);
+	} while (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+				 new_raw_count) != prev_raw_count);
+	/*
+	 * compute the delta
+	 */
+	delta = (new_raw_count - prev_raw_count) &
+		HISI_MAX_PERIOD(hisi_pmu->counter_bits);
+	local64_add(delta, &event->count);
+}
+
+void hisi_uncore_pmu_start(struct perf_event *event, int flags)
+{
+	struct hisi_pmu *hisi_pmu = to_hisi_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
+		return;
+
+	WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+	hwc->state = 0;
+	hisi_uncore_pmu_set_event_period(event);
+
+	if (flags & PERF_EF_RELOAD) {
+		u64 prev_raw_count =  local64_read(&hwc->prev_count);
+
+		hisi_pmu->ops->write_counter(hisi_pmu, hwc, prev_raw_count);
+	}
+
+	hisi_uncore_pmu_enable_event(event);
+	perf_event_update_userpage(event);
+}
+
+void hisi_uncore_pmu_stop(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	hisi_uncore_pmu_disable_event(event);
+	WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+	hwc->state |= PERF_HES_STOPPED;
+
+	if (hwc->state & PERF_HES_UPTODATE)
+		return;
+
+	/* Read hardware counter and update the perf counter statistics */
+	hisi_uncore_pmu_event_update(event);
+	hwc->state |= PERF_HES_UPTODATE;
+}
+
+int hisi_uncore_pmu_add(struct perf_event *event, int flags)
+{
+	struct hisi_pmu *hisi_pmu = to_hisi_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx;
+
+	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+
+	/* Get an available counter index for counting */
+	idx = hisi_pmu->ops->get_event_idx(event);
+	if (idx < 0)
+		return idx;
+
+	event->hw.idx = idx;
+	hisi_pmu->pmu_events.hw_events[idx] = event;
+
+	if (flags & PERF_EF_START)
+		hisi_uncore_pmu_start(event, PERF_EF_RELOAD);
+
+	return 0;
+}
+
+void hisi_uncore_pmu_del(struct perf_event *event, int flags)
+{
+	struct hisi_pmu *hisi_pmu = to_hisi_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+
+	hisi_uncore_pmu_stop(event, PERF_EF_UPDATE);
+	hisi_uncore_pmu_clear_event_idx(hisi_pmu, hwc->idx);
+	perf_event_update_userpage(event);
+	hisi_pmu->pmu_events.hw_events[hwc->idx] = NULL;
+}
+
+void hisi_uncore_pmu_read(struct perf_event *event)
+{
+	/* Read hardware counter and update the perf counter statistics */
+	hisi_uncore_pmu_event_update(event);
+}
+
+void hisi_uncore_pmu_enable(struct pmu *pmu)
+{
+	struct hisi_pmu *hisi_pmu = to_hisi_pmu(pmu);
+	int enabled = bitmap_weight(hisi_pmu->pmu_events.used_mask,
+				    hisi_pmu->num_counters);
+
+	if (!enabled)
+		return;
+
+	hisi_pmu->ops->start_counters(hisi_pmu);
+}
+
+void hisi_uncore_pmu_disable(struct pmu *pmu)
+{
+	struct hisi_pmu *hisi_pmu = to_hisi_pmu(pmu);
+
+	hisi_pmu->ops->stop_counters(hisi_pmu);
+}
+
+/*
+ * Read Super CPU cluster and CPU cluster ID from MPIDR_EL1.
+ * If multi-threading is supported, SCCL_ID is in MPIDR[aff3] and CCL_ID
+ * is in MPIDR[aff2]; if not, SCCL_ID is in MPIDR[aff2] and CCL_ID is
+ * in MPIDR[aff1]. If this changes in future, this shall be updated.
+ */
+static void hisi_read_sccl_and_ccl_id(int *sccl_id, int *ccl_id)
+{
+	u64 mpidr = read_cpuid_mpidr();
+
+	if (mpidr & MPIDR_MT_BITMASK) {
+		if (sccl_id)
+			*sccl_id = MPIDR_AFFINITY_LEVEL(mpidr, 3);
+		if (ccl_id)
+			*ccl_id = MPIDR_AFFINITY_LEVEL(mpidr, 2);
+	} else {
+		if (sccl_id)
+			*sccl_id = MPIDR_AFFINITY_LEVEL(mpidr, 2);
+		if (ccl_id)
+			*ccl_id = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+	}
+}
+
+/*
+ * Check whether the CPU is associated with this uncore PMU
+ */
+static bool hisi_pmu_cpu_is_associated_pmu(struct hisi_pmu *hisi_pmu)
+{
+	int sccl_id, ccl_id;
+
+	if (hisi_pmu->ccl_id == -1) {
+		/* If CCL_ID is -1, the PMU only shares the same SCCL */
+		hisi_read_sccl_and_ccl_id(&sccl_id, NULL);
+
+		return sccl_id == hisi_pmu->sccl_id;
+	}
+
+	hisi_read_sccl_and_ccl_id(&sccl_id, &ccl_id);
+
+	return sccl_id == hisi_pmu->sccl_id && ccl_id == hisi_pmu->ccl_id;
+}
+
+int hisi_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
+{
+	struct hisi_pmu *hisi_pmu = hlist_entry_safe(node, struct hisi_pmu,
+						     node);
+
+	if (!hisi_pmu_cpu_is_associated_pmu(hisi_pmu))
+		return 0;
+
+	cpumask_set_cpu(cpu, &hisi_pmu->associated_cpus);
+
+	/* If another CPU is already managing this PMU, simply return. */
+	if (hisi_pmu->on_cpu != -1)
+		return 0;
+
+	/* Use this CPU in cpumask for event counting */
+	hisi_pmu->on_cpu = cpu;
+
+	/* Overflow interrupt also should use the same CPU */
+	WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(cpu)));
+
+	return 0;
+}
+
+int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
+{
+	struct hisi_pmu *hisi_pmu = hlist_entry_safe(node, struct hisi_pmu,
+						     node);
+	cpumask_t pmu_online_cpus;
+	unsigned int target;
+
+	if (!cpumask_test_and_clear_cpu(cpu, &hisi_pmu->associated_cpus))
+		return 0;
+
+	/* Nothing to do if this CPU doesn't own the PMU */
+	if (hisi_pmu->on_cpu != cpu)
+		return 0;
+
+	/* Give up ownership of the PMU */
+	hisi_pmu->on_cpu = -1;
+
+	/* Choose a new CPU to migrate ownership of the PMU to */
+	cpumask_and(&pmu_online_cpus, &hisi_pmu->associated_cpus,
+		    cpu_online_mask);
+	target = cpumask_any_but(&pmu_online_cpus, cpu);
+	if (target >= nr_cpu_ids)
+		return 0;
+
+	perf_pmu_migrate_context(&hisi_pmu->pmu, cpu, target);
+	/* Use this CPU for event counting */
+	hisi_pmu->on_cpu = target;
+	WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(target)));
+
+	return 0;
+}
diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h
new file mode 100644
index 000000000000..f21226a0e9c6
--- /dev/null
+++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h
@@ -0,0 +1,102 @@
+/*
+ * HiSilicon SoC Hardware event counters support
+ *
+ * Copyright (C) 2017 Hisilicon Limited
+ * Author: Anurup M <anurup.m@huawei.com>
+ *         Shaokun Zhang <zhangshaokun@hisilicon.com>
+ *
+ * This code is based on the uncore PMUs like arm-cci and arm-ccn.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __HISI_UNCORE_PMU_H__
+#define __HISI_UNCORE_PMU_H__
+
+#include <linux/cpumask.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/perf_event.h>
+#include <linux/types.h>
+
+#undef pr_fmt
+#define pr_fmt(fmt)     "hisi_pmu: " fmt
+
+#define HISI_MAX_COUNTERS 0x10
+#define to_hisi_pmu(p)	(container_of(p, struct hisi_pmu, pmu))
+
+#define HISI_PMU_ATTR(_name, _func, _config)				\
+	(&((struct dev_ext_attribute[]) {				\
+		{ __ATTR(_name, 0444, _func, NULL), (void *)_config }   \
+	})[0].attr.attr)
+
+#define HISI_PMU_FORMAT_ATTR(_name, _config)		\
+	HISI_PMU_ATTR(_name, hisi_format_sysfs_show, (void *)_config)
+#define HISI_PMU_EVENT_ATTR(_name, _config)		\
+	HISI_PMU_ATTR(_name, hisi_event_sysfs_show, (unsigned long)_config)
+
+struct hisi_pmu;
+
+struct hisi_uncore_ops {
+	void (*write_evtype)(struct hisi_pmu *, int, u32);
+	int (*get_event_idx)(struct perf_event *);
+	u64 (*read_counter)(struct hisi_pmu *, struct hw_perf_event *);
+	void (*write_counter)(struct hisi_pmu *, struct hw_perf_event *, u64);
+	void (*enable_counter)(struct hisi_pmu *, struct hw_perf_event *);
+	void (*disable_counter)(struct hisi_pmu *, struct hw_perf_event *);
+	void (*enable_counter_int)(struct hisi_pmu *, struct hw_perf_event *);
+	void (*disable_counter_int)(struct hisi_pmu *, struct hw_perf_event *);
+	void (*start_counters)(struct hisi_pmu *);
+	void (*stop_counters)(struct hisi_pmu *);
+};
+
+struct hisi_pmu_hwevents {
+	struct perf_event *hw_events[HISI_MAX_COUNTERS];
+	DECLARE_BITMAP(used_mask, HISI_MAX_COUNTERS);
+};
+
+/* Generic pmu struct for different pmu types */
+struct hisi_pmu {
+	struct pmu pmu;
+	const struct hisi_uncore_ops *ops;
+	struct hisi_pmu_hwevents pmu_events;
+	/* associated_cpus: All CPUs associated with the PMU */
+	cpumask_t associated_cpus;
+	/* CPU used for counting */
+	int on_cpu;
+	int irq;
+	struct device *dev;
+	struct hlist_node node;
+	int sccl_id;
+	int ccl_id;
+	void __iomem *base;
+	/* the ID of the PMU modules */
+	u32 index_id;
+	int num_counters;
+	int counter_bits;
+	/* check event code range */
+	int check_event;
+};
+
+int hisi_uncore_pmu_counter_valid(struct hisi_pmu *hisi_pmu, int idx);
+int hisi_uncore_pmu_get_event_idx(struct perf_event *event);
+void hisi_uncore_pmu_read(struct perf_event *event);
+int hisi_uncore_pmu_add(struct perf_event *event, int flags);
+void hisi_uncore_pmu_del(struct perf_event *event, int flags);
+void hisi_uncore_pmu_start(struct perf_event *event, int flags);
+void hisi_uncore_pmu_stop(struct perf_event *event, int flags);
+void hisi_uncore_pmu_set_event_period(struct perf_event *event);
+void hisi_uncore_pmu_event_update(struct perf_event *event);
+int hisi_uncore_pmu_event_init(struct perf_event *event);
+void hisi_uncore_pmu_enable(struct pmu *pmu);
+void hisi_uncore_pmu_disable(struct pmu *pmu);
+ssize_t hisi_event_sysfs_show(struct device *dev,
+			      struct device_attribute *attr, char *buf);
+ssize_t hisi_format_sysfs_show(struct device *dev,
+			       struct device_attribute *attr, char *buf);
+ssize_t hisi_cpumask_sysfs_show(struct device *dev,
+				struct device_attribute *attr, char *buf);
+int hisi_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *node);
+int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node);
+#endif /* __HISI_UNCORE_PMU_H__ */
diff --git a/drivers/perf/qcom_l2_pmu.c b/drivers/perf/qcom_l2_pmu.c
index b242cce10468..4fdc8486a8e4 100644
--- a/drivers/perf/qcom_l2_pmu.c
+++ b/drivers/perf/qcom_l2_pmu.c
@@ -92,6 +92,21 @@
 
 #define reg_idx(reg, i)         (((i) * IA_L2_REG_OFFSET) + reg##_BASE)
 
+/*
+ * Events
+ */
+#define L2_EVENT_CYCLES                    0xfe
+#define L2_EVENT_DCACHE_OPS                0x400
+#define L2_EVENT_ICACHE_OPS                0x401
+#define L2_EVENT_TLBI                      0x402
+#define L2_EVENT_BARRIERS                  0x403
+#define L2_EVENT_TOTAL_READS               0x405
+#define L2_EVENT_TOTAL_WRITES              0x406
+#define L2_EVENT_TOTAL_REQUESTS            0x407
+#define L2_EVENT_LDREX                     0x420
+#define L2_EVENT_STREX                     0x421
+#define L2_EVENT_CLREX                     0x422
+
 static DEFINE_RAW_SPINLOCK(l2_access_lock);
 
 /**
@@ -700,9 +715,12 @@ static struct attribute_group l2_cache_pmu_cpumask_group = {
 /* CCG format for perf RAW codes. */
 PMU_FORMAT_ATTR(l2_code,   "config:4-11");
 PMU_FORMAT_ATTR(l2_group,  "config:0-3");
+PMU_FORMAT_ATTR(event,     "config:0-11");
+
 static struct attribute *l2_cache_pmu_formats[] = {
 	&format_attr_l2_code.attr,
 	&format_attr_l2_group.attr,
+	&format_attr_event.attr,
 	NULL,
 };
 
@@ -711,9 +729,45 @@ static struct attribute_group l2_cache_pmu_format_group = {
 	.attrs = l2_cache_pmu_formats,
 };
 
+static ssize_t l2cache_pmu_event_show(struct device *dev,
+				      struct device_attribute *attr, char *page)
+{
+	struct perf_pmu_events_attr *pmu_attr;
+
+	pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+	return sprintf(page, "event=0x%02llx\n", pmu_attr->id);
+}
+
+#define L2CACHE_EVENT_ATTR(_name, _id)					     \
+	(&((struct perf_pmu_events_attr[]) {				     \
+		{ .attr = __ATTR(_name, 0444, l2cache_pmu_event_show, NULL), \
+		  .id = _id, }						     \
+	})[0].attr.attr)
+
+static struct attribute *l2_cache_pmu_events[] = {
+	L2CACHE_EVENT_ATTR(cycles, L2_EVENT_CYCLES),
+	L2CACHE_EVENT_ATTR(dcache-ops, L2_EVENT_DCACHE_OPS),
+	L2CACHE_EVENT_ATTR(icache-ops, L2_EVENT_ICACHE_OPS),
+	L2CACHE_EVENT_ATTR(tlbi, L2_EVENT_TLBI),
+	L2CACHE_EVENT_ATTR(barriers, L2_EVENT_BARRIERS),
+	L2CACHE_EVENT_ATTR(total-reads, L2_EVENT_TOTAL_READS),
+	L2CACHE_EVENT_ATTR(total-writes, L2_EVENT_TOTAL_WRITES),
+	L2CACHE_EVENT_ATTR(total-requests, L2_EVENT_TOTAL_REQUESTS),
+	L2CACHE_EVENT_ATTR(ldrex, L2_EVENT_LDREX),
+	L2CACHE_EVENT_ATTR(strex, L2_EVENT_STREX),
+	L2CACHE_EVENT_ATTR(clrex, L2_EVENT_CLREX),
+	NULL
+};
+
+static struct attribute_group l2_cache_pmu_events_group = {
+	.name = "events",
+	.attrs = l2_cache_pmu_events,
+};
+
 static const struct attribute_group *l2_cache_pmu_attr_grps[] = {
 	&l2_cache_pmu_format_group,
 	&l2_cache_pmu_cpumask_group,
+	&l2_cache_pmu_events_group,
 	NULL,
 };
 
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 73b01e474fdc..c697882b3aba 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1699,7 +1699,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
 				 long signr, size_t *total)
 {
 	unsigned int i;
-	unsigned int regset_size = view->regsets[0].n * view->regsets[0].size;
+	unsigned int regset0_size = regset_size(t->task, &view->regsets[0]);
 
 	/*
 	 * NT_PRSTATUS is the one special case, because the regset data
@@ -1708,11 +1708,11 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
 	 * We assume that regset 0 is NT_PRSTATUS.
 	 */
 	fill_prstatus(&t->prstatus, t->task, signr);
-	(void) view->regsets[0].get(t->task, &view->regsets[0], 0, regset_size,
+	(void) view->regsets[0].get(t->task, &view->regsets[0], 0, regset0_size,
 				    &t->prstatus.pr_reg, NULL);
 
 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
-		  PRSTATUS_SIZE(t->prstatus, regset_size), &t->prstatus);
+		  PRSTATUS_SIZE(t->prstatus, regset0_size), &t->prstatus);
 	*total += notesize(&t->notes[0]);
 
 	do_thread_regset_writeback(t->task, &view->regsets[0]);
@@ -1728,7 +1728,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
 		if (regset->core_note_type && regset->get &&
 		    (!regset->active || regset->active(t->task, regset))) {
 			int ret;
-			size_t size = regset->n * regset->size;
+			size_t size = regset_size(t->task, regset);
 			void *data = kmalloc(size, GFP_KERNEL);
 			if (unlikely(!data))
 				return 0;
@@ -1743,7 +1743,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
 						  size, data);
 				else {
 					SET_PR_FPVALID(&t->prstatus,
-							1, regset_size);
+							1, regset0_size);
 					fill_note(&t->notes[i], "CORE",
 						  NT_PRFPREG, size, data);
 				}
diff --git a/include/clocksource/arm_arch_timer.h b/include/clocksource/arm_arch_timer.h
index cc805b72994a..349e5957c949 100644
--- a/include/clocksource/arm_arch_timer.h
+++ b/include/clocksource/arm_arch_timer.h
@@ -67,7 +67,9 @@ enum arch_timer_spi_nr {
 #define ARCH_TIMER_USR_VT_ACCESS_EN	(1 << 8) /* virtual timer registers */
 #define ARCH_TIMER_USR_PT_ACCESS_EN	(1 << 9) /* physical timer registers */
 
-#define ARCH_TIMER_EVT_STREAM_FREQ	10000	/* 100us */
+#define ARCH_TIMER_EVT_STREAM_PERIOD_US	100
+#define ARCH_TIMER_EVT_STREAM_FREQ				\
+	(USEC_PER_SEC / ARCH_TIMER_EVT_STREAM_PERIOD_US)
 
 struct arch_timer_kvm_info {
 	struct timecounter timecounter;
@@ -93,6 +95,7 @@ struct arch_timer_mem {
 extern u32 arch_timer_get_rate(void);
 extern u64 (*arch_timer_read_counter)(void);
 extern struct arch_timer_kvm_info *arch_timer_get_kvm_info(void);
+extern bool arch_timer_evtstrm_available(void);
 
 #else
 
@@ -106,6 +109,11 @@ static inline u64 arch_timer_read_counter(void)
 	return 0;
 }
 
+static inline bool arch_timer_evtstrm_available(void)
+{
+	return false;
+}
+
 #endif
 
 #endif
diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h
index 8d3f0bf80379..2f7a29242b87 100644
--- a/include/linux/acpi_iort.h
+++ b/include/linux/acpi_iort.h
@@ -49,8 +49,8 @@ static inline void acpi_configure_pmsi_domain(struct device *dev) { }
 /* IOMMU interface */
 static inline void iort_dma_setup(struct device *dev, u64 *dma_addr,
 				  u64 *size) { }
-static inline
-const struct iommu_ops *iort_iommu_configure(struct device *dev)
+static inline const struct iommu_ops *iort_iommu_configure(
+				      struct device *dev)
 { return NULL; }
 #endif
 
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index ec32c4c5eb30..201ab7267986 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -155,6 +155,9 @@ enum cpuhp_state {
 	CPUHP_AP_PERF_S390_SF_ONLINE,
 	CPUHP_AP_PERF_ARM_CCI_ONLINE,
 	CPUHP_AP_PERF_ARM_CCN_ONLINE,
+	CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE,
+	CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE,
+	CPUHP_AP_PERF_ARM_HISI_L3_ONLINE,
 	CPUHP_AP_PERF_ARM_L2X0_ONLINE,
 	CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE,
 	CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE,
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 60e3100b0809..dd418955962b 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -246,6 +246,14 @@ static inline int irq_is_percpu(unsigned int irq)
 	return desc->status_use_accessors & IRQ_PER_CPU;
 }
 
+static inline int irq_is_percpu_devid(unsigned int irq)
+{
+	struct irq_desc *desc;
+
+	desc = irq_to_desc(irq);
+	return desc->status_use_accessors & IRQ_PER_CPU_DEVID;
+}
+
 static inline void
 irq_set_lockdep_class(unsigned int irq, struct lock_class_key *class)
 {
diff --git a/include/linux/regset.h b/include/linux/regset.h
index 8e0c9febf495..494cedaafdf2 100644
--- a/include/linux/regset.h
+++ b/include/linux/regset.h
@@ -107,6 +107,28 @@ typedef int user_regset_writeback_fn(struct task_struct *target,
 				     int immediate);
 
 /**
+ * user_regset_get_size_fn - type of @get_size function in &struct user_regset
+ * @target:	thread being examined
+ * @regset:	regset being examined
+ *
+ * This call is optional; usually the pointer is %NULL.
+ *
+ * When provided, this function must return the current size of regset
+ * data, as observed by the @get function in &struct user_regset.  The
+ * value returned must be a multiple of @size.  The returned size is
+ * required to be valid only until the next time (if any) @regset is
+ * modified for @target.
+ *
+ * This function is intended for dynamically sized regsets.  A regset
+ * that is statically sized does not need to implement it.
+ *
+ * This function should not be called directly: instead, callers should
+ * call regset_size() to determine the current size of a regset.
+ */
+typedef unsigned int user_regset_get_size_fn(struct task_struct *target,
+					     const struct user_regset *regset);
+
+/**
  * struct user_regset - accessible thread CPU state
  * @n:			Number of slots (registers).
  * @size:		Size in bytes of a slot (register).
@@ -117,19 +139,33 @@ typedef int user_regset_writeback_fn(struct task_struct *target,
  * @set:		Function to store values.
  * @active:		Function to report if regset is active, or %NULL.
  * @writeback:		Function to write data back to user memory, or %NULL.
+ * @get_size:		Function to return the regset's size, or %NULL.
  *
  * This data structure describes a machine resource we call a register set.
  * This is part of the state of an individual thread, not necessarily
  * actual CPU registers per se.  A register set consists of a number of
  * similar slots, given by @n.  Each slot is @size bytes, and aligned to
- * @align bytes (which is at least @size).
+ * @align bytes (which is at least @size).  For dynamically-sized
+ * regsets, @n must contain the maximum possible number of slots for the
+ * regset, and @get_size must point to a function that returns the
+ * current regset size.
  *
- * These functions must be called only on the current thread or on a
- * thread that is in %TASK_STOPPED or %TASK_TRACED state, that we are
- * guaranteed will not be woken up and return to user mode, and that we
- * have called wait_task_inactive() on.  (The target thread always might
- * wake up for SIGKILL while these functions are working, in which case
- * that thread's user_regset state might be scrambled.)
+ * Callers that need to know only the current size of the regset and do
+ * not care about its internal structure should call regset_size()
+ * instead of inspecting @n or calling @get_size.
+ *
+ * For backward compatibility, the @get and @set methods must pad to, or
+ * accept, @n * @size bytes, even if the current regset size is smaller.
+ * The precise semantics of these operations depend on the regset being
+ * accessed.
+ *
+ * The functions to which &struct user_regset members point must be
+ * called only on the current thread or on a thread that is in
+ * %TASK_STOPPED or %TASK_TRACED state, that we are guaranteed will not
+ * be woken up and return to user mode, and that we have called
+ * wait_task_inactive() on.  (The target thread always might wake up for
+ * SIGKILL while these functions are working, in which case that
+ * thread's user_regset state might be scrambled.)
  *
  * The @pos argument must be aligned according to @align; the @count
  * argument must be a multiple of @size.  These functions are not
@@ -156,6 +192,7 @@ struct user_regset {
 	user_regset_set_fn		*set;
 	user_regset_active_fn		*active;
 	user_regset_writeback_fn	*writeback;
+	user_regset_get_size_fn		*get_size;
 	unsigned int			n;
 	unsigned int 			size;
 	unsigned int 			align;
@@ -371,5 +408,21 @@ static inline int copy_regset_from_user(struct task_struct *target,
 	return regset->set(target, regset, offset, size, NULL, data);
 }
 
+/**
+ * regset_size - determine the current size of a regset
+ * @target:	thread to be examined
+ * @regset:	regset to be examined
+ *
+ * Note that the returned size is valid only until the next time
+ * (if any) @regset is modified for @target.
+ */
+static inline unsigned int regset_size(struct task_struct *target,
+				       const struct user_regset *regset)
+{
+	if (!regset->get_size)
+		return regset->n * regset->size;
+	else
+		return regset->get_size(target, regset);
+}
 
 #endif	/* <linux/regset.h> */
diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index e09a4f963dc0..bb6836986200 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -418,6 +418,7 @@ typedef struct elf64_shdr {
 #define NT_ARM_HW_BREAK	0x402		/* ARM hardware breakpoint registers */
 #define NT_ARM_HW_WATCH	0x403		/* ARM hardware watchpoint registers */
 #define NT_ARM_SYSTEM_CALL	0x404	/* ARM system call number */
+#define NT_ARM_SVE	0x405		/* ARM Scalable Vector Extension registers */
 #define NT_METAG_CBUF	0x500		/* Metag catch buffer registers */
 #define NT_METAG_RPIPE	0x501		/* Metag read pipeline state */
 #define NT_METAG_TLS	0x502		/* Metag TLS pointer */
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 362493a2f950..b9a4953018ed 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -942,6 +942,7 @@ enum perf_callchain_context {
 #define PERF_AUX_FLAG_TRUNCATED		0x01	/* record was truncated to fit */
 #define PERF_AUX_FLAG_OVERWRITE		0x02	/* snapshot from overwrite mode */
 #define PERF_AUX_FLAG_PARTIAL		0x04	/* record contains gaps */
+#define PERF_AUX_FLAG_COLLISION		0x08	/* sample collided with another */
 
 #define PERF_FLAG_FD_NO_GROUP		(1UL << 0)
 #define PERF_FLAG_FD_OUTPUT		(1UL << 1)
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index b640071421f7..af5f8c2df87a 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -198,4 +198,13 @@ struct prctl_mm_map {
 # define PR_CAP_AMBIENT_LOWER		3
 # define PR_CAP_AMBIENT_CLEAR_ALL	4
 
+/* arm64 Scalable Vector Extension controls */
+/* Flag values must be kept in sync with ptrace NT_ARM_SVE interface */
+#define PR_SVE_SET_VL			50	/* set task vector length */
+# define PR_SVE_SET_VL_ONEXEC		(1 << 18) /* defer effect until exec */
+#define PR_SVE_GET_VL			51	/* get task vector length */
+/* Bits common to PR_SVE_SET_VL and PR_SVE_GET_VL */
+# define PR_SVE_VL_LEN_MASK		0xffff
+# define PR_SVE_VL_INHERIT		(1 << 17) /* inherit across exec */
+
 #endif /* _LINUX_PRCTL_H */
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index f3e37971c842..141aa2ca8728 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -411,6 +411,7 @@ err:
 
 	return NULL;
 }
+EXPORT_SYMBOL_GPL(perf_aux_output_begin);
 
 static bool __always_inline rb_need_aux_wakeup(struct ring_buffer *rb)
 {
@@ -480,6 +481,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size)
 	rb_free_aux(rb);
 	ring_buffer_put(rb);
 }
+EXPORT_SYMBOL_GPL(perf_aux_output_end);
 
 /*
  * Skip over a given number of bytes in the AUX buffer, due to, for example,
@@ -505,6 +507,7 @@ int perf_aux_output_skip(struct perf_output_handle *handle, unsigned long size)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(perf_aux_output_skip);
 
 void *perf_get_aux(struct perf_output_handle *handle)
 {
@@ -514,6 +517,7 @@ void *perf_get_aux(struct perf_output_handle *handle)
 
 	return handle->rb->aux_priv;
 }
+EXPORT_SYMBOL_GPL(perf_get_aux);
 
 #define PERF_AUX_GFP	(GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY)
 
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index f2edcf85780d..49b54e9979cc 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -862,6 +862,7 @@ int irq_get_percpu_devid_partition(unsigned int irq, struct cpumask *affinity)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(irq_get_percpu_devid_partition);
 
 void kstat_incr_irq_this_cpu(unsigned int irq)
 {
diff --git a/kernel/sys.c b/kernel/sys.c
index 524a4cb9bbe2..83ffd7dccf23 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -111,6 +111,12 @@
 #ifndef SET_FP_MODE
 # define SET_FP_MODE(a,b)	(-EINVAL)
 #endif
+#ifndef SVE_SET_VL
+# define SVE_SET_VL(a)		(-EINVAL)
+#endif
+#ifndef SVE_GET_VL
+# define SVE_GET_VL()		(-EINVAL)
+#endif
 
 /*
  * this is where the system-wide overflow UID and GID are defined, for
@@ -2386,6 +2392,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 	case PR_GET_FP_MODE:
 		error = GET_FP_MODE(me);
 		break;
+	case PR_SVE_SET_VL:
+		error = SVE_SET_VL(arg2);
+		break;
+	case PR_SVE_GET_VL:
+		error = SVE_GET_VL();
+		break;
 	default:
 		error = -EINVAL;
 		break;
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 95cba0799828..4cf9b91e6c9b 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -652,6 +652,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		 */
 		preempt_disable();
 
+		/* Flush FP/SIMD state that can't survive guest entry/exit */
+		kvm_fpsimd_flush_cpu_state();
+
 		kvm_pmu_flush_hwstate(vcpu);
 
 		kvm_timer_flush_hwstate(vcpu);