aboutsummaryrefslogtreecommitdiff
path: root/arch/arm64/include/asm
diff options
context:
space:
mode:
authorLinus Torvalds2022-08-04 14:59:54 -0700
committerLinus Torvalds2022-08-04 14:59:54 -0700
commit7c5c3a6177fa9646884114fc7f2e970b0bc50dc9 (patch)
tree956857522574ae7cb07d2227dc16e53d7e9e00e7 /arch/arm64/include/asm
parentf0a892f599c46af673e47418c47c15e69a7b67f4 (diff)
parent281106f938d3daaea6f8b6723a8217a2a1ef6936 (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Paolo Bonzini: "Quite a large pull request due to a selftest API overhaul and some patches that had come in too late for 5.19. ARM: - Unwinder implementations for both nVHE modes (classic and protected), complete with an overflow stack - Rework of the sysreg access from userspace, with a complete rewrite of the vgic-v3 view to allign with the rest of the infrastructure - Disagregation of the vcpu flags in separate sets to better track their use model. - A fix for the GICv2-on-v3 selftest - A small set of cosmetic fixes RISC-V: - Track ISA extensions used by Guest using bitmap - Added system instruction emulation framework - Added CSR emulation framework - Added gfp_custom flag in struct kvm_mmu_memory_cache - Added G-stage ioremap() and iounmap() functions - Added support for Svpbmt inside Guest s390: - add an interface to provide a hypervisor dump for secure guests - improve selftests to use TAP interface - enable interpretive execution of zPCI instructions (for PCI passthrough) - First part of deferred teardown - CPU Topology - PV attestation - Minor fixes x86: - Permit guests to ignore single-bit ECC errors - Intel IPI virtualization - Allow getting/setting pending triple fault with KVM_GET/SET_VCPU_EVENTS - PEBS virtualization - Simplify PMU emulation by just using PERF_TYPE_RAW events - More accurate event reinjection on SVM (avoid retrying instructions) - Allow getting/setting the state of the speaker port data bit - Refuse starting the kvm-intel module if VM-Entry/VM-Exit controls are inconsistent - "Notify" VM exit (detect microarchitectural hangs) for Intel - Use try_cmpxchg64 instead of cmpxchg64 - Ignore benign host accesses to PMU MSRs when PMU is disabled - Allow disabling KVM's "MONITOR/MWAIT are NOPs!" behavior - Allow NX huge page mitigation to be disabled on a per-vm basis - Port eager page splitting to shadow MMU as well - Enable CMCI capability by default and handle injected UCNA errors - Expose pid of vcpu threads in debugfs - x2AVIC support for AMD - cleanup PIO emulation - Fixes for LLDT/LTR emulation - Don't require refcounted "struct page" to create huge SPTEs - Miscellaneous cleanups: - MCE MSR emulation - Use separate namespaces for guest PTEs and shadow PTEs bitmasks - PIO emulation - Reorganize rmap API, mostly around rmap destruction - Do not workaround very old KVM bugs for L0 that runs with nesting enabled - new selftests API for CPUID Generic: - Fix races in gfn->pfn cache refresh; do not pin pages tracked by the cache - new selftests API using struct kvm_vcpu instead of a (vm, id) tuple" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (606 commits) selftests: kvm: set rax before vmcall selftests: KVM: Add exponent check for boolean stats selftests: KVM: Provide descriptive assertions in kvm_binary_stats_test selftests: KVM: Check stat name before other fields KVM: x86/mmu: remove unused variable RISC-V: KVM: Add support for Svpbmt inside Guest/VM RISC-V: KVM: Use PAGE_KERNEL_IO in kvm_riscv_gstage_ioremap() RISC-V: KVM: Add G-stage ioremap() and iounmap() functions KVM: Add gfp_custom flag in struct kvm_mmu_memory_cache RISC-V: KVM: Add extensible CSR emulation framework RISC-V: KVM: Add extensible system instruction emulation framework RISC-V: KVM: Factor-out instruction emulation into separate sources RISC-V: KVM: move preempt_disable() call in kvm_arch_vcpu_ioctl_run RISC-V: KVM: Make kvm_riscv_guest_timer_init a void function RISC-V: KVM: Fix variable spelling mistake RISC-V: KVM: Improve ISA extension by using a bitmap KVM, x86/mmu: Fix the comment around kvm_tdp_mmu_zap_leafs() KVM: SVM: Dump Virtual Machine Save Area (VMSA) to klog KVM: x86/mmu: Treat NX as a valid SPTE bit for NPT KVM: x86: Do not block APIC write for non ICR registers ...
Diffstat (limited to 'arch/arm64/include/asm')
-rw-r--r--arch/arm64/include/asm/kvm_asm.h16
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h11
-rw-r--r--arch/arm64/include/asm/kvm_host.h205
-rw-r--r--arch/arm64/include/asm/memory.h8
-rw-r--r--arch/arm64/include/asm/stacktrace.h62
-rw-r--r--arch/arm64/include/asm/stacktrace/common.h199
-rw-r--r--arch/arm64/include/asm/stacktrace/nvhe.h55
7 files changed, 438 insertions, 118 deletions
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 2e277f2ed671..53035763e48e 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -176,6 +176,22 @@ struct kvm_nvhe_init_params {
unsigned long vtcr;
};
+/*
+ * Used by the host in EL1 to dump the nVHE hypervisor backtrace on
+ * hyp_panic() in non-protected mode.
+ *
+ * @stack_base: hyp VA of the hyp_stack base.
+ * @overflow_stack_base: hyp VA of the hyp_overflow_stack base.
+ * @fp: hyp FP where the backtrace begins.
+ * @pc: hyp PC where the backtrace begins.
+ */
+struct kvm_nvhe_stacktrace_info {
+ unsigned long stack_base;
+ unsigned long overflow_stack_base;
+ unsigned long fp;
+ unsigned long pc;
+};
+
/* Translate a kernel address @ptr into its equivalent linear mapping */
#define kvm_ksym_ref(ptr) \
({ \
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 0e66edd3aff2..9bdba47f7e14 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -473,9 +473,18 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu)
{
- vcpu->arch.flags |= KVM_ARM64_INCREMENT_PC;
+ WARN_ON(vcpu_get_flag(vcpu, PENDING_EXCEPTION));
+ vcpu_set_flag(vcpu, INCREMENT_PC);
}
+#define kvm_pend_exception(v, e) \
+ do { \
+ WARN_ON(vcpu_get_flag((v), INCREMENT_PC)); \
+ vcpu_set_flag((v), PENDING_EXCEPTION); \
+ vcpu_set_flag((v), e); \
+ } while (0)
+
+
static inline bool vcpu_has_feature(struct kvm_vcpu *vcpu, int feature)
{
return test_bit(feature, vcpu->arch.features);
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index de32152cea04..f38ef299f13b 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -325,8 +325,30 @@ struct kvm_vcpu_arch {
/* Exception Information */
struct kvm_vcpu_fault_info fault;
- /* Miscellaneous vcpu state flags */
- u64 flags;
+ /* Ownership of the FP regs */
+ enum {
+ FP_STATE_FREE,
+ FP_STATE_HOST_OWNED,
+ FP_STATE_GUEST_OWNED,
+ } fp_state;
+
+ /* Configuration flags, set once and for all before the vcpu can run */
+ u8 cflags;
+
+ /* Input flags to the hypervisor code, potentially cleared after use */
+ u8 iflags;
+
+ /* State flags for kernel bookkeeping, unused by the hypervisor code */
+ u8 sflags;
+
+ /*
+ * Don't run the guest (internal implementation need).
+ *
+ * Contrary to the flags above, this is set/cleared outside of
+ * a vcpu context, and thus cannot be mixed with the flags
+ * themselves (or the flag accesses need to be made atomic).
+ */
+ bool pause;
/*
* We maintain more than a single set of debug registers to support
@@ -376,9 +398,6 @@ struct kvm_vcpu_arch {
/* vcpu power state */
struct kvm_mp_state mp_state;
- /* Don't run the guest (internal implementation need) */
- bool pause;
-
/* Cache some mmu pages needed inside spinlock regions */
struct kvm_mmu_memory_cache mmu_page_cache;
@@ -392,10 +411,6 @@ struct kvm_vcpu_arch {
/* Additional reset state */
struct vcpu_reset_state reset_state;
- /* True when deferrable sysregs are loaded on the physical CPU,
- * see kvm_vcpu_load_sysregs_vhe and kvm_vcpu_put_sysregs_vhe. */
- bool sysregs_loaded_on_cpu;
-
/* Guest PV state */
struct {
u64 last_steal;
@@ -403,6 +418,124 @@ struct kvm_vcpu_arch {
} steal;
};
+/*
+ * Each 'flag' is composed of a comma-separated triplet:
+ *
+ * - the flag-set it belongs to in the vcpu->arch structure
+ * - the value for that flag
+ * - the mask for that flag
+ *
+ * __vcpu_single_flag() builds such a triplet for a single-bit flag.
+ * unpack_vcpu_flag() extract the flag value from the triplet for
+ * direct use outside of the flag accessors.
+ */
+#define __vcpu_single_flag(_set, _f) _set, (_f), (_f)
+
+#define __unpack_flag(_set, _f, _m) _f
+#define unpack_vcpu_flag(...) __unpack_flag(__VA_ARGS__)
+
+#define __build_check_flag(v, flagset, f, m) \
+ do { \
+ typeof(v->arch.flagset) *_fset; \
+ \
+ /* Check that the flags fit in the mask */ \
+ BUILD_BUG_ON(HWEIGHT(m) != HWEIGHT((f) | (m))); \
+ /* Check that the flags fit in the type */ \
+ BUILD_BUG_ON((sizeof(*_fset) * 8) <= __fls(m)); \
+ } while (0)
+
+#define __vcpu_get_flag(v, flagset, f, m) \
+ ({ \
+ __build_check_flag(v, flagset, f, m); \
+ \
+ v->arch.flagset & (m); \
+ })
+
+#define __vcpu_set_flag(v, flagset, f, m) \
+ do { \
+ typeof(v->arch.flagset) *fset; \
+ \
+ __build_check_flag(v, flagset, f, m); \
+ \
+ fset = &v->arch.flagset; \
+ if (HWEIGHT(m) > 1) \
+ *fset &= ~(m); \
+ *fset |= (f); \
+ } while (0)
+
+#define __vcpu_clear_flag(v, flagset, f, m) \
+ do { \
+ typeof(v->arch.flagset) *fset; \
+ \
+ __build_check_flag(v, flagset, f, m); \
+ \
+ fset = &v->arch.flagset; \
+ *fset &= ~(m); \
+ } while (0)
+
+#define vcpu_get_flag(v, ...) __vcpu_get_flag((v), __VA_ARGS__)
+#define vcpu_set_flag(v, ...) __vcpu_set_flag((v), __VA_ARGS__)
+#define vcpu_clear_flag(v, ...) __vcpu_clear_flag((v), __VA_ARGS__)
+
+/* SVE exposed to guest */
+#define GUEST_HAS_SVE __vcpu_single_flag(cflags, BIT(0))
+/* SVE config completed */
+#define VCPU_SVE_FINALIZED __vcpu_single_flag(cflags, BIT(1))
+/* PTRAUTH exposed to guest */
+#define GUEST_HAS_PTRAUTH __vcpu_single_flag(cflags, BIT(2))
+
+/* Exception pending */
+#define PENDING_EXCEPTION __vcpu_single_flag(iflags, BIT(0))
+/*
+ * PC increment. Overlaps with EXCEPT_MASK on purpose so that it can't
+ * be set together with an exception...
+ */
+#define INCREMENT_PC __vcpu_single_flag(iflags, BIT(1))
+/* Target EL/MODE (not a single flag, but let's abuse the macro) */
+#define EXCEPT_MASK __vcpu_single_flag(iflags, GENMASK(3, 1))
+
+/* Helpers to encode exceptions with minimum fuss */
+#define __EXCEPT_MASK_VAL unpack_vcpu_flag(EXCEPT_MASK)
+#define __EXCEPT_SHIFT __builtin_ctzl(__EXCEPT_MASK_VAL)
+#define __vcpu_except_flags(_f) iflags, (_f << __EXCEPT_SHIFT), __EXCEPT_MASK_VAL
+
+/*
+ * When PENDING_EXCEPTION is set, EXCEPT_MASK can take the following
+ * values:
+ *
+ * For AArch32 EL1:
+ */
+#define EXCEPT_AA32_UND __vcpu_except_flags(0)
+#define EXCEPT_AA32_IABT __vcpu_except_flags(1)
+#define EXCEPT_AA32_DABT __vcpu_except_flags(2)
+/* For AArch64: */
+#define EXCEPT_AA64_EL1_SYNC __vcpu_except_flags(0)
+#define EXCEPT_AA64_EL1_IRQ __vcpu_except_flags(1)
+#define EXCEPT_AA64_EL1_FIQ __vcpu_except_flags(2)
+#define EXCEPT_AA64_EL1_SERR __vcpu_except_flags(3)
+/* For AArch64 with NV (one day): */
+#define EXCEPT_AA64_EL2_SYNC __vcpu_except_flags(4)
+#define EXCEPT_AA64_EL2_IRQ __vcpu_except_flags(5)
+#define EXCEPT_AA64_EL2_FIQ __vcpu_except_flags(6)
+#define EXCEPT_AA64_EL2_SERR __vcpu_except_flags(7)
+/* Guest debug is live */
+#define DEBUG_DIRTY __vcpu_single_flag(iflags, BIT(4))
+/* Save SPE context if active */
+#define DEBUG_STATE_SAVE_SPE __vcpu_single_flag(iflags, BIT(5))
+/* Save TRBE context if active */
+#define DEBUG_STATE_SAVE_TRBE __vcpu_single_flag(iflags, BIT(6))
+
+/* SVE enabled for host EL0 */
+#define HOST_SVE_ENABLED __vcpu_single_flag(sflags, BIT(0))
+/* SME enabled for EL0 */
+#define HOST_SME_ENABLED __vcpu_single_flag(sflags, BIT(1))
+/* Physical CPU not in supported_cpus */
+#define ON_UNSUPPORTED_CPU __vcpu_single_flag(sflags, BIT(2))
+/* WFIT instruction trapped */
+#define IN_WFIT __vcpu_single_flag(sflags, BIT(3))
+/* vcpu system registers loaded on physical CPU */
+#define SYSREGS_ON_CPU __vcpu_single_flag(sflags, BIT(4))
+
/* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
#define vcpu_sve_pffr(vcpu) (kern_hyp_va((vcpu)->arch.sve_state) + \
sve_ffr_offset((vcpu)->arch.sve_max_vl))
@@ -423,70 +556,31 @@ struct kvm_vcpu_arch {
__size_ret; \
})
-/* vcpu_arch flags field values: */
-#define KVM_ARM64_DEBUG_DIRTY (1 << 0)
-#define KVM_ARM64_FP_ENABLED (1 << 1) /* guest FP regs loaded */
-#define KVM_ARM64_FP_HOST (1 << 2) /* host FP regs loaded */
-#define KVM_ARM64_HOST_SVE_ENABLED (1 << 4) /* SVE enabled for EL0 */
-#define KVM_ARM64_GUEST_HAS_SVE (1 << 5) /* SVE exposed to guest */
-#define KVM_ARM64_VCPU_SVE_FINALIZED (1 << 6) /* SVE config completed */
-#define KVM_ARM64_GUEST_HAS_PTRAUTH (1 << 7) /* PTRAUTH exposed to guest */
-#define KVM_ARM64_PENDING_EXCEPTION (1 << 8) /* Exception pending */
-/*
- * Overlaps with KVM_ARM64_EXCEPT_MASK on purpose so that it can't be
- * set together with an exception...
- */
-#define KVM_ARM64_INCREMENT_PC (1 << 9) /* Increment PC */
-#define KVM_ARM64_EXCEPT_MASK (7 << 9) /* Target EL/MODE */
-/*
- * When KVM_ARM64_PENDING_EXCEPTION is set, KVM_ARM64_EXCEPT_MASK can
- * take the following values:
- *
- * For AArch32 EL1:
- */
-#define KVM_ARM64_EXCEPT_AA32_UND (0 << 9)
-#define KVM_ARM64_EXCEPT_AA32_IABT (1 << 9)
-#define KVM_ARM64_EXCEPT_AA32_DABT (2 << 9)
-/* For AArch64: */
-#define KVM_ARM64_EXCEPT_AA64_ELx_SYNC (0 << 9)
-#define KVM_ARM64_EXCEPT_AA64_ELx_IRQ (1 << 9)
-#define KVM_ARM64_EXCEPT_AA64_ELx_FIQ (2 << 9)
-#define KVM_ARM64_EXCEPT_AA64_ELx_SERR (3 << 9)
-#define KVM_ARM64_EXCEPT_AA64_EL1 (0 << 11)
-#define KVM_ARM64_EXCEPT_AA64_EL2 (1 << 11)
-
-#define KVM_ARM64_DEBUG_STATE_SAVE_SPE (1 << 12) /* Save SPE context if active */
-#define KVM_ARM64_DEBUG_STATE_SAVE_TRBE (1 << 13) /* Save TRBE context if active */
-#define KVM_ARM64_FP_FOREIGN_FPSTATE (1 << 14)
-#define KVM_ARM64_ON_UNSUPPORTED_CPU (1 << 15) /* Physical CPU not in supported_cpus */
-#define KVM_ARM64_HOST_SME_ENABLED (1 << 16) /* SME enabled for EL0 */
-#define KVM_ARM64_WFIT (1 << 17) /* WFIT instruction trapped */
-
#define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \
KVM_GUESTDBG_USE_SW_BP | \
KVM_GUESTDBG_USE_HW | \
KVM_GUESTDBG_SINGLESTEP)
#define vcpu_has_sve(vcpu) (system_supports_sve() && \
- ((vcpu)->arch.flags & KVM_ARM64_GUEST_HAS_SVE))
+ vcpu_get_flag(vcpu, GUEST_HAS_SVE))
#ifdef CONFIG_ARM64_PTR_AUTH
#define vcpu_has_ptrauth(vcpu) \
((cpus_have_final_cap(ARM64_HAS_ADDRESS_AUTH) || \
cpus_have_final_cap(ARM64_HAS_GENERIC_AUTH)) && \
- (vcpu)->arch.flags & KVM_ARM64_GUEST_HAS_PTRAUTH)
+ vcpu_get_flag(vcpu, GUEST_HAS_PTRAUTH))
#else
#define vcpu_has_ptrauth(vcpu) false
#endif
#define vcpu_on_unsupported_cpu(vcpu) \
- ((vcpu)->arch.flags & KVM_ARM64_ON_UNSUPPORTED_CPU)
+ vcpu_get_flag(vcpu, ON_UNSUPPORTED_CPU)
#define vcpu_set_on_unsupported_cpu(vcpu) \
- ((vcpu)->arch.flags |= KVM_ARM64_ON_UNSUPPORTED_CPU)
+ vcpu_set_flag(vcpu, ON_UNSUPPORTED_CPU)
#define vcpu_clear_on_unsupported_cpu(vcpu) \
- ((vcpu)->arch.flags &= ~KVM_ARM64_ON_UNSUPPORTED_CPU)
+ vcpu_clear_flag(vcpu, ON_UNSUPPORTED_CPU)
#define vcpu_gp_regs(v) (&(v)->arch.ctxt.regs)
@@ -620,8 +714,6 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu);
int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices);
-int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
-int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
struct kvm_vcpu_events *events);
@@ -831,8 +923,7 @@ void kvm_init_protected_traps(struct kvm_vcpu *vcpu);
int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature);
bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);
-#define kvm_arm_vcpu_sve_finalized(vcpu) \
- ((vcpu)->arch.flags & KVM_ARM64_VCPU_SVE_FINALIZED)
+#define kvm_arm_vcpu_sve_finalized(vcpu) vcpu_get_flag(vcpu, VCPU_SVE_FINALIZED)
#define kvm_has_mte(kvm) \
(system_supports_mte() && \
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 227d256cd4b9..4e4c171f070b 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -114,6 +114,14 @@
#define OVERFLOW_STACK_SIZE SZ_4K
/*
+ * With the minimum frame size of [x29, x30], exactly half the combined
+ * sizes of the hyp and overflow stacks is the maximum size needed to
+ * save the unwinded stacktrace; plus an additional entry to delimit the
+ * end.
+ */
+#define NVHE_STACKTRACE_SIZE ((OVERFLOW_STACK_SIZE + PAGE_SIZE) / 2 + sizeof(long))
+
+/*
* Alignment of kernel segments (e.g. .text, .data).
*
* 4 KB granule: 16 level 3 entries, with contiguous bit
diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
index aec9315bf156..6ebdcdff77f5 100644
--- a/arch/arm64/include/asm/stacktrace.h
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -8,52 +8,20 @@
#include <linux/percpu.h>
#include <linux/sched.h>
#include <linux/sched/task_stack.h>
-#include <linux/types.h>
#include <linux/llist.h>
#include <asm/memory.h>
+#include <asm/pointer_auth.h>
#include <asm/ptrace.h>
#include <asm/sdei.h>
-enum stack_type {
- STACK_TYPE_UNKNOWN,
- STACK_TYPE_TASK,
- STACK_TYPE_IRQ,
- STACK_TYPE_OVERFLOW,
- STACK_TYPE_SDEI_NORMAL,
- STACK_TYPE_SDEI_CRITICAL,
- __NR_STACK_TYPES
-};
-
-struct stack_info {
- unsigned long low;
- unsigned long high;
- enum stack_type type;
-};
+#include <asm/stacktrace/common.h>
extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk,
const char *loglvl);
DECLARE_PER_CPU(unsigned long *, irq_stack_ptr);
-static inline bool on_stack(unsigned long sp, unsigned long size,
- unsigned long low, unsigned long high,
- enum stack_type type, struct stack_info *info)
-{
- if (!low)
- return false;
-
- if (sp < low || sp + size < sp || sp + size > high)
- return false;
-
- if (info) {
- info->low = low;
- info->high = high;
- info->type = type;
- }
- return true;
-}
-
static inline bool on_irq_stack(unsigned long sp, unsigned long size,
struct stack_info *info)
{
@@ -89,30 +57,4 @@ static inline bool on_overflow_stack(unsigned long sp, unsigned long size,
struct stack_info *info) { return false; }
#endif
-
-/*
- * We can only safely access per-cpu stacks from current in a non-preemptible
- * context.
- */
-static inline bool on_accessible_stack(const struct task_struct *tsk,
- unsigned long sp, unsigned long size,
- struct stack_info *info)
-{
- if (info)
- info->type = STACK_TYPE_UNKNOWN;
-
- if (on_task_stack(tsk, sp, size, info))
- return true;
- if (tsk != current || preemptible())
- return false;
- if (on_irq_stack(sp, size, info))
- return true;
- if (on_overflow_stack(sp, size, info))
- return true;
- if (on_sdei_stack(sp, size, info))
- return true;
-
- return false;
-}
-
#endif /* __ASM_STACKTRACE_H */
diff --git a/arch/arm64/include/asm/stacktrace/common.h b/arch/arm64/include/asm/stacktrace/common.h
new file mode 100644
index 000000000000..f58eb944c46f
--- /dev/null
+++ b/arch/arm64/include/asm/stacktrace/common.h
@@ -0,0 +1,199 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Common arm64 stack unwinder code.
+ *
+ * To implement a new arm64 stack unwinder:
+ * 1) Include this header
+ *
+ * 2) Call into unwind_next_common() from your top level unwind
+ * function, passing it the validation and translation callbacks
+ * (though the later can be NULL if no translation is required).
+ *
+ * See: arch/arm64/kernel/stacktrace.c for the reference implementation.
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ */
+#ifndef __ASM_STACKTRACE_COMMON_H
+#define __ASM_STACKTRACE_COMMON_H
+
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+#include <linux/kprobes.h>
+#include <linux/types.h>
+
+enum stack_type {
+ STACK_TYPE_UNKNOWN,
+ STACK_TYPE_TASK,
+ STACK_TYPE_IRQ,
+ STACK_TYPE_OVERFLOW,
+ STACK_TYPE_SDEI_NORMAL,
+ STACK_TYPE_SDEI_CRITICAL,
+ STACK_TYPE_HYP,
+ __NR_STACK_TYPES
+};
+
+struct stack_info {
+ unsigned long low;
+ unsigned long high;
+ enum stack_type type;
+};
+
+/*
+ * A snapshot of a frame record or fp/lr register values, along with some
+ * accounting information necessary for robust unwinding.
+ *
+ * @fp: The fp value in the frame record (or the real fp)
+ * @pc: The lr value in the frame record (or the real lr)
+ *
+ * @stacks_done: Stacks which have been entirely unwound, for which it is no
+ * longer valid to unwind to.
+ *
+ * @prev_fp: The fp that pointed to this frame record, or a synthetic value
+ * of 0. This is used to ensure that within a stack, each
+ * subsequent frame record is at an increasing address.
+ * @prev_type: The type of stack this frame record was on, or a synthetic
+ * value of STACK_TYPE_UNKNOWN. This is used to detect a
+ * transition from one stack to another.
+ *
+ * @kr_cur: When KRETPROBES is selected, holds the kretprobe instance
+ * associated with the most recently encountered replacement lr
+ * value.
+ *
+ * @task: The task being unwound.
+ */
+struct unwind_state {
+ unsigned long fp;
+ unsigned long pc;
+ DECLARE_BITMAP(stacks_done, __NR_STACK_TYPES);
+ unsigned long prev_fp;
+ enum stack_type prev_type;
+#ifdef CONFIG_KRETPROBES
+ struct llist_node *kr_cur;
+#endif
+ struct task_struct *task;
+};
+
+static inline bool on_stack(unsigned long sp, unsigned long size,
+ unsigned long low, unsigned long high,
+ enum stack_type type, struct stack_info *info)
+{
+ if (!low)
+ return false;
+
+ if (sp < low || sp + size < sp || sp + size > high)
+ return false;
+
+ if (info) {
+ info->low = low;
+ info->high = high;
+ info->type = type;
+ }
+ return true;
+}
+
+static inline void unwind_init_common(struct unwind_state *state,
+ struct task_struct *task)
+{
+ state->task = task;
+#ifdef CONFIG_KRETPROBES
+ state->kr_cur = NULL;
+#endif
+
+ /*
+ * Prime the first unwind.
+ *
+ * In unwind_next() we'll check that the FP points to a valid stack,
+ * which can't be STACK_TYPE_UNKNOWN, and the first unwind will be
+ * treated as a transition to whichever stack that happens to be. The
+ * prev_fp value won't be used, but we set it to 0 such that it is
+ * definitely not an accessible stack address.
+ */
+ bitmap_zero(state->stacks_done, __NR_STACK_TYPES);
+ state->prev_fp = 0;
+ state->prev_type = STACK_TYPE_UNKNOWN;
+}
+
+/*
+ * stack_trace_translate_fp_fn() - Translates a non-kernel frame pointer to
+ * a kernel address.
+ *
+ * @fp: the frame pointer to be updated to its kernel address.
+ * @type: the stack type associated with frame pointer @fp
+ *
+ * Returns true and success and @fp is updated to the corresponding
+ * kernel virtual address; otherwise returns false.
+ */
+typedef bool (*stack_trace_translate_fp_fn)(unsigned long *fp,
+ enum stack_type type);
+
+/*
+ * on_accessible_stack_fn() - Check whether a stack range is on any
+ * of the possible stacks.
+ *
+ * @tsk: task whose stack is being unwound
+ * @sp: stack address being checked
+ * @size: size of the stack range being checked
+ * @info: stack unwinding context
+ */
+typedef bool (*on_accessible_stack_fn)(const struct task_struct *tsk,
+ unsigned long sp, unsigned long size,
+ struct stack_info *info);
+
+static inline int unwind_next_common(struct unwind_state *state,
+ struct stack_info *info,
+ on_accessible_stack_fn accessible,
+ stack_trace_translate_fp_fn translate_fp)
+{
+ unsigned long fp = state->fp, kern_fp = fp;
+ struct task_struct *tsk = state->task;
+
+ if (fp & 0x7)
+ return -EINVAL;
+
+ if (!accessible(tsk, fp, 16, info))
+ return -EINVAL;
+
+ if (test_bit(info->type, state->stacks_done))
+ return -EINVAL;
+
+ /*
+ * If fp is not from the current address space perform the necessary
+ * translation before dereferencing it to get the next fp.
+ */
+ if (translate_fp && !translate_fp(&kern_fp, info->type))
+ return -EINVAL;
+
+ /*
+ * As stacks grow downward, any valid record on the same stack must be
+ * at a strictly higher address than the prior record.
+ *
+ * Stacks can nest in several valid orders, e.g.
+ *
+ * TASK -> IRQ -> OVERFLOW -> SDEI_NORMAL
+ * TASK -> SDEI_NORMAL -> SDEI_CRITICAL -> OVERFLOW
+ * HYP -> OVERFLOW
+ *
+ * ... but the nesting itself is strict. Once we transition from one
+ * stack to another, it's never valid to unwind back to that first
+ * stack.
+ */
+ if (info->type == state->prev_type) {
+ if (fp <= state->prev_fp)
+ return -EINVAL;
+ } else {
+ __set_bit(state->prev_type, state->stacks_done);
+ }
+
+ /*
+ * Record this frame record's values and location. The prev_fp and
+ * prev_type are only meaningful to the next unwind_next() invocation.
+ */
+ state->fp = READ_ONCE(*(unsigned long *)(kern_fp));
+ state->pc = READ_ONCE(*(unsigned long *)(kern_fp + 8));
+ state->prev_fp = fp;
+ state->prev_type = info->type;
+
+ return 0;
+}
+
+#endif /* __ASM_STACKTRACE_COMMON_H */
diff --git a/arch/arm64/include/asm/stacktrace/nvhe.h b/arch/arm64/include/asm/stacktrace/nvhe.h
new file mode 100644
index 000000000000..d5527b600390
--- /dev/null
+++ b/arch/arm64/include/asm/stacktrace/nvhe.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * KVM nVHE hypervisor stack tracing support.
+ *
+ * The unwinder implementation depends on the nVHE mode:
+ *
+ * 1) Non-protected nVHE mode - the host can directly access the
+ * HYP stack pages and unwind the HYP stack in EL1. This saves having
+ * to allocate shared buffers for the host to read the unwinded
+ * stacktrace.
+ *
+ * 2) pKVM (protected nVHE) mode - the host cannot directly access
+ * the HYP memory. The stack is unwinded in EL2 and dumped to a shared
+ * buffer where the host can read and print the stacktrace.
+ *
+ * Copyright (C) 2022 Google LLC
+ */
+#ifndef __ASM_STACKTRACE_NVHE_H
+#define __ASM_STACKTRACE_NVHE_H
+
+#include <asm/stacktrace/common.h>
+
+/*
+ * kvm_nvhe_unwind_init - Start an unwind from the given nVHE HYP fp and pc
+ *
+ * @state : unwind_state to initialize
+ * @fp : frame pointer at which to start the unwinding.
+ * @pc : program counter at which to start the unwinding.
+ */
+static inline void kvm_nvhe_unwind_init(struct unwind_state *state,
+ unsigned long fp,
+ unsigned long pc)
+{
+ unwind_init_common(state, NULL);
+
+ state->fp = fp;
+ state->pc = pc;
+}
+
+#ifndef __KVM_NVHE_HYPERVISOR__
+/*
+ * Conventional (non-protected) nVHE HYP stack unwinder
+ *
+ * In non-protected mode, the unwinding is done from kernel proper context
+ * (by the host in EL1).
+ */
+
+DECLARE_KVM_NVHE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack);
+DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_stacktrace_info, kvm_stacktrace_info);
+DECLARE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
+
+void kvm_nvhe_dump_backtrace(unsigned long hyp_offset);
+
+#endif /* __KVM_NVHE_HYPERVISOR__ */
+#endif /* __ASM_STACKTRACE_NVHE_H */