diff options
author | Linus Torvalds | 2023-05-01 12:06:20 -0700 |
---|---|---|
committer | Linus Torvalds | 2023-05-01 12:06:20 -0700 |
commit | c8c655c34e33544aec9d64b660872ab33c29b5f1 (patch) | |
tree | 4aad88f698f04cef9e5d9d573a6df6283085dadd /arch/arm64 | |
parent | d75439d64a1e2b35e0f08906205b00279753cbed (diff) | |
parent | b3c98052d46948a8d65d2778c7f306ff38366aac (diff) |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Paolo Bonzini:
"s390:
- More phys_to_virt conversions
- Improvement of AP management for VSIE (nested virtualization)
ARM64:
- Numerous fixes for the pathological lock inversion issue that
plagued KVM/arm64 since... forever.
- New framework allowing SMCCC-compliant hypercalls to be forwarded
to userspace, hopefully paving the way for some more features being
moved to VMMs rather than be implemented in the kernel.
- Large rework of the timer code to allow a VM-wide offset to be
applied to both virtual and physical counters as well as a
per-timer, per-vcpu offset that complements the global one. This
last part allows the NV timer code to be implemented on top.
- A small set of fixes to make sure that we don't change anything
affecting the EL1&0 translation regime just after having having
taken an exception to EL2 until we have executed a DSB. This
ensures that speculative walks started in EL1&0 have completed.
- The usual selftest fixes and improvements.
x86:
- Optimize CR0.WP toggling by avoiding an MMU reload when TDP is
enabled, and by giving the guest control of CR0.WP when EPT is
enabled on VMX (VMX-only because SVM doesn't support per-bit
controls)
- Add CR0/CR4 helpers to query single bits, and clean up related code
where KVM was interpreting kvm_read_cr4_bits()'s "unsigned long"
return as a bool
- Move AMD_PSFD to cpufeatures.h and purge KVM's definition
- Avoid unnecessary writes+flushes when the guest is only adding new
PTEs
- Overhaul .sync_page() and .invlpg() to utilize .sync_page()'s
optimizations when emulating invalidations
- Clean up the range-based flushing APIs
- Revamp the TDP MMU's reaping of Accessed/Dirty bits to clear a
single A/D bit using a LOCK AND instead of XCHG, and skip all of
the "handle changed SPTE" overhead associated with writing the
entire entry
- Track the number of "tail" entries in a pte_list_desc to avoid
having to walk (potentially) all descriptors during insertion and
deletion, which gets quite expensive if the guest is spamming
fork()
- Disallow virtualizing legacy LBRs if architectural LBRs are
available, the two are mutually exclusive in hardware
- Disallow writes to immutable feature MSRs (notably
PERF_CAPABILITIES) after KVM_RUN, similar to CPUID features
- Overhaul the vmx_pmu_caps selftest to better validate
PERF_CAPABILITIES
- Apply PMU filters to emulated events and add test coverage to the
pmu_event_filter selftest
- AMD SVM:
- Add support for virtual NMIs
- Fixes for edge cases related to virtual interrupts
- Intel AMX:
- Don't advertise XTILE_CFG in KVM_GET_SUPPORTED_CPUID if
XTILE_DATA is not being reported due to userspace not opting in
via prctl()
- Fix a bug in emulation of ENCLS in compatibility mode
- Allow emulation of NOP and PAUSE for L2
- AMX selftests improvements
- Misc cleanups
MIPS:
- Constify MIPS's internal callbacks (a leftover from the hardware
enabling rework that landed in 6.3)
Generic:
- Drop unnecessary casts from "void *" throughout kvm_main.c
- Tweak the layout of "struct kvm_mmu_memory_cache" to shrink the
struct size by 8 bytes on 64-bit kernels by utilizing a padding
hole
Documentation:
- Fix goof introduced by the conversion to rST"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (211 commits)
KVM: s390: pci: fix virtual-physical confusion on module unload/load
KVM: s390: vsie: clarifications on setting the APCB
KVM: s390: interrupt: fix virtual-physical confusion for next alert GISA
KVM: arm64: Have kvm_psci_vcpu_on() use WRITE_ONCE() to update mp_state
KVM: arm64: Acquire mp_state_lock in kvm_arch_vcpu_ioctl_vcpu_init()
KVM: selftests: Test the PMU event "Instructions retired"
KVM: selftests: Copy full counter values from guest in PMU event filter test
KVM: selftests: Use error codes to signal errors in PMU event filter test
KVM: selftests: Print detailed info in PMU event filter asserts
KVM: selftests: Add helpers for PMC asserts in PMU event filter test
KVM: selftests: Add a common helper for the PMU event filter guest code
KVM: selftests: Fix spelling mistake "perrmited" -> "permitted"
KVM: arm64: vhe: Drop extra isb() on guest exit
KVM: arm64: vhe: Synchronise with page table walker on MMU update
KVM: arm64: pkvm: Document the side effects of kvm_flush_dcache_to_poc()
KVM: arm64: nvhe: Synchronise with page table walker on TLBI
KVM: arm64: Handle 32bit CNTPCTSS traps
KVM: arm64: nvhe: Synchronise with page table walker on vcpu run
KVM: arm64: vgic: Don't acquire its_lock before config_lock
KVM: selftests: Add test to verify KVM's supported XCR0
...
Diffstat (limited to 'arch/arm64')
34 files changed, 1168 insertions, 351 deletions
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 3dd691c85ca0..7e7e19ef6993 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -16,6 +16,7 @@ #include <linux/types.h> #include <linux/jump_label.h> #include <linux/kvm_types.h> +#include <linux/maple_tree.h> #include <linux/percpu.h> #include <linux/psci.h> #include <asm/arch_gicv3.h> @@ -199,6 +200,9 @@ struct kvm_arch { /* Mandated version of PSCI */ u32 psci_version; + /* Protects VM-scoped configuration data */ + struct mutex config_lock; + /* * If we encounter a data abort without valid instruction syndrome * information, report this to user space. User space can (and @@ -221,7 +225,12 @@ struct kvm_arch { #define KVM_ARCH_FLAG_EL1_32BIT 4 /* PSCI SYSTEM_SUSPEND enabled for the guest */ #define KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED 5 - + /* VM counter offset */ +#define KVM_ARCH_FLAG_VM_COUNTER_OFFSET 6 + /* Timer PPIs made immutable */ +#define KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE 7 + /* SMCCC filter initialized for the VM */ +#define KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED 8 unsigned long flags; /* @@ -242,6 +251,7 @@ struct kvm_arch { /* Hypercall features firmware registers' descriptor */ struct kvm_smccc_features smccc_feat; + struct maple_tree smccc_filter; /* * For an untrusted host VM, 'pkvm.handle' is used to lookup @@ -365,6 +375,10 @@ enum vcpu_sysreg { TPIDR_EL2, /* EL2 Software Thread ID Register */ CNTHCTL_EL2, /* Counter-timer Hypervisor Control register */ SP_EL2, /* EL2 Stack Pointer */ + CNTHP_CTL_EL2, + CNTHP_CVAL_EL2, + CNTHV_CTL_EL2, + CNTHV_CVAL_EL2, NR_SYS_REGS /* Nothing after this line! */ }; @@ -522,6 +536,7 @@ struct kvm_vcpu_arch { /* vcpu power state */ struct kvm_mp_state mp_state; + spinlock_t mp_state_lock; /* Cache some mmu pages needed inside spinlock regions */ struct kvm_mmu_memory_cache mmu_page_cache; @@ -939,6 +954,9 @@ void kvm_reset_sys_regs(struct kvm_vcpu *vcpu); int __init kvm_sys_reg_table_init(void); +bool lock_all_vcpus(struct kvm *kvm); +void unlock_all_vcpus(struct kvm *kvm); + /* MMIO helpers */ void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data); unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len); @@ -1022,8 +1040,10 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu, int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); -long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm, - struct kvm_arm_copy_mte_tags *copy_tags); +int kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm, + struct kvm_arm_copy_mte_tags *copy_tags); +int kvm_vm_ioctl_set_counter_offset(struct kvm *kvm, + struct kvm_arm_counter_offset *offset); /* Guest/host FPSIMD coordination helpers */ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu); @@ -1078,6 +1098,9 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu); (system_supports_32bit_el0() && \ !static_branch_unlikely(&arm64_mismatched_32bit_el0)) +#define kvm_vm_has_ran_once(kvm) \ + (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &(kvm)->arch.flags)) + int kvm_trng_call(struct kvm_vcpu *vcpu); #ifdef CONFIG_KVM extern phys_addr_t hyp_mem_base; diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 083cc47dca08..27e63c111f78 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -63,6 +63,7 @@ * specific registers encoded in the instructions). */ .macro kern_hyp_va reg +#ifndef __KVM_VHE_HYPERVISOR__ alternative_cb ARM64_ALWAYS_SYSTEM, kvm_update_va_mask and \reg, \reg, #1 /* mask with va_mask */ ror \reg, \reg, #1 /* rotate to the first tag bit */ @@ -70,6 +71,7 @@ alternative_cb ARM64_ALWAYS_SYSTEM, kvm_update_va_mask add \reg, \reg, #0, lsl 12 /* insert the top 12 bits of the tag */ ror \reg, \reg, #63 /* rotate back */ alternative_cb_end +#endif .endm /* @@ -127,6 +129,7 @@ void kvm_apply_hyp_relocations(void); static __always_inline unsigned long __kern_hyp_va(unsigned long v) { +#ifndef __KVM_VHE_HYPERVISOR__ asm volatile(ALTERNATIVE_CB("and %0, %0, #1\n" "ror %0, %0, #1\n" "add %0, %0, #0\n" @@ -135,6 +138,7 @@ static __always_inline unsigned long __kern_hyp_va(unsigned long v) ARM64_ALWAYS_SYSTEM, kvm_update_va_mask) : "+r" (v)); +#endif return v; } diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index c48b41c9b0cc..e72d9aaab6b1 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -388,6 +388,7 @@ #define SYS_CNTFRQ_EL0 sys_reg(3, 3, 14, 0, 0) +#define SYS_CNTPCT_EL0 sys_reg(3, 3, 14, 0, 1) #define SYS_CNTPCTSS_EL0 sys_reg(3, 3, 14, 0, 5) #define SYS_CNTVCTSS_EL0 sys_reg(3, 3, 14, 0, 6) @@ -400,7 +401,9 @@ #define SYS_AARCH32_CNTP_TVAL sys_reg(0, 0, 14, 2, 0) #define SYS_AARCH32_CNTP_CTL sys_reg(0, 0, 14, 2, 1) +#define SYS_AARCH32_CNTPCT sys_reg(0, 0, 0, 14, 0) #define SYS_AARCH32_CNTP_CVAL sys_reg(0, 2, 0, 14, 0) +#define SYS_AARCH32_CNTPCTSS sys_reg(0, 8, 0, 14, 0) #define __PMEV_op2(n) ((n) & 0x7) #define __CNTR_CRm(n) (0x8 | (((n) >> 3) & 0x3)) diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index f8129c624b07..f7ddd73a8c0f 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -198,6 +198,15 @@ struct kvm_arm_copy_mte_tags { __u64 reserved[2]; }; +/* + * Counter/Timer offset structure. Describe the virtual/physical offset. + * To be used with KVM_ARM_SET_COUNTER_OFFSET. + */ +struct kvm_arm_counter_offset { + __u64 counter_offset; + __u64 reserved; +}; + #define KVM_ARM_TAGS_TO_GUEST 0 #define KVM_ARM_TAGS_FROM_GUEST 1 @@ -372,6 +381,10 @@ enum { #endif }; +/* Device Control API on vm fd */ +#define KVM_ARM_VM_SMCCC_CTRL 0 +#define KVM_ARM_VM_SMCCC_FILTER 0 + /* Device Control API: ARM VGIC */ #define KVM_DEV_ARM_VGIC_GRP_ADDR 0 #define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1 @@ -411,6 +424,8 @@ enum { #define KVM_ARM_VCPU_TIMER_CTRL 1 #define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0 #define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1 +#define KVM_ARM_VCPU_TIMER_IRQ_HVTIMER 2 +#define KVM_ARM_VCPU_TIMER_IRQ_HPTIMER 3 #define KVM_ARM_VCPU_PVTIME_CTRL 2 #define KVM_ARM_VCPU_PVTIME_IPA 0 @@ -469,6 +484,27 @@ enum { /* run->fail_entry.hardware_entry_failure_reason codes. */ #define KVM_EXIT_FAIL_ENTRY_CPU_UNSUPPORTED (1ULL << 0) +enum kvm_smccc_filter_action { + KVM_SMCCC_FILTER_HANDLE = 0, + KVM_SMCCC_FILTER_DENY, + KVM_SMCCC_FILTER_FWD_TO_USER, + +#ifdef __KERNEL__ + NR_SMCCC_FILTER_ACTIONS +#endif +}; + +struct kvm_smccc_filter { + __u32 base; + __u32 nr_functions; + __u8 action; + __u8 pad[15]; +}; + +/* arm64-specific KVM_EXIT_HYPERCALL flags */ +#define KVM_HYPERCALL_EXIT_SMC (1U << 0) +#define KVM_HYPERCALL_EXIT_16BIT (1U << 1) + #endif #endif /* __ARM_KVM_H__ */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 2f9911256503..bf38d065396b 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2230,6 +2230,17 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, ARM64_CPUID_FIELDS(ID_AA64MMFR0_EL1, ECV, IMP) }, + { + .desc = "Enhanced Counter Virtualization (CNTPOFF)", + .capability = ARM64_HAS_ECV_CNTPOFF, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64MMFR0_EL1, + .field_pos = ID_AA64MMFR0_EL1_ECV_SHIFT, + .field_width = 4, + .sign = FTR_UNSIGNED, + .min_field_value = ID_AA64MMFR0_EL1_ECV_CNTPOFF, + }, #ifdef CONFIG_ARM64_PAN { .desc = "Privileged Access Never", diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index e1af4301b913..05b022be885b 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -16,6 +16,7 @@ #include <asm/arch_timer.h> #include <asm/kvm_emulate.h> #include <asm/kvm_hyp.h> +#include <asm/kvm_nested.h> #include <kvm/arm_vgic.h> #include <kvm/arm_arch_timer.h> @@ -30,14 +31,11 @@ static u32 host_ptimer_irq_flags; static DEFINE_STATIC_KEY_FALSE(has_gic_active_state); -static const struct kvm_irq_level default_ptimer_irq = { - .irq = 30, - .level = 1, -}; - -static const struct kvm_irq_level default_vtimer_irq = { - .irq = 27, - .level = 1, +static const u8 default_ppi[] = { + [TIMER_PTIMER] = 30, + [TIMER_VTIMER] = 27, + [TIMER_HPTIMER] = 26, + [TIMER_HVTIMER] = 28, }; static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx); @@ -51,6 +49,24 @@ static void kvm_arm_timer_write(struct kvm_vcpu *vcpu, static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, struct arch_timer_context *timer, enum kvm_arch_timer_regs treg); +static bool kvm_arch_timer_get_input_level(int vintid); + +static struct irq_ops arch_timer_irq_ops = { + .get_input_level = kvm_arch_timer_get_input_level, +}; + +static bool has_cntpoff(void) +{ + return (has_vhe() && cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF)); +} + +static int nr_timers(struct kvm_vcpu *vcpu) +{ + if (!vcpu_has_nv(vcpu)) + return NR_KVM_EL0_TIMERS; + + return NR_KVM_TIMERS; +} u32 timer_get_ctl(struct arch_timer_context *ctxt) { @@ -61,6 +77,10 @@ u32 timer_get_ctl(struct arch_timer_context *ctxt) return __vcpu_sys_reg(vcpu, CNTV_CTL_EL0); case TIMER_PTIMER: return __vcpu_sys_reg(vcpu, CNTP_CTL_EL0); + case TIMER_HVTIMER: + return __vcpu_sys_reg(vcpu, CNTHV_CTL_EL2); + case TIMER_HPTIMER: + return __vcpu_sys_reg(vcpu, CNTHP_CTL_EL2); default: WARN_ON(1); return 0; @@ -76,6 +96,10 @@ u64 timer_get_cval(struct arch_timer_context *ctxt) return __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0); case TIMER_PTIMER: return __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0); + case TIMER_HVTIMER: + return __vcpu_sys_reg(vcpu, CNTHV_CVAL_EL2); + case TIMER_HPTIMER: + return __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2); default: WARN_ON(1); return 0; @@ -84,10 +108,17 @@ u64 timer_get_cval(struct arch_timer_context *ctxt) static u64 timer_get_offset(struct arch_timer_context *ctxt) { + u64 offset = 0; + + if (!ctxt) + return 0; + if (ctxt->offset.vm_offset) - return *ctxt->offset.vm_offset; + offset += *ctxt->offset.vm_offset; + if (ctxt->offset.vcpu_offset) + offset += *ctxt->offset.vcpu_offset; - return 0; + return offset; } static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl) @@ -101,6 +132,12 @@ static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl) case TIMER_PTIMER: __vcpu_sys_reg(vcpu, CNTP_CTL_EL0) = ctl; break; + case TIMER_HVTIMER: + __vcpu_sys_reg(vcpu, CNTHV_CTL_EL2) = ctl; + break; + case TIMER_HPTIMER: + __vcpu_sys_reg(vcpu, CNTHP_CTL_EL2) = ctl; + break; default: WARN_ON(1); } @@ -117,6 +154,12 @@ static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval) case TIMER_PTIMER: __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = cval; break; + case TIMER_HVTIMER: + __vcpu_sys_reg(vcpu, CNTHV_CVAL_EL2) = cval; + break; + case TIMER_HPTIMER: + __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2) = cval; + break; default: WARN_ON(1); } @@ -139,13 +182,27 @@ u64 kvm_phys_timer_read(void) static void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map) { - if (has_vhe()) { + if (vcpu_has_nv(vcpu)) { + if (is_hyp_ctxt(vcpu)) { + map->direct_vtimer = vcpu_hvtimer(vcpu); + map->direct_ptimer = vcpu_hptimer(vcpu); + map->emul_vtimer = vcpu_vtimer(vcpu); + map->emul_ptimer = vcpu_ptimer(vcpu); + } else { + map->direct_vtimer = vcpu_vtimer(vcpu); + map->direct_ptimer = vcpu_ptimer(vcpu); + map->emul_vtimer = vcpu_hvtimer(vcpu); + map->emul_ptimer = vcpu_hptimer(vcpu); + } + } else if (has_vhe()) { map->direct_vtimer = vcpu_vtimer(vcpu); map->direct_ptimer = vcpu_ptimer(vcpu); + map->emul_vtimer = NULL; map->emul_ptimer = NULL; } else { map->direct_vtimer = vcpu_vtimer(vcpu); map->direct_ptimer = NULL; + map->emul_vtimer = NULL; map->emul_ptimer = vcpu_ptimer(vcpu); } @@ -212,7 +269,7 @@ static u64 kvm_counter_compute_delta(struct arch_timer_context *timer_ctx, ns = cyclecounter_cyc2ns(timecounter->cc, val - now, timecounter->mask, - &timecounter->frac); + &timer_ctx->ns_frac); return ns; } @@ -240,8 +297,11 @@ static bool vcpu_has_wfit_active(struct kvm_vcpu *vcpu) static u64 wfit_delay_ns(struct kvm_vcpu *vcpu) { - struct arch_timer_context *ctx = vcpu_vtimer(vcpu); u64 val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu)); + struct arch_timer_context *ctx; + + ctx = (vcpu_has_nv(vcpu) && is_hyp_ctxt(vcpu)) ? vcpu_hvtimer(vcpu) + : vcpu_vtimer(vcpu); return kvm_counter_compute_delta(ctx, val); } @@ -255,7 +315,7 @@ static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu) u64 min_delta = ULLONG_MAX; int i; - for (i = 0; i < NR_KVM_TIMERS; i++) { + for (i = 0; i < nr_timers(vcpu); i++) { struct arch_timer_context *ctx = &vcpu->arch.timer_cpu.timers[i]; WARN(ctx->loaded, "timer %d loaded\n", i); @@ -338,9 +398,11 @@ static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) switch (index) { case TIMER_VTIMER: + case TIMER_HVTIMER: cnt_ctl = read_sysreg_el0(SYS_CNTV_CTL); break; case TIMER_PTIMER: + case TIMER_HPTIMER: cnt_ctl = read_sysreg_el0(SYS_CNTP_CTL); break; case NR_KVM_TIMERS: @@ -392,12 +454,12 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, int ret; timer_ctx->irq.level = new_level; - trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_ctx->irq.irq, + trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_irq(timer_ctx), timer_ctx->irq.level); if (!userspace_irqchip(vcpu->kvm)) { ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, - timer_ctx->irq.irq, + timer_irq(timer_ctx), timer_ctx->irq.level, timer_ctx); WARN_ON(ret); @@ -432,6 +494,12 @@ static void set_cntvoff(u64 cntvoff) kvm_call_hyp(__kvm_timer_set_cntvoff, cntvoff); } +static void set_cntpoff(u64 cntpoff) +{ + if (has_cntpoff()) + write_sysreg_s(cntpoff, SYS_CNTPOFF_EL2); +} + static void timer_save_state(struct arch_timer_context *ctx) { struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu); @@ -447,7 +515,10 @@ static void timer_save_state(struct arch_timer_context *ctx) goto out; switch (index) { + u64 cval; + case TIMER_VTIMER: + case TIMER_HVTIMER: timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTV_CTL)); timer_set_cval(ctx, read_sysreg_el0(SYS_CNTV_CVAL)); @@ -473,13 +544,20 @@ static void timer_save_state(struct arch_timer_context *ctx) set_cntvoff(0); break; case TIMER_PTIMER: + case TIMER_HPTIMER: timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTP_CTL)); - timer_set_cval(ctx, read_sysreg_el0(SYS_CNTP_CVAL)); + cval = read_sysreg_el0(SYS_CNTP_CVAL); + + if (!has_cntpoff()) + cval -= timer_get_offset(ctx); + + timer_set_cval(ctx, cval); /* Disable the timer */ write_sysreg_el0(0, SYS_CNTP_CTL); isb(); + set_cntpoff(0); break; case NR_KVM_TIMERS: BUG(); @@ -510,6 +588,7 @@ static void kvm_timer_blocking(struct kvm_vcpu *vcpu) */ if (!kvm_timer_irq_can_fire(map.direct_vtimer) && !kvm_timer_irq_can_fire(map.direct_ptimer) && + !kvm_timer_irq_can_fire(map.emul_vtimer) && !kvm_timer_irq_can_fire(map.emul_ptimer) && !vcpu_has_wfit_active(vcpu)) return; @@ -543,14 +622,23 @@ static void timer_restore_state(struct arch_timer_context *ctx) goto out; switch (index) { + u64 cval, offset; + case TIMER_VTIMER: + case TIMER_HVTIMER: set_cntvoff(timer_get_offset(ctx)); write_sysreg_el0(timer_get_cval(ctx), SYS_CNTV_CVAL); isb(); write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTV_CTL); break; case TIMER_PTIMER: - write_sysreg_el0(timer_get_cval(ctx), SYS_CNTP_CVAL); + case TIMER_HPTIMER: + cval = timer_get_cval(ctx); + offset = timer_get_offset(ctx); + set_cntpoff(offset); + if (!has_cntpoff()) + cval += offset; + write_sysreg_el0(cval, SYS_CNTP_CVAL); isb(); write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTP_CTL); break; @@ -586,7 +674,7 @@ static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx) kvm_timer_update_irq(ctx->vcpu, kvm_timer_should_fire(ctx), ctx); if (irqchip_in_kernel(vcpu->kvm)) - phys_active = kvm_vgic_map_is_active(vcpu, ctx->irq.irq); + phys_active = kvm_vgic_map_is_active(vcpu, timer_irq(ctx)); phys_active |= ctx->irq.level; @@ -621,6 +709,128 @@ static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu) enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); } +/* If _pred is true, set bit in _set, otherwise set it in _clr */ +#define assign_clear_set_bit(_pred, _bit, _clr, _set) \ + do { \ + if (_pred) \ + (_set) |= (_bit); \ + else \ + (_clr) |= (_bit); \ + } while (0) + +static void kvm_timer_vcpu_load_nested_switch(struct kvm_vcpu *vcpu, + struct timer_map *map) +{ + int hw, ret; + + if (!irqchip_in_kernel(vcpu->kvm)) + return; + + /* + * We only ever unmap the vtimer irq on a VHE system that runs nested + * virtualization, in which case we have both a valid emul_vtimer, + * emul_ptimer, direct_vtimer, and direct_ptimer. + * + * Since this is called from kvm_timer_vcpu_load(), a change between + * vEL2 and vEL1/0 will have just happened, and the timer_map will + * represent this, and therefore we switch the emul/direct mappings + * below. + */ + hw = kvm_vgic_get_map(vcpu, timer_irq(map->direct_vtimer)); + if (hw < 0) { + kvm_vgic_unmap_phys_irq(vcpu, timer_irq(map->emul_vtimer)); + kvm_vgic_unmap_phys_irq(vcpu, timer_irq(map->emul_ptimer)); + + ret = kvm_vgic_map_phys_irq(vcpu, + map->direct_vtimer->host_timer_irq, + timer_irq(map->direct_vtimer), + &arch_timer_irq_ops); + WARN_ON_ONCE(ret); + ret = kvm_vgic_map_phys_irq(vcpu, + map->direct_ptimer->host_timer_irq, + timer_irq(map->direct_ptimer), + &arch_timer_irq_ops); + WARN_ON_ONCE(ret); + + /* + * The virtual offset behaviour is "interresting", as it + * always applies when HCR_EL2.E2H==0, but only when + * accessed from EL1 when HCR_EL2.E2H==1. So make sure we + * track E2H when putting the HV timer in "direct" mode. + */ + if (map->direct_vtimer == vcpu_hvtimer(vcpu)) { + struct arch_timer_offset *offs = &map->direct_vtimer->offset; + + if (vcpu_el2_e2h_is_set(vcpu)) + offs->vcpu_offset = NULL; + else + offs->vcpu_offset = &__vcpu_sys_reg(vcpu, CNTVOFF_EL2); + } + } +} + +static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map) +{ + bool tpt, tpc; + u64 clr, set; + + /* + * No trapping gets configured here with nVHE. See + * __timer_enable_traps(), which is where the stuff happens. + */ + if (!has_vhe()) + return; + + /* + * Our default policy is not to trap anything. As we progress + * within this function, reality kicks in and we start adding + * traps based on emulation requirements. + */ + tpt = tpc = false; + + /* + * We have two possibility to deal with a physical offset: + * + * - Either we have CNTPOFF (yay!) or the offset is 0: + * we let the guest freely access the HW + * + * - or neither of these condition apply: + * we trap accesses to the HW, but still use it + * after correcting the physical offset + */ + if (!has_cntpoff() && timer_get_offset(map->direct_ptimer)) + tpt = tpc = true; + + /* + * Apply the enable bits that the guest hypervisor has requested for + * its own guest. We can only add traps that wouldn't have been set + * above. + */ + if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) { + u64 val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2); + + /* Use the VHE format for mental sanity */ + if (!vcpu_el2_e2h_is_set(vcpu)) + val = (val & (CNTHCTL_EL1PCEN | CNTHCTL_EL1PCTEN)) << 10; + + tpt |= !(val & (CNTHCTL_EL1PCEN << 10)); + tpc |= !(val & (CNTHCTL_EL1PCTEN << 10)); + } + + /* + * Now that we have collected our requirements, compute the + * trap and enable bits. + */ + set = 0; + clr = 0; + + assign_clear_set_bit(tpt, CNTHCTL_EL1PCEN << 10, set, clr); + assign_clear_set_bit(tpc, CNTHCTL_EL1PCTEN << 10, set, clr); + + /* This only happens on VHE, so use the CNTKCTL_EL1 accessor */ + sysreg_clear_set(cntkctl_el1, clr, set); +} + void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = vcpu_timer(vcpu); @@ -632,6 +842,9 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) get_timer_map(vcpu, &map); if (static_branch_likely(&has_gic_active_state)) { + if (vcpu_has_nv(vcpu)) + kvm_timer_vcpu_load_nested_switch(vcpu, &map); + kvm_timer_vcpu_load_gic(map.direct_vtimer); if (map.direct_ptimer) kvm_timer_vcpu_load_gic(map.direct_ptimer); @@ -644,9 +857,12 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) timer_restore_state(map.direct_vtimer); if (map.direct_ptimer) timer_restore_state(map.direct_ptimer); - + if (map.emul_vtimer) + timer_emulate(map.emul_vtimer); if (map.emul_ptimer) timer_emulate(map.emul_ptimer); + + timer_set_traps(vcpu, &map); } bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu) @@ -689,6 +905,8 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) * In any case, we re-schedule the hrtimer for the physical timer when * coming back to the VCPU thread in kvm_timer_vcpu_load(). */ + if (map.emul_vtimer) + soft_timer_cancel(&map.emul_vtimer->hrtimer); if (map.emul_ptimer) soft_timer_cancel(&map.emul_ptimer->hrtimer); @@ -738,56 +956,89 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) * resets the timer to be disabled and unmasked and is compliant with * the ARMv7 architecture. */ - timer_set_ctl(vcpu_vtimer(vcpu), 0); - timer_set_ctl(vcpu_ptimer(vcpu), 0); + for (int i = 0; i < nr_timers(vcpu); i++) + timer_set_ctl(vcpu_get_timer(vcpu, i), 0); + + /* + * A vcpu running at EL2 is in charge of the offset applied to + * the virtual timer, so use the physical VM offset, and point + * the vcpu offset to CNTVOFF_EL2. + */ + if (vcpu_has_nv(vcpu)) { + struct arch_timer_offset *offs = &vcpu_vtimer(vcpu)->offset; + + offs->vcpu_offset = &__vcpu_sys_reg(vcpu, CNTVOFF_EL2); + offs->vm_offset = &vcpu->kvm->arch.timer_data.poffset; + } if (timer->enabled) { - kvm_timer_update_irq(vcpu, false, vcpu_vtimer(vcpu)); - kvm_timer_update_irq(vcpu, false, vcpu_ptimer(vcpu)); + for (int i = 0; i < nr_timers(vcpu); i++) + kvm_timer_update_irq(vcpu, false, + vcpu_get_timer(vcpu, i)); if (irqchip_in_kernel(vcpu->kvm)) { - kvm_vgic_reset_mapped_irq(vcpu, map.direct_vtimer->irq.irq); + kvm_vgic_reset_mapped_irq(vcpu, timer_irq(map.direct_vtimer)); if (map.direct_ptimer) - kvm_vgic_reset_mapped_irq(vcpu, map.direct_ptimer->irq.irq); + kvm_vgic_reset_mapped_irq(vcpu, timer_irq(map.direct_ptimer)); } } + if (map.emul_vtimer) + soft_timer_cancel(&map.emul_vtimer->hrtimer); if (map.emul_ptimer) soft_timer_cancel(&map.emul_ptimer->hrtimer); return 0; } +static void timer_context_init(struct kvm_vcpu *vcpu, int timerid) +{ + struct arch_timer_context *ctxt = vcpu_get_timer(vcpu, timerid); + struct kvm *kvm = vcpu->kvm; + + ctxt->vcpu = vcpu; + + if (timerid == TIMER_VTIMER) + ctxt->offset.vm_offset = &kvm->arch.timer_data.voffset; + else + ctxt->offset.vm_offset = &kvm->arch.timer_data.poffset; + + hrtimer_init(&ctxt->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); + ctxt->hrtimer.function = kvm_hrtimer_expire; + + switch (timerid) { + case TIMER_PTIMER: + case TIMER_HPTIMER: + ctxt->host_timer_irq = host_ptimer_irq; + break; + case TIMER_VTIMER: + case TIMER_HVTIMER: + ctxt->host_timer_irq = host_vtimer_irq; + break; + } +} + void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = vcpu_timer(vcpu); - struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); - struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); - vtimer->vcpu = vcpu; - vtimer->offset.vm_offset = &vcpu->kvm->arch.timer_data.voffset; - ptimer->vcpu = vcpu; + for (int i = 0; i < NR_KVM_TIMERS; i++) + timer_context_init(vcpu, i); - /* Synchronize cntvoff across all vtimers of a VM. */ - timer_set_offset(vtimer, kvm_phys_timer_read()); - timer_set_offset(ptimer, 0); + /* Synchronize offsets across timers of a VM if not already provided */ + if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &vcpu->kvm->arch.flags)) { + timer_set_offset(vcpu_vtimer(vcpu), kvm_phys_timer_read()); + timer_set_offset(vcpu_ptimer(vcpu), 0); + } hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); timer->bg_timer.function = kvm_bg_timer_expire; +} - hrtimer_init(&vtimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); - hrtimer_init(&ptimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); - vtimer->hrtimer.function = kvm_hrtimer_expire; - ptimer->hrtimer.function = kvm_hrtimer_expire; - - vtimer->irq.irq = default_vtimer_irq.irq; - ptimer->irq.irq = default_ptimer_irq.irq; - - vtimer->host_timer_irq = host_vtimer_irq; - ptimer->host_timer_irq = host_ptimer_irq; - - vtimer->host_timer_irq_flags = host_vtimer_irq_flags; - ptimer->host_timer_irq_flags = host_ptimer_irq_flags; +void kvm_timer_init_vm(struct kvm *kvm) +{ + for (int i = 0; i < NR_KVM_TIMERS; i++) + kvm->arch.timer_data.ppi[i] = default_ppi[i]; } void kvm_timer_cpu_up(void) @@ -814,8 +1065,11 @@ int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value); break; case KVM_REG_ARM_TIMER_CNT: - timer = vcpu_vtimer(vcpu); - timer_set_offset(timer, kvm_phys_timer_read() - value); + if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, + &vcpu->kvm->arch.flags)) { + timer = vcpu_vtimer(vcpu); + timer_set_offset(timer, kvm_phys_timer_read() - value); + } break; case KVM_REG_ARM_TIMER_CVAL: timer = vcpu_vtimer(vcpu); @@ -825,6 +1079,13 @@ int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) timer = vcpu_ptimer(vcpu); kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value); break; + case KVM_REG_ARM_PTIMER_CNT: + if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, + &vcpu->kvm->arch.flags)) { + timer = vcpu_ptimer(vcpu); + timer_set_offset(timer, kvm_phys_timer_read() - value); + } + break; case KVM_REG_ARM_PTIMER_CVAL: timer = vcpu_ptimer(vcpu); kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value); @@ -902,6 +1163,10 @@ static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, val = kvm_phys_timer_read() - timer_get_offset(timer); break; + case TIMER_REG_VOFF: + val = *timer->offset.vcpu_offset; + break; + default: BUG(); } @@ -920,7 +1185,7 @@ u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu, get_timer_map(vcpu, &map); timer = vcpu_get_timer(vcpu, tmr); - if (timer == map.emul_ptimer) + if (timer == map.emul_vtimer || timer == map.emul_ptimer) return kvm_arm_timer_read(vcpu, timer, treg); preempt_disable(); @@ -952,6 +1217,10 @@ static void kvm_arm_timer_write(struct kvm_vcpu *vcpu, timer_set_cval(timer, val); break; + case TIMER_REG_VOFF: + *timer->offset.vcpu_offset = val; + break; + default: BUG(); } @@ -967,7 +1236,7 @@ void kvm_arm_timer_write_sysreg(struct kvm_vcpu *vcpu, get_timer_map(vcpu, &map); timer = vcpu_get_timer(vcpu, tmr); - if (timer == map.emul_ptimer) { + if (timer == map.emul_vtimer || timer == map.emul_ptimer) { soft_timer_cancel(&timer->hrtimer); kvm_arm_timer_write(vcpu, timer, treg, val); timer_emulate(timer); @@ -1047,10 +1316,6 @@ static const struct irq_domain_ops timer_domain_ops = { .free = timer_irq_domain_free, }; -static struct irq_ops arch_timer_irq_ops = { - .get_input_level = kvm_arch_timer_get_input_level, -}; - static void kvm_irq_fixup_flags(unsigned int virq, u32 *flags) { *flags = irq_get_trigger_type(virq); @@ -1192,44 +1457,56 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu) { - int vtimer_irq, ptimer_irq, ret; - unsigned long i; + u32 ppis = 0; + bool valid; - vtimer_irq = vcpu_vtimer(vcpu)->irq.irq; - ret = kvm_vgic_set_owner(vcpu, vtimer_irq, vcpu_vtimer(vcpu)); - if (ret) - return false; + mutex_lock(&vcpu->kvm->arch.config_lock); - ptimer_irq = vcpu_ptimer(vcpu)->irq.irq; - ret = kvm_vgic_set_owner(vcpu, ptimer_irq, vcpu_ptimer(vcpu)); - if (ret) - return false; + for (int i = 0; i < nr_timers(vcpu); i++) { + struct arch_timer_context *ctx; + int irq; + + ctx = vcpu_get_timer(vcpu, i); + irq = timer_irq(ctx); + if (kvm_vgic_set_owner(vcpu, irq, ctx)) + break; - kvm_for_each_vcpu(i, vcpu, vcpu->kvm) { - if (vcpu_vtimer(vcpu)->irq.irq != vtimer_irq || - vcpu_ptimer(vcpu)->irq.irq != ptimer_irq) - return false; + /* + * We know by construction that we only have PPIs, so + * all values are less than 32. + */ + ppis |= BIT(irq); } - return true; + valid = hweight32(ppis) == nr_timers(vcpu); + + if (valid) + set_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE, &vcpu->kvm->arch.flags); + + mutex_unlock(&vcpu->kvm->arch.config_lock); + + return valid; } -bool kvm_arch_timer_get_input_level(int vintid) +static bool kvm_arch_timer_get_input_level(int vintid) { struct kvm_vcpu *vcpu = kvm_get_running_vcpu(); - struct arch_timer_context *timer; if (WARN(!vcpu, "No vcpu context!\n")) return false; - if (vintid == vcpu_vtimer(vcpu)->irq.irq) - timer = vcpu_vtimer(vcpu); - else if (vintid == vcpu_ptimer(vcpu)->irq.irq) - timer = vcpu_ptimer(vcpu); - else - BUG(); + for (int i = 0; i < nr_timers(vcpu); i++) { + struct arch_timer_context *ctx; + + ctx = vcpu_get_timer(vcpu, i); + if (timer_irq(ctx) == vintid) + return kvm_timer_should_fire(ctx); + } + + /* A timer IRQ has fired, but no matching timer was found? */ + WARN_RATELIMIT(1, "timer INTID%d unknown\n", vintid); - return kvm_timer_should_fire(timer); + return false; } int kvm_timer_enable(struct kvm_vcpu *vcpu) @@ -1258,7 +1535,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) ret = kvm_vgic_map_phys_irq(vcpu, map.direct_vtimer->host_timer_irq, - map.direct_vtimer->irq.irq, + timer_irq(map.direct_vtimer), &arch_timer_irq_ops); if (ret) return ret; @@ -1266,7 +1543,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) if (map.direct_ptimer) { ret = kvm_vgic_map_phys_irq(vcpu, map.direct_ptimer->host_timer_irq, - map.direct_ptimer->irq.irq, + timer_irq(map.direct_ptimer), &arch_timer_irq_ops); } @@ -1278,45 +1555,17 @@ no_vgic: return 0; } -/* - * On VHE system, we only need to configure the EL2 timer trap register once, - * not for every world switch. - * The host kernel runs at EL2 with HCR_EL2.TGE == 1, - * and this makes those bits have no effect for the host kernel execution. - */ +/* If we have CNTPOFF, permanently set ECV to enable it */ void kvm_timer_init_vhe(void) { - /* When HCR_EL2.E2H ==1, EL1PCEN and EL1PCTEN are shifted by 10 */ - u32 cnthctl_shift = 10; - u64 val; - - /* - * VHE systems allow the guest direct access to the EL1 physical - * timer/counter. - */ - val = read_sysreg(cnthctl_el2); - val |= (CNTHCTL_EL1PCEN << cnthctl_shift); - val |= (CNTHCTL_EL1PCTEN << cnthctl_shift); - write_sysreg(val, cnthctl_el2); -} - -static void set_timer_irqs(struct kvm *kvm, int vtimer_irq, int ptimer_irq) -{ - struct kvm_vcpu *vcpu; - unsigned long i; - - kvm_for_each_vcpu(i, vcpu, kvm) { - vcpu_vtimer(vcpu)->irq.irq = vtimer_irq; - vcpu_ptimer(vcpu)->irq.irq = ptimer_irq; - } + if (cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF)) + sysreg_clear_set(cntkctl_el1, 0, CNTHCTL_ECV); } int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { int __user *uaddr = (int __user *)(long)attr->addr; - struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); - struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); - int irq; + int irq, idx, ret = 0; if (!irqchip_in_kernel(vcpu->kvm)) return -EINVAL; @@ -1327,21 +1576,42 @@ int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) if (!(irq_is_ppi(irq))) return -EINVAL; - if (vcpu->arch.timer_cpu.enabled) - return -EBUSY; + mutex_lock(&vcpu->kvm->arch.config_lock); + + if (test_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE, + &vcpu->kvm->arch.flags)) { + ret = -EBUSY; + goto out; + } switch (attr->attr) { case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: - set_timer_irqs(vcpu->kvm, irq, ptimer->irq.irq); + idx = TIMER_VTIMER; break; case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: - set_timer_irqs(vcpu->kvm, vtimer->irq.irq, irq); + idx = TIMER_PTIMER; + break; + case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER: + idx = TIMER_HVTIMER; + break; + case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER: + idx = TIMER_HPTIMER; break; default: - return -ENXIO; + ret = -ENXIO; + goto out; } - return 0; + /* + * We cannot validate the IRQ unicity before we run, so take it at + * face value. The verdict will be given on first vcpu run, for each + * vcpu. Yes this is late. Blame it on the stupid API. + */ + vcpu->kvm->arch.timer_data.ppi[idx] = irq; + +out: + mutex_unlock(&vcpu->kvm->arch.config_lock); + return ret; } int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) @@ -1357,11 +1627,17 @@ int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: timer = vcpu_ptimer(vcpu); break; + case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER: + timer = vcpu_hvtimer(vcpu); + break; + case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER: + timer = vcpu_hptimer(vcpu); + break; default: return -ENXIO; } - irq = timer->irq.irq; + irq = timer_irq(timer); return put_user(irq, uaddr); } @@ -1370,8 +1646,42 @@ int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) switch (attr->attr) { case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: + case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER: + case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER: return 0; } return -ENXIO; } + +int kvm_vm_ioctl_set_counter_offset(struct kvm *kvm, + struct kvm_arm_counter_offset *offset) +{ + int ret = 0; + + if (offset->reserved) + return -EINVAL; + + mutex_lock(&kvm->lock); + + if (lock_all_vcpus(kvm)) { + set_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &kvm->arch.flags); + + /* + * If userspace decides to set the offset using this + * API rather than merely restoring the counter + * values, the offset applies to both the virtual and + * physical views. + */ + kvm->arch.timer_data.voffset = offset->counter_offset; + kvm->arch.timer_data.poffset = offset->counter_offset; + + unlock_all_vcpus(kvm); + } else { + ret = -EBUSY; + } + + mutex_unlock(&kvm->lock); + + return ret; +} diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 6673c7b4f1a8..14391826241c 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -126,6 +126,16 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { int ret; + mutex_init(&kvm->arch.config_lock); + +#ifdef CONFIG_LOCKDEP + /* Clue in lockdep that the config_lock must be taken inside kvm->lock */ + mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.config_lock); + mutex_unlock(&kvm->arch.config_lock); + mutex_unlock(&kvm->lock); +#endif + ret = kvm_share_hyp(kvm, kvm + 1); if (ret) return ret; @@ -146,6 +156,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm_vgic_early_init(kvm); + kvm_timer_init_vm(kvm); + /* The maximum number of VCPUs is limited by the host's GIC model */ kvm->max_vcpus = kvm_arm_default_max_vcpus(); @@ -190,6 +202,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm_destroy_vcpus(kvm); kvm_unshare_hyp(kvm, kvm + 1); + + kvm_arm_teardown_hypercalls(kvm); } int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) @@ -219,6 +233,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_PTP_KVM: case KVM_CAP_ARM_SYSTEM_SUSPEND: case KVM_CAP_IRQFD_RESAMPLE: + case KVM_CAP_COUNTER_OFFSET: r = 1; break; case KVM_CAP_SET_GUEST_DEBUG2: @@ -325,6 +340,16 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) { int err; + spin_lock_init(&vcpu->arch.mp_state_lock); + +#ifdef CONFIG_LOCKDEP + /* Inform lockdep that the config_lock is acquired after vcpu->mutex */ + mutex_lock(&vcpu->mutex); + mutex_lock(&vcpu->kvm->arch.config_lock); + mutex_unlock(&vcpu->kvm->arch.config_lock); + mutex_unlock(&vcpu->mutex); +#endif + /* Force users to call KVM_ARM_VCPU_INIT */ vcpu->arch.target = -1; bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES); @@ -442,34 +467,41 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) vcpu->cpu = -1; } -void kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu) +static void __kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu) { - vcpu->arch.mp_state.mp_state = KVM_MP_STATE_STOPPED; + WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_STOPPED); kvm_make_request(KVM_REQ_SLEEP, vcpu); kvm_vcpu_kick(vcpu); } +void kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu) +{ + spin_lock(&vcpu->arch.mp_state_lock); + __kvm_arm_vcpu_power_off(vcpu); + spin_unlock(&vcpu->arch.mp_state_lock); +} + bool kvm_arm_vcpu_stopped(struct kvm_vcpu *vcpu) { - return vcpu->arch.mp_state.mp_state == KVM_MP_STATE_STOPPED; + return READ_ONCE(vcpu->arch.mp_state.mp_state) == KVM_MP_STATE_STOPPED; } static void kvm_arm_vcpu_suspend(struct kvm_vcpu *vcpu) { - vcpu->arch.mp_state.mp_state = KVM_MP_STATE_SUSPENDED; + WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_SUSPENDED); kvm_make_request(KVM_REQ_SUSPEND, vcpu); kvm_vcpu_kick(vcpu); } static bool kvm_arm_vcpu_suspended(struct kvm_vcpu *vcpu) { - return vcpu->arch.mp_state.mp_state == KVM_MP_STATE_SUSPENDED; + return READ_ONCE(vcpu->arch.mp_state.mp_state) == KVM_MP_STATE_SUSPENDED; } int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { - *mp_state = vcpu->arch.mp_state; + *mp_state = READ_ONCE(vcpu->arch.mp_state); return 0; } @@ -479,12 +511,14 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, { int ret = 0; + spin_lock(&vcpu->arch.mp_state_lock); + switch (mp_state->mp_state) { case KVM_MP_STATE_RUNNABLE: - vcpu->arch.mp_state = *mp_state; + WRITE_ONCE(vcpu->arch.mp_state, *mp_state); break; case KVM_MP_STATE_STOPPED: - kvm_arm_vcpu_power_off(vcpu); + __kvm_arm_vcpu_power_off(vcpu); break; case KVM_MP_STATE_SUSPENDED: kvm_arm_vcpu_suspend(vcpu); @@ -493,6 +527,8 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, ret = -EINVAL; } + spin_unlock(&vcpu->arch.mp_state_lock); + return ret; } @@ -592,9 +628,9 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) if (kvm_vm_is_protected(kvm)) kvm_call_hyp_nvhe(__pkvm_vcpu_init_traps, vcpu); - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.config_lock); set_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags); - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.config_lock); return ret; } @@ -1209,10 +1245,14 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, /* * Handle the "start in power-off" case. */ + spin_lock(&vcpu->arch.mp_state_lock); + if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) - kvm_arm_vcpu_power_off(vcpu); + __kvm_arm_vcpu_power_off(vcpu); else - vcpu->arch.mp_state.mp_state = KVM_MP_STATE_RUNNABLE; + WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_RUNNABLE); + + spin_unlock(&vcpu->arch.mp_state_lock); return 0; } @@ -1438,11 +1478,31 @@ static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm, } } -long kvm_arch_vm_ioctl(struct file *filp, - unsigned int ioctl, unsigned long arg) +static int kvm_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_ARM_VM_SMCCC_CTRL: + return kvm_vm_smccc_has_attr(kvm, attr); + default: + return -ENXIO; + } +} + +static int kvm_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_ARM_VM_SMCCC_CTRL: + return kvm_vm_smccc_set_attr(kvm, attr); + default: + return -ENXIO; + } +} + +int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { struct kvm *kvm = filp->private_data; void __user *argp = (void __user *)arg; + struct kvm_device_attr attr; switch (ioctl) { case KVM_CREATE_IRQCHIP: { @@ -1478,11 +1538,73 @@ long kvm_arch_vm_ioctl(struct file *filp, return -EFAULT; return kvm_vm_ioctl_mte_copy_tags(kvm, ©_tags); } + case KVM_ARM_SET_COUNTER_OFFSET: { + struct kvm_arm_counter_offset offset; + + if (copy_from_user(&offset, argp, sizeof(offset))) + return -EFAULT; + return kvm_vm_ioctl_set_counter_offset(kvm, &offset); + } + case KVM_HAS_DEVICE_ATTR: { + if (copy_from_user(&attr, argp, sizeof(attr))) + return -EFAULT; + + return kvm_vm_has_attr(kvm, &attr); + } + case KVM_SET_DEVICE_ATTR: { + if (copy_from_user(&attr, argp, sizeof(attr))) + return -EFAULT; + + return kvm_vm_set_attr(kvm, &attr); + } default: return -EINVAL; } } +/* unlocks vcpus from @vcpu_lock_idx and smaller */ +static void unlock_vcpus(struct kvm *kvm, int vcpu_lock_idx) +{ + struct kvm_vcpu *tmp_vcpu; + + for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) { + tmp_vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx); + mutex_unlock(&tmp_vcpu->mutex); + } +} + +void unlock_all_vcpus(struct kvm *kvm) +{ + lockdep_assert_held(&kvm->lock); + + unlock_vcpus(kvm, atomic_read(&kvm->online_vcpus) - 1); +} + +/* Returns true if all vcpus were locked, false otherwise */ +bool lock_all_vcpus(struct kvm *kvm) +{ + struct kvm_vcpu *tmp_vcpu; + unsigned long c; + + lockdep_assert_held(&kvm->lock); + + /* + * Any time a vcpu is in an ioctl (including running), the + * core KVM code tries to grab the vcpu->mutex. + * + * By grabbing the vcpu->mutex of all VCPUs we ensure that no + * other VCPUs can fiddle with the state while we access it. + */ + kvm_for_each_vcpu(c, tmp_vcpu, kvm) { + if (!mutex_trylock(&tmp_vcpu->mutex)) { + unlock_vcpus(kvm, c - 1); + return false; + } + } + + return true; +} + static unsigned long nvhe_percpu_size(void) { return (unsigned long)CHOOSE_NVHE_SYM(__per_cpu_end) - diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 07444fa22888..20280a5233f6 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -590,11 +590,16 @@ static unsigned long num_core_regs(const struct kvm_vcpu *vcpu) return copy_core_reg_indices(vcpu, NULL); } -/** - * ARM64 versions of the TIMER registers, always available on arm64 - */ +static const u64 timer_reg_list[] = { + KVM_REG_ARM_TIMER_CTL, + KVM_REG_ARM_TIMER_CNT, + KVM_REG_ARM_TIMER_CVAL, + KVM_REG_ARM_PTIMER_CTL, + KVM_REG_ARM_PTIMER_CNT, + KVM_REG_ARM_PTIMER_CVAL, +}; -#define NUM_TIMER_REGS 3 +#define NUM_TIMER_REGS ARRAY_SIZE(timer_reg_list) static bool is_timer_reg(u64 index) { @@ -602,6 +607,9 @@ static bool is_timer_reg(u64 index) case KVM_REG_ARM_TIMER_CTL: case KVM_REG_ARM_TIMER_CNT: case KVM_REG_ARM_TIMER_CVAL: + case KVM_REG_ARM_PTIMER_CTL: + case KVM_REG_ARM_PTIMER_CNT: + case KVM_REG_ARM_PTIMER_CVAL: return true; } return false; @@ -609,14 +617,11 @@ static bool is_timer_reg(u64 index) static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) { - if (put_user(KVM_REG_ARM_TIMER_CTL, uindices)) - return -EFAULT; - uindices++; - if (put_user(KVM_REG_ARM_TIMER_CNT, uindices)) - return -EFAULT; - uindices++; - if (put_user(KVM_REG_ARM_TIMER_CVAL, uindices)) - return -EFAULT; + for (int i = 0; i < NUM_TIMER_REGS; i++) { + if (put_user(timer_reg_list[i], uindices)) + return -EFAULT; + uindices++; + } return 0; } @@ -957,7 +962,9 @@ int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu, switch (attr->group) { case KVM_ARM_VCPU_PMU_V3_CTRL: + mutex_lock(&vcpu->kvm->arch.config_lock); ret = kvm_arm_pmu_v3_set_attr(vcpu, attr); + mutex_unlock(&vcpu->kvm->arch.config_lock); break; case KVM_ARM_VCPU_TIMER_CTRL: ret = kvm_arm_timer_set_attr(vcpu, attr); @@ -1019,8 +1026,8 @@ int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, return ret; } -long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm, - struct kvm_arm_copy_mte_tags *copy_tags) +int kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm, + struct kvm_arm_copy_mte_tags *copy_tags) { gpa_t guest_ipa = copy_tags->guest_ipa; size_t length = copy_tags->length; @@ -1041,6 +1048,10 @@ long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm, if (length & ~PAGE_MASK || guest_ipa & ~PAGE_MASK) return -EINVAL; + /* Lengths above INT_MAX cannot be represented in the return value */ + if (length > INT_MAX) + return -EINVAL; + gfn = gpa_to_gfn(guest_ipa); mutex_lock(&kvm->slots_lock); diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index a798c0b4d717..6dcd6604b6bc 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -36,8 +36,6 @@ static void kvm_handle_guest_serror(struct kvm_vcpu *vcpu, u64 esr) static int handle_hvc(struct kvm_vcpu *vcpu) { - int ret; - trace_kvm_hvc_arm64(*vcpu_pc(vcpu), vcpu_get_reg(vcpu, 0), kvm_vcpu_hvc_get_imm(vcpu)); vcpu->stat.hvc_exit_stat++; @@ -52,33 +50,29 @@ static int handle_hvc(struct kvm_vcpu *vcpu) return 1; } - ret = kvm_hvc_call_handler(vcpu); - if (ret < 0) { - vcpu_set_reg(vcpu, 0, ~0UL); - return 1; - } - - return ret; + return kvm_smccc_call_handler(vcpu); } static int handle_smc(struct kvm_vcpu *vcpu) { - int ret; - /* * "If an SMC instruction executed at Non-secure EL1 is * trapped to EL2 because HCR_EL2.TSC is 1, the exception is a * Trap exception, not a Secure Monitor Call exception [...]" * * We need to advance the PC after the trap, as it would - * otherwise return to the same address... - * - * Only handle SMCs from the virtual EL2 with an immediate of zero and - * skip it otherwise. + * otherwise return to the same address. Furthermore, pre-incrementing + * the PC before potentially exiting to userspace maintains the same + * abstraction for both SMCs and HVCs. + */ + kvm_incr_pc(vcpu); + + /* + * SMCs with a nonzero immediate are reserved according to DEN0028E 2.9 + * "SMC and HVC immediate value". */ - if (!vcpu_is_el2(vcpu) || kvm_vcpu_hvc_get_imm(vcpu)) { + if (kvm_vcpu_hvc_get_imm(vcpu)) { vcpu_set_reg(vcpu, 0, ~0UL); - kvm_incr_pc(vcpu); return 1; } @@ -89,13 +83,7 @@ static int handle_smc(struct kvm_vcpu *vcpu) * at Non-secure EL1 is trapped to EL2 if HCR_EL2.TSC==1, rather than * being treated as UNDEFINED. */ - ret = kvm_hvc_call_handler(vcpu); - if (ret < 0) - vcpu_set_reg(vcpu, 0, ~0UL); - - kvm_incr_pc(vcpu); - - return ret; + return kvm_smccc_call_handler(vcpu); } /* diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 07d37ff88a3f..c41166f1a1dd 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -26,6 +26,7 @@ #include <asm/kvm_emulate.h> #include <asm/kvm_hyp.h> #include <asm/kvm_mmu.h> +#include <asm/kvm_nested.h> #include <asm/fpsimd.h> #include <asm/debug-monitors.h> #include <asm/processor.h> @@ -326,6 +327,55 @@ static bool kvm_hyp_handle_ptrauth(struct kvm_vcpu *vcpu, u64 *exit_code) return true; } +static bool kvm_hyp_handle_cntpct(struct kvm_vcpu *vcpu) +{ + struct arch_timer_context *ctxt; + u32 sysreg; + u64 val; + + /* + * We only get here for 64bit guests, 32bit guests will hit + * the long and winding road all the way to the standard + * handling. Yes, it sucks to be irrelevant. + */ + sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_esr(vcpu)); + + switch (sysreg) { + case SYS_CNTPCT_EL0: + case SYS_CNTPCTSS_EL0: + if (vcpu_has_nv(vcpu)) { + if (is_hyp_ctxt(vcpu)) { + ctxt = vcpu_hptimer(vcpu); + break; + } + + /* Check for guest hypervisor trapping */ + val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2); + if (!vcpu_el2_e2h_is_set(vcpu)) + val = (val & CNTHCTL_EL1PCTEN) << 10; + + if (!(val & (CNTHCTL_EL1PCTEN << 10))) + return false; + } + + ctxt = vcpu_ptimer(vcpu); + break; + default: + return false; + } + + val = arch_timer_read_cntpct_el0(); + + if (ctxt->offset.vm_offset) + val -= *kern_hyp_va(ctxt->offset.vm_offset); + if (ctxt->offset.vcpu_offset) + val -= *kern_hyp_va(ctxt->offset.vcpu_offset); + + vcpu_set_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu), val); + __kvm_skip_instr(vcpu); + return true; +} + static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code) { if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) && @@ -339,6 +389,9 @@ static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code) if (esr_is_ptrauth_trap(kvm_vcpu_get_esr(vcpu))) return kvm_hyp_handle_ptrauth(vcpu, exit_code); + if (kvm_hyp_handle_cntpct(vcpu)) + return true; + return false; } diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c index 2673bde62fad..d756b939f296 100644 --- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c +++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c @@ -37,7 +37,6 @@ static void __debug_save_spe(u64 *pmscr_el1) /* Now drain all buffered data to memory */ psb_csync(); - dsb(nsh); } static void __debug_restore_spe(u64 pmscr_el1) @@ -69,7 +68,6 @@ static void __debug_save_trace(u64 *trfcr_el1) isb(); /* Drain the trace buffer to memory */ tsb_csync(); - dsb(nsh); } static void __debug_restore_trace(u64 trfcr_el1) diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 552653fa18be..2e9ec4a2a4a3 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -297,6 +297,13 @@ int __pkvm_prot_finalize(void) params->vttbr = kvm_get_vttbr(mmu); params->vtcr = host_mmu.arch.vtcr; params->hcr_el2 |= HCR_VM; + + /* + * The CMO below not only cleans the updated params to the + * PoC, but also provides the DSB that ensures ongoing + * page-table walks that have started before we trapped to EL2 + * have completed. + */ kvm_flush_dcache_to_poc(params, sizeof(*params)); write_sysreg(params->hcr_el2, hcr_el2); diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index c2cb46ca4fb6..71fa16a0dc77 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -272,6 +272,17 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) */ __debug_save_host_buffers_nvhe(vcpu); + /* + * We're about to restore some new MMU state. Make sure + * ongoing page-table walks that have started before we + * trapped to EL2 have completed. This also synchronises the + * above disabling of SPE and TRBE. + * + * See DDI0487I.a D8.1.5 "Out-of-context translation regimes", + * rule R_LFHQG and subsequent information statements. + */ + dsb(nsh); + __kvm_adjust_pc(vcpu); /* @@ -306,6 +317,13 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) __timer_disable_traps(vcpu); __hyp_vgic_save_state(vcpu); + /* + * Same thing as before the guest run: we're about to switch + * the MMU context, so let's make sure we don't have any + * ongoing EL1&0 translations. + */ + dsb(nsh); + __deactivate_traps(vcpu); __load_host_stage2(); diff --git a/arch/arm64/kvm/hyp/nvhe/timer-sr.c b/arch/arm64/kvm/hyp/nvhe/timer-sr.c index 9072e71693ba..b185ac0dbd47 100644 --- a/arch/arm64/kvm/hyp/nvhe/timer-sr.c +++ b/arch/arm64/kvm/hyp/nvhe/timer-sr.c @@ -9,6 +9,7 @@ #include <linux/kvm_host.h> #include <asm/kvm_hyp.h> +#include <asm/kvm_mmu.h> void __kvm_timer_set_cntvoff(u64 cntvoff) { @@ -35,14 +36,19 @@ void __timer_disable_traps(struct kvm_vcpu *vcpu) */ void __timer_enable_traps(struct kvm_vcpu *vcpu) { - u64 val; + u64 clr = 0, set = 0; /* * Disallow physical timer access for the guest - * Physical counter access is allowed + * Physical counter access is allowed if no offset is enforced + * or running protected (we don't offset anything in this case). */ - val = read_sysreg(cnthctl_el2); - val &= ~CNTHCTL_EL1PCEN; - val |= CNTHCTL_EL1PCTEN; - write_sysreg(val, cnthctl_el2); + clr = CNTHCTL_EL1PCEN; + if (is_protected_kvm_enabled() || + !kern_hyp_va(vcpu->kvm)->arch.timer_data.poffset) + set |= CNTHCTL_EL1PCTEN; + else + clr |= CNTHCTL_EL1PCTEN; + + sysreg_clear_set(cnthctl_el2, clr, set); } diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c index d296d617f589..978179133f4b 100644 --- a/arch/arm64/kvm/hyp/nvhe/tlb.c +++ b/arch/arm64/kvm/hyp/nvhe/tlb.c @@ -15,8 +15,31 @@ struct tlb_inv_context { }; static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu, - struct tlb_inv_context *cxt) + struct tlb_inv_context *cxt, + bool nsh) { + /* + * We have two requirements: + * + * - ensure that the page table updates are visible to all + * CPUs, for which a dsb(DOMAIN-st) is what we need, DOMAIN + * being either ish or nsh, depending on the invalidation + * type. + * + * - complete any speculative page table walk started before + * we trapped to EL2 so that we can mess with the MM + * registers out of context, for which dsb(nsh) is enough + * + * The composition of these two barriers is a dsb(DOMAIN), and + * the 'nsh' parameter tracks the distinction between + * Inner-Shareable and Non-Shareable, as specified by the + * callers. + */ + if (nsh) + dsb(nsh); + else + dsb(ish); + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { u64 val; @@ -60,10 +83,8 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, { struct tlb_inv_context cxt; - dsb(ishst); - /* Switch to requested VMID */ - __tlb_switch_to_guest(mmu, &cxt); + __tlb_switch_to_guest(mmu, &cxt, false); /* * We could do so much better if we had the VA as well. @@ -113,10 +134,8 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu) { struct tlb_inv_context cxt; - dsb(ishst); - /* Switch to requested VMID */ - __tlb_switch_to_guest(mmu, &cxt); + __tlb_switch_to_guest(mmu, &cxt, false); __tlbi(vmalls12e1is); dsb(ish); @@ -130,7 +149,7 @@ void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu) struct tlb_inv_context cxt; /* Switch to requested VMID */ - __tlb_switch_to_guest(mmu, &cxt); + __tlb_switch_to_guest(mmu, &cxt, false); __tlbi(vmalle1); asm volatile("ic iallu"); @@ -142,7 +161,8 @@ void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu) void __kvm_flush_vm_context(void) { - dsb(ishst); + /* Same remark as in __tlb_switch_to_guest() */ + dsb(ish); __tlbi(alle1is); /* diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index cd3f3117bf16..3d868e84c7a0 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -227,11 +227,10 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) /* * When we exit from the guest we change a number of CPU configuration - * parameters, such as traps. Make sure these changes take effect - * before running the host or additional guests. + * parameters, such as traps. We rely on the isb() in kvm_call_hyp*() + * to make sure these changes take effect before running the host or + * additional guests. */ - isb(); - return ret; } diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c index 7b44f6b3b547..b35a178e7e0d 100644 --- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c @@ -13,6 +13,7 @@ #include <asm/kvm_asm.h> #include <asm/kvm_emulate.h> #include <asm/kvm_hyp.h> +#include <asm/kvm_nested.h> /* * VHE: Host and guest must save mdscr_el1 and sp_el0 (and the PC and @@ -70,6 +71,17 @@ void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu) __sysreg_save_user_state(host_ctxt); /* + * When running a normal EL1 guest, we only load a new vcpu + * after a context switch, which imvolves a DSB, so all + * speculative EL1&0 walks will have already completed. + * If running NV, the vcpu may transition between vEL1 and + * vEL2 without a context switch, so make sure we complete + * those walks before loading a new context. + */ + if (vcpu_has_nv(vcpu)) + dsb(nsh); + + /* * Load guest EL1 and user state * * We must restore the 32-bit state before the sysregs, thanks diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c index c4b4678bc4a4..7fb4df0456de 100644 --- a/arch/arm64/kvm/hypercalls.c +++ b/arch/arm64/kvm/hypercalls.c @@ -47,7 +47,7 @@ static void kvm_ptp_get_time(struct kvm_vcpu *vcpu, u64 *val) cycles = systime_snapshot.cycles - vcpu->kvm->arch.timer_data.voffset; break; case KVM_PTP_PHYS_COUNTER: - cycles = systime_snapshot.cycles; + cycles = systime_snapshot.cycles - vcpu->kvm->arch.timer_data.poffset; break; default: return; @@ -65,7 +65,7 @@ static void kvm_ptp_get_time(struct kvm_vcpu *vcpu, u64 *val) val[3] = lower_32_bits(cycles); } -static bool kvm_hvc_call_default_allowed(u32 func_id) +static bool kvm_smccc_default_allowed(u32 func_id) { switch (func_id) { /* @@ -93,7 +93,7 @@ static bool kvm_hvc_call_default_allowed(u32 func_id) } } -static bool kvm_hvc_call_allowed(struct kvm_vcpu *vcpu, u32 func_id) +static bool kvm_smccc_test_fw_bmap(struct kvm_vcpu *vcpu, u32 func_id) { struct kvm_smccc_features *smccc_feat = &vcpu->kvm->arch.smccc_feat; @@ -117,20 +117,161 @@ static bool kvm_hvc_call_allowed(struct kvm_vcpu *vcpu, u32 func_id) return test_bit(KVM_REG_ARM_VENDOR_HYP_BIT_PTP, &smccc_feat->vendor_hyp_bmap); default: - return kvm_hvc_call_default_allowed(func_id); + return false; + } +} + +#define SMC32_ARCH_RANGE_BEGIN ARM_SMCCC_VERSION_FUNC_ID +#define SMC32_ARCH_RANGE_END ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_32, \ + 0, ARM_SMCCC_FUNC_MASK) + +#define SMC64_ARCH_RANGE_BEGIN ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + 0, 0) +#define SMC64_ARCH_RANGE_END ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + 0, ARM_SMCCC_FUNC_MASK) + +static void init_smccc_filter(struct kvm *kvm) +{ + int r; + + mt_init(&kvm->arch.smccc_filter); + + /* + * Prevent userspace from handling any SMCCC calls in the architecture + * range, avoiding the risk of misrepresenting Spectre mitigation status + * to the guest. + */ + r = mtree_insert_range(&kvm->arch.smccc_filter, + SMC32_ARCH_RANGE_BEGIN, SMC32_ARCH_RANGE_END, + xa_mk_value(KVM_SMCCC_FILTER_HANDLE), + GFP_KERNEL_ACCOUNT); + WARN_ON_ONCE(r); + + r = mtree_insert_range(&kvm->arch.smccc_filter, + SMC64_ARCH_RANGE_BEGIN, SMC64_ARCH_RANGE_END, + xa_mk_value(KVM_SMCCC_FILTER_HANDLE), + GFP_KERNEL_ACCOUNT); + WARN_ON_ONCE(r); + +} + +static int kvm_smccc_set_filter(struct kvm *kvm, struct kvm_smccc_filter __user *uaddr) +{ + const void *zero_page = page_to_virt(ZERO_PAGE(0)); + struct kvm_smccc_filter filter; + u32 start, end; + int r; + + if (copy_from_user(&filter, uaddr, sizeof(filter))) + return -EFAULT; + + if (memcmp(filter.pad, zero_page, sizeof(filter.pad))) + return -EINVAL; + + start = filter.base; + end = start + filter.nr_functions - 1; + + if (end < start || filter.action >= NR_SMCCC_FILTER_ACTIONS) + return -EINVAL; + + mutex_lock(&kvm->arch.config_lock); + + if (kvm_vm_has_ran_once(kvm)) { + r = -EBUSY; + goto out_unlock; } + + r = mtree_insert_range(&kvm->arch.smccc_filter, start, end, + xa_mk_value(filter.action), GFP_KERNEL_ACCOUNT); + if (r) + goto out_unlock; + + set_bit(KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED, &kvm->arch.flags); + +out_unlock: + mutex_unlock(&kvm->arch.config_lock); + return r; +} + +static u8 kvm_smccc_filter_get_action(struct kvm *kvm, u32 func_id) +{ + unsigned long idx = func_id; + void *val; + + if (!test_bit(KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED, &kvm->arch.flags)) + return KVM_SMCCC_FILTER_HANDLE; + + /* + * But where's the error handling, you say? + * + * mt_find() returns NULL if no entry was found, which just so happens + * to match KVM_SMCCC_FILTER_HANDLE. + */ + val = mt_find(&kvm->arch.smccc_filter, &idx, idx); + return xa_to_value(val); } -int kvm_hvc_call_handler(struct kvm_vcpu *vcpu) +static u8 kvm_smccc_get_action(struct kvm_vcpu *vcpu, u32 func_id) +{ + /* + * Intervening actions in the SMCCC filter take precedence over the + * pseudo-firmware register bitmaps. + */ + u8 action = kvm_smccc_filter_get_action(vcpu->kvm, func_id); + if (action != KVM_SMCCC_FILTER_HANDLE) + return action; + + if (kvm_smccc_test_fw_bmap(vcpu, func_id) || + kvm_smccc_default_allowed(func_id)) + return KVM_SMCCC_FILTER_HANDLE; + + return KVM_SMCCC_FILTER_DENY; +} + +static void kvm_prepare_hypercall_exit(struct kvm_vcpu *vcpu, u32 func_id) +{ + u8 ec = ESR_ELx_EC(kvm_vcpu_get_esr(vcpu)); + struct kvm_run *run = vcpu->run; + u64 flags = 0; + + if (ec == ESR_ELx_EC_SMC32 || ec == ESR_ELx_EC_SMC64) + flags |= KVM_HYPERCALL_EXIT_SMC; + + if (!kvm_vcpu_trap_il_is32bit(vcpu)) + flags |= KVM_HYPERCALL_EXIT_16BIT; + + run->exit_reason = KVM_EXIT_HYPERCALL; + run->hypercall = (typeof(run->hypercall)) { + .nr = func_id, + .flags = flags, + }; +} + +int kvm_smccc_call_handler(struct kvm_vcpu *vcpu) { struct kvm_smccc_features *smccc_feat = &vcpu->kvm->arch.smccc_feat; u32 func_id = smccc_get_function(vcpu); u64 val[4] = {SMCCC_RET_NOT_SUPPORTED}; u32 feature; + u8 action; gpa_t gpa; - if (!kvm_hvc_call_allowed(vcpu, func_id)) + action = kvm_smccc_get_action(vcpu, func_id); + switch (action) { + case KVM_SMCCC_FILTER_HANDLE: + break; + case KVM_SMCCC_FILTER_DENY: + goto out; + case KVM_SMCCC_FILTER_FWD_TO_USER: + kvm_prepare_hypercall_exit(vcpu, func_id); + return 0; + default: + WARN_RATELIMIT(1, "Unhandled SMCCC filter action: %d\n", action); goto out; + } switch (func_id) { case ARM_SMCCC_VERSION_FUNC_ID: @@ -245,6 +386,13 @@ void kvm_arm_init_hypercalls(struct kvm *kvm) smccc_feat->std_bmap = KVM_ARM_SMCCC_STD_FEATURES; smccc_feat->std_hyp_bmap = KVM_ARM_SMCCC_STD_HYP_FEATURES; smccc_feat->vendor_hyp_bmap = KVM_ARM_SMCCC_VENDOR_HYP_FEATURES; + + init_smccc_filter(kvm); +} + +void kvm_arm_teardown_hypercalls(struct kvm *kvm) +{ + mtree_destroy(&kvm->arch.smccc_filter); } int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu) @@ -377,17 +525,16 @@ static int kvm_arm_set_fw_reg_bmap(struct kvm_vcpu *vcpu, u64 reg_id, u64 val) if (val & ~fw_reg_features) return -EINVAL; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.config_lock); - if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags) && - val != *fw_reg_bmap) { + if (kvm_vm_has_ran_once(kvm) && val != *fw_reg_bmap) { ret = -EBUSY; goto out; } WRITE_ONCE(*fw_reg_bmap, val); out: - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.config_lock); return ret; } @@ -481,3 +628,25 @@ int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) return -EINVAL; } + +int kvm_vm_smccc_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) +{ + switch (attr->attr) { + case KVM_ARM_VM_SMCCC_FILTER: + return 0; + default: + return -ENXIO; + } +} + +int kvm_vm_smccc_set_attr(struct kvm *kvm, struct kvm_device_attr *attr) +{ + void __user *uaddr = (void __user *)attr->addr; + + switch (attr->attr) { + case KVM_ARM_VM_SMCCC_FILTER: + return kvm_smccc_set_filter(kvm, uaddr); + default: + return -ENXIO; + } +} diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 5eca0cdd961d..45727d50d18d 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -876,13 +876,13 @@ static int kvm_arm_pmu_v3_set_pmu(struct kvm_vcpu *vcpu, int pmu_id) struct arm_pmu *arm_pmu; int ret = -ENXIO; - mutex_lock(&kvm->lock); + lockdep_assert_held(&kvm->arch.config_lock); mutex_lock(&arm_pmus_lock); list_for_each_entry(entry, &arm_pmus, entry) { arm_pmu = entry->arm_pmu; if (arm_pmu->pmu.type == pmu_id) { - if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags) || + if (kvm_vm_has_ran_once(kvm) || (kvm->arch.pmu_filter && kvm->arch.arm_pmu != arm_pmu)) { ret = -EBUSY; break; @@ -896,7 +896,6 @@ static int kvm_arm_pmu_v3_set_pmu(struct kvm_vcpu *vcpu, int pmu_id) } mutex_unlock(&arm_pmus_lock); - mutex_unlock(&kvm->lock); return ret; } @@ -904,22 +903,20 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { struct kvm *kvm = vcpu->kvm; + lockdep_assert_held(&kvm->arch.config_lock); + if (!kvm_vcpu_has_pmu(vcpu)) return -ENODEV; if (vcpu->arch.pmu.created) return -EBUSY; - mutex_lock(&kvm->lock); if (!kvm->arch.arm_pmu) { /* No PMU set, get the default one */ kvm->arch.arm_pmu = kvm_pmu_probe_armpmu(); - if (!kvm->arch.arm_pmu) { - mutex_unlock(&kvm->lock); + if (!kvm->arch.arm_pmu) return -ENODEV; - } } - mutex_unlock(&kvm->lock); switch (attr->attr) { case KVM_ARM_VCPU_PMU_V3_IRQ: { @@ -963,19 +960,13 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) filter.action != KVM_PMU_EVENT_DENY)) return -EINVAL; - mutex_lock(&kvm->lock); - - if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags)) { - mutex_unlock(&kvm->lock); + if (kvm_vm_has_ran_once(kvm)) return -EBUSY; - } if (!kvm->arch.pmu_filter) { kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL_ACCOUNT); - if (!kvm->arch.pmu_filter) { - mutex_unlock(&kvm->lock); + if (!kvm->arch.pmu_filter) return -ENOMEM; - } /* * The default depends on the first applied filter. @@ -994,8 +985,6 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) else bitmap_clear(kvm->arch.pmu_filter, filter.base_event, filter.nevents); - mutex_unlock(&kvm->lock); - return 0; } case KVM_ARM_VCPU_PMU_V3_SET_PMU: { diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c index 7fbc4c1b9df0..1f69b667332b 100644 --- a/arch/arm64/kvm/psci.c +++ b/arch/arm64/kvm/psci.c @@ -62,6 +62,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) struct vcpu_reset_state *reset_state; struct kvm *kvm = source_vcpu->kvm; struct kvm_vcpu *vcpu = NULL; + int ret = PSCI_RET_SUCCESS; unsigned long cpu_id; cpu_id = smccc_get_arg1(source_vcpu); @@ -76,11 +77,15 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) */ if (!vcpu) return PSCI_RET_INVALID_PARAMS; + + spin_lock(&vcpu->arch.mp_state_lock); if (!kvm_arm_vcpu_stopped(vcpu)) { if (kvm_psci_version(source_vcpu) != KVM_ARM_PSCI_0_1) - return PSCI_RET_ALREADY_ON; + ret = PSCI_RET_ALREADY_ON; else - return PSCI_RET_INVALID_PARAMS; + ret = PSCI_RET_INVALID_PARAMS; + + goto out_unlock; } reset_state = &vcpu->arch.reset_state; @@ -96,7 +101,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) */ reset_state->r0 = smccc_get_arg3(source_vcpu); - WRITE_ONCE(reset_state->reset, true); + reset_state->reset = true; kvm_make_request(KVM_REQ_VCPU_RESET, vcpu); /* @@ -105,10 +110,12 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) */ smp_wmb(); - vcpu->arch.mp_state.mp_state = KVM_MP_STATE_RUNNABLE; + WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_RUNNABLE); kvm_vcpu_wake_up(vcpu); - return PSCI_RET_SUCCESS; +out_unlock: + spin_unlock(&vcpu->arch.mp_state_lock); + return ret; } static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu) @@ -168,8 +175,11 @@ static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type, u64 flags) * after this call is handled and before the VCPUs have been * re-initialized. */ - kvm_for_each_vcpu(i, tmp, vcpu->kvm) - tmp->arch.mp_state.mp_state = KVM_MP_STATE_STOPPED; + kvm_for_each_vcpu(i, tmp, vcpu->kvm) { + spin_lock(&tmp->arch.mp_state_lock); + WRITE_ONCE(tmp->arch.mp_state.mp_state, KVM_MP_STATE_STOPPED); + spin_unlock(&tmp->arch.mp_state_lock); + } kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP); memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event)); @@ -229,7 +239,6 @@ static unsigned long kvm_psci_check_allowed_function(struct kvm_vcpu *vcpu, u32 static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) { - struct kvm *kvm = vcpu->kvm; u32 psci_fn = smccc_get_function(vcpu); unsigned long val; int ret = 1; @@ -254,9 +263,7 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) kvm_psci_narrow_to_32bit(vcpu); fallthrough; case PSCI_0_2_FN64_CPU_ON: - mutex_lock(&kvm->lock); val = kvm_psci_vcpu_on(vcpu); - mutex_unlock(&kvm->lock); break; case PSCI_0_2_FN_AFFINITY_INFO: kvm_psci_narrow_to_32bit(vcpu); @@ -395,7 +402,6 @@ static int kvm_psci_1_x_call(struct kvm_vcpu *vcpu, u32 minor) static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) { - struct kvm *kvm = vcpu->kvm; u32 psci_fn = smccc_get_function(vcpu); unsigned long val; @@ -405,9 +411,7 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) val = PSCI_RET_SUCCESS; break; case KVM_PSCI_FN_CPU_ON: - mutex_lock(&kvm->lock); val = kvm_psci_vcpu_on(vcpu); - mutex_unlock(&kvm->lock); break; default: val = PSCI_RET_NOT_SUPPORTED; @@ -435,6 +439,7 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) int kvm_psci_call(struct kvm_vcpu *vcpu) { u32 psci_fn = smccc_get_function(vcpu); + int version = kvm_psci_version(vcpu); unsigned long val; val = kvm_psci_check_allowed_function(vcpu, psci_fn); @@ -443,7 +448,7 @@ int kvm_psci_call(struct kvm_vcpu *vcpu) return 1; } - switch (kvm_psci_version(vcpu)) { + switch (version) { case KVM_ARM_PSCI_1_1: return kvm_psci_1_x_call(vcpu, 1); case KVM_ARM_PSCI_1_0: @@ -453,6 +458,8 @@ int kvm_psci_call(struct kvm_vcpu *vcpu) case KVM_ARM_PSCI_0_1: return kvm_psci_0_1_call(vcpu); default: - return -EINVAL; + WARN_ONCE(1, "Unknown PSCI version %d", version); + smccc_set_retval(vcpu, SMCCC_RET_NOT_SUPPORTED, 0, 0, 0); + return 1; } } diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 49a3257dec46..b5dee8e57e77 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -205,7 +205,7 @@ static int kvm_set_vm_width(struct kvm_vcpu *vcpu) is32bit = vcpu_has_feature(vcpu, KVM_ARM_VCPU_EL1_32BIT); - lockdep_assert_held(&kvm->lock); + lockdep_assert_held(&kvm->arch.config_lock); if (test_bit(KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED, &kvm->arch.flags)) { /* @@ -262,17 +262,18 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) bool loaded; u32 pstate; - mutex_lock(&vcpu->kvm->lock); + mutex_lock(&vcpu->kvm->arch.config_lock); ret = kvm_set_vm_width(vcpu); - if (!ret) { - reset_state = vcpu->arch.reset_state; - WRITE_ONCE(vcpu->arch.reset_state.reset, false); - } - mutex_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->arch.config_lock); if (ret) return ret; + spin_lock(&vcpu->arch.mp_state_lock); + reset_state = vcpu->arch.reset_state; + vcpu->arch.reset_state.reset = false; + spin_unlock(&vcpu->arch.mp_state_lock); + /* Reset PMU outside of the non-preemptible section */ kvm_pmu_vcpu_reset(vcpu); diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 34688918c811..71b12094d613 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1154,6 +1154,12 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu, tmr = TIMER_PTIMER; treg = TIMER_REG_CVAL; break; + case SYS_CNTPCT_EL0: + case SYS_CNTPCTSS_EL0: + case SYS_AARCH32_CNTPCT: + tmr = TIMER_PTIMER; + treg = TIMER_REG_CNT; + break; default: print_sys_reg_msg(p, "%s", "Unhandled trapped timer register"); kvm_inject_undefined(vcpu); @@ -2091,6 +2097,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { AMU_AMEVTYPER1_EL0(14), AMU_AMEVTYPER1_EL0(15), + { SYS_DESC(SYS_CNTPCT_EL0), access_arch_timer }, + { SYS_DESC(SYS_CNTPCTSS_EL0), access_arch_timer }, { SYS_DESC(SYS_CNTP_TVAL_EL0), access_arch_timer }, { SYS_DESC(SYS_CNTP_CTL_EL0), access_arch_timer }, { SYS_DESC(SYS_CNTP_CVAL_EL0), access_arch_timer }, @@ -2541,10 +2549,12 @@ static const struct sys_reg_desc cp15_64_regs[] = { { Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, TTBR0_EL1 }, { CP15_PMU_SYS_REG(DIRECT, 0, 0, 9, 0), .access = access_pmu_evcntr }, { Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI1R */ + { SYS_DESC(SYS_AARCH32_CNTPCT), access_arch_timer }, { Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, TTBR1_EL1 }, { Op1( 1), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_ASGI1R */ { Op1( 2), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI0R */ { SYS_DESC(SYS_AARCH32_CNTP_CVAL), access_arch_timer }, + { SYS_DESC(SYS_AARCH32_CNTPCTSS), access_arch_timer }, }; static bool check_sysreg_table(const struct sys_reg_desc *table, unsigned int n, diff --git a/arch/arm64/kvm/trace_arm.h b/arch/arm64/kvm/trace_arm.h index f3e46a976125..6ce5c025218d 100644 --- a/arch/arm64/kvm/trace_arm.h +++ b/arch/arm64/kvm/trace_arm.h @@ -206,6 +206,7 @@ TRACE_EVENT(kvm_get_timer_map, __field( unsigned long, vcpu_id ) __field( int, direct_vtimer ) __field( int, direct_ptimer ) + __field( int, emul_vtimer ) __field( int, emul_ptimer ) ), @@ -214,14 +215,17 @@ TRACE_EVENT(kvm_get_timer_map, __entry->direct_vtimer = arch_timer_ctx_index(map->direct_vtimer); __entry->direct_ptimer = (map->direct_ptimer) ? arch_timer_ctx_index(map->direct_ptimer) : -1; + __entry->emul_vtimer = + (map->emul_vtimer) ? arch_timer_ctx_index(map->emul_vtimer) : -1; __entry->emul_ptimer = (map->emul_ptimer) ? arch_timer_ctx_index(map->emul_ptimer) : -1; ), - TP_printk("VCPU: %ld, dv: %d, dp: %d, ep: %d", + TP_printk("VCPU: %ld, dv: %d, dp: %d, ev: %d, ep: %d", __entry->vcpu_id, __entry->direct_vtimer, __entry->direct_ptimer, + __entry->emul_vtimer, __entry->emul_ptimer) ); diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c index 78cde687383c..07aa0437125a 100644 --- a/arch/arm64/kvm/vgic/vgic-debug.c +++ b/arch/arm64/kvm/vgic/vgic-debug.c @@ -85,7 +85,7 @@ static void *vgic_debug_start(struct seq_file *s, loff_t *pos) struct kvm *kvm = s->private; struct vgic_state_iter *iter; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.config_lock); iter = kvm->arch.vgic.iter; if (iter) { iter = ERR_PTR(-EBUSY); @@ -104,7 +104,7 @@ static void *vgic_debug_start(struct seq_file *s, loff_t *pos) if (end_of_vgic(iter)) iter = NULL; out: - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.config_lock); return iter; } @@ -132,12 +132,12 @@ static void vgic_debug_stop(struct seq_file *s, void *v) if (IS_ERR(v)) return; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.config_lock); iter = kvm->arch.vgic.iter; kfree(iter->lpi_array); kfree(iter); kvm->arch.vgic.iter = NULL; - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.config_lock); } static void print_dist_state(struct seq_file *s, struct vgic_dist *dist) diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index cd134db41a57..9d42c7cb2b58 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -74,9 +74,6 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) unsigned long i; int ret; - if (irqchip_in_kernel(kvm)) - return -EEXIST; - /* * This function is also called by the KVM_CREATE_IRQCHIP handler, * which had no chance yet to check the availability of the GICv2 @@ -87,10 +84,20 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) !kvm_vgic_global_state.can_emulate_gicv2) return -ENODEV; + /* Must be held to avoid race with vCPU creation */ + lockdep_assert_held(&kvm->lock); + ret = -EBUSY; if (!lock_all_vcpus(kvm)) return ret; + mutex_lock(&kvm->arch.config_lock); + + if (irqchip_in_kernel(kvm)) { + ret = -EEXIST; + goto out_unlock; + } + kvm_for_each_vcpu(i, vcpu, kvm) { if (vcpu_has_run_once(vcpu)) goto out_unlock; @@ -118,6 +125,7 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) INIT_LIST_HEAD(&kvm->arch.vgic.rd_regions); out_unlock: + mutex_unlock(&kvm->arch.config_lock); unlock_all_vcpus(kvm); return ret; } @@ -227,9 +235,9 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) * KVM io device for the redistributor that belongs to this VCPU. */ if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { - mutex_lock(&vcpu->kvm->lock); + mutex_lock(&vcpu->kvm->arch.config_lock); ret = vgic_register_redist_iodev(vcpu); - mutex_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->arch.config_lock); } return ret; } @@ -250,7 +258,6 @@ static void kvm_vgic_vcpu_enable(struct kvm_vcpu *vcpu) * The function is generally called when nr_spis has been explicitly set * by the guest through the KVM DEVICE API. If not nr_spis is set to 256. * vgic_initialized() returns true when this function has succeeded. - * Must be called with kvm->lock held! */ int vgic_init(struct kvm *kvm) { @@ -259,6 +266,8 @@ int vgic_init(struct kvm *kvm) int ret = 0, i; unsigned long idx; + lockdep_assert_held(&kvm->arch.config_lock); + if (vgic_initialized(kvm)) return 0; @@ -373,12 +382,13 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF; } -/* To be called with kvm->lock held */ static void __kvm_vgic_destroy(struct kvm *kvm) { struct kvm_vcpu *vcpu; unsigned long i; + lockdep_assert_held(&kvm->arch.config_lock); + vgic_debug_destroy(kvm); kvm_for_each_vcpu(i, vcpu, kvm) @@ -389,9 +399,9 @@ static void __kvm_vgic_destroy(struct kvm *kvm) void kvm_vgic_destroy(struct kvm *kvm) { - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.config_lock); __kvm_vgic_destroy(kvm); - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.config_lock); } /** @@ -414,9 +424,9 @@ int vgic_lazy_init(struct kvm *kvm) if (kvm->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V2) return -EBUSY; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.config_lock); ret = vgic_init(kvm); - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.config_lock); } return ret; @@ -441,7 +451,7 @@ int kvm_vgic_map_resources(struct kvm *kvm) if (likely(vgic_ready(kvm))) return 0; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.config_lock); if (vgic_ready(kvm)) goto out; @@ -459,7 +469,7 @@ int kvm_vgic_map_resources(struct kvm *kvm) dist->ready = true; out: - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.config_lock); return ret; } diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index 2642e9ce2819..750e51e3779a 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -1958,6 +1958,16 @@ static int vgic_its_create(struct kvm_device *dev, u32 type) mutex_init(&its->its_lock); mutex_init(&its->cmd_lock); + /* Yep, even more trickery for lock ordering... */ +#ifdef CONFIG_LOCKDEP + mutex_lock(&dev->kvm->arch.config_lock); + mutex_lock(&its->cmd_lock); + mutex_lock(&its->its_lock); + mutex_unlock(&its->its_lock); + mutex_unlock(&its->cmd_lock); + mutex_unlock(&dev->kvm->arch.config_lock); +#endif + its->vgic_its_base = VGIC_ADDR_UNDEF; INIT_LIST_HEAD(&its->device_list); @@ -2045,6 +2055,13 @@ static int vgic_its_attr_regs_access(struct kvm_device *dev, mutex_lock(&dev->kvm->lock); + if (!lock_all_vcpus(dev->kvm)) { + mutex_unlock(&dev->kvm->lock); + return -EBUSY; + } + + mutex_lock(&dev->kvm->arch.config_lock); + if (IS_VGIC_ADDR_UNDEF(its->vgic_its_base)) { ret = -ENXIO; goto out; @@ -2058,11 +2075,6 @@ static int vgic_its_attr_regs_access(struct kvm_device *dev, goto out; } - if (!lock_all_vcpus(dev->kvm)) { - ret = -EBUSY; - goto out; - } - addr = its->vgic_its_base + offset; len = region->access_flags & VGIC_ACCESS_64bit ? 8 : 4; @@ -2076,8 +2088,9 @@ static int vgic_its_attr_regs_access(struct kvm_device *dev, } else { *reg = region->its_read(dev->kvm, its, addr, len); } - unlock_all_vcpus(dev->kvm); out: + mutex_unlock(&dev->kvm->arch.config_lock); + unlock_all_vcpus(dev->kvm); mutex_unlock(&dev->kvm->lock); return ret; } @@ -2749,14 +2762,15 @@ static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr) return 0; mutex_lock(&kvm->lock); - mutex_lock(&its->its_lock); if (!lock_all_vcpus(kvm)) { - mutex_unlock(&its->its_lock); mutex_unlock(&kvm->lock); return -EBUSY; } + mutex_lock(&kvm->arch.config_lock); + mutex_lock(&its->its_lock); + switch (attr) { case KVM_DEV_ARM_ITS_CTRL_RESET: vgic_its_reset(kvm, its); @@ -2769,8 +2783,9 @@ static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr) break; } - unlock_all_vcpus(kvm); mutex_unlock(&its->its_lock); + mutex_unlock(&kvm->arch.config_lock); + unlock_all_vcpus(kvm); mutex_unlock(&kvm->lock); return ret; } diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c index edeac2380591..35cfa268fd5d 100644 --- a/arch/arm64/kvm/vgic/vgic-kvm-device.c +++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c @@ -46,7 +46,7 @@ int kvm_set_legacy_vgic_v2_addr(struct kvm *kvm, struct kvm_arm_device_addr *dev struct vgic_dist *vgic = &kvm->arch.vgic; int r; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.config_lock); switch (FIELD_GET(KVM_ARM_DEVICE_TYPE_MASK, dev_addr->id)) { case KVM_VGIC_V2_ADDR_TYPE_DIST: r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); @@ -68,7 +68,7 @@ int kvm_set_legacy_vgic_v2_addr(struct kvm *kvm, struct kvm_arm_device_addr *dev r = -ENODEV; } - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.config_lock); return r; } @@ -102,7 +102,7 @@ static int kvm_vgic_addr(struct kvm *kvm, struct kvm_device_attr *attr, bool wri if (get_user(addr, uaddr)) return -EFAULT; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.config_lock); switch (attr->attr) { case KVM_VGIC_V2_ADDR_TYPE_DIST: r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); @@ -191,7 +191,7 @@ static int kvm_vgic_addr(struct kvm *kvm, struct kvm_device_attr *attr, bool wri } out: - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.config_lock); if (!r && !write) r = put_user(addr, uaddr); @@ -227,7 +227,7 @@ static int vgic_set_common_attr(struct kvm_device *dev, (val & 31)) return -EINVAL; - mutex_lock(&dev->kvm->lock); + mutex_lock(&dev->kvm->arch.config_lock); if (vgic_ready(dev->kvm) || dev->kvm->arch.vgic.nr_spis) ret = -EBUSY; @@ -235,16 +235,16 @@ static int vgic_set_common_attr(struct kvm_device *dev, dev->kvm->arch.vgic.nr_spis = val - VGIC_NR_PRIVATE_IRQS; - mutex_unlock(&dev->kvm->lock); + mutex_unlock(&dev->kvm->arch.config_lock); return ret; } case KVM_DEV_ARM_VGIC_GRP_CTRL: { switch (attr->attr) { case KVM_DEV_ARM_VGIC_CTRL_INIT: - mutex_lock(&dev->kvm->lock); + mutex_lock(&dev->kvm->arch.config_lock); r = vgic_init(dev->kvm); - mutex_unlock(&dev->kvm->lock); + mutex_unlock(&dev->kvm->arch.config_lock); return r; case KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES: /* @@ -260,7 +260,10 @@ static int vgic_set_common_attr(struct kvm_device *dev, mutex_unlock(&dev->kvm->lock); return -EBUSY; } + + mutex_lock(&dev->kvm->arch.config_lock); r = vgic_v3_save_pending_tables(dev->kvm); + mutex_unlock(&dev->kvm->arch.config_lock); unlock_all_vcpus(dev->kvm); mutex_unlock(&dev->kvm->lock); return r; @@ -342,44 +345,6 @@ int vgic_v2_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr, return 0; } -/* unlocks vcpus from @vcpu_lock_idx and smaller */ -static void unlock_vcpus(struct kvm *kvm, int vcpu_lock_idx) -{ - struct kvm_vcpu *tmp_vcpu; - - for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) { - tmp_vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx); - mutex_unlock(&tmp_vcpu->mutex); - } -} - -void unlock_all_vcpus(struct kvm *kvm) -{ - unlock_vcpus(kvm, atomic_read(&kvm->online_vcpus) - 1); -} - -/* Returns true if all vcpus were locked, false otherwise */ -bool lock_all_vcpus(struct kvm *kvm) -{ - struct kvm_vcpu *tmp_vcpu; - unsigned long c; - - /* - * Any time a vcpu is run, vcpu_load is called which tries to grab the - * vcpu->mutex. By grabbing the vcpu->mutex of all VCPUs we ensure - * that no other VCPUs are run and fiddle with the vgic state while we - * access it. - */ - kvm_for_each_vcpu(c, tmp_vcpu, kvm) { - if (!mutex_trylock(&tmp_vcpu->mutex)) { - unlock_vcpus(kvm, c - 1); - return false; - } - } - - return true; -} - /** * vgic_v2_attr_regs_access - allows user space to access VGIC v2 state * @@ -411,15 +376,17 @@ static int vgic_v2_attr_regs_access(struct kvm_device *dev, mutex_lock(&dev->kvm->lock); + if (!lock_all_vcpus(dev->kvm)) { + mutex_unlock(&dev->kvm->lock); + return -EBUSY; + } + + mutex_lock(&dev->kvm->arch.config_lock); + ret = vgic_init(dev->kvm); if (ret) goto out; - if (!lock_all_vcpus(dev->kvm)) { - ret = -EBUSY; - goto out; - } - switch (attr->group) { case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: ret = vgic_v2_cpuif_uaccess(vcpu, is_write, addr, &val); @@ -432,8 +399,9 @@ static int vgic_v2_attr_regs_access(struct kvm_device *dev, break; } - unlock_all_vcpus(dev->kvm); out: + mutex_unlock(&dev->kvm->arch.config_lock); + unlock_all_vcpus(dev->kvm); mutex_unlock(&dev->kvm->lock); if (!ret && !is_write) @@ -569,12 +537,14 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev, mutex_lock(&dev->kvm->lock); - if (unlikely(!vgic_initialized(dev->kvm))) { - ret = -EBUSY; - goto out; + if (!lock_all_vcpus(dev->kvm)) { + mutex_unlock(&dev->kvm->lock); + return -EBUSY; } - if (!lock_all_vcpus(dev->kvm)) { + mutex_lock(&dev->kvm->arch.config_lock); + + if (unlikely(!vgic_initialized(dev->kvm))) { ret = -EBUSY; goto out; } @@ -609,8 +579,9 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev, break; } - unlock_all_vcpus(dev->kvm); out: + mutex_unlock(&dev->kvm->arch.config_lock); + unlock_all_vcpus(dev->kvm); mutex_unlock(&dev->kvm->lock); if (!ret && uaccess && !is_write) { diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c index 91201f743033..472b18ac92a2 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -111,7 +111,7 @@ static void vgic_mmio_write_v3_misc(struct kvm_vcpu *vcpu, case GICD_CTLR: { bool was_enabled, is_hwsgi; - mutex_lock(&vcpu->kvm->lock); + mutex_lock(&vcpu->kvm->arch.config_lock); was_enabled = dist->enabled; is_hwsgi = dist->nassgireq; @@ -139,7 +139,7 @@ static void vgic_mmio_write_v3_misc(struct kvm_vcpu *vcpu, else if (!was_enabled && dist->enabled) vgic_kick_vcpus(vcpu->kvm); - mutex_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->arch.config_lock); break; } case GICD_TYPER: diff --git a/arch/arm64/kvm/vgic/vgic-mmio.c b/arch/arm64/kvm/vgic/vgic-mmio.c index e67b3b2c8044..1939c94e0b24 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio.c +++ b/arch/arm64/kvm/vgic/vgic-mmio.c @@ -530,13 +530,13 @@ unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu, u32 intid = VGIC_ADDR_TO_INTID(addr, 1); u32 val; - mutex_lock(&vcpu->kvm->lock); + mutex_lock(&vcpu->kvm->arch.config_lock); vgic_access_active_prepare(vcpu, intid); val = __vgic_mmio_read_active(vcpu, addr, len); vgic_access_active_finish(vcpu, intid); - mutex_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->arch.config_lock); return val; } @@ -625,13 +625,13 @@ void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu, { u32 intid = VGIC_ADDR_TO_INTID(addr, 1); - mutex_lock(&vcpu->kvm->lock); + mutex_lock(&vcpu->kvm->arch.config_lock); vgic_access_active_prepare(vcpu, intid); __vgic_mmio_write_cactive(vcpu, addr, len, val); vgic_access_active_finish(vcpu, intid); - mutex_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->arch.config_lock); } int vgic_mmio_uaccess_write_cactive(struct kvm_vcpu *vcpu, @@ -662,13 +662,13 @@ void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu, { u32 intid = VGIC_ADDR_TO_INTID(addr, 1); - mutex_lock(&vcpu->kvm->lock); + mutex_lock(&vcpu->kvm->arch.config_lock); vgic_access_active_prepare(vcpu, intid); __vgic_mmio_write_sactive(vcpu, addr, len, val); vgic_access_active_finish(vcpu, intid); - mutex_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->arch.config_lock); } int vgic_mmio_uaccess_write_sactive(struct kvm_vcpu *vcpu, diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c index a413718be92b..3bb003478060 100644 --- a/arch/arm64/kvm/vgic/vgic-v4.c +++ b/arch/arm64/kvm/vgic/vgic-v4.c @@ -232,9 +232,8 @@ int vgic_v4_request_vpe_irq(struct kvm_vcpu *vcpu, int irq) * @kvm: Pointer to the VM being initialized * * We may be called each time a vITS is created, or when the - * vgic is initialized. This relies on kvm->lock to be - * held. In both cases, the number of vcpus should now be - * fixed. + * vgic is initialized. In both cases, the number of vcpus + * should now be fixed. */ int vgic_v4_init(struct kvm *kvm) { @@ -243,6 +242,8 @@ int vgic_v4_init(struct kvm *kvm) int nr_vcpus, ret; unsigned long i; + lockdep_assert_held(&kvm->arch.config_lock); + if (!kvm_vgic_global_state.has_gicv4) return 0; /* Nothing to see here... move along. */ @@ -309,14 +310,14 @@ int vgic_v4_init(struct kvm *kvm) /** * vgic_v4_teardown - Free the GICv4 data structures * @kvm: Pointer to the VM being destroyed - * - * Relies on kvm->lock to be held. */ void vgic_v4_teardown(struct kvm *kvm) { struct its_vm *its_vm = &kvm->arch.vgic.its_vm; int i; + lockdep_assert_held(&kvm->arch.config_lock); + if (!its_vm->vpes) return; diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index d97e6080b421..8be4c1ebdec2 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -24,11 +24,13 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = { /* * Locking order is always: * kvm->lock (mutex) - * its->cmd_lock (mutex) - * its->its_lock (mutex) - * vgic_cpu->ap_list_lock must be taken with IRQs disabled - * kvm->lpi_list_lock must be taken with IRQs disabled - * vgic_irq->irq_lock must be taken with IRQs disabled + * vcpu->mutex (mutex) + * kvm->arch.config_lock (mutex) + * its->cmd_lock (mutex) + * its->its_lock (mutex) + * vgic_cpu->ap_list_lock must be taken with IRQs disabled + * kvm->lpi_list_lock must be taken with IRQs disabled + * vgic_irq->irq_lock must be taken with IRQs disabled * * As the ap_list_lock might be taken from the timer interrupt handler, * we have to disable IRQs before taking this lock and everything lower @@ -573,6 +575,21 @@ int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid) return 0; } +int kvm_vgic_get_map(struct kvm_vcpu *vcpu, unsigned int vintid) +{ + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); + unsigned long flags; + int ret = -1; + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + if (irq->hw) + ret = irq->hwintid; + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + + vgic_put_irq(vcpu->kvm, irq); + return ret; +} + /** * kvm_vgic_set_owner - Set the owner of an interrupt for a VM * diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h index 7f7f3c5ed85a..f9923beedd27 100644 --- a/arch/arm64/kvm/vgic/vgic.h +++ b/arch/arm64/kvm/vgic/vgic.h @@ -273,9 +273,6 @@ int vgic_init(struct kvm *kvm); void vgic_debug_init(struct kvm *kvm); void vgic_debug_destroy(struct kvm *kvm); -bool lock_all_vcpus(struct kvm *kvm); -void unlock_all_vcpus(struct kvm *kvm); - static inline int vgic_v3_max_apr_idx(struct kvm_vcpu *vcpu) { struct vgic_cpu *cpu_if = &vcpu->arch.vgic_cpu; diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 37b1340e9646..40ba95472594 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -23,6 +23,7 @@ HAS_DCPOP HAS_DIT HAS_E0PD HAS_ECV +HAS_ECV_CNTPOFF HAS_EPAN HAS_GENERIC_AUTH HAS_GENERIC_AUTH_ARCH_QARMA3 diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 77edce16f4f9..c9a0d1fa3209 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -2115,6 +2115,10 @@ Sysreg CONTEXTIDR_EL2 3 4 13 0 1 Fields CONTEXTIDR_ELx EndSysreg +Sysreg CNTPOFF_EL2 3 4 14 0 6 +Field 63:0 PhysicalOffset +EndSysreg + Sysreg CPACR_EL12 3 5 1 0 2 Fields CPACR_ELx EndSysreg |