diff options
author | Linus Torvalds | 2022-03-24 11:58:57 -0700 |
---|---|---|
committer | Linus Torvalds | 2022-03-24 11:58:57 -0700 |
commit | 1ebdbeb03efe89f01f15df038a589077df3d21f5 (patch) | |
tree | 06b6b7bb565668d136c060c5104481e48cbf71e2 /tools | |
parent | efee6c79298fd823c569d501d041de85caa102a6 (diff) | |
parent | c9b8fecddb5bb4b67e351bbaeaa648a6f7456912 (diff) |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Paolo Bonzini:
"ARM:
- Proper emulation of the OSLock feature of the debug architecture
- Scalibility improvements for the MMU lock when dirty logging is on
- New VMID allocator, which will eventually help with SVA in VMs
- Better support for PMUs in heterogenous systems
- PSCI 1.1 support, enabling support for SYSTEM_RESET2
- Implement CONFIG_DEBUG_LIST at EL2
- Make CONFIG_ARM64_ERRATUM_2077057 default y
- Reduce the overhead of VM exit when no interrupt is pending
- Remove traces of 32bit ARM host support from the documentation
- Updated vgic selftests
- Various cleanups, doc updates and spelling fixes
RISC-V:
- Prevent KVM_COMPAT from being selected
- Optimize __kvm_riscv_switch_to() implementation
- RISC-V SBI v0.3 support
s390:
- memop selftest
- fix SCK locking
- adapter interruptions virtualization for secure guests
- add Claudio Imbrenda as maintainer
- first step to do proper storage key checking
x86:
- Continue switching kvm_x86_ops to static_call(); introduce
static_call_cond() and __static_call_ret0 when applicable.
- Cleanup unused arguments in several functions
- Synthesize AMD 0x80000021 leaf
- Fixes and optimization for Hyper-V sparse-bank hypercalls
- Implement Hyper-V's enlightened MSR bitmap for nested SVM
- Remove MMU auditing
- Eager splitting of page tables (new aka "TDP" MMU only) when dirty
page tracking is enabled
- Cleanup the implementation of the guest PGD cache
- Preparation for the implementation of Intel IPI virtualization
- Fix some segment descriptor checks in the emulator
- Allow AMD AVIC support on systems with physical APIC ID above 255
- Better API to disable virtualization quirks
- Fixes and optimizations for the zapping of page tables:
- Zap roots in two passes, avoiding RCU read-side critical
sections that last too long for very large guests backed by 4
KiB SPTEs.
- Zap invalid and defunct roots asynchronously via
concurrency-managed work queue.
- Allowing yielding when zapping TDP MMU roots in response to the
root's last reference being put.
- Batch more TLB flushes with an RCU trick. Whoever frees the
paging structure now holds RCU as a proxy for all vCPUs running
in the guest, i.e. to prolongs the grace period on their behalf.
It then kicks the the vCPUs out of guest mode before doing
rcu_read_unlock().
Generic:
- Introduce __vcalloc and use it for very large allocations that need
memcg accounting"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (246 commits)
KVM: use kvcalloc for array allocations
KVM: x86: Introduce KVM_CAP_DISABLE_QUIRKS2
kvm: x86: Require const tsc for RT
KVM: x86: synthesize CPUID leaf 0x80000021h if useful
KVM: x86: add support for CPUID leaf 0x80000021
KVM: x86: do not use KVM_X86_OP_OPTIONAL_RET0 for get_mt_mask
Revert "KVM: x86/mmu: Zap only TDP MMU leafs in kvm_zap_gfn_range()"
kvm: x86/mmu: Flush TLB before zap_gfn_range releases RCU
KVM: arm64: fix typos in comments
KVM: arm64: Generalise VM features into a set of flags
KVM: s390: selftests: Add error memop tests
KVM: s390: selftests: Add more copy memop tests
KVM: s390: selftests: Add named stages for memop test
KVM: s390: selftests: Add macro as abstraction for MEM_OP
KVM: s390: selftests: Split memop tests
KVM: s390x: fix SCK locking
RISC-V: KVM: Implement SBI HSM suspend call
RISC-V: KVM: Add common kvm_riscv_vcpu_wfi() function
RISC-V: Add SBI HSM suspend related defines
RISC-V: KVM: Implement SBI v0.3 SRST extension
...
Diffstat (limited to 'tools')
29 files changed, 2059 insertions, 243 deletions
diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index 323e251ed37b..490d489d0ee8 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -367,6 +367,7 @@ struct kvm_arm_copy_mte_tags { #define KVM_ARM_VCPU_PMU_V3_IRQ 0 #define KVM_ARM_VCPU_PMU_V3_INIT 1 #define KVM_ARM_VCPU_PMU_V3_FILTER 2 +#define KVM_ARM_VCPU_PMU_V3_SET_PMU 3 #define KVM_ARM_VCPU_TIMER_CTRL 1 #define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0 #define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1 diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 507ee1f2aa96..bbc6b7c2dc1b 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -1135,6 +1135,8 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_XSAVE2 208 #define KVM_CAP_SYS_ATTRIBUTES 209 #define KVM_CAP_PPC_AIL_MODE_3 210 +#define KVM_CAP_S390_MEM_OP_EXTENSION 211 +#define KVM_CAP_PMU_CAPABILITY 212 #ifdef KVM_CAP_IRQ_ROUTING @@ -1971,6 +1973,8 @@ struct kvm_dirty_gfn { #define KVM_BUS_LOCK_DETECTION_OFF (1 << 0) #define KVM_BUS_LOCK_DETECTION_EXIT (1 << 1) +#define KVM_PMU_CAP_DISABLE (1 << 0) + /** * struct kvm_stats_header - Header of per vm/vcpu binary statistics data. * @flags: Some extra information for header, always 0 for now. diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 62f9b781545b..d1e8f5237469 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -8,6 +8,7 @@ /s390x/memop /s390x/resets /s390x/sync_regs_test +/s390x/tprot /x86_64/amx_test /x86_64/cpuid_test /x86_64/cr4_cpuid_sync_test @@ -46,6 +47,7 @@ /x86_64/vmx_tsc_adjust_test /x86_64/vmx_nested_tsc_scaling_test /x86_64/xapic_ipi_test +/x86_64/xapic_state_test /x86_64/xen_shinfo_test /x86_64/xen_vmcall_test /x86_64/xss_msr_test @@ -57,6 +59,7 @@ /hardware_disable_test /kvm_create_max_vcpus /kvm_page_table_test +/max_guest_memory_test /memslot_modification_stress_test /memslot_perf_test /rseq_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index b970397f725c..21c2dbd21a81 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -51,6 +51,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/emulator_error_test TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid TEST_GEN_PROGS_x86_64 += x86_64/hyperv_features +TEST_GEN_PROGS_x86_64 += x86_64/hyperv_svm_test TEST_GEN_PROGS_x86_64 += x86_64/kvm_clock_test TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test @@ -76,6 +77,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_nested_tsc_scaling_test TEST_GEN_PROGS_x86_64 += x86_64/xapic_ipi_test +TEST_GEN_PROGS_x86_64 += x86_64/xapic_state_test TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test TEST_GEN_PROGS_x86_64 += x86_64/debug_regs TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test @@ -91,6 +93,7 @@ TEST_GEN_PROGS_x86_64 += dirty_log_perf_test TEST_GEN_PROGS_x86_64 += hardware_disable_test TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus TEST_GEN_PROGS_x86_64 += kvm_page_table_test +TEST_GEN_PROGS_x86_64 += max_guest_memory_test TEST_GEN_PROGS_x86_64 += memslot_modification_stress_test TEST_GEN_PROGS_x86_64 += memslot_perf_test TEST_GEN_PROGS_x86_64 += rseq_test @@ -120,6 +123,7 @@ TEST_GEN_PROGS_aarch64 += kvm_binary_stats_test TEST_GEN_PROGS_s390x = s390x/memop TEST_GEN_PROGS_s390x += s390x/resets TEST_GEN_PROGS_s390x += s390x/sync_regs_test +TEST_GEN_PROGS_s390x += s390x/tprot TEST_GEN_PROGS_s390x += demand_paging_test TEST_GEN_PROGS_s390x += dirty_log_test TEST_GEN_PROGS_s390x += kvm_create_max_vcpus diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c index ea189d83abf7..63b2178210c4 100644 --- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c +++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c @@ -23,7 +23,7 @@ #define SPSR_D (1 << 9) #define SPSR_SS (1 << 21) -extern unsigned char sw_bp, hw_bp, bp_svc, bp_brk, hw_wp, ss_start; +extern unsigned char sw_bp, sw_bp2, hw_bp, hw_bp2, bp_svc, bp_brk, hw_wp, ss_start; static volatile uint64_t sw_bp_addr, hw_bp_addr; static volatile uint64_t wp_addr, wp_data_addr; static volatile uint64_t svc_addr; @@ -47,6 +47,14 @@ static void reset_debug_state(void) isb(); } +static void enable_os_lock(void) +{ + write_sysreg(1, oslar_el1); + isb(); + + GUEST_ASSERT(read_sysreg(oslsr_el1) & 2); +} + static void install_wp(uint64_t addr) { uint32_t wcr; @@ -99,6 +107,7 @@ static void guest_code(void) GUEST_SYNC(0); /* Software-breakpoint */ + reset_debug_state(); asm volatile("sw_bp: brk #0"); GUEST_ASSERT_EQ(sw_bp_addr, PC(sw_bp)); @@ -152,6 +161,51 @@ static void guest_code(void) GUEST_ASSERT_EQ(ss_addr[1], PC(ss_start) + 4); GUEST_ASSERT_EQ(ss_addr[2], PC(ss_start) + 8); + GUEST_SYNC(6); + + /* OS Lock does not block software-breakpoint */ + reset_debug_state(); + enable_os_lock(); + sw_bp_addr = 0; + asm volatile("sw_bp2: brk #0"); + GUEST_ASSERT_EQ(sw_bp_addr, PC(sw_bp2)); + + GUEST_SYNC(7); + + /* OS Lock blocking hardware-breakpoint */ + reset_debug_state(); + enable_os_lock(); + install_hw_bp(PC(hw_bp2)); + hw_bp_addr = 0; + asm volatile("hw_bp2: nop"); + GUEST_ASSERT_EQ(hw_bp_addr, 0); + + GUEST_SYNC(8); + + /* OS Lock blocking watchpoint */ + reset_debug_state(); + enable_os_lock(); + write_data = '\0'; + wp_data_addr = 0; + install_wp(PC(write_data)); + write_data = 'x'; + GUEST_ASSERT_EQ(write_data, 'x'); + GUEST_ASSERT_EQ(wp_data_addr, 0); + + GUEST_SYNC(9); + + /* OS Lock blocking single-step */ + reset_debug_state(); + enable_os_lock(); + ss_addr[0] = 0; + install_ss(); + ss_idx = 0; + asm volatile("mrs x0, esr_el1\n\t" + "add x0, x0, #1\n\t" + "msr daifset, #8\n\t" + : : : "x0"); + GUEST_ASSERT_EQ(ss_addr[0], 0); + GUEST_DONE(); } @@ -223,7 +277,7 @@ int main(int argc, char *argv[]) vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, ESR_EC_SVC64, guest_svc_handler); - for (stage = 0; stage < 7; stage++) { + for (stage = 0; stage < 11; stage++) { vcpu_run(vm, VCPU_ID); switch (get_ucall(vm, VCPU_ID, &uc)) { diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c index f769fc6cd927..f12147c43464 100644 --- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c +++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c @@ -760,6 +760,7 @@ static __u64 base_regs[] = { ARM64_SYS_REG(2, 0, 0, 15, 5), ARM64_SYS_REG(2, 0, 0, 15, 6), ARM64_SYS_REG(2, 0, 0, 15, 7), + ARM64_SYS_REG(2, 0, 1, 1, 4), /* OSLSR_EL1 */ ARM64_SYS_REG(2, 4, 0, 7, 0), /* DBGVCR32_EL2 */ ARM64_SYS_REG(3, 0, 0, 0, 5), /* MPIDR_EL1 */ ARM64_SYS_REG(3, 0, 0, 1, 0), /* ID_PFR0_EL1 */ diff --git a/tools/testing/selftests/kvm/aarch64/vgic_irq.c b/tools/testing/selftests/kvm/aarch64/vgic_irq.c index 7eca97799917..554ca649d470 100644 --- a/tools/testing/selftests/kvm/aarch64/vgic_irq.c +++ b/tools/testing/selftests/kvm/aarch64/vgic_irq.c @@ -306,7 +306,8 @@ static void guest_restore_active(struct test_args *args, uint32_t prio, intid, ap1r; int i; - /* Set the priorities of the first (KVM_NUM_PRIOS - 1) IRQs + /* + * Set the priorities of the first (KVM_NUM_PRIOS - 1) IRQs * in descending order, so intid+1 can preempt intid. */ for (i = 0, prio = (num - 1) * 8; i < num; i++, prio -= 8) { @@ -315,7 +316,8 @@ static void guest_restore_active(struct test_args *args, gic_set_priority(intid, prio); } - /* In a real migration, KVM would restore all GIC state before running + /* + * In a real migration, KVM would restore all GIC state before running * guest code. */ for (i = 0; i < num; i++) { @@ -472,10 +474,10 @@ static void test_restore_active(struct test_args *args, struct kvm_inject_desc * guest_restore_active(args, MIN_SPI, 4, f->cmd); } -static void guest_code(struct test_args args) +static void guest_code(struct test_args *args) { - uint32_t i, nr_irqs = args.nr_irqs; - bool level_sensitive = args.level_sensitive; + uint32_t i, nr_irqs = args->nr_irqs; + bool level_sensitive = args->level_sensitive; struct kvm_inject_desc *f, *inject_fns; gic_init(GIC_V3, 1, dist, redist); @@ -484,11 +486,11 @@ static void guest_code(struct test_args args) gic_irq_enable(i); for (i = MIN_SPI; i < nr_irqs; i++) - gic_irq_set_config(i, !args.level_sensitive); + gic_irq_set_config(i, !level_sensitive); - gic_set_eoi_split(args.eoi_split); + gic_set_eoi_split(args->eoi_split); - reset_priorities(&args); + reset_priorities(args); gic_set_priority_mask(CPU_PRIO_MASK); inject_fns = level_sensitive ? inject_level_fns @@ -497,17 +499,18 @@ static void guest_code(struct test_args args) local_irq_enable(); /* Start the tests. */ - for_each_supported_inject_fn(&args, inject_fns, f) { - test_injection(&args, f); - test_preemption(&args, f); - test_injection_failure(&args, f); + for_each_supported_inject_fn(args, inject_fns, f) { + test_injection(args, f); + test_preemption(args, f); + test_injection_failure(args, f); } - /* Restore the active state of IRQs. This would happen when live + /* + * Restore the active state of IRQs. This would happen when live * migrating IRQs in the middle of being handled. */ - for_each_supported_activate_fn(&args, set_active_fns, f) - test_restore_active(&args, f); + for_each_supported_activate_fn(args, set_active_fns, f) + test_restore_active(args, f); GUEST_DONE(); } @@ -573,8 +576,8 @@ static void kvm_set_gsi_routing_irqchip_check(struct kvm_vm *vm, kvm_gsi_routing_write(vm, routing); } else { ret = _kvm_gsi_routing_write(vm, routing); - /* The kernel only checks for KVM_IRQCHIP_NUM_PINS. */ - if (intid >= KVM_IRQCHIP_NUM_PINS) + /* The kernel only checks e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS */ + if (((uint64_t)intid + num - 1 - MIN_SPI) >= KVM_IRQCHIP_NUM_PINS) TEST_ASSERT(ret != 0 && errno == EINVAL, "Bad intid %u did not cause KVM_SET_GSI_ROUTING " "error: rc: %i errno: %i", intid, ret, errno); @@ -739,6 +742,7 @@ static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split) int gic_fd; struct kvm_vm *vm; struct kvm_inject_args inject_args; + vm_vaddr_t args_gva; struct test_args args = { .nr_irqs = nr_irqs, @@ -757,7 +761,9 @@ static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split) vcpu_init_descriptor_tables(vm, VCPU_ID); /* Setup the guest args page (so it gets the args). */ - vcpu_args_set(vm, 0, 1, args); + args_gva = vm_vaddr_alloc_page(vm); + memcpy(addr_gva2hva(vm, args_gva), &args, sizeof(args)); + vcpu_args_set(vm, 0, 1, args_gva); gic_fd = vgic_v3_setup(vm, 1, nr_irqs, GICD_BASE_GPA, GICR_BASE_GPA); @@ -841,7 +847,8 @@ int main(int argc, char **argv) } } - /* If the user just specified nr_irqs and/or gic_version, then run all + /* + * If the user just specified nr_irqs and/or gic_version, then run all * combinations. */ if (default_args) { diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index 1954b964d1cf..c9d9e513ca04 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -18,6 +18,12 @@ #include "test_util.h" #include "perf_test_util.h" #include "guest_modes.h" +#ifdef __aarch64__ +#include "aarch64/vgic.h" + +#define GICD_BASE_GPA 0x8000000ULL +#define GICR_BASE_GPA 0x80A0000ULL +#endif /* How many host loops to run by default (one KVM_GET_DIRTY_LOG for each loop)*/ #define TEST_HOST_LOOP_N 2UL @@ -200,6 +206,10 @@ static void run_test(enum vm_guest_mode mode, void *arg) vm_enable_cap(vm, &cap); } +#ifdef __aarch64__ + vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA); +#endif + /* Start the iterations */ iteration = 0; host_quit = false; @@ -298,12 +308,18 @@ static void run_test(enum vm_guest_mode mode, void *arg) static void help(char *name) { puts(""); - printf("usage: %s [-h] [-i iterations] [-p offset] " + printf("usage: %s [-h] [-i iterations] [-p offset] [-g]" "[-m mode] [-b vcpu bytes] [-v vcpus] [-o] [-s mem type]" "[-x memslots]\n", name); puts(""); printf(" -i: specify iteration counts (default: %"PRIu64")\n", TEST_HOST_LOOP_N); + printf(" -g: Do not enable KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2. This\n" + " makes KVM_GET_DIRTY_LOG clear the dirty log (i.e.\n" + " KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE is not enabled)\n" + " and writes will be tracked as soon as dirty logging is\n" + " enabled on the memslot (i.e. KVM_DIRTY_LOG_INITIALLY_SET\n" + " is not enabled).\n"); printf(" -p: specify guest physical test memory offset\n" " Warning: a low offset can conflict with the loaded test code.\n"); guest_modes_help(); @@ -343,8 +359,11 @@ int main(int argc, char *argv[]) guest_modes_append_default(); - while ((opt = getopt(argc, argv, "hi:p:m:b:f:v:os:x:")) != -1) { + while ((opt = getopt(argc, argv, "ghi:p:m:b:f:v:os:x:")) != -1) { switch (opt) { + case 'g': + dirty_log_manual_caps = 0; + break; case 'i': p.iterations = atoi(optarg); break; diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h index 4ed6aa049a91..92cef0ffb19e 100644 --- a/tools/testing/selftests/kvm/include/kvm_util_base.h +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h @@ -123,6 +123,7 @@ int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva, size_t len); void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename); +int kvm_memfd_alloc(size_t size, bool hugepages); void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent); @@ -147,6 +148,10 @@ void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, void vm_create_irqchip(struct kvm_vm *vm); +void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, + uint64_t gpa, uint64_t size, void *hva); +int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, + uint64_t gpa, uint64_t size, void *hva); void vm_userspace_mem_region_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, uint64_t guest_paddr, uint32_t slot, uint64_t npages, @@ -336,6 +341,9 @@ struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus, uint32_t num_percpu_pages, void *guest_code, uint32_t vcpuids[]); +/* Create a default VM without any vcpus. */ +struct kvm_vm *vm_create_without_vcpus(enum vm_guest_mode mode, uint64_t pages); + /* * Adds a vCPU with reasonable defaults (e.g. a stack) * diff --git a/tools/testing/selftests/kvm/include/s390x/processor.h b/tools/testing/selftests/kvm/include/s390x/processor.h index e0e96a5f608c..255c9b990f4c 100644 --- a/tools/testing/selftests/kvm/include/s390x/processor.h +++ b/tools/testing/selftests/kvm/include/s390x/processor.h @@ -5,6 +5,8 @@ #ifndef SELFTEST_KVM_PROCESSOR_H #define SELFTEST_KVM_PROCESSOR_H +#include <linux/compiler.h> + /* Bits in the region/segment table entry */ #define REGION_ENTRY_ORIGIN ~0xfffUL /* region/segment table origin */ #define REGION_ENTRY_PROTECT 0x200 /* region protection bit */ @@ -19,4 +21,10 @@ #define PAGE_PROTECT 0x200 /* HW read-only bit */ #define PAGE_NOEXEC 0x100 /* HW no-execute bit */ +/* Is there a portable way to do this? */ +static inline void cpu_relax(void) +{ + barrier(); +} + #endif diff --git a/tools/testing/selftests/kvm/include/x86_64/apic.h b/tools/testing/selftests/kvm/include/x86_64/apic.h index 0be4757f1f20..ac88557dcc9a 100644 --- a/tools/testing/selftests/kvm/include/x86_64/apic.h +++ b/tools/testing/selftests/kvm/include/x86_64/apic.h @@ -33,6 +33,7 @@ #define APIC_SPIV 0xF0 #define APIC_SPIV_FOCUS_DISABLED (1 << 9) #define APIC_SPIV_APIC_ENABLED (1 << 8) +#define APIC_IRR 0x200 #define APIC_ICR 0x300 #define APIC_DEST_SELF 0x40000 #define APIC_DEST_ALLINC 0x80000 diff --git a/tools/testing/selftests/kvm/include/x86_64/evmcs.h b/tools/testing/selftests/kvm/include/x86_64/evmcs.h index c9af97abd622..cc5d14a45702 100644 --- a/tools/testing/selftests/kvm/include/x86_64/evmcs.h +++ b/tools/testing/selftests/kvm/include/x86_64/evmcs.h @@ -213,6 +213,25 @@ struct hv_enlightened_vmcs { u64 padding64_6[7]; }; +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE 0 +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP BIT(0) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP BIT(1) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2 BIT(2) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1 BIT(3) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC BIT(4) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT BIT(5) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY BIT(6) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN BIT(7) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR BIT(8) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT BIT(9) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC BIT(10) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1 BIT(11) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2 BIT(12) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER BIT(13) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1 BIT(14) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ENLIGHTENMENTSCONTROL BIT(15) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL 0xFFFF + #define HV_X64_MSR_VP_ASSIST_PAGE 0x40000073 #define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE 0x00000001 #define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT 12 @@ -648,381 +667,507 @@ static inline int evmcs_vmwrite(uint64_t encoding, uint64_t value) switch (encoding) { case GUEST_RIP: current_evmcs->guest_rip = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE; break; case GUEST_RSP: current_evmcs->guest_rsp = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC; break; case GUEST_RFLAGS: current_evmcs->guest_rflags = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC; break; case HOST_IA32_PAT: current_evmcs->host_ia32_pat = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1; break; case HOST_IA32_EFER: current_evmcs->host_ia32_efer = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1; break; case HOST_CR0: current_evmcs->host_cr0 = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1; break; case HOST_CR3: current_evmcs->host_cr3 = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1; break; case HOST_CR4: current_evmcs->host_cr4 = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1; break; case HOST_IA32_SYSENTER_ESP: current_evmcs->host_ia32_sysenter_esp = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1; break; case HOST_IA32_SYSENTER_EIP: current_evmcs->host_ia32_sysenter_eip = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1; break; case HOST_RIP: current_evmcs->host_rip = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1; break; case IO_BITMAP_A: current_evmcs->io_bitmap_a = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP; break; case IO_BITMAP_B: current_evmcs->io_bitmap_b = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP; break; case MSR_BITMAP: current_evmcs->msr_bitmap = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP; break; case GUEST_ES_BASE: current_evmcs->guest_es_base = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; break; case GUEST_CS_BASE: current_evmcs->guest_cs_base = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; break; case GUEST_SS_BASE: current_evmcs->guest_ss_base = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; break; case GUEST_DS_BASE: current_evmcs->guest_ds_base = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; break; case GUEST_FS_BASE: current_evmcs->guest_fs_base = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; break; case GUEST_GS_BASE: current_evmcs->guest_gs_base = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; break; case GUEST_LDTR_BASE: current_evmcs->guest_ldtr_base = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; break; case GUEST_TR_BASE: current_evmcs->guest_tr_base = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; break; case GUEST_GDTR_BASE: current_evmcs->guest_gdtr_base = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; break; case GUEST_IDTR_BASE: current_evmcs->guest_idtr_base = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; break; case TSC_OFFSET: current_evmcs->tsc_offset = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2; break; case VIRTUAL_APIC_PAGE_ADDR: current_evmcs->virtual_apic_page_addr = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2; break; case VMCS_LINK_POINTER: current_evmcs->vmcs_link_pointer = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1; break; case GUEST_IA32_DEBUGCTL: current_evmcs->guest_ia32_debugctl = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1; break; case GUEST_IA32_PAT: current_evmcs->guest_ia32_pat = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1; break; case GUEST_IA32_EFER: current_evmcs->guest_ia32_efer = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1; break; case GUEST_PDPTR0: current_evmcs->guest_pdptr0 = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1; break; case GUEST_PDPTR1: current_evmcs->guest_pdptr1 = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1; break; case GUEST_PDPTR2: current_evmcs->guest_pdptr2 = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1; break; case GUEST_PDPTR3: current_evmcs->guest_pdptr3 = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1; break; case GUEST_PENDING_DBG_EXCEPTIONS: current_evmcs->guest_pending_dbg_exceptions = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1; break; case GUEST_SYSENTER_ESP: current_evmcs->guest_sysenter_esp = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1; break; case GUEST_SYSENTER_EIP: current_evmcs->guest_sysenter_eip = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1; break; case CR0_GUEST_HOST_MASK: current_evmcs->cr0_guest_host_mask = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR; break; case CR4_GUEST_HOST_MASK: current_evmcs->cr4_guest_host_mask = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR; break; case CR0_READ_SHADOW: current_evmcs->cr0_read_shadow = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR; break; case CR4_READ_SHADOW: current_evmcs->cr4_read_shadow = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR; break; case GUEST_CR0: current_evmcs->guest_cr0 = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR; break; case GUEST_CR3: current_evmcs->guest_cr3 = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR; break; case GUEST_CR4: current_evmcs->guest_cr4 = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR; break; case GUEST_DR7: current_evmcs->guest_dr7 = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR; break; case HOST_FS_BASE: current_evmcs->host_fs_base = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER; break; case HOST_GS_BASE: current_evmcs->host_gs_base = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER; break; case HOST_TR_BASE: current_evmcs->host_tr_base = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER; break; case HOST_GDTR_BASE: current_evmcs->host_gdtr_base = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER; break; case HOST_IDTR_BASE: current_evmcs->host_idtr_base = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER; break; case HOST_RSP: current_evmcs->host_rsp = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER; break; case EPT_POINTER: current_evmcs->ept_pointer = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT; break; case GUEST_BNDCFGS: current_evmcs->guest_bndcfgs = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1; break; case XSS_EXIT_BITMAP: current_evmcs->xss_exit_bitmap = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2; break; case GUEST_PHYSICAL_ADDRESS: current_evmcs->guest_physical_address = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE; break; case EXIT_QUALIFICATION: current_evmcs->exit_qualification = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE; break; case GUEST_LINEAR_ADDRESS: current_evmcs->guest_linear_address = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE; break; case VM_EXIT_MSR_STORE_ADDR: current_evmcs->vm_exit_msr_store_addr = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; break; case VM_EXIT_MSR_LOAD_ADDR: current_evmcs->vm_exit_msr_load_addr = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; break; case VM_ENTRY_MSR_LOAD_ADDR: current_evmcs->vm_entry_msr_load_addr = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; break; case CR3_TARGET_VALUE0: current_evmcs->cr3_target_value0 = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; break; case CR3_TARGET_VALUE1: current_evmcs->cr3_target_value1 = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; break; case CR3_TARGET_VALUE2: current_evmcs->cr3_target_value2 = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; break; case CR3_TARGET_VALUE3: current_evmcs->cr3_target_value3 = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; break; case TPR_THRESHOLD: current_evmcs->tpr_threshold = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE; break; case GUEST_INTERRUPTIBILITY_INFO: current_evmcs->guest_interruptibility_info = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC; break; case CPU_BASED_VM_EXEC_CONTROL: current_evmcs->cpu_based_vm_exec_control = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC; break; case EXCEPTION_BITMAP: current_evmcs->exception_bitmap = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN; break; case VM_ENTRY_CONTROLS: current_evmcs->vm_entry_controls = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY; break; case VM_ENTRY_INTR_INFO_FIELD: current_evmcs->vm_entry_intr_info_field = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT; break; case VM_ENTRY_EXCEPTION_ERROR_CODE: current_evmcs->vm_entry_exception_error_code = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT; break; case VM_ENTRY_INSTRUCTION_LEN: current_evmcs->vm_entry_instruction_len = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT; break; case HOST_IA32_SYSENTER_CS: current_evmcs->host_ia32_sysenter_cs = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1; break; case PIN_BASED_VM_EXEC_CONTROL: current_evmcs->pin_based_vm_exec_control = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1; break; case VM_EXIT_CONTROLS: current_evmcs->vm_exit_controls = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1; break; case SECONDARY_VM_EXEC_CONTROL: current_evmcs->secondary_vm_exec_control = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1; break; case GUEST_ES_LIMIT: current_evmcs->guest_es_limit = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; break; case GUEST_CS_LIMIT: current_evmcs->guest_cs_limit = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; break; case GUEST_SS_LIMIT: current_evmcs->guest_ss_limit = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; break; case GUEST_DS_LIMIT: current_evmcs->guest_ds_limit = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; break; case GUEST_FS_LIMIT: current_evmcs->guest_fs_limit = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; break; case GUEST_GS_LIMIT: current_evmcs->guest_gs_limit = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; break; case GUEST_LDTR_LIMIT: current_evmcs->guest_ldtr_limit = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; break; case GUEST_TR_LIMIT: current_evmcs->guest_tr_limit = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; break; case GUEST_GDTR_LIMIT: current_evmcs->guest_gdtr_limit = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; break; case GUEST_IDTR_LIMIT: current_evmcs->guest_idtr_limit = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; break; case GUEST_ES_AR_BYTES: current_evmcs->guest_es_ar_bytes = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; break; case GUEST_CS_AR_BYTES: current_evmcs->guest_cs_ar_bytes = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; break; case GUEST_SS_AR_BYTES: current_evmcs->guest_ss_ar_bytes = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; break; case GUEST_DS_AR_BYTES: current_evmcs->guest_ds_ar_bytes = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; break; case GUEST_FS_AR_BYTES: current_evmcs->guest_fs_ar_bytes = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; break; case GUEST_GS_AR_BYTES: current_evmcs->guest_gs_ar_bytes = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; break; case GUEST_LDTR_AR_BYTES: current_evmcs->guest_ldtr_ar_bytes = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; break; case GUEST_TR_AR_BYTES: current_evmcs->guest_tr_ar_bytes = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; break; case GUEST_ACTIVITY_STATE: current_evmcs->guest_activity_state = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1; break; case GUEST_SYSENTER_CS: current_evmcs->guest_sysenter_cs = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1; break; case VM_INSTRUCTION_ERROR: current_evmcs->vm_instruction_error = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE; break; case VM_EXIT_REASON: current_evmcs->vm_exit_reason = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE; break; case VM_EXIT_INTR_INFO: current_evmcs->vm_exit_intr_info = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE; break; case VM_EXIT_INTR_ERROR_CODE: current_evmcs->vm_exit_intr_error_code = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE; break; case IDT_VECTORING_INFO_FIELD: current_evmcs->idt_vectoring_info_field = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE; break; case IDT_VECTORING_ERROR_CODE: current_evmcs->idt_vectoring_error_code = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE; break; case VM_EXIT_INSTRUCTION_LEN: current_evmcs->vm_exit_instruction_len = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE; break; case VMX_INSTRUCTION_INFO: current_evmcs->vmx_instruction_info = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE; break; case PAGE_FAULT_ERROR_CODE_MASK: current_evmcs->page_fault_error_code_mask = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; break; case PAGE_FAULT_ERROR_CODE_MATCH: current_evmcs->page_fault_error_code_match = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; break; case CR3_TARGET_COUNT: current_evmcs->cr3_target_count = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; break; case VM_EXIT_MSR_STORE_COUNT: current_evmcs->vm_exit_msr_store_count = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; break; case VM_EXIT_MSR_LOAD_COUNT: current_evmcs->vm_exit_msr_load_count = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; break; case VM_ENTRY_MSR_LOAD_COUNT: current_evmcs->vm_entry_msr_load_count = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; break; case HOST_ES_SELECTOR: current_evmcs->host_es_selector = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1; break; case HOST_CS_SELECTOR: current_evmcs->host_cs_selector = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1; break; case HOST_SS_SELECTOR: current_evmcs->host_ss_selector = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1; break; case HOST_DS_SELECTOR: current_evmcs->host_ds_selector = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1; break; case HOST_FS_SELECTOR: current_evmcs->host_fs_selector = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1; break; case HOST_GS_SELECTOR: current_evmcs->host_gs_selector = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1; break; case HOST_TR_SELECTOR: current_evmcs->host_tr_selector = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1; break; case GUEST_ES_SELECTOR: current_evmcs->guest_es_selector = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; break; case GUEST_CS_SELECTOR: current_evmcs->guest_cs_selector = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; break; case GUEST_SS_SELECTOR: current_evmcs->guest_ss_selector = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; break; case GUEST_DS_SELECTOR: current_evmcs->guest_ds_selector = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; break; case GUEST_FS_SELECTOR: current_evmcs->guest_fs_selector = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; break; case GUEST_GS_SELECTOR: current_evmcs->guest_gs_selector = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; break; case GUEST_LDTR_SELECTOR: current_evmcs->guest_ldtr_selector = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; break; case GUEST_TR_SELECTOR: current_evmcs->guest_tr_selector = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2; break; case VIRTUAL_PROCESSOR_ID: current_evmcs->virtual_processor_id = value; + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT; break; default: return 1; } @@ -1070,7 +1215,10 @@ static inline int evmcs_vmresume(void) { int ret; - current_evmcs->hv_clean_fields = 0; + /* HOST_RIP */ + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1; + /* HOST_RSP */ + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER; __asm__ __volatile__("push %%rbp;" "push %%rcx;" diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 8a470da7b71a..37db341d4cc5 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -363,6 +363,11 @@ static inline unsigned long get_xmm(int n) return 0; } +static inline void cpu_relax(void) +{ + asm volatile("rep; nop" ::: "memory"); +} + bool is_intel_cpu(void); bool is_amd_cpu(void); diff --git a/tools/testing/selftests/kvm/include/x86_64/svm.h b/tools/testing/selftests/kvm/include/x86_64/svm.h index f4ea2355dbc2..2225e5077350 100644 --- a/tools/testing/selftests/kvm/include/x86_64/svm.h +++ b/tools/testing/selftests/kvm/include/x86_64/svm.h @@ -99,7 +99,14 @@ struct __attribute__ ((__packed__)) vmcb_control_area { u8 reserved_6[8]; /* Offset 0xe8 */ u64 avic_logical_id; /* Offset 0xf0 */ u64 avic_physical_id; /* Offset 0xf8 */ - u8 reserved_7[768]; + u8 reserved_7[8]; + u64 vmsa_pa; /* Used for an SEV-ES guest */ + u8 reserved_8[720]; + /* + * Offset 0x3e0, 32 bytes reserved + * for use by hypervisor/software. + */ + u8 reserved_sw[32]; }; diff --git a/tools/testing/selftests/kvm/include/x86_64/svm_util.h b/tools/testing/selftests/kvm/include/x86_64/svm_util.h index 587fbe408b99..a25aabd8f5e7 100644 --- a/tools/testing/selftests/kvm/include/x86_64/svm_util.h +++ b/tools/testing/selftests/kvm/include/x86_64/svm_util.h @@ -16,6 +16,7 @@ #define CPUID_SVM_BIT 2 #define CPUID_SVM BIT_ULL(CPUID_SVM_BIT) +#define SVM_EXIT_MSR 0x07c #define SVM_EXIT_VMMCALL 0x081 struct svm_test_data { @@ -28,6 +29,11 @@ struct svm_test_data { struct vmcb_save_area *save_area; /* gva */ void *save_area_hva; uint64_t save_area_gpa; + + /* MSR-Bitmap */ + void *msr; /* gva */ + void *msr_hva; + uint64_t msr_gpa; }; struct svm_test_data *vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva); diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c b/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c index 00f613c0583c..263bf3ed8fd5 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c +++ b/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c @@ -19,7 +19,7 @@ struct gicv3_data { unsigned int nr_spis; }; -#define sgi_base_from_redist(redist_base) (redist_base + SZ_64K) +#define sgi_base_from_redist(redist_base) (redist_base + SZ_64K) #define DIST_BIT (1U << 31) enum gicv3_intid_range { @@ -105,7 +105,8 @@ static void gicv3_set_eoi_split(bool split) { uint32_t val; - /* All other fields are read-only, so no need to read CTLR first. In + /* + * All other fields are read-only, so no need to read CTLR first. In * fact, the kernel does the same. */ val = split ? (1U << 1) : 0; @@ -159,9 +160,10 @@ static void gicv3_access_reg(uint32_t intid, uint64_t offset, uint32_t cpu_or_dist; GUEST_ASSERT(bits_per_field <= reg_bits); - GUEST_ASSERT(*val < (1U << bits_per_field)); - /* Some registers like IROUTER are 64 bit long. Those are currently not - * supported by readl nor writel, so just asserting here until then. + GUEST_ASSERT(!write || *val < (1U << bits_per_field)); + /* + * This function does not support 64 bit accesses. Just asserting here + * until we implement readq/writeq. */ GUEST_ASSERT(reg_bits == 32); diff --git a/tools/testing/selftests/kvm/lib/aarch64/vgic.c b/tools/testing/selftests/kvm/lib/aarch64/vgic.c index f5cd0c536d85..5d45046c1b80 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/vgic.c +++ b/tools/testing/selftests/kvm/lib/aarch64/vgic.c @@ -140,9 +140,6 @@ static void vgic_poke_irq(int gic_fd, uint32_t intid, uint64_t val; bool intid_is_private = INTID_IS_SGI(intid) || INTID_IS_PPI(intid); - /* Check that the addr part of the attr is within 32 bits. */ - assert(attr <= KVM_DEV_ARM_VGIC_OFFSET_MASK); - uint32_t group = intid_is_private ? KVM_DEV_ARM_VGIC_GRP_REDIST_REGS : KVM_DEV_ARM_VGIC_GRP_DIST_REGS; @@ -152,7 +149,11 @@ static void vgic_poke_irq(int gic_fd, uint32_t intid, attr += SZ_64K; } - /* All calls will succeed, even with invalid intid's, as long as the + /* Check that the addr part of the attr is within 32 bits. */ + assert((attr & ~KVM_DEV_ARM_VGIC_OFFSET_MASK) == 0); + + /* + * All calls will succeed, even with invalid intid's, as long as the * addr part of the attr is within 32 bits (checked above). An invalid * intid will just make the read/writes point to above the intended * register space (i.e., ICPENDR after ISPENDR). diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index d8cf851ab119..1665a220abcb 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -362,6 +362,20 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) return vm; } +struct kvm_vm *vm_create_without_vcpus(enum vm_guest_mode mode, uint64_t pages) +{ + struct kvm_vm *vm; + + vm = vm_create(mode, pages, O_RDWR); + + kvm_vm_elf_load(vm, program_invocation_name); + +#ifdef __x86_64__ + vm_create_irqchip(vm); +#endif + return vm; +} + /* * VM Create with customized parameters * @@ -412,13 +426,8 @@ struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus, nr_vcpus, kvm_check_cap(KVM_CAP_MAX_VCPUS)); pages = vm_adjust_num_guest_pages(mode, pages); - vm = vm_create(mode, pages, O_RDWR); - kvm_vm_elf_load(vm, program_invocation_name); - -#ifdef __x86_64__ - vm_create_irqchip(vm); -#endif + vm = vm_create_without_vcpus(mode, pages); for (i = 0; i < nr_vcpus; ++i) { uint32_t vcpuid = vcpuids ? vcpuids[i] : i; @@ -709,6 +718,27 @@ void kvm_vm_free(struct kvm_vm *vmp) free(vmp); } +int kvm_memfd_alloc(size_t size, bool hugepages) +{ + int memfd_flags = MFD_CLOEXEC; + int fd, r; + + if (hugepages) + memfd_flags |= MFD_HUGETLB; + + fd = memfd_create("kvm_selftest", memfd_flags); + TEST_ASSERT(fd != -1, "memfd_create() failed, errno: %i (%s)", + errno, strerror(errno)); + + r = ftruncate(fd, size); + TEST_ASSERT(!r, "ftruncate() failed, errno: %i (%s)", errno, strerror(errno)); + + r = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, size); + TEST_ASSERT(!r, "fallocate() failed, errno: %i (%s)", errno, strerror(errno)); + + return fd; +} + /* * Memory Compare, host virtual to guest virtual * @@ -830,6 +860,30 @@ static void vm_userspace_mem_region_hva_insert(struct rb_root *hva_tree, rb_insert_color(®ion->hva_node, hva_tree); } + +int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, + uint64_t gpa, uint64_t size, void *hva) +{ + struct kvm_userspace_memory_region region = { + .slot = slot, + .flags = flags, + .guest_phys_addr = gpa, + .memory_size = size, + .userspace_addr = (uintptr_t)hva, + }; + + return ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, ®ion); +} + +void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, + uint64_t gpa, uint64_t size, void *hva) +{ + int ret = __vm_set_user_memory_region(vm, slot, flags, gpa, size, hva); + + TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION failed, errno = %d (%s)", + errno, strerror(errno)); +} + /* * VM Userspace Memory Region Add * @@ -937,24 +991,9 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, region->mmap_size += alignment; region->fd = -1; - if (backing_src_is_shared(src_type)) { - int memfd_flags = MFD_CLOEXEC; - - if (src_type == VM_MEM_SRC_SHARED_HUGETLB) - memfd_flags |= MFD_HUGETLB; - - region->fd = memfd_create("kvm_selftest", memfd_flags); - TEST_ASSERT(region->fd != -1, - "memfd_create failed, errno: %i", errno); - - ret = ftruncate(region->fd, region->mmap_size); - TEST_ASSERT(ret == 0, "ftruncate failed, errno: %i", errno); - - ret = fallocate(region->fd, - FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, - region->mmap_size); - TEST_ASSERT(ret == 0, "fallocate failed, errno: %i", errno); - } + if (backing_src_is_shared(src_type)) + region->fd = kvm_memfd_alloc(region->mmap_size, + src_type == VM_MEM_SRC_SHARED_HUGETLB); region->mmap_start = mmap(NULL, region->mmap_size, PROT_READ | PROT_WRITE, diff --git a/tools/testing/selftests/kvm/lib/x86_64/svm.c b/tools/testing/selftests/kvm/lib/x86_64/svm.c index 0ebc03ce079c..736ee4a23df6 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/svm.c +++ b/tools/testing/selftests/kvm/lib/x86_64/svm.c @@ -43,6 +43,11 @@ vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva) svm->save_area_hva = addr_gva2hva(vm, (uintptr_t)svm->save_area); svm->save_area_gpa = addr_gva2gpa(vm, (uintptr_t)svm->save_area); + svm->msr = (void *)vm_vaddr_alloc_page(vm); + svm->msr_hva = addr_gva2hva(vm, (uintptr_t)svm->msr); + svm->msr_gpa = addr_gva2gpa(vm, (uintptr_t)svm->msr); + memset(svm->msr_hva, 0, getpagesize()); + *p_svm_gva = svm_gva; return svm; } @@ -106,6 +111,7 @@ void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_r save->dbgctl = rdmsr(MSR_IA32_DEBUGCTLMSR); ctrl->intercept = (1ULL << INTERCEPT_VMRUN) | (1ULL << INTERCEPT_VMMCALL); + ctrl->msrpm_base_pa = svm->msr_gpa; vmcb->save.rip = (u64)guest_rip; vmcb->save.rsp = (u64)guest_rsp; diff --git a/tools/testing/selftests/kvm/max_guest_memory_test.c b/tools/testing/selftests/kvm/max_guest_memory_test.c new file mode 100644 index 000000000000..3875c4b23a04 --- /dev/null +++ b/tools/testing/selftests/kvm/max_guest_memory_test.c @@ -0,0 +1,292 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE + +#include <stdio.h> +#include <stdlib.h> +#include <pthread.h> +#include <semaphore.h> +#include <sys/types.h> +#include <signal.h> +#include <errno.h> +#include <linux/bitmap.h> +#include <linux/bitops.h> +#include <linux/atomic.h> + +#include "kvm_util.h" +#include "test_util.h" +#include "guest_modes.h" +#include "processor.h" + +static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride) +{ + uint64_t gpa; + + for (gpa = start_gpa; gpa < end_gpa; gpa += stride) + *((volatile uint64_t *)gpa) = gpa; + + GUEST_DONE(); +} + +struct vcpu_info { + struct kvm_vm *vm; + uint32_t id; + uint64_t start_gpa; + uint64_t end_gpa; +}; + +static int nr_vcpus; +static atomic_t rendezvous; + +static void rendezvous_with_boss(void) +{ + int orig = atomic_read(&rendezvous); + + if (orig > 0) { + atomic_dec_and_test(&rendezvous); + while (atomic_read(&rendezvous) > 0) + cpu_relax(); + } else { + atomic_inc(&rendezvous); + while (atomic_read(&rendezvous) < 0) + cpu_relax(); + } +} + +static void run_vcpu(struct kvm_vm *vm, uint32_t vcpu_id) +{ + vcpu_run(vm, vcpu_id); + ASSERT_EQ(get_ucall(vm, vcpu_id, NULL), UCALL_DONE); +} + +static void *vcpu_worker(void *data) +{ + struct vcpu_info *vcpu = data; + struct kvm_vm *vm = vcpu->vm; + struct kvm_sregs sregs; + struct kvm_regs regs; + + vcpu_args_set(vm, vcpu->id, 3, vcpu->start_gpa, vcpu->end_gpa, + vm_get_page_size(vm)); + + /* Snapshot regs before the first run. */ + vcpu_regs_get(vm, vcpu->id, ®s); + rendezvous_with_boss(); + + run_vcpu(vm, vcpu->id); + rendezvous_with_boss(); + vcpu_regs_set(vm, vcpu->id, ®s); + vcpu_sregs_get(vm, vcpu->id, &sregs); +#ifdef __x86_64__ + /* Toggle CR0.WP to trigger a MMU context reset. */ + sregs.cr0 ^= X86_CR0_WP; +#endif + vcpu_sregs_set(vm, vcpu->id, &sregs); + rendezvous_with_boss(); + + run_vcpu(vm, vcpu->id); + rendezvous_with_boss(); + + return NULL; +} + +static pthread_t *spawn_workers(struct kvm_vm *vm, uint64_t start_gpa, + uint64_t end_gpa) +{ + struct vcpu_info *info; + uint64_t gpa, nr_bytes; + pthread_t *threads; + int i; + + threads = malloc(nr_vcpus * sizeof(*threads)); + TEST_ASSERT(threads, "Failed to allocate vCPU threads"); + + info = malloc(nr_vcpus * sizeof(*info)); + TEST_ASSERT(info, "Failed to allocate vCPU gpa ranges"); + + nr_bytes = ((end_gpa - start_gpa) / nr_vcpus) & + ~((uint64_t)vm_get_page_size(vm) - 1); + TEST_ASSERT(nr_bytes, "C'mon, no way you have %d CPUs", nr_vcpus); + + for (i = 0, gpa = start_gpa; i < nr_vcpus; i++, gpa += nr_bytes) { + info[i].vm = vm; + info[i].id = i; + info[i].start_gpa = gpa; + info[i].end_gpa = gpa + nr_bytes; + pthread_create(&threads[i], NULL, vcpu_worker, &info[i]); + } + return threads; +} + +static void rendezvous_with_vcpus(struct timespec *time, const char *name) +{ + int i, rendezvoused; + + pr_info("Waiting for vCPUs to finish %s...\n", name); + + rendezvoused = atomic_read(&rendezvous); + for (i = 0; abs(rendezvoused) != 1; i++) { + usleep(100); + if (!(i & 0x3f)) + pr_info("\r%d vCPUs haven't rendezvoused...", + abs(rendezvoused) - 1); + rendezvoused = atomic_read(&rendezvous); + } + + clock_gettime(CLOCK_MONOTONIC, time); + + /* Release the vCPUs after getting the time of the previous action. */ + pr_info("\rAll vCPUs finished %s, releasing...\n", name); + if (rendezvoused > 0) + atomic_set(&rendezvous, -nr_vcpus - 1); + else + atomic_set(&rendezvous, nr_vcpus + 1); +} + +static void calc_default_nr_vcpus(void) +{ + cpu_set_t possible_mask; + int r; + + r = sched_getaffinity(0, sizeof(possible_mask), &possible_mask); + TEST_ASSERT(!r, "sched_getaffinity failed, errno = %d (%s)", + errno, strerror(errno)); + + nr_vcpus = CPU_COUNT(&possible_mask) * 3/4; + TEST_ASSERT(nr_vcpus > 0, "Uh, no CPUs?"); +} + +int main(int argc, char *argv[]) +{ + /* + * Skip the first 4gb and slot0. slot0 maps <1gb and is used to back + * the guest's code, stack, and page tables. Because selftests creates + * an IRQCHIP, a.k.a. a local APIC, KVM creates an internal memslot + * just below the 4gb boundary. This test could create memory at + * 1gb-3gb,but it's simpler to skip straight to 4gb. + */ + const uint64_t size_1gb = (1 << 30); + const uint64_t start_gpa = (4ull * size_1gb); + const int first_slot = 1; + + struct timespec time_start, time_run1, time_reset, time_run2; + uint64_t max_gpa, gpa, slot_size, max_mem, i; + int max_slots, slot, opt, fd; + bool hugepages = false; + pthread_t *threads; + struct kvm_vm *vm; + void *mem; + + /* + * Default to 2gb so that maxing out systems with MAXPHADDR=46, which + * are quite common for x86, requires changing only max_mem (KVM allows + * 32k memslots, 32k * 2gb == ~64tb of guest memory). + */ + slot_size = 2 * size_1gb; + + max_slots = kvm_check_cap(KVM_CAP_NR_MEMSLOTS); + TEST_ASSERT(max_slots > first_slot, "KVM is broken"); + + /* All KVM MMUs should be able to survive a 128gb guest. */ + max_mem = 128 * size_1gb; + + calc_default_nr_vcpus(); + + while ((opt = getopt(argc, argv, "c:h:m:s:H")) != -1) { + switch (opt) { + case 'c': + nr_vcpus = atoi(optarg); + TEST_ASSERT(nr_vcpus > 0, "number of vcpus must be >0"); + break; + case 'm': + max_mem = atoi(optarg) * size_1gb; + TEST_ASSERT(max_mem > 0, "memory size must be >0"); + break; + case 's': + slot_size = atoi(optarg) * size_1gb; + TEST_ASSERT(slot_size > 0, "slot size must be >0"); + break; + case 'H': + hugepages = true; + break; + case 'h': + default: + printf("usage: %s [-c nr_vcpus] [-m max_mem_in_gb] [-s slot_size_in_gb] [-H]\n", argv[0]); + exit(1); + } + } + + vm = vm_create_default_with_vcpus(nr_vcpus, 0, 0, guest_code, NULL); + + max_gpa = vm_get_max_gfn(vm) << vm_get_page_shift(vm); + TEST_ASSERT(max_gpa > (4 * slot_size), "MAXPHYADDR <4gb "); + + fd = kvm_memfd_alloc(slot_size, hugepages); + mem = mmap(NULL, slot_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + TEST_ASSERT(mem != MAP_FAILED, "mmap() failed"); + + TEST_ASSERT(!madvise(mem, slot_size, MADV_NOHUGEPAGE), "madvise() failed"); + + /* Pre-fault the memory to avoid taking mmap_sem on guest page faults. */ + for (i = 0; i < slot_size; i += vm_get_page_size(vm)) + ((uint8_t *)mem)[i] = 0xaa; + + gpa = 0; + for (slot = first_slot; slot < max_slots; slot++) { + gpa = start_gpa + ((slot - first_slot) * slot_size); + if (gpa + slot_size > max_gpa) + break; + + if ((gpa - start_gpa) >= max_mem) + break; + + vm_set_user_memory_region(vm, slot, 0, gpa, slot_size, mem); + +#ifdef __x86_64__ + /* Identity map memory in the guest using 1gb pages. */ + for (i = 0; i < slot_size; i += size_1gb) + __virt_pg_map(vm, gpa + i, gpa + i, X86_PAGE_SIZE_1G); +#else + for (i = 0; i < slot_size; i += vm_get_page_size(vm)) + virt_pg_map(vm, gpa + i, gpa + i); +#endif + } + + atomic_set(&rendezvous, nr_vcpus + 1); + threads = spawn_workers(vm, start_gpa, gpa); + + pr_info("Running with %lugb of guest memory and %u vCPUs\n", + (gpa - start_gpa) / size_1gb, nr_vcpus); + + rendezvous_with_vcpus(&time_start, "spawning"); + rendezvous_with_vcpus(&time_run1, "run 1"); + rendezvous_with_vcpus(&time_reset, "reset"); + rendezvous_with_vcpus(&time_run2, "run 2"); + + time_run2 = timespec_sub(time_run2, time_reset); + time_reset = timespec_sub(time_reset, time_run1); + time_run1 = timespec_sub(time_run1, time_start); + + pr_info("run1 = %ld.%.9lds, reset = %ld.%.9lds, run2 = %ld.%.9lds\n", + time_run1.tv_sec, time_run1.tv_nsec, + time_reset.tv_sec, time_reset.tv_nsec, + time_run2.tv_sec, time_run2.tv_nsec); + + /* + * Delete even numbered slots (arbitrary) and unmap the first half of + * the backing (also arbitrary) to verify KVM correctly drops all + * references to the removed regions. + */ + for (slot = (slot - 1) & ~1ull; slot >= first_slot; slot -= 2) + vm_set_user_memory_region(vm, slot, 0, 0, 0, NULL); + + munmap(mem, slot_size / 2); + + /* Sanity check that the vCPUs actually ran. */ + for (i = 0; i < nr_vcpus; i++) + pthread_join(threads[i], NULL); + + /* + * Deliberately exit without deleting the remaining memslots or closing + * kvm_fd to test cleanup via mmu_notifier.release. + */ +} diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390x/memop.c index 9f49ead380ab..b04c2c1b3c30 100644 --- a/tools/testing/selftests/kvm/s390x/memop.c +++ b/tools/testing/selftests/kvm/s390x/memop.c @@ -13,154 +13,668 @@ #include "test_util.h" #include "kvm_util.h" +enum mop_target { + LOGICAL, + SIDA, + ABSOLUTE, + INVALID, +}; + +enum mop_access_mode { + READ, + WRITE, +}; + +struct mop_desc { + uintptr_t gaddr; + uintptr_t gaddr_v; + uint64_t set_flags; + unsigned int f_check : 1; + unsigned int f_inject : 1; + unsigned int f_key : 1; + unsigned int _gaddr_v : 1; + unsigned int _set_flags : 1; + unsigned int _sida_offset : 1; + unsigned int _ar : 1; + uint32_t size; + enum mop_target target; + enum mop_access_mode mode; + void *buf; + uint32_t sida_offset; + uint8_t ar; + uint8_t key; +}; + +static struct kvm_s390_mem_op ksmo_from_desc(struct mop_desc desc) +{ + struct kvm_s390_mem_op ksmo = { + .gaddr = (uintptr_t)desc.gaddr, + .size = desc.size, + .buf = ((uintptr_t)desc.buf), + .reserved = "ignored_ignored_ignored_ignored" + }; + + switch (desc.target) { + case LOGICAL: + if (desc.mode == READ) + ksmo.op = KVM_S390_MEMOP_LOGICAL_READ; + if (desc.mode == WRITE) + ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE; + break; + case SIDA: + if (desc.mode == READ) + ksmo.op = KVM_S390_MEMOP_SIDA_READ; + if (desc.mode == WRITE) + ksmo.op = KVM_S390_MEMOP_SIDA_WRITE; + break; + case ABSOLUTE: + if (desc.mode == READ) + ksmo.op = KVM_S390_MEMOP_ABSOLUTE_READ; + if (desc.mode == WRITE) + ksmo.op = KVM_S390_MEMOP_ABSOLUTE_WRITE; + break; + case INVALID: + ksmo.op = -1; + } + if (desc.f_check) + ksmo.flags |= KVM_S390_MEMOP_F_CHECK_ONLY; + if (desc.f_inject) + ksmo.flags |= KVM_S390_MEMOP_F_INJECT_EXCEPTION; + if (desc._set_flags) + ksmo.flags = desc.set_flags; + if (desc.f_key) { + ksmo.flags |= KVM_S390_MEMOP_F_SKEY_PROTECTION; + ksmo.key = desc.key; + } + if (desc._ar) + ksmo.ar = desc.ar; + else + ksmo.ar = 0; + if (desc._sida_offset) + ksmo.sida_offset = desc.sida_offset; + + return ksmo; +} + +/* vcpu dummy id signifying that vm instead of vcpu ioctl is to occur */ +const uint32_t VM_VCPU_ID = (uint32_t)-1; + +struct test_vcpu { + struct kvm_vm *vm; + uint32_t id; +}; + +#define PRINT_MEMOP false +static void print_memop(uint32_t vcpu_id, const struct kvm_s390_mem_op *ksmo) +{ + if (!PRINT_MEMOP) + return; + + if (vcpu_id == VM_VCPU_ID) + printf("vm memop("); + else + printf("vcpu memop("); + switch (ksmo->op) { + case KVM_S390_MEMOP_LOGICAL_READ: + printf("LOGICAL, READ, "); + break; + case KVM_S390_MEMOP_LOGICAL_WRITE: + printf("LOGICAL, WRITE, "); + break; + case KVM_S390_MEMOP_SIDA_READ: + printf("SIDA, READ, "); + break; + case KVM_S390_MEMOP_SIDA_WRITE: + printf("SIDA, WRITE, "); + break; + case KVM_S390_MEMOP_ABSOLUTE_READ: + printf("ABSOLUTE, READ, "); + break; + case KVM_S390_MEMOP_ABSOLUTE_WRITE: + printf("ABSOLUTE, WRITE, "); + break; + } + printf("gaddr=%llu, size=%u, buf=%llu, ar=%u, key=%u", + ksmo->gaddr, ksmo->size, ksmo->buf, ksmo->ar, ksmo->key); + if (ksmo->flags & KVM_S390_MEMOP_F_CHECK_ONLY) + printf(", CHECK_ONLY"); + if (ksmo->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) + printf(", INJECT_EXCEPTION"); + if (ksmo->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) + printf(", SKEY_PROTECTION"); + puts(")"); +} + +static void memop_ioctl(struct test_vcpu vcpu, struct kvm_s390_mem_op *ksmo) +{ + if (vcpu.id == VM_VCPU_ID) + vm_ioctl(vcpu.vm, KVM_S390_MEM_OP, ksmo); + else + vcpu_ioctl(vcpu.vm, vcpu.id, KVM_S390_MEM_OP, ksmo); +} + +static int err_memop_ioctl(struct test_vcpu vcpu, struct kvm_s390_mem_op *ksmo) +{ + if (vcpu.id == VM_VCPU_ID) + return _vm_ioctl(vcpu.vm, KVM_S390_MEM_OP, ksmo); + else + return _vcpu_ioctl(vcpu.vm, vcpu.id, KVM_S390_MEM_OP, ksmo); +} + +#define MEMOP(err, vcpu_p, mop_target_p, access_mode_p, buf_p, size_p, ...) \ +({ \ + struct test_vcpu __vcpu = (vcpu_p); \ + struct mop_desc __desc = { \ + .target = (mop_target_p), \ + .mode = (access_mode_p), \ + .buf = (buf_p), \ + .size = (size_p), \ + __VA_ARGS__ \ + }; \ + struct kvm_s390_mem_op __ksmo; \ + \ + if (__desc._gaddr_v) { \ + if (__desc.target == ABSOLUTE) \ + __desc.gaddr = addr_gva2gpa(__vcpu.vm, __desc.gaddr_v); \ + else \ + __desc.gaddr = __desc.gaddr_v; \ + } \ + __ksmo = ksmo_from_desc(__desc); \ + print_memop(__vcpu.id, &__ksmo); \ + err##memop_ioctl(__vcpu, &__ksmo); \ +}) + +#define MOP(...) MEMOP(, __VA_ARGS__) +#define ERR_MOP(...) MEMOP(err_, __VA_ARGS__) + +#define GADDR(a) .gaddr = ((uintptr_t)a) +#define GADDR_V(v) ._gaddr_v = 1, .gaddr_v = ((uintptr_t)v) +#define CHECK_ONLY .f_check = 1 +#define SET_FLAGS(f) ._set_flags = 1, .set_flags = (f) +#define SIDA_OFFSET(o) ._sida_offset = 1, .sida_offset = (o) +#define AR(a) ._ar = 1, .ar = (a) +#define KEY(a) .f_key = 1, .key = (a) + +#define CHECK_N_DO(f, ...) ({ f(__VA_ARGS__, CHECK_ONLY); f(__VA_ARGS__); }) + #define VCPU_ID 1 +#define PAGE_SHIFT 12 +#define PAGE_SIZE (1ULL << PAGE_SHIFT) +#define PAGE_MASK (~(PAGE_SIZE - 1)) +#define CR0_FETCH_PROTECTION_OVERRIDE (1UL << (63 - 38)) +#define CR0_STORAGE_PROTECTION_OVERRIDE (1UL << (63 - 39)) static uint8_t mem1[65536]; static uint8_t mem2[65536]; -static void guest_code(void) +struct test_default { + struct kvm_vm *kvm_vm; + struct test_vcpu vm; + struct test_vcpu vcpu; + struct kvm_run *run; + int size; +}; + +static struct test_default test_default_init(void *guest_code) +{ + struct test_default t; + + t.size = min((size_t)kvm_check_cap(KVM_CAP_S390_MEM_OP), sizeof(mem1)); + t.kvm_vm = vm_create_default(VCPU_ID, 0, guest_code); + t.vm = (struct test_vcpu) { t.kvm_vm, VM_VCPU_ID }; + t.vcpu = (struct test_vcpu) { t.kvm_vm, VCPU_ID }; + t.run = vcpu_state(t.kvm_vm, VCPU_ID); + return t; +} + +enum stage { + /* Synced state set by host, e.g. DAT */ + STAGE_INITED, + /* Guest did nothing */ + STAGE_IDLED, + /* Guest set storage keys (specifics up to test case) */ + STAGE_SKEYS_SET, + /* Guest copied memory (locations up to test case) */ + STAGE_COPIED, +}; + +#define HOST_SYNC(vcpu_p, stage) \ +({ \ + struct test_vcpu __vcpu = (vcpu_p); \ + struct ucall uc; \ + int __stage = (stage); \ + \ + vcpu_run(__vcpu.vm, __vcpu.id); \ + get_ucall(__vcpu.vm, __vcpu.id, &uc); \ + ASSERT_EQ(uc.cmd, UCALL_SYNC); \ + ASSERT_EQ(uc.args[1], __stage); \ +}) \ + +static void prepare_mem12(void) { int i; + for (i = 0; i < sizeof(mem1); i++) + mem1[i] = rand(); + memset(mem2, 0xaa, sizeof(mem2)); +} + +#define ASSERT_MEM_EQ(p1, p2, size) \ + TEST_ASSERT(!memcmp(p1, p2, size), "Memory contents do not match!") + +#define DEFAULT_WRITE_READ(copy_cpu, mop_cpu, mop_target_p, size, ...) \ +({ \ + struct test_vcpu __copy_cpu = (copy_cpu), __mop_cpu = (mop_cpu); \ + enum mop_target __target = (mop_target_p); \ + uint32_t __size = (size); \ + \ + prepare_mem12(); \ + CHECK_N_DO(MOP, __mop_cpu, __target, WRITE, mem1, __size, \ + GADDR_V(mem1), ##__VA_ARGS__); \ + HOST_SYNC(__copy_cpu, STAGE_COPIED); \ + CHECK_N_DO(MOP, __mop_cpu, __target, READ, mem2, __size, \ + GADDR_V(mem2), ##__VA_ARGS__); \ + ASSERT_MEM_EQ(mem1, mem2, __size); \ +}) + +#define DEFAULT_READ(copy_cpu, mop_cpu, mop_target_p, size, ...) \ +({ \ + struct test_vcpu __copy_cpu = (copy_cpu), __mop_cpu = (mop_cpu); \ + enum mop_target __target = (mop_target_p); \ + uint32_t __size = (size); \ + \ + prepare_mem12(); \ + CHECK_N_DO(MOP, __mop_cpu, __target, WRITE, mem1, __size, \ + GADDR_V(mem1)); \ + HOST_SYNC(__copy_cpu, STAGE_COPIED); \ + CHECK_N_DO(MOP, __mop_cpu, __target, READ, mem2, __size, ##__VA_ARGS__);\ + ASSERT_MEM_EQ(mem1, mem2, __size); \ +}) + +static void guest_copy(void) +{ + GUEST_SYNC(STAGE_INITED); + memcpy(&mem2, &mem1, sizeof(mem2)); + GUEST_SYNC(STAGE_COPIED); +} + +static void test_copy(void) +{ + struct test_default t = test_default_init(guest_copy); + + HOST_SYNC(t.vcpu, STAGE_INITED); + + DEFAULT_WRITE_READ(t.vcpu, t.vcpu, LOGICAL, t.size); + + kvm_vm_free(t.kvm_vm); +} + +static void set_storage_key_range(void *addr, size_t len, uint8_t key) +{ + uintptr_t _addr, abs, i; + int not_mapped = 0; + + _addr = (uintptr_t)addr; + for (i = _addr & PAGE_MASK; i < _addr + len; i += PAGE_SIZE) { + abs = i; + asm volatile ( + "lra %[abs], 0(0,%[abs])\n" + " jz 0f\n" + " llill %[not_mapped],1\n" + " j 1f\n" + "0: sske %[key], %[abs]\n" + "1:" + : [abs] "+&a" (abs), [not_mapped] "+r" (not_mapped) + : [key] "r" (key) + : "cc" + ); + GUEST_ASSERT_EQ(not_mapped, 0); + } +} + +static void guest_copy_key(void) +{ + set_storage_key_range(mem1, sizeof(mem1), 0x90); + set_storage_key_range(mem2, sizeof(mem2), 0x90); + GUEST_SYNC(STAGE_SKEYS_SET); + for (;;) { - for (i = 0; i < sizeof(mem2); i++) - mem2[i] = mem1[i]; - GUEST_SYNC(0); + memcpy(&mem2, &mem1, sizeof(mem2)); + GUEST_SYNC(STAGE_COPIED); } } -int main(int argc, char *argv[]) +static void test_copy_key(void) { - struct kvm_vm *vm; - struct kvm_run *run; - struct kvm_s390_mem_op ksmo; - int rv, i, maxsize; + struct test_default t = test_default_init(guest_copy_key); - setbuf(stdout, NULL); /* Tell stdout not to buffer its content */ + HOST_SYNC(t.vcpu, STAGE_SKEYS_SET); - maxsize = kvm_check_cap(KVM_CAP_S390_MEM_OP); - if (!maxsize) { - print_skip("CAP_S390_MEM_OP not supported"); - exit(KSFT_SKIP); + /* vm, no key */ + DEFAULT_WRITE_READ(t.vcpu, t.vm, ABSOLUTE, t.size); + + /* vm/vcpu, machting key or key 0 */ + DEFAULT_WRITE_READ(t.vcpu, t.vcpu, LOGICAL, t.size, KEY(0)); + DEFAULT_WRITE_READ(t.vcpu, t.vcpu, LOGICAL, t.size, KEY(9)); + DEFAULT_WRITE_READ(t.vcpu, t.vm, ABSOLUTE, t.size, KEY(0)); + DEFAULT_WRITE_READ(t.vcpu, t.vm, ABSOLUTE, t.size, KEY(9)); + /* + * There used to be different code paths for key handling depending on + * if the region crossed a page boundary. + * There currently are not, but the more tests the merrier. + */ + DEFAULT_WRITE_READ(t.vcpu, t.vcpu, LOGICAL, 1, KEY(0)); + DEFAULT_WRITE_READ(t.vcpu, t.vcpu, LOGICAL, 1, KEY(9)); + DEFAULT_WRITE_READ(t.vcpu, t.vm, ABSOLUTE, 1, KEY(0)); + DEFAULT_WRITE_READ(t.vcpu, t.vm, ABSOLUTE, 1, KEY(9)); + + /* vm/vcpu, mismatching keys on read, but no fetch protection */ + DEFAULT_READ(t.vcpu, t.vcpu, LOGICAL, t.size, GADDR_V(mem2), KEY(2)); + DEFAULT_READ(t.vcpu, t.vm, ABSOLUTE, t.size, GADDR_V(mem1), KEY(2)); + + kvm_vm_free(t.kvm_vm); +} + +static void guest_copy_key_fetch_prot(void) +{ + /* + * For some reason combining the first sync with override enablement + * results in an exception when calling HOST_SYNC. + */ + GUEST_SYNC(STAGE_INITED); + /* Storage protection override applies to both store and fetch. */ + set_storage_key_range(mem1, sizeof(mem1), 0x98); + set_storage_key_range(mem2, sizeof(mem2), 0x98); + GUEST_SYNC(STAGE_SKEYS_SET); + + for (;;) { + memcpy(&mem2, &mem1, sizeof(mem2)); + GUEST_SYNC(STAGE_COPIED); } - if (maxsize > sizeof(mem1)) - maxsize = sizeof(mem1); +} - /* Create VM */ - vm = vm_create_default(VCPU_ID, 0, guest_code); - run = vcpu_state(vm, VCPU_ID); +static void test_copy_key_storage_prot_override(void) +{ + struct test_default t = test_default_init(guest_copy_key_fetch_prot); - for (i = 0; i < sizeof(mem1); i++) - mem1[i] = i * i + i; - - /* Set the first array */ - ksmo.gaddr = addr_gva2gpa(vm, (uintptr_t)mem1); - ksmo.flags = 0; - ksmo.size = maxsize; - ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE; - ksmo.buf = (uintptr_t)mem1; - ksmo.ar = 0; - vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo); - - /* Let the guest code copy the first array to the second */ - vcpu_run(vm, VCPU_ID); - TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC, - "Unexpected exit reason: %u (%s)\n", - run->exit_reason, - exit_reason_str(run->exit_reason)); + HOST_SYNC(t.vcpu, STAGE_INITED); + t.run->s.regs.crs[0] |= CR0_STORAGE_PROTECTION_OVERRIDE; + t.run->kvm_dirty_regs = KVM_SYNC_CRS; + HOST_SYNC(t.vcpu, STAGE_SKEYS_SET); - memset(mem2, 0xaa, sizeof(mem2)); + /* vcpu, mismatching keys, storage protection override in effect */ + DEFAULT_WRITE_READ(t.vcpu, t.vcpu, LOGICAL, t.size, KEY(2)); + + kvm_vm_free(t.kvm_vm); +} + +static void test_copy_key_fetch_prot(void) +{ + struct test_default t = test_default_init(guest_copy_key_fetch_prot); - /* Get the second array */ - ksmo.gaddr = (uintptr_t)mem2; - ksmo.flags = 0; - ksmo.size = maxsize; - ksmo.op = KVM_S390_MEMOP_LOGICAL_READ; - ksmo.buf = (uintptr_t)mem2; - ksmo.ar = 0; - vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo); - - TEST_ASSERT(!memcmp(mem1, mem2, maxsize), - "Memory contents do not match!"); - - /* Check error conditions - first bad size: */ - ksmo.gaddr = (uintptr_t)mem1; - ksmo.flags = 0; - ksmo.size = -1; - ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE; - ksmo.buf = (uintptr_t)mem1; - ksmo.ar = 0; - rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo); + HOST_SYNC(t.vcpu, STAGE_INITED); + HOST_SYNC(t.vcpu, STAGE_SKEYS_SET); + + /* vm/vcpu, matching key, fetch protection in effect */ + DEFAULT_READ(t.vcpu, t.vcpu, LOGICAL, t.size, GADDR_V(mem2), KEY(9)); + DEFAULT_READ(t.vcpu, t.vm, ABSOLUTE, t.size, GADDR_V(mem2), KEY(9)); + + kvm_vm_free(t.kvm_vm); +} + +#define ERR_PROT_MOP(...) \ +({ \ + int rv; \ + \ + rv = ERR_MOP(__VA_ARGS__); \ + TEST_ASSERT(rv == 4, "Should result in protection exception"); \ +}) + +static void test_errors_key(void) +{ + struct test_default t = test_default_init(guest_copy_key_fetch_prot); + + HOST_SYNC(t.vcpu, STAGE_INITED); + HOST_SYNC(t.vcpu, STAGE_SKEYS_SET); + + /* vm/vcpu, mismatching keys, fetch protection in effect */ + CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, WRITE, mem1, t.size, GADDR_V(mem1), KEY(2)); + CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, t.size, GADDR_V(mem2), KEY(2)); + CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, WRITE, mem1, t.size, GADDR_V(mem1), KEY(2)); + CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, t.size, GADDR_V(mem2), KEY(2)); + + kvm_vm_free(t.kvm_vm); +} + +static void test_errors_key_storage_prot_override(void) +{ + struct test_default t = test_default_init(guest_copy_key_fetch_prot); + + HOST_SYNC(t.vcpu, STAGE_INITED); + t.run->s.regs.crs[0] |= CR0_STORAGE_PROTECTION_OVERRIDE; + t.run->kvm_dirty_regs = KVM_SYNC_CRS; + HOST_SYNC(t.vcpu, STAGE_SKEYS_SET); + + /* vm, mismatching keys, storage protection override not applicable to vm */ + CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, WRITE, mem1, t.size, GADDR_V(mem1), KEY(2)); + CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, t.size, GADDR_V(mem2), KEY(2)); + + kvm_vm_free(t.kvm_vm); +} + +const uint64_t last_page_addr = -PAGE_SIZE; + +static void guest_copy_key_fetch_prot_override(void) +{ + int i; + char *page_0 = 0; + + GUEST_SYNC(STAGE_INITED); + set_storage_key_range(0, PAGE_SIZE, 0x18); + set_storage_key_range((void *)last_page_addr, PAGE_SIZE, 0x0); + asm volatile ("sske %[key],%[addr]\n" :: [addr] "r"(0), [key] "r"(0x18) : "cc"); + GUEST_SYNC(STAGE_SKEYS_SET); + + for (;;) { + for (i = 0; i < PAGE_SIZE; i++) + page_0[i] = mem1[i]; + GUEST_SYNC(STAGE_COPIED); + } +} + +static void test_copy_key_fetch_prot_override(void) +{ + struct test_default t = test_default_init(guest_copy_key_fetch_prot_override); + vm_vaddr_t guest_0_page, guest_last_page; + + guest_0_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, 0); + guest_last_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr); + if (guest_0_page != 0 || guest_last_page != last_page_addr) { + print_skip("did not allocate guest pages at required positions"); + goto out; + } + + HOST_SYNC(t.vcpu, STAGE_INITED); + t.run->s.regs.crs[0] |= CR0_FETCH_PROTECTION_OVERRIDE; + t.run->kvm_dirty_regs = KVM_SYNC_CRS; + HOST_SYNC(t.vcpu, STAGE_SKEYS_SET); + + /* vcpu, mismatching keys on fetch, fetch protection override applies */ + prepare_mem12(); + MOP(t.vcpu, LOGICAL, WRITE, mem1, PAGE_SIZE, GADDR_V(mem1)); + HOST_SYNC(t.vcpu, STAGE_COPIED); + CHECK_N_DO(MOP, t.vcpu, LOGICAL, READ, mem2, 2048, GADDR_V(guest_0_page), KEY(2)); + ASSERT_MEM_EQ(mem1, mem2, 2048); + + /* + * vcpu, mismatching keys on fetch, fetch protection override applies, + * wraparound + */ + prepare_mem12(); + MOP(t.vcpu, LOGICAL, WRITE, mem1, 2 * PAGE_SIZE, GADDR_V(guest_last_page)); + HOST_SYNC(t.vcpu, STAGE_COPIED); + CHECK_N_DO(MOP, t.vcpu, LOGICAL, READ, mem2, PAGE_SIZE + 2048, + GADDR_V(guest_last_page), KEY(2)); + ASSERT_MEM_EQ(mem1, mem2, 2048); + +out: + kvm_vm_free(t.kvm_vm); +} + +static void test_errors_key_fetch_prot_override_not_enabled(void) +{ + struct test_default t = test_default_init(guest_copy_key_fetch_prot_override); + vm_vaddr_t guest_0_page, guest_last_page; + + guest_0_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, 0); + guest_last_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr); + if (guest_0_page != 0 || guest_last_page != last_page_addr) { + print_skip("did not allocate guest pages at required positions"); + goto out; + } + HOST_SYNC(t.vcpu, STAGE_INITED); + HOST_SYNC(t.vcpu, STAGE_SKEYS_SET); + + /* vcpu, mismatching keys on fetch, fetch protection override not enabled */ + CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, 2048, GADDR_V(0), KEY(2)); + +out: + kvm_vm_free(t.kvm_vm); +} + +static void test_errors_key_fetch_prot_override_enabled(void) +{ + struct test_default t = test_default_init(guest_copy_key_fetch_prot_override); + vm_vaddr_t guest_0_page, guest_last_page; + + guest_0_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, 0); + guest_last_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr); + if (guest_0_page != 0 || guest_last_page != last_page_addr) { + print_skip("did not allocate guest pages at required positions"); + goto out; + } + HOST_SYNC(t.vcpu, STAGE_INITED); + t.run->s.regs.crs[0] |= CR0_FETCH_PROTECTION_OVERRIDE; + t.run->kvm_dirty_regs = KVM_SYNC_CRS; + HOST_SYNC(t.vcpu, STAGE_SKEYS_SET); + + /* + * vcpu, mismatching keys on fetch, + * fetch protection override does not apply because memory range acceeded + */ + CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, 2048 + 1, GADDR_V(0), KEY(2)); + CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, PAGE_SIZE + 2048 + 1, + GADDR_V(guest_last_page), KEY(2)); + /* vm, fetch protected override does not apply */ + CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, 2048, GADDR(0), KEY(2)); + CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, 2048, GADDR_V(guest_0_page), KEY(2)); + +out: + kvm_vm_free(t.kvm_vm); +} + +static void guest_idle(void) +{ + GUEST_SYNC(STAGE_INITED); /* for consistency's sake */ + for (;;) + GUEST_SYNC(STAGE_IDLED); +} + +static void _test_errors_common(struct test_vcpu vcpu, enum mop_target target, int size) +{ + int rv; + + /* Bad size: */ + rv = ERR_MOP(vcpu, target, WRITE, mem1, -1, GADDR_V(mem1)); TEST_ASSERT(rv == -1 && errno == E2BIG, "ioctl allows insane sizes"); /* Zero size: */ - ksmo.gaddr = (uintptr_t)mem1; - ksmo.flags = 0; - ksmo.size = 0; - ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE; - ksmo.buf = (uintptr_t)mem1; - ksmo.ar = 0; - rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo); + rv = ERR_MOP(vcpu, target, WRITE, mem1, 0, GADDR_V(mem1)); TEST_ASSERT(rv == -1 && (errno == EINVAL || errno == ENOMEM), "ioctl allows 0 as size"); /* Bad flags: */ - ksmo.gaddr = (uintptr_t)mem1; - ksmo.flags = -1; - ksmo.size = maxsize; - ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE; - ksmo.buf = (uintptr_t)mem1; - ksmo.ar = 0; - rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo); + rv = ERR_MOP(vcpu, target, WRITE, mem1, size, GADDR_V(mem1), SET_FLAGS(-1)); TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows all flags"); - /* Bad operation: */ - ksmo.gaddr = (uintptr_t)mem1; - ksmo.flags = 0; - ksmo.size = maxsize; - ksmo.op = -1; - ksmo.buf = (uintptr_t)mem1; - ksmo.ar = 0; - rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo); - TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows bad operations"); - /* Bad guest address: */ - ksmo.gaddr = ~0xfffUL; - ksmo.flags = KVM_S390_MEMOP_F_CHECK_ONLY; - ksmo.size = maxsize; - ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE; - ksmo.buf = (uintptr_t)mem1; - ksmo.ar = 0; - rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo); + rv = ERR_MOP(vcpu, target, WRITE, mem1, size, GADDR((void *)~0xfffUL), CHECK_ONLY); TEST_ASSERT(rv > 0, "ioctl does not report bad guest memory access"); /* Bad host address: */ - ksmo.gaddr = (uintptr_t)mem1; - ksmo.flags = 0; - ksmo.size = maxsize; - ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE; - ksmo.buf = 0; - ksmo.ar = 0; - rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo); + rv = ERR_MOP(vcpu, target, WRITE, 0, size, GADDR_V(mem1)); TEST_ASSERT(rv == -1 && errno == EFAULT, "ioctl does not report bad host memory address"); + /* Bad key: */ + rv = ERR_MOP(vcpu, target, WRITE, mem1, size, GADDR_V(mem1), KEY(17)); + TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows invalid key"); +} + +static void test_errors(void) +{ + struct test_default t = test_default_init(guest_idle); + int rv; + + HOST_SYNC(t.vcpu, STAGE_INITED); + + _test_errors_common(t.vcpu, LOGICAL, t.size); + _test_errors_common(t.vm, ABSOLUTE, t.size); + + /* Bad operation: */ + rv = ERR_MOP(t.vcpu, INVALID, WRITE, mem1, t.size, GADDR_V(mem1)); + TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows bad operations"); + /* virtual addresses are not translated when passing INVALID */ + rv = ERR_MOP(t.vm, INVALID, WRITE, mem1, PAGE_SIZE, GADDR(0)); + TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows bad operations"); + /* Bad access register: */ - run->psw_mask &= ~(3UL << (63 - 17)); - run->psw_mask |= 1UL << (63 - 17); /* Enable AR mode */ - vcpu_run(vm, VCPU_ID); /* To sync new state to SIE block */ - ksmo.gaddr = (uintptr_t)mem1; - ksmo.flags = 0; - ksmo.size = maxsize; - ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE; - ksmo.buf = (uintptr_t)mem1; - ksmo.ar = 17; - rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo); + t.run->psw_mask &= ~(3UL << (63 - 17)); + t.run->psw_mask |= 1UL << (63 - 17); /* Enable AR mode */ + HOST_SYNC(t.vcpu, STAGE_IDLED); /* To sync new state to SIE block */ + rv = ERR_MOP(t.vcpu, LOGICAL, WRITE, mem1, t.size, GADDR_V(mem1), AR(17)); TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows ARs > 15"); - run->psw_mask &= ~(3UL << (63 - 17)); /* Disable AR mode */ - vcpu_run(vm, VCPU_ID); /* Run to sync new state */ + t.run->psw_mask &= ~(3UL << (63 - 17)); /* Disable AR mode */ + HOST_SYNC(t.vcpu, STAGE_IDLED); /* Run to sync new state */ + + /* Check that the SIDA calls are rejected for non-protected guests */ + rv = ERR_MOP(t.vcpu, SIDA, READ, mem1, 8, GADDR(0), SIDA_OFFSET(0x1c0)); + TEST_ASSERT(rv == -1 && errno == EINVAL, + "ioctl does not reject SIDA_READ in non-protected mode"); + rv = ERR_MOP(t.vcpu, SIDA, WRITE, mem1, 8, GADDR(0), SIDA_OFFSET(0x1c0)); + TEST_ASSERT(rv == -1 && errno == EINVAL, + "ioctl does not reject SIDA_WRITE in non-protected mode"); + + kvm_vm_free(t.kvm_vm); +} - kvm_vm_free(vm); +int main(int argc, char *argv[]) +{ + int memop_cap, extension_cap; + + setbuf(stdout, NULL); /* Tell stdout not to buffer its content */ + + memop_cap = kvm_check_cap(KVM_CAP_S390_MEM_OP); + extension_cap = kvm_check_cap(KVM_CAP_S390_MEM_OP_EXTENSION); + if (!memop_cap) { + print_skip("CAP_S390_MEM_OP not supported"); + exit(KSFT_SKIP); + } + + test_copy(); + if (extension_cap > 0) { + test_copy_key(); + test_copy_key_storage_prot_override(); + test_copy_key_fetch_prot(); + test_copy_key_fetch_prot_override(); + test_errors_key(); + test_errors_key_storage_prot_override(); + test_errors_key_fetch_prot_override_not_enabled(); + test_errors_key_fetch_prot_override_enabled(); + } else { + print_skip("storage key memop extension not supported"); + } + test_errors(); return 0; } diff --git a/tools/testing/selftests/kvm/s390x/tprot.c b/tools/testing/selftests/kvm/s390x/tprot.c new file mode 100644 index 000000000000..c097b9db495e --- /dev/null +++ b/tools/testing/selftests/kvm/s390x/tprot.c @@ -0,0 +1,227 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Test TEST PROTECTION emulation. + * + * Copyright IBM Corp. 2021 + */ + +#include <sys/mman.h> +#include "test_util.h" +#include "kvm_util.h" + +#define PAGE_SHIFT 12 +#define PAGE_SIZE (1 << PAGE_SHIFT) +#define CR0_FETCH_PROTECTION_OVERRIDE (1UL << (63 - 38)) +#define CR0_STORAGE_PROTECTION_OVERRIDE (1UL << (63 - 39)) + +#define VCPU_ID 1 + +static __aligned(PAGE_SIZE) uint8_t pages[2][PAGE_SIZE]; +static uint8_t *const page_store_prot = pages[0]; +static uint8_t *const page_fetch_prot = pages[1]; + +/* Nonzero return value indicates that address not mapped */ +static int set_storage_key(void *addr, uint8_t key) +{ + int not_mapped = 0; + + asm volatile ( + "lra %[addr], 0(0,%[addr])\n" + " jz 0f\n" + " llill %[not_mapped],1\n" + " j 1f\n" + "0: sske %[key], %[addr]\n" + "1:" + : [addr] "+&a" (addr), [not_mapped] "+r" (not_mapped) + : [key] "r" (key) + : "cc" + ); + return -not_mapped; +} + +enum permission { + READ_WRITE = 0, + READ = 1, + RW_PROTECTED = 2, + TRANSL_UNAVAIL = 3, +}; + +static enum permission test_protection(void *addr, uint8_t key) +{ + uint64_t mask; + + asm volatile ( + "tprot %[addr], 0(%[key])\n" + " ipm %[mask]\n" + : [mask] "=r" (mask) + : [addr] "Q" (*(char *)addr), + [key] "a" (key) + : "cc" + ); + + return (enum permission)(mask >> 28); +} + +enum stage { + STAGE_END, + STAGE_INIT_SIMPLE, + TEST_SIMPLE, + STAGE_INIT_FETCH_PROT_OVERRIDE, + TEST_FETCH_PROT_OVERRIDE, + TEST_STORAGE_PROT_OVERRIDE, +}; + +struct test { + enum stage stage; + void *addr; + uint8_t key; + enum permission expected; +} tests[] = { + /* + * We perform each test in the array by executing TEST PROTECTION on + * the specified addr with the specified key and checking if the returned + * permissions match the expected value. + * Both guest and host cooperate to set up the required test conditions. + * A central condition is that the page targeted by addr has to be DAT + * protected in the host mappings, in order for KVM to emulate the + * TEST PROTECTION instruction. + * Since the page tables are shared, the host uses mprotect to achieve + * this. + * + * Test resulting in RW_PROTECTED/TRANSL_UNAVAIL will be interpreted + * by SIE, not KVM, but there is no harm in testing them also. + * See Enhanced Suppression-on-Protection Facilities in the + * Interpretive-Execution Mode + */ + /* + * guest: set storage key of page_store_prot to 1 + * storage key of page_fetch_prot to 9 and enable + * protection for it + * STAGE_INIT_SIMPLE + * host: write protect both via mprotect + */ + /* access key 0 matches any storage key -> RW */ + { TEST_SIMPLE, page_store_prot, 0x00, READ_WRITE }, + /* access key matches storage key -> RW */ + { TEST_SIMPLE, page_store_prot, 0x10, READ_WRITE }, + /* mismatched keys, but no fetch protection -> RO */ + { TEST_SIMPLE, page_store_prot, 0x20, READ }, + /* access key 0 matches any storage key -> RW */ + { TEST_SIMPLE, page_fetch_prot, 0x00, READ_WRITE }, + /* access key matches storage key -> RW */ + { TEST_SIMPLE, page_fetch_prot, 0x90, READ_WRITE }, + /* mismatched keys, fetch protection -> inaccessible */ + { TEST_SIMPLE, page_fetch_prot, 0x10, RW_PROTECTED }, + /* page 0 not mapped yet -> translation not available */ + { TEST_SIMPLE, (void *)0x00, 0x10, TRANSL_UNAVAIL }, + /* + * host: try to map page 0 + * guest: set storage key of page 0 to 9 and enable fetch protection + * STAGE_INIT_FETCH_PROT_OVERRIDE + * host: write protect page 0 + * enable fetch protection override + */ + /* mismatched keys, fetch protection, but override applies -> RO */ + { TEST_FETCH_PROT_OVERRIDE, (void *)0x00, 0x10, READ }, + /* mismatched keys, fetch protection, override applies to 0-2048 only -> inaccessible */ + { TEST_FETCH_PROT_OVERRIDE, (void *)2049, 0x10, RW_PROTECTED }, + /* + * host: enable storage protection override + */ + /* mismatched keys, but override applies (storage key 9) -> RW */ + { TEST_STORAGE_PROT_OVERRIDE, page_fetch_prot, 0x10, READ_WRITE }, + /* mismatched keys, no fetch protection, override doesn't apply -> RO */ + { TEST_STORAGE_PROT_OVERRIDE, page_store_prot, 0x20, READ }, + /* mismatched keys, but override applies (storage key 9) -> RW */ + { TEST_STORAGE_PROT_OVERRIDE, (void *)2049, 0x10, READ_WRITE }, + /* end marker */ + { STAGE_END, 0, 0, 0 }, +}; + +static enum stage perform_next_stage(int *i, bool mapped_0) +{ + enum stage stage = tests[*i].stage; + enum permission result; + bool skip; + + for (; tests[*i].stage == stage; (*i)++) { + /* + * Some fetch protection override tests require that page 0 + * be mapped, however, when the hosts tries to map that page via + * vm_vaddr_alloc, it may happen that some other page gets mapped + * instead. + * In order to skip these tests we detect this inside the guest + */ + skip = tests[*i].addr < (void *)4096 && + tests[*i].expected != TRANSL_UNAVAIL && + !mapped_0; + if (!skip) { + result = test_protection(tests[*i].addr, tests[*i].key); + GUEST_ASSERT_2(result == tests[*i].expected, *i, result); + } + } + return stage; +} + +static void guest_code(void) +{ + bool mapped_0; + int i = 0; + + GUEST_ASSERT_EQ(set_storage_key(page_store_prot, 0x10), 0); + GUEST_ASSERT_EQ(set_storage_key(page_fetch_prot, 0x98), 0); + GUEST_SYNC(STAGE_INIT_SIMPLE); + GUEST_SYNC(perform_next_stage(&i, false)); + + /* Fetch-protection override */ + mapped_0 = !set_storage_key((void *)0, 0x98); + GUEST_SYNC(STAGE_INIT_FETCH_PROT_OVERRIDE); + GUEST_SYNC(perform_next_stage(&i, mapped_0)); + + /* Storage-protection override */ + GUEST_SYNC(perform_next_stage(&i, mapped_0)); +} + +#define HOST_SYNC(vmp, stage) \ +({ \ + struct kvm_vm *__vm = (vmp); \ + struct ucall uc; \ + int __stage = (stage); \ + \ + vcpu_run(__vm, VCPU_ID); \ + get_ucall(__vm, VCPU_ID, &uc); \ + if (uc.cmd == UCALL_ABORT) { \ + TEST_FAIL("line %lu: %s, hints: %lu, %lu", uc.args[1], \ + (const char *)uc.args[0], uc.args[2], uc.args[3]); \ + } \ + ASSERT_EQ(uc.cmd, UCALL_SYNC); \ + ASSERT_EQ(uc.args[1], __stage); \ +}) + +int main(int argc, char *argv[]) +{ + struct kvm_vm *vm; + struct kvm_run *run; + vm_vaddr_t guest_0_page; + + vm = vm_create_default(VCPU_ID, 0, guest_code); + run = vcpu_state(vm, VCPU_ID); + + HOST_SYNC(vm, STAGE_INIT_SIMPLE); + mprotect(addr_gva2hva(vm, (vm_vaddr_t)pages), PAGE_SIZE * 2, PROT_READ); + HOST_SYNC(vm, TEST_SIMPLE); + + guest_0_page = vm_vaddr_alloc(vm, PAGE_SIZE, 0); + if (guest_0_page != 0) + print_skip("Did not allocate page at 0 for fetch protection override tests"); + HOST_SYNC(vm, STAGE_INIT_FETCH_PROT_OVERRIDE); + if (guest_0_page == 0) + mprotect(addr_gva2hva(vm, (vm_vaddr_t)0), PAGE_SIZE, PROT_READ); + run->s.regs.crs[0] |= CR0_FETCH_PROTECTION_OVERRIDE; + run->kvm_dirty_regs = KVM_SYNC_CRS; + HOST_SYNC(vm, TEST_FETCH_PROT_OVERRIDE); + + run->s.regs.crs[0] |= CR0_STORAGE_PROTECTION_OVERRIDE; + run->kvm_dirty_regs = KVM_SYNC_CRS; + HOST_SYNC(vm, TEST_STORAGE_PROT_OVERRIDE); +} diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index 72a1c9b4882c..73bc297dabe6 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -329,22 +329,6 @@ static void test_zero_memory_regions(void) } #endif /* __x86_64__ */ -static int test_memory_region_add(struct kvm_vm *vm, void *mem, uint32_t slot, - uint32_t size, uint64_t guest_addr) -{ - struct kvm_userspace_memory_region region; - int ret; - - region.slot = slot; - region.flags = 0; - region.guest_phys_addr = guest_addr; - region.memory_size = size; - region.userspace_addr = (uintptr_t) mem; - ret = ioctl(vm_get_fd(vm), KVM_SET_USER_MEMORY_REGION, ®ion); - - return ret; -} - /* * Test it can be added memory slots up to KVM_CAP_NR_MEMSLOTS, then any * tentative to add further slots should fail. @@ -382,23 +366,20 @@ static void test_add_max_memory_regions(void) TEST_ASSERT(mem != MAP_FAILED, "Failed to mmap() host"); mem_aligned = (void *)(((size_t) mem + alignment - 1) & ~(alignment - 1)); - for (slot = 0; slot < max_mem_slots; slot++) { - ret = test_memory_region_add(vm, mem_aligned + - ((uint64_t)slot * MEM_REGION_SIZE), - slot, MEM_REGION_SIZE, - (uint64_t)slot * MEM_REGION_SIZE); - TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" - " rc: %i errno: %i slot: %i\n", - ret, errno, slot); - } + for (slot = 0; slot < max_mem_slots; slot++) + vm_set_user_memory_region(vm, slot, 0, + ((uint64_t)slot * MEM_REGION_SIZE), + MEM_REGION_SIZE, + mem_aligned + (uint64_t)slot * MEM_REGION_SIZE); /* Check it cannot be added memory slots beyond the limit */ mem_extra = mmap(NULL, MEM_REGION_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); TEST_ASSERT(mem_extra != MAP_FAILED, "Failed to mmap() host"); - ret = test_memory_region_add(vm, mem_extra, max_mem_slots, MEM_REGION_SIZE, - (uint64_t)max_mem_slots * MEM_REGION_SIZE); + ret = __vm_set_user_memory_region(vm, max_mem_slots, 0, + (uint64_t)max_mem_slots * MEM_REGION_SIZE, + MEM_REGION_SIZE, mem_extra); TEST_ASSERT(ret == -1 && errno == EINVAL, "Adding one more memory slot should fail with EINVAL"); diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c index 4c7841dfd481..d12e043aa2ee 100644 --- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c +++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c @@ -10,6 +10,7 @@ #include <stdlib.h> #include <string.h> #include <sys/ioctl.h> +#include <linux/bitmap.h> #include "test_util.h" @@ -32,6 +33,22 @@ static void guest_nmi_handler(struct ex_regs *regs) { } +/* Exits to L1 destroy GRPs! */ +static inline void rdmsr_fs_base(void) +{ + __asm__ __volatile__ ("mov $0xc0000100, %%rcx; rdmsr" : : : + "rax", "rbx", "rcx", "rdx", + "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", + "r13", "r14", "r15"); +} +static inline void rdmsr_gs_base(void) +{ + __asm__ __volatile__ ("mov $0xc0000101, %%rcx; rdmsr" : : : + "rax", "rbx", "rcx", "rdx", + "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", + "r13", "r14", "r15"); +} + void l2_guest_code(void) { GUEST_SYNC(7); @@ -41,6 +58,15 @@ void l2_guest_code(void) /* Forced exit to L1 upon restore */ GUEST_SYNC(9); + vmcall(); + + /* MSR-Bitmap tests */ + rdmsr_fs_base(); /* intercepted */ + rdmsr_fs_base(); /* intercepted */ + rdmsr_gs_base(); /* not intercepted */ + vmcall(); + rdmsr_gs_base(); /* intercepted */ + /* Done, exit to L1 and never come back. */ vmcall(); } @@ -76,8 +102,9 @@ void guest_code(struct vmx_pages *vmx_pages) current_evmcs->revision_id = EVMCS_VERSION; GUEST_SYNC(6); - current_evmcs->pin_based_vm_exec_control |= - PIN_BASED_NMI_EXITING; + vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmreadz(PIN_BASED_VM_EXEC_CONTROL) | + PIN_BASED_NMI_EXITING); + GUEST_ASSERT(!vmlaunch()); GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa); @@ -91,6 +118,39 @@ void guest_code(struct vmx_pages *vmx_pages) GUEST_SYNC(10); GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + current_evmcs->guest_rip += 3; /* vmcall */ + + /* Intercept RDMSR 0xc0000100 */ + vmwrite(CPU_BASED_VM_EXEC_CONTROL, vmreadz(CPU_BASED_VM_EXEC_CONTROL) | + CPU_BASED_USE_MSR_BITMAPS); + set_bit(MSR_FS_BASE & 0x1fff, vmx_pages->msr + 0x400); + GUEST_ASSERT(!vmresume()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ); + current_evmcs->guest_rip += 2; /* rdmsr */ + + /* Enable enlightened MSR bitmap */ + current_evmcs->hv_enlightenments_control.msr_bitmap = 1; + GUEST_ASSERT(!vmresume()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ); + current_evmcs->guest_rip += 2; /* rdmsr */ + + /* Intercept RDMSR 0xc0000101 without telling KVM about it */ + set_bit(MSR_GS_BASE & 0x1fff, vmx_pages->msr + 0x400); + /* Make sure HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP is set */ + current_evmcs->hv_clean_fields |= HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP; + GUEST_ASSERT(!vmresume()); + /* Make sure we don't see EXIT_REASON_MSR_READ here so eMSR bitmap works */ + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + current_evmcs->guest_rip += 3; /* vmcall */ + + /* Now tell KVM we've changed MSR-Bitmap */ + current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP; + GUEST_ASSERT(!vmresume()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ); + current_evmcs->guest_rip += 2; /* rdmsr */ + + GUEST_ASSERT(!vmresume()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); GUEST_SYNC(11); /* Try enlightened vmptrld with an incorrect GPA */ diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c index 7e2d2d17d2ed..8c245ab2d98a 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c @@ -49,16 +49,13 @@ static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries, bool evmcs_expected) { int i; - int nent = 9; + int nent_expected = 10; u32 test_val; - if (evmcs_expected) - nent += 1; /* 0x4000000A */ - - TEST_ASSERT(hv_cpuid_entries->nent == nent, + TEST_ASSERT(hv_cpuid_entries->nent == nent_expected, "KVM_GET_SUPPORTED_HV_CPUID should return %d entries" - " with evmcs=%d (returned %d)", - nent, evmcs_expected, hv_cpuid_entries->nent); + " (returned %d)", + nent_expected, hv_cpuid_entries->nent); for (i = 0; i < hv_cpuid_entries->nent; i++) { struct kvm_cpuid_entry2 *entry = &hv_cpuid_entries->entries[i]; @@ -68,9 +65,6 @@ static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries, "function %x is our of supported range", entry->function); - TEST_ASSERT(evmcs_expected || (entry->function != 0x4000000A), - "0x4000000A leaf should not be reported"); - TEST_ASSERT(entry->index == 0, ".index field should be zero"); @@ -97,8 +91,20 @@ static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries, "NoNonArchitecturalCoreSharing bit" " doesn't reflect SMT setting"); break; - } + case 0x4000000A: + TEST_ASSERT(entry->eax & (1UL << 19), + "Enlightened MSR-Bitmap should always be supported" + " 0x40000000.EAX: %x", entry->eax); + if (evmcs_expected) + TEST_ASSERT((entry->eax & 0xffff) == 0x101, + "Supported Enlightened VMCS version range is supposed to be 1:1" + " 0x40000000.EAX: %x", entry->eax); + + break; + default: + break; + } /* * If needed for debug: * fprintf(stdout, @@ -107,7 +113,6 @@ static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries, * entry->edx); */ } - } void test_hv_cpuid_e2big(struct kvm_vm *vm, bool system) diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c new file mode 100644 index 000000000000..21f5ca9197da --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c @@ -0,0 +1,175 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * KVM_GET/SET_* tests + * + * Copyright (C) 2022, Red Hat, Inc. + * + * Tests for Hyper-V extensions to SVM. + */ +#define _GNU_SOURCE /* for program_invocation_short_name */ +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <linux/bitmap.h> + +#include "test_util.h" + +#include "kvm_util.h" +#include "processor.h" +#include "svm_util.h" +#include "hyperv.h" + +#define VCPU_ID 1 +#define L2_GUEST_STACK_SIZE 256 + +struct hv_enlightenments { + struct __packed hv_enlightenments_control { + u32 nested_flush_hypercall:1; + u32 msr_bitmap:1; + u32 enlightened_npt_tlb: 1; + u32 reserved:29; + } __packed hv_enlightenments_control; + u32 hv_vp_id; + u64 hv_vm_id; + u64 partition_assist_page; + u64 reserved; +} __packed; + +/* + * Hyper-V uses the software reserved clean bit in VMCB + */ +#define VMCB_HV_NESTED_ENLIGHTENMENTS (1U << 31) + +static inline void vmmcall(void) +{ + __asm__ __volatile__("vmmcall"); +} + +void l2_guest_code(void) +{ + GUEST_SYNC(3); + /* Exit to L1 */ + vmmcall(); + + /* MSR-Bitmap tests */ + rdmsr(MSR_FS_BASE); /* intercepted */ + rdmsr(MSR_FS_BASE); /* intercepted */ + rdmsr(MSR_GS_BASE); /* not intercepted */ + vmmcall(); + rdmsr(MSR_GS_BASE); /* intercepted */ + + GUEST_SYNC(5); + + /* Done, exit to L1 and never come back. */ + vmmcall(); +} + +static void __attribute__((__flatten__)) guest_code(struct svm_test_data *svm) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + struct vmcb *vmcb = svm->vmcb; + struct hv_enlightenments *hve = + (struct hv_enlightenments *)vmcb->control.reserved_sw; + + GUEST_SYNC(1); + + wrmsr(HV_X64_MSR_GUEST_OS_ID, (u64)0x8100 << 48); + + GUEST_ASSERT(svm->vmcb_gpa); + /* Prepare for L2 execution. */ + generic_svm_setup(svm, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + GUEST_SYNC(2); + run_guest(vmcb, svm->vmcb_gpa); + GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL); + GUEST_SYNC(4); + vmcb->save.rip += 3; + + /* Intercept RDMSR 0xc0000100 */ + vmcb->control.intercept |= 1ULL << INTERCEPT_MSR_PROT; + set_bit(2 * (MSR_FS_BASE & 0x1fff), svm->msr + 0x800); + run_guest(vmcb, svm->vmcb_gpa); + GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR); + vmcb->save.rip += 2; /* rdmsr */ + + /* Enable enlightened MSR bitmap */ + hve->hv_enlightenments_control.msr_bitmap = 1; + run_guest(vmcb, svm->vmcb_gpa); + GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR); + vmcb->save.rip += 2; /* rdmsr */ + + /* Intercept RDMSR 0xc0000101 without telling KVM about it */ + set_bit(2 * (MSR_GS_BASE & 0x1fff), svm->msr + 0x800); + /* Make sure HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP is set */ + vmcb->control.clean |= VMCB_HV_NESTED_ENLIGHTENMENTS; + run_guest(vmcb, svm->vmcb_gpa); + /* Make sure we don't see SVM_EXIT_MSR here so eMSR bitmap works */ + GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL); + vmcb->save.rip += 3; /* vmcall */ + + /* Now tell KVM we've changed MSR-Bitmap */ + vmcb->control.clean &= ~VMCB_HV_NESTED_ENLIGHTENMENTS; + run_guest(vmcb, svm->vmcb_gpa); + GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR); + vmcb->save.rip += 2; /* rdmsr */ + + run_guest(vmcb, svm->vmcb_gpa); + GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL); + GUEST_SYNC(6); + + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + vm_vaddr_t nested_gva = 0; + + struct kvm_vm *vm; + struct kvm_run *run; + struct ucall uc; + int stage; + + if (!nested_svm_supported()) { + print_skip("Nested SVM not supported"); + exit(KSFT_SKIP); + } + /* Create VM */ + vm = vm_create_default(VCPU_ID, 0, guest_code); + vcpu_set_hv_cpuid(vm, VCPU_ID); + run = vcpu_state(vm, VCPU_ID); + vcpu_alloc_svm(vm, &nested_gva); + vcpu_args_set(vm, VCPU_ID, 1, nested_gva); + + for (stage = 1;; stage++) { + _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Stage %d: unexpected exit reason: %u (%s),\n", + stage, run->exit_reason, + exit_reason_str(run->exit_reason)); + + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_ABORT: + TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], + __FILE__, uc.args[1]); + /* NOT REACHED */ + case UCALL_SYNC: + break; + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + + /* UCALL_SYNC is handled here. */ + TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") && + uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx", + stage, (ulong)uc.args[1]); + + } + +done: + kvm_vm_free(vm); +} diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c index c715adcbd487..0d06ffa95d9d 100644 --- a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c +++ b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c @@ -326,6 +326,37 @@ static void test_not_member_allow_list(struct kvm_vm *vm) } /* + * Verify that setting KVM_PMU_CAP_DISABLE prevents the use of the PMU. + * + * Note that KVM_CAP_PMU_CAPABILITY must be invoked prior to creating VCPUs. + */ +static void test_pmu_config_disable(void (*guest_code)(void)) +{ + int r; + struct kvm_vm *vm; + struct kvm_enable_cap cap = { 0 }; + + r = kvm_check_cap(KVM_CAP_PMU_CAPABILITY); + if (!(r & KVM_PMU_CAP_DISABLE)) + return; + + vm = vm_create_without_vcpus(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES); + + cap.cap = KVM_CAP_PMU_CAPABILITY; + cap.args[0] = KVM_PMU_CAP_DISABLE; + TEST_ASSERT(!vm_enable_cap(vm, &cap), "Failed to set KVM_PMU_CAP_DISABLE."); + + vm_vcpu_add_default(vm, VCPU_ID, guest_code); + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vm, VCPU_ID); + + TEST_ASSERT(!sanity_check_pmu(vm), + "Guest should not be able to use disabled PMU."); + + kvm_vm_free(vm); +} + +/* * Check for a non-zero PMU version, at least one general-purpose * counter per logical processor, an EBX bit vector of length greater * than 5, and EBX[5] clear. @@ -430,5 +461,7 @@ int main(int argc, char *argv[]) kvm_vm_free(vm); + test_pmu_config_disable(guest_code); + return 0; } diff --git a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c b/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c index 80056bbbb003..d1dc1acf997c 100644 --- a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c +++ b/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c @@ -21,6 +21,8 @@ #define NR_LOCK_TESTING_THREADS 3 #define NR_LOCK_TESTING_ITERATIONS 10000 +bool have_sev_es; + static int __sev_ioctl(int vm_fd, int cmd_id, void *data, __u32 *fw_error) { struct kvm_sev_cmd cmd = { @@ -172,10 +174,18 @@ static void test_sev_migrate_parameters(void) *sev_es_vm_no_vmsa; int ret; - sev_vm = sev_vm_create(/* es= */ false); - sev_es_vm = sev_vm_create(/* es= */ true); vm_no_vcpu = vm_create(VM_MODE_DEFAULT, 0, O_RDWR); vm_no_sev = aux_vm_create(true); + ret = __sev_migrate_from(vm_no_vcpu->fd, vm_no_sev->fd); + TEST_ASSERT(ret == -1 && errno == EINVAL, + "Migrations require SEV enabled. ret %d, errno: %d\n", ret, + errno); + + if (!have_sev_es) + goto out; + + sev_vm = sev_vm_create(/* es= */ false); + sev_es_vm = sev_vm_create(/* es= */ true); sev_es_vm_no_vmsa = vm_create(VM_MODE_DEFAULT, 0, O_RDWR); sev_ioctl(sev_es_vm_no_vmsa->fd, KVM_SEV_ES_INIT, NULL); vm_vcpu_add(sev_es_vm_no_vmsa, 1); @@ -204,14 +214,10 @@ static void test_sev_migrate_parameters(void) "SEV-ES migrations require UPDATE_VMSA. ret %d, errno: %d\n", ret, errno); - ret = __sev_migrate_from(vm_no_vcpu->fd, vm_no_sev->fd); - TEST_ASSERT(ret == -1 && errno == EINVAL, - "Migrations require SEV enabled. ret %d, errno: %d\n", ret, - errno); - kvm_vm_free(sev_vm); kvm_vm_free(sev_es_vm); kvm_vm_free(sev_es_vm_no_vmsa); +out: kvm_vm_free(vm_no_vcpu); kvm_vm_free(vm_no_sev); } @@ -300,7 +306,6 @@ static void test_sev_mirror_parameters(void) int ret; sev_vm = sev_vm_create(/* es= */ false); - sev_es_vm = sev_vm_create(/* es= */ true); vm_with_vcpu = aux_vm_create(true); vm_no_vcpu = aux_vm_create(false); @@ -310,6 +315,21 @@ static void test_sev_mirror_parameters(void) "Should not be able copy context to self. ret: %d, errno: %d\n", ret, errno); + ret = __sev_mirror_create(vm_no_vcpu->fd, vm_with_vcpu->fd); + TEST_ASSERT(ret == -1 && errno == EINVAL, + "Copy context requires SEV enabled. ret %d, errno: %d\n", ret, + errno); + + ret = __sev_mirror_create(vm_with_vcpu->fd, sev_vm->fd); + TEST_ASSERT( + ret == -1 && errno == EINVAL, + "SEV copy context requires no vCPUS on the destination. ret: %d, errno: %d\n", + ret, errno); + + if (!have_sev_es) + goto out; + + sev_es_vm = sev_vm_create(/* es= */ true); ret = __sev_mirror_create(sev_vm->fd, sev_es_vm->fd); TEST_ASSERT( ret == -1 && errno == EINVAL, @@ -322,63 +342,97 @@ static void test_sev_mirror_parameters(void) "Should not be able copy context to SEV-ES enabled VM. ret: %d, errno: %d\n", ret, errno); - ret = __sev_mirror_create(vm_no_vcpu->fd, vm_with_vcpu->fd); - TEST_ASSERT(ret == -1 && errno == EINVAL, - "Copy context requires SEV enabled. ret %d, errno: %d\n", ret, - errno); - - ret = __sev_mirror_create(vm_with_vcpu->fd, sev_vm->fd); - TEST_ASSERT( - ret == -1 && errno == EINVAL, - "SEV copy context requires no vCPUS on the destination. ret: %d, errno: %d\n", - ret, errno); + kvm_vm_free(sev_es_vm); +out: kvm_vm_free(sev_vm); - kvm_vm_free(sev_es_vm); kvm_vm_free(vm_with_vcpu); kvm_vm_free(vm_no_vcpu); } static void test_sev_move_copy(void) { - struct kvm_vm *dst_vm, *sev_vm, *mirror_vm, *dst_mirror_vm; - int ret; + struct kvm_vm *dst_vm, *dst2_vm, *dst3_vm, *sev_vm, *mirror_vm, + *dst_mirror_vm, *dst2_mirror_vm, *dst3_mirror_vm; sev_vm = sev_vm_create(/* es= */ false); dst_vm = aux_vm_create(true); + dst2_vm = aux_vm_create(true); + dst3_vm = aux_vm_create(true); mirror_vm = aux_vm_create(false); dst_mirror_vm = aux_vm_create(false); + dst2_mirror_vm = aux_vm_create(false); + dst3_mirror_vm = aux_vm_create(false); sev_mirror_create(mirror_vm->fd, sev_vm->fd); - ret = __sev_migrate_from(dst_vm->fd, sev_vm->fd); - TEST_ASSERT(ret == -1 && errno == EBUSY, - "Cannot migrate VM that has mirrors. ret %d, errno: %d\n", ret, - errno); - /* The mirror itself can be migrated. */ sev_migrate_from(dst_mirror_vm->fd, mirror_vm->fd); - ret = __sev_migrate_from(dst_vm->fd, sev_vm->fd); - TEST_ASSERT(ret == -1 && errno == EBUSY, - "Cannot migrate VM that has mirrors. ret %d, errno: %d\n", ret, - errno); + sev_migrate_from(dst_vm->fd, sev_vm->fd); + + sev_migrate_from(dst2_vm->fd, dst_vm->fd); + sev_migrate_from(dst2_mirror_vm->fd, dst_mirror_vm->fd); + + sev_migrate_from(dst3_mirror_vm->fd, dst2_mirror_vm->fd); + sev_migrate_from(dst3_vm->fd, dst2_vm->fd); + + kvm_vm_free(dst_vm); + kvm_vm_free(sev_vm); + kvm_vm_free(dst2_vm); + kvm_vm_free(dst3_vm); + kvm_vm_free(mirror_vm); + kvm_vm_free(dst_mirror_vm); + kvm_vm_free(dst2_mirror_vm); + kvm_vm_free(dst3_mirror_vm); /* - * mirror_vm is not a mirror anymore, dst_mirror_vm is. Thus, - * the owner can be copied as soon as dst_mirror_vm is gone. + * Run similar test be destroy mirrors before mirrored VMs to ensure + * destruction is done safely. */ - kvm_vm_free(dst_mirror_vm); + sev_vm = sev_vm_create(/* es= */ false); + dst_vm = aux_vm_create(true); + mirror_vm = aux_vm_create(false); + dst_mirror_vm = aux_vm_create(false); + + sev_mirror_create(mirror_vm->fd, sev_vm->fd); + + sev_migrate_from(dst_mirror_vm->fd, mirror_vm->fd); sev_migrate_from(dst_vm->fd, sev_vm->fd); kvm_vm_free(mirror_vm); + kvm_vm_free(dst_mirror_vm); kvm_vm_free(dst_vm); kvm_vm_free(sev_vm); } +#define X86_FEATURE_SEV (1 << 1) +#define X86_FEATURE_SEV_ES (1 << 3) + int main(int argc, char *argv[]) { + struct kvm_cpuid_entry2 *cpuid; + + if (!kvm_check_cap(KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM) && + !kvm_check_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM)) { + print_skip("Capabilities not available"); + exit(KSFT_SKIP); + } + + cpuid = kvm_get_supported_cpuid_entry(0x80000000); + if (cpuid->eax < 0x8000001f) { + print_skip("AMD memory encryption not available"); + exit(KSFT_SKIP); + } + cpuid = kvm_get_supported_cpuid_entry(0x8000001f); + if (!(cpuid->eax & X86_FEATURE_SEV)) { + print_skip("AMD SEV not available"); + exit(KSFT_SKIP); + } + have_sev_es = !!(cpuid->eax & X86_FEATURE_SEV_ES); + if (kvm_check_cap(KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM)) { test_sev_migrate_from(/* es= */ false); - test_sev_migrate_from(/* es= */ true); + if (have_sev_es) + test_sev_migrate_from(/* es= */ true); test_sev_migrate_locking(); test_sev_migrate_parameters(); if (kvm_check_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM)) @@ -386,7 +440,8 @@ int main(int argc, char *argv[]) } if (kvm_check_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM)) { test_sev_mirror(/* es= */ false); - test_sev_mirror(/* es= */ true); + if (have_sev_es) + test_sev_mirror(/* es= */ true); test_sev_mirror_parameters(); } return 0; diff --git a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c new file mode 100644 index 000000000000..0792334ba243 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c @@ -0,0 +1,150 @@ +// SPDX-License-Identifier: GPL-2.0-only +#define _GNU_SOURCE /* for program_invocation_short_name */ +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> + +#include "apic.h" +#include "kvm_util.h" +#include "processor.h" +#include "test_util.h" + +struct kvm_vcpu { + uint32_t id; + bool is_x2apic; +}; + +static void xapic_guest_code(void) +{ + asm volatile("cli"); + + xapic_enable(); + + while (1) { + uint64_t val = (u64)xapic_read_reg(APIC_IRR) | + (u64)xapic_read_reg(APIC_IRR + 0x10) << 32; + + xapic_write_reg(APIC_ICR2, val >> 32); + xapic_write_reg(APIC_ICR, val); + GUEST_SYNC(val); + } +} + +static void x2apic_guest_code(void) +{ + asm volatile("cli"); + + x2apic_enable(); + + do { + uint64_t val = x2apic_read_reg(APIC_IRR) | + x2apic_read_reg(APIC_IRR + 0x10) << 32; + + x2apic_write_reg(APIC_ICR, val); + GUEST_SYNC(val); + } while (1); +} + +static void ____test_icr(struct kvm_vm *vm, struct kvm_vcpu *vcpu, uint64_t val) +{ + struct kvm_lapic_state xapic; + struct ucall uc; + uint64_t icr; + + /* + * Tell the guest what ICR value to write. Use the IRR to pass info, + * all bits are valid and should not be modified by KVM (ignoring the + * fact that vectors 0-15 are technically illegal). + */ + vcpu_ioctl(vm, vcpu->id, KVM_GET_LAPIC, &xapic); + *((u32 *)&xapic.regs[APIC_IRR]) = val; + *((u32 *)&xapic.regs[APIC_IRR + 0x10]) = val >> 32; + vcpu_ioctl(vm, vcpu->id, KVM_SET_LAPIC, &xapic); + + vcpu_run(vm, vcpu->id); + ASSERT_EQ(get_ucall(vm, vcpu->id, &uc), UCALL_SYNC); + ASSERT_EQ(uc.args[1], val); + + vcpu_ioctl(vm, vcpu->id, KVM_GET_LAPIC, &xapic); + icr = (u64)(*((u32 *)&xapic.regs[APIC_ICR])) | + (u64)(*((u32 *)&xapic.regs[APIC_ICR2])) << 32; + if (!vcpu->is_x2apic) + val &= (-1u | (0xffull << (32 + 24))); + ASSERT_EQ(icr, val & ~APIC_ICR_BUSY); +} + +static void __test_icr(struct kvm_vm *vm, struct kvm_vcpu *vcpu, uint64_t val) +{ + ____test_icr(vm, vcpu, val | APIC_ICR_BUSY); + ____test_icr(vm, vcpu, val & ~(u64)APIC_ICR_BUSY); +} + +static void test_icr(struct kvm_vm *vm, struct kvm_vcpu *vcpu) +{ + uint64_t icr, i, j; + + icr = APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_FIXED; + for (i = 0; i <= 0xff; i++) + __test_icr(vm, vcpu, icr | i); + + icr = APIC_INT_ASSERT | APIC_DM_FIXED; + for (i = 0; i <= 0xff; i++) + __test_icr(vm, vcpu, icr | i); + + /* + * Send all flavors of IPIs to non-existent vCPUs. TODO: use number of + * vCPUs, not vcpu.id + 1. Arbitrarily use vector 0xff. + */ + icr = APIC_INT_ASSERT | 0xff; + for (i = vcpu->id + 1; i < 0xff; i++) { + for (j = 0; j < 8; j++) + __test_icr(vm, vcpu, i << (32 + 24) | APIC_INT_ASSERT | (j << 8)); + } + + /* And again with a shorthand destination for all types of IPIs. */ + icr = APIC_DEST_ALLBUT | APIC_INT_ASSERT; + for (i = 0; i < 8; i++) + __test_icr(vm, vcpu, icr | (i << 8)); + + /* And a few garbage value, just make sure it's an IRQ (blocked). */ + __test_icr(vm, vcpu, 0xa5a5a5a5a5a5a5a5 & ~APIC_DM_FIXED_MASK); + __test_icr(vm, vcpu, 0x5a5a5a5a5a5a5a5a & ~APIC_DM_FIXED_MASK); + __test_icr(vm, vcpu, -1ull & ~APIC_DM_FIXED_MASK); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu vcpu = { + .id = 0, + .is_x2apic = true, + }; + struct kvm_cpuid2 *cpuid; + struct kvm_vm *vm; + int i; + + vm = vm_create_default(vcpu.id, 0, x2apic_guest_code); + test_icr(vm, &vcpu); + kvm_vm_free(vm); + + /* + * Use a second VM for the xAPIC test so that x2APIC can be hidden from + * the guest in order to test AVIC. KVM disallows changing CPUID after + * KVM_RUN and AVIC is disabled if _any_ vCPU is allowed to use x2APIC. + */ + vm = vm_create_default(vcpu.id, 0, xapic_guest_code); + vcpu.is_x2apic = false; + + cpuid = vcpu_get_cpuid(vm, vcpu.id); + for (i = 0; i < cpuid->nent; i++) { + if (cpuid->entries[i].function == 1) + break; + } + cpuid->entries[i].ecx &= ~BIT(21); + vcpu_set_cpuid(vm, vcpu.id, cpuid); + + virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA); + test_icr(vm, &vcpu); + kvm_vm_free(vm); +} |