From e9e9feebcbc14b174fef862842f8cc9a388e1db3 Mon Sep 17 00:00:00 2001 From: Janis Schoetterl-Glausch Date: Fri, 11 Feb 2022 19:22:10 +0100 Subject: KVM: s390: Add optional storage key checking to MEMOP IOCTL User space needs a mechanism to perform key checked accesses when emulating instructions. The key can be passed as an additional argument. Having an additional argument is flexible, as user space can pass the guest PSW's key, in order to make an access the same way the CPU would, or pass another key if necessary. Signed-off-by: Janis Schoetterl-Glausch Reviewed-by: Claudio Imbrenda Reviewed-by: Christian Borntraeger Reviewed-by: Janosch Frank Link: https://lore.kernel.org/r/20220211182215.2730017-6-scgl@linux.ibm.com Signed-off-by: Christian Borntraeger --- include/uapi/linux/kvm.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 5191b57e1562..4566f429db2c 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -562,7 +562,10 @@ struct kvm_s390_mem_op { __u32 op; /* type of operation */ __u64 buf; /* buffer in userspace */ union { - __u8 ar; /* the access register number */ + struct { + __u8 ar; /* the access register number */ + __u8 key; /* access key, ignored if flag unset */ + }; __u32 sida_offset; /* offset into the sida */ __u8 reserved[32]; /* should be set to 0 */ }; @@ -575,6 +578,7 @@ struct kvm_s390_mem_op { /* flags for kvm_s390_mem_op->flags */ #define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0) #define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1) +#define KVM_S390_MEMOP_F_SKEY_PROTECTION (1ULL << 2) /* for KVM_INTERRUPT */ struct kvm_interrupt { -- cgit v1.2.3 From ef11c9463ae006302ce170a401854a48ea0532ca Mon Sep 17 00:00:00 2001 From: Janis Schoetterl-Glausch Date: Fri, 11 Feb 2022 19:22:11 +0100 Subject: KVM: s390: Add vm IOCTL for key checked guest absolute memory access Channel I/O honors storage keys and is performed on absolute memory. For I/O emulation user space therefore needs to be able to do key checked accesses. The vm IOCTL supports read/write accesses, as well as checking if an access would succeed. Unlike relying on KVM_S390_GET_SKEYS for key checking would, the vm IOCTL performs the check in lockstep with the read or write, by, ultimately, mapping the access to move instructions that support key protection checking with a supplied key. Fetch and storage protection override are not applicable to absolute accesses and so are not applied as they are when using the vcpu memop. Signed-off-by: Janis Schoetterl-Glausch Reviewed-by: Christian Borntraeger Link: https://lore.kernel.org/r/20220211182215.2730017-7-scgl@linux.ibm.com Signed-off-by: Christian Borntraeger --- arch/s390/kvm/gaccess.c | 72 ++++++++++++++++++++++++++++++++++++++++++ arch/s390/kvm/gaccess.h | 6 ++++ arch/s390/kvm/kvm-s390.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/kvm.h | 2 ++ 4 files changed, 161 insertions(+) (limited to 'include/uapi') diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 37838f637707..d53a183c2005 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -795,6 +795,35 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu, return 1; } +static int vm_check_access_key(struct kvm *kvm, u8 access_key, + enum gacc_mode mode, gpa_t gpa) +{ + u8 storage_key, access_control; + bool fetch_protected; + unsigned long hva; + int r; + + if (access_key == 0) + return 0; + + hva = gfn_to_hva(kvm, gpa_to_gfn(gpa)); + if (kvm_is_error_hva(hva)) + return PGM_ADDRESSING; + + mmap_read_lock(current->mm); + r = get_guest_storage_key(current->mm, hva, &storage_key); + mmap_read_unlock(current->mm); + if (r) + return r; + access_control = FIELD_GET(_PAGE_ACC_BITS, storage_key); + if (access_control == access_key) + return 0; + fetch_protected = storage_key & _PAGE_FP_BIT; + if ((mode == GACC_FETCH || mode == GACC_IFETCH) && !fetch_protected) + return 0; + return PGM_PROTECTION; +} + static bool fetch_prot_override_applicable(struct kvm_vcpu *vcpu, enum gacc_mode mode, union asce asce) { @@ -994,6 +1023,26 @@ access_guest_page_with_key(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa, return 0; } +int access_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, void *data, + unsigned long len, enum gacc_mode mode, u8 access_key) +{ + int offset = offset_in_page(gpa); + int fragment_len; + int rc; + + while (min(PAGE_SIZE - offset, len) > 0) { + fragment_len = min(PAGE_SIZE - offset, len); + rc = access_guest_page_with_key(kvm, mode, gpa, data, fragment_len, access_key); + if (rc) + return rc; + offset = 0; + len -= fragment_len; + data += fragment_len; + gpa += fragment_len; + } + return 0; +} + int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data, unsigned long len, enum gacc_mode mode, u8 access_key) @@ -1144,6 +1193,29 @@ int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, return rc; } +/** + * check_gpa_range - test a range of guest physical addresses for accessibility + * @kvm: virtual machine instance + * @gpa: guest physical address + * @length: length of test range + * @mode: access mode to test, relevant for storage keys + * @access_key: access key to mach the storage keys with + */ +int check_gpa_range(struct kvm *kvm, unsigned long gpa, unsigned long length, + enum gacc_mode mode, u8 access_key) +{ + unsigned int fragment_len; + int rc = 0; + + while (length && !rc) { + fragment_len = min(PAGE_SIZE - offset_in_page(gpa), length); + rc = vm_check_access_key(kvm, access_key, mode, gpa); + length -= fragment_len; + gpa += fragment_len; + } + return rc; +} + /** * kvm_s390_check_low_addr_prot_real - check for low-address protection * @vcpu: virtual cpu diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h index c5f2e7311b17..1124ff282012 100644 --- a/arch/s390/kvm/gaccess.h +++ b/arch/s390/kvm/gaccess.h @@ -193,6 +193,12 @@ int guest_translate_address_with_key(struct kvm_vcpu *vcpu, unsigned long gva, u int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, unsigned long length, enum gacc_mode mode, u8 access_key); +int check_gpa_range(struct kvm *kvm, unsigned long gpa, unsigned long length, + enum gacc_mode mode, u8 access_key); + +int access_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, void *data, + unsigned long len, enum gacc_mode mode, u8 access_key); + int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data, unsigned long len, enum gacc_mode mode, u8 access_key); diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index c31b40abfa23..36bc73b5f5de 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2364,6 +2364,78 @@ static bool access_key_invalid(u8 access_key) return access_key > 0xf; } +static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop) +{ + void __user *uaddr = (void __user *)mop->buf; + u64 supported_flags; + void *tmpbuf = NULL; + int r, srcu_idx; + + supported_flags = KVM_S390_MEMOP_F_SKEY_PROTECTION + | KVM_S390_MEMOP_F_CHECK_ONLY; + if (mop->flags & ~supported_flags) + return -EINVAL; + if (mop->size > MEM_OP_MAX_SIZE) + return -E2BIG; + if (kvm_s390_pv_is_protected(kvm)) + return -EINVAL; + if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) { + if (access_key_invalid(mop->key)) + return -EINVAL; + } else { + mop->key = 0; + } + if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) { + tmpbuf = vmalloc(mop->size); + if (!tmpbuf) + return -ENOMEM; + } + + srcu_idx = srcu_read_lock(&kvm->srcu); + + if (kvm_is_error_gpa(kvm, mop->gaddr)) { + r = PGM_ADDRESSING; + goto out_unlock; + } + + switch (mop->op) { + case KVM_S390_MEMOP_ABSOLUTE_READ: { + if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { + r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_FETCH, mop->key); + } else { + r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf, + mop->size, GACC_FETCH, mop->key); + if (r == 0) { + if (copy_to_user(uaddr, tmpbuf, mop->size)) + r = -EFAULT; + } + } + break; + } + case KVM_S390_MEMOP_ABSOLUTE_WRITE: { + if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { + r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_STORE, mop->key); + } else { + if (copy_from_user(tmpbuf, uaddr, mop->size)) { + r = -EFAULT; + break; + } + r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf, + mop->size, GACC_STORE, mop->key); + } + break; + } + default: + r = -EINVAL; + } + +out_unlock: + srcu_read_unlock(&kvm->srcu, srcu_idx); + + vfree(tmpbuf); + return r; +} + long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -2488,6 +2560,15 @@ long kvm_arch_vm_ioctl(struct file *filp, } break; } + case KVM_S390_MEM_OP: { + struct kvm_s390_mem_op mem_op; + + if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0) + r = kvm_s390_vm_mem_op(kvm, &mem_op); + else + r = -EFAULT; + break; + } default: r = -ENOTTY; } diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 4566f429db2c..4bc7623def87 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -575,6 +575,8 @@ struct kvm_s390_mem_op { #define KVM_S390_MEMOP_LOGICAL_WRITE 1 #define KVM_S390_MEMOP_SIDA_READ 2 #define KVM_S390_MEMOP_SIDA_WRITE 3 +#define KVM_S390_MEMOP_ABSOLUTE_READ 4 +#define KVM_S390_MEMOP_ABSOLUTE_WRITE 5 /* flags for kvm_s390_mem_op->flags */ #define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0) #define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1) -- cgit v1.2.3 From d004079edc166ff19605475211305923c708b4d5 Mon Sep 17 00:00:00 2001 From: Janis Schoetterl-Glausch Date: Fri, 11 Feb 2022 19:22:13 +0100 Subject: KVM: s390: Add capability for storage key extension of MEM_OP IOCTL Availability of the KVM_CAP_S390_MEM_OP_EXTENSION capability signals that: * The vcpu MEM_OP IOCTL supports storage key checking. * The vm MEM_OP IOCTL exists. Signed-off-by: Janis Schoetterl-Glausch Reviewed-by: Janosch Frank Reviewed-by: Christian Borntraeger Link: https://lore.kernel.org/r/20220211182215.2730017-9-scgl@linux.ibm.com Signed-off-by: Christian Borntraeger --- arch/s390/kvm/kvm-s390.c | 1 + include/uapi/linux/kvm.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 773bccdd446c..c2c26c2aad64 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -564,6 +564,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_VCPU_RESETS: case KVM_CAP_SET_GUEST_DEBUG: case KVM_CAP_S390_DIAG318: + case KVM_CAP_S390_MEM_OP_EXTENSION: r = 1; break; case KVM_CAP_SET_GUEST_DEBUG2: diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 4bc7623def87..08756eeea065 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1140,6 +1140,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_VM_GPA_BITS 207 #define KVM_CAP_XSAVE2 208 #define KVM_CAP_SYS_ATTRIBUTES 209 +#define KVM_CAP_S390_MEM_OP_EXTENSION 210 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From 5e35d0eb472b48ac9c8ef7017753b8a1f765aa01 Mon Sep 17 00:00:00 2001 From: Janis Schoetterl-Glausch Date: Fri, 11 Feb 2022 19:22:14 +0100 Subject: KVM: s390: Update api documentation for memop ioctl Document all currently existing operations, flags and explain under which circumstances they are available. Document the recently introduced absolute operations and the storage key protection flag, as well as the existing SIDA operations. Signed-off-by: Janis Schoetterl-Glausch Reviewed-by: Janosch Frank Link: https://lore.kernel.org/r/20220211182215.2730017-10-scgl@linux.ibm.com Signed-off-by: Christian Borntraeger --- Documentation/virt/kvm/api.rst | 112 +++++++++++++++++++++++++++++++++-------- include/uapi/linux/kvm.h | 2 +- 2 files changed, 91 insertions(+), 23 deletions(-) (limited to 'include/uapi') diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index a4267104db50..48f23bb80d7f 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -3683,15 +3683,17 @@ The fields in each entry are defined as follows: 4.89 KVM_S390_MEM_OP -------------------- -:Capability: KVM_CAP_S390_MEM_OP +:Capability: KVM_CAP_S390_MEM_OP, KVM_CAP_S390_PROTECTED, KVM_CAP_S390_MEM_OP_EXTENSION :Architectures: s390 -:Type: vcpu ioctl +:Type: vm ioctl, vcpu ioctl :Parameters: struct kvm_s390_mem_op (in) :Returns: = 0 on success, < 0 on generic error (e.g. -EFAULT or -ENOMEM), > 0 if an exception occurred while walking the page tables -Read or write data from/to the logical (virtual) memory of a VCPU. +Read or write data from/to the VM's memory. +The KVM_CAP_S390_MEM_OP_EXTENSION capability specifies what functionality is +supported. Parameters are specified via the following structure:: @@ -3701,33 +3703,99 @@ Parameters are specified via the following structure:: __u32 size; /* amount of bytes */ __u32 op; /* type of operation */ __u64 buf; /* buffer in userspace */ - __u8 ar; /* the access register number */ - __u8 reserved[31]; /* should be set to 0 */ + union { + struct { + __u8 ar; /* the access register number */ + __u8 key; /* access key, ignored if flag unset */ + }; + __u32 sida_offset; /* offset into the sida */ + __u8 reserved[32]; /* ignored */ + }; }; -The type of operation is specified in the "op" field. It is either -KVM_S390_MEMOP_LOGICAL_READ for reading from logical memory space or -KVM_S390_MEMOP_LOGICAL_WRITE for writing to logical memory space. The -KVM_S390_MEMOP_F_CHECK_ONLY flag can be set in the "flags" field to check -whether the corresponding memory access would create an access exception -(without touching the data in the memory at the destination). In case an -access exception occurred while walking the MMU tables of the guest, the -ioctl returns a positive error number to indicate the type of exception. -This exception is also raised directly at the corresponding VCPU if the -flag KVM_S390_MEMOP_F_INJECT_EXCEPTION is set in the "flags" field. - The start address of the memory region has to be specified in the "gaddr" field, and the length of the region in the "size" field (which must not be 0). The maximum value for "size" can be obtained by checking the KVM_CAP_S390_MEM_OP capability. "buf" is the buffer supplied by the userspace application where the read data should be written to for -KVM_S390_MEMOP_LOGICAL_READ, or where the data that should be written is -stored for a KVM_S390_MEMOP_LOGICAL_WRITE. When KVM_S390_MEMOP_F_CHECK_ONLY -is specified, "buf" is unused and can be NULL. "ar" designates the access -register number to be used; the valid range is 0..15. +a read access, or where the data that should be written is stored for +a write access. The "reserved" field is meant for future extensions. +Reserved and unused values are ignored. Future extension that add members must +introduce new flags. + +The type of operation is specified in the "op" field. Flags modifying +their behavior can be set in the "flags" field. Undefined flag bits must +be set to 0. + +Possible operations are: + * ``KVM_S390_MEMOP_LOGICAL_READ`` + * ``KVM_S390_MEMOP_LOGICAL_WRITE`` + * ``KVM_S390_MEMOP_ABSOLUTE_READ`` + * ``KVM_S390_MEMOP_ABSOLUTE_WRITE`` + * ``KVM_S390_MEMOP_SIDA_READ`` + * ``KVM_S390_MEMOP_SIDA_WRITE`` + +Logical read/write: +^^^^^^^^^^^^^^^^^^^ + +Access logical memory, i.e. translate the given guest address to an absolute +address given the state of the VCPU and use the absolute address as target of +the access. "ar" designates the access register number to be used; the valid +range is 0..15. +Logical accesses are permitted for the VCPU ioctl only. +Logical accesses are permitted for non-protected guests only. + +Supported flags: + * ``KVM_S390_MEMOP_F_CHECK_ONLY`` + * ``KVM_S390_MEMOP_F_INJECT_EXCEPTION`` + * ``KVM_S390_MEMOP_F_SKEY_PROTECTION`` + +The KVM_S390_MEMOP_F_CHECK_ONLY flag can be set to check whether the +corresponding memory access would cause an access exception; however, +no actual access to the data in memory at the destination is performed. +In this case, "buf" is unused and can be NULL. + +In case an access exception occurred during the access (or would occur +in case of KVM_S390_MEMOP_F_CHECK_ONLY), the ioctl returns a positive +error number indicating the type of exception. This exception is also +raised directly at the corresponding VCPU if the flag +KVM_S390_MEMOP_F_INJECT_EXCEPTION is set. + +If the KVM_S390_MEMOP_F_SKEY_PROTECTION flag is set, storage key +protection is also in effect and may cause exceptions if accesses are +prohibited given the access key passed in "key". +KVM_S390_MEMOP_F_SKEY_PROTECTION is available if KVM_CAP_S390_MEM_OP_EXTENSION +is > 0. + +Absolute read/write: +^^^^^^^^^^^^^^^^^^^^ + +Access absolute memory. This operation is intended to be used with the +KVM_S390_MEMOP_F_SKEY_PROTECTION flag, to allow accessing memory and performing +the checks required for storage key protection as one operation (as opposed to +user space getting the storage keys, performing the checks, and accessing +memory thereafter, which could lead to a delay between check and access). +Absolute accesses are permitted for the VM ioctl if KVM_CAP_S390_MEM_OP_EXTENSION +is > 0. +Currently absolute accesses are not permitted for VCPU ioctls. +Absolute accesses are permitted for non-protected guests only. + +Supported flags: + * ``KVM_S390_MEMOP_F_CHECK_ONLY`` + * ``KVM_S390_MEMOP_F_SKEY_PROTECTION`` + +The semantics of the flags are as for logical accesses. + +SIDA read/write: +^^^^^^^^^^^^^^^^ + +Access the secure instruction data area which contains memory operands necessary +for instruction emulation for protected guests. +SIDA accesses are available if the KVM_CAP_S390_PROTECTED capability is available. +SIDA accesses are permitted for the VCPU ioctl only. +SIDA accesses are permitted for protected guests only. -The "reserved" field is meant for future extensions. It is not used by -KVM with the currently defined set of flags. +No flags are supported. 4.90 KVM_S390_GET_SKEYS ----------------------- diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 08756eeea065..dbc550bbd9fa 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -567,7 +567,7 @@ struct kvm_s390_mem_op { __u8 key; /* access key, ignored if flag unset */ }; __u32 sida_offset; /* offset into the sida */ - __u8 reserved[32]; /* should be set to 0 */ + __u8 reserved[32]; /* ignored */ }; }; /* types for kvm_s390_mem_op->op */ -- cgit v1.2.3 From d43583b890e7cb0078d13d056753a56602b92406 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 21 Feb 2022 15:35:23 +0000 Subject: KVM: arm64: Expose PSCI SYSTEM_RESET2 call to the guest PSCI v1.1 introduces the optional SYSTEM_RESET2 call, which allows the caller to provide a vendor-specific "reset type" and "cookie" to request a particular form of reset or shutdown. Expose this call to the guest and handle it in the same way as PSCI SYSTEM_RESET, along with some basic range checking on the type argument. Cc: Marc Zyngier Cc: James Morse Cc: Alexandru Elisei Cc: Suzuki K Poulose Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220221153524.15397-3-will@kernel.org --- arch/arm64/kvm/psci.c | 33 +++++++++++++++++++++++++++++---- include/uapi/linux/psci.h | 4 ++++ 2 files changed, 33 insertions(+), 4 deletions(-) (limited to 'include/uapi') diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c index 70d07477e712..30fcc5a99483 100644 --- a/arch/arm64/kvm/psci.c +++ b/arch/arm64/kvm/psci.c @@ -308,7 +308,7 @@ out: static int kvm_psci_1_x_call(struct kvm_vcpu *vcpu, u32 minor) { u32 psci_fn = smccc_get_function(vcpu); - u32 feature; + u32 arg; unsigned long val; int ret = 1; @@ -320,12 +320,12 @@ static int kvm_psci_1_x_call(struct kvm_vcpu *vcpu, u32 minor) val = minor == 0 ? KVM_ARM_PSCI_1_0 : KVM_ARM_PSCI_1_1; break; case PSCI_1_0_FN_PSCI_FEATURES: - feature = smccc_get_arg1(vcpu); - val = kvm_psci_check_allowed_function(vcpu, feature); + arg = smccc_get_arg1(vcpu); + val = kvm_psci_check_allowed_function(vcpu, arg); if (val) break; - switch(feature) { + switch(arg) { case PSCI_0_2_FN_PSCI_VERSION: case PSCI_0_2_FN_CPU_SUSPEND: case PSCI_0_2_FN64_CPU_SUSPEND: @@ -341,11 +341,36 @@ static int kvm_psci_1_x_call(struct kvm_vcpu *vcpu, u32 minor) case ARM_SMCCC_VERSION_FUNC_ID: val = 0; break; + case PSCI_1_1_FN_SYSTEM_RESET2: + case PSCI_1_1_FN64_SYSTEM_RESET2: + if (minor >= 1) { + val = 0; + break; + } + fallthrough; default: val = PSCI_RET_NOT_SUPPORTED; break; } break; + case PSCI_1_1_FN_SYSTEM_RESET2: + kvm_psci_narrow_to_32bit(vcpu); + fallthrough; + case PSCI_1_1_FN64_SYSTEM_RESET2: + if (minor >= 1) { + arg = smccc_get_arg1(vcpu); + + if (arg > PSCI_1_1_RESET_TYPE_SYSTEM_WARM_RESET && + arg < PSCI_1_1_RESET_TYPE_VENDOR_START) { + val = PSCI_RET_INVALID_PARAMS; + } else { + kvm_psci_system_reset(vcpu); + val = PSCI_RET_INTERNAL_FAILURE; + ret = 0; + } + break; + }; + fallthrough; default: return kvm_psci_0_2_call(vcpu); } diff --git a/include/uapi/linux/psci.h b/include/uapi/linux/psci.h index 2fcad1dd0b0e..2bf93c0d6354 100644 --- a/include/uapi/linux/psci.h +++ b/include/uapi/linux/psci.h @@ -82,6 +82,10 @@ #define PSCI_0_2_TOS_UP_NO_MIGRATE 1 #define PSCI_0_2_TOS_MP 2 +/* PSCI v1.1 reset type encoding for SYSTEM_RESET2 */ +#define PSCI_1_1_RESET_TYPE_SYSTEM_WARM_RESET 0 +#define PSCI_1_1_RESET_TYPE_VENDOR_START 0x80000000U + /* PSCI version decoding (independent of PSCI version) */ #define PSCI_VERSION_MAJOR_SHIFT 16 #define PSCI_VERSION_MINOR_MASK \ -- cgit v1.2.3 From ba7bb663f5547ef474c98df99a97bb4a13c5715f Mon Sep 17 00:00:00 2001 From: David Dunn Date: Wed, 23 Feb 2022 22:57:41 +0000 Subject: KVM: x86: Provide per VM capability for disabling PMU virtualization Add a new capability, KVM_CAP_PMU_CAPABILITY, that takes a bitmask of settings/features to allow userspace to configure PMU virtualization on a per-VM basis. For now, support a single flag, KVM_PMU_CAP_DISABLE, to allow disabling PMU virtualization for a VM even when KVM is configured with enable_pmu=true a module level. To keep KVM simple, disallow changing VM's PMU configuration after vCPUs have been created. Signed-off-by: David Dunn Message-Id: <20220223225743.2703915-2-daviddunn@google.com> Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 22 ++++++++++++++++++++++ arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/svm/pmu.c | 2 +- arch/x86/kvm/vmx/pmu_intel.c | 2 +- arch/x86/kvm/x86.c | 18 ++++++++++++++++++ include/uapi/linux/kvm.h | 3 +++ tools/include/uapi/linux/kvm.h | 4 ++++ 7 files changed, 50 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index c3e68c1531f0..f5d011351016 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -7643,3 +7643,25 @@ The argument to KVM_ENABLE_CAP is also a bitmask, and must be a subset of the result of KVM_CHECK_EXTENSION. KVM will forward to userspace the hypercalls whose corresponding bit is in the argument, and return ENOSYS for the others. + +8.35 KVM_CAP_PMU_CAPABILITY +--------------------------- + +:Capability KVM_CAP_PMU_CAPABILITY +:Architectures: x86 +:Type: vm +:Parameters: arg[0] is bitmask of PMU virtualization capabilities. +:Returns 0 on success, -EINVAL when arg[0] contains invalid bits + +This capability alters PMU virtualization in KVM. + +Calling KVM_CHECK_EXTENSION for this capability returns a bitmask of +PMU virtualization capabilities that can be adjusted on a VM. + +The argument to KVM_ENABLE_CAP is also a bitmask and selects specific +PMU virtualization capabilities to be applied to the VM. This can +only be invoked on a VM prior to the creation of VCPUs. + +At this time, KVM_PMU_CAP_DISABLE is the only capability. Setting +this capability will disable PMU virtualization for that VM. Usermode +should adjust CPUID leaf 0xA to reflect that the PMU is disabled. diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 463171e3b613..dafb5a6220cd 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1147,6 +1147,7 @@ struct kvm_arch { bool exception_payload_enabled; bool bus_lock_detection_enabled; + bool enable_pmu; /* * If exit_on_emulation_error is set, and the in-kernel instruction * emulator fails to emulate an instruction, allow userspace diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c index 5aa45f13b16d..d4de52409335 100644 --- a/arch/x86/kvm/svm/pmu.c +++ b/arch/x86/kvm/svm/pmu.c @@ -101,7 +101,7 @@ static inline struct kvm_pmc *get_gp_pmc_amd(struct kvm_pmu *pmu, u32 msr, { struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu); - if (!enable_pmu) + if (!vcpu->kvm->arch.enable_pmu) return NULL; switch (msr) { diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 03fab48b149c..4e5b1eeeb77c 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -487,7 +487,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) pmu->reserved_bits = 0xffffffff00200000ull; entry = kvm_find_cpuid_entry(vcpu, 0xa, 0); - if (!entry || !enable_pmu) + if (!entry || !vcpu->kvm->arch.enable_pmu) return; eax.full = entry->eax; edx.full = entry->edx; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 16d29d41908f..0b95c379e234 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -110,6 +110,8 @@ static u64 __read_mostly cr4_reserved_bits = CR4_RESERVED_BITS; #define KVM_EXIT_HYPERCALL_VALID_MASK (1 << KVM_HC_MAP_GPA_RANGE) +#define KVM_CAP_PMU_VALID_MASK KVM_PMU_CAP_DISABLE + #define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \ KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK) @@ -4330,6 +4332,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) if (r < sizeof(struct kvm_xsave)) r = sizeof(struct kvm_xsave); break; + case KVM_CAP_PMU_CAPABILITY: + r = enable_pmu ? KVM_CAP_PMU_VALID_MASK : 0; + break; } default: break; @@ -6004,6 +6009,18 @@ split_irqchip_unlock: kvm->arch.exit_on_emulation_error = cap->args[0]; r = 0; break; + case KVM_CAP_PMU_CAPABILITY: + r = -EINVAL; + if (!enable_pmu || (cap->args[0] & ~KVM_CAP_PMU_VALID_MASK)) + break; + + mutex_lock(&kvm->lock); + if (!kvm->created_vcpus) { + kvm->arch.enable_pmu = !(cap->args[0] & KVM_PMU_CAP_DISABLE); + r = 0; + } + mutex_unlock(&kvm->lock); + break; default: r = -EINVAL; break; @@ -11586,6 +11603,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags); kvm->arch.guest_can_read_msr_platform_info = true; + kvm->arch.enable_pmu = enable_pmu; #if IS_ENABLED(CONFIG_HYPERV) spin_lock_init(&kvm->arch.hv_root_tdp_lock); diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index a02bbf8fd0f6..d2f1efc3aa35 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1142,6 +1142,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_SYS_ATTRIBUTES 209 #define KVM_CAP_PPC_AIL_MODE_3 210 #define KVM_CAP_S390_MEM_OP_EXTENSION 211 +#define KVM_CAP_PMU_CAPABILITY 212 #ifdef KVM_CAP_IRQ_ROUTING @@ -1978,6 +1979,8 @@ struct kvm_dirty_gfn { #define KVM_BUS_LOCK_DETECTION_OFF (1 << 0) #define KVM_BUS_LOCK_DETECTION_EXIT (1 << 1) +#define KVM_PMU_CAP_DISABLE (1 << 0) + /** * struct kvm_stats_header - Header of per vm/vcpu binary statistics data. * @flags: Some extra information for header, always 0 for now. diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 507ee1f2aa96..bbc6b7c2dc1b 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -1135,6 +1135,8 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_XSAVE2 208 #define KVM_CAP_SYS_ATTRIBUTES 209 #define KVM_CAP_PPC_AIL_MODE_3 210 +#define KVM_CAP_S390_MEM_OP_EXTENSION 211 +#define KVM_CAP_PMU_CAPABILITY 212 #ifdef KVM_CAP_IRQ_ROUTING @@ -1971,6 +1973,8 @@ struct kvm_dirty_gfn { #define KVM_BUS_LOCK_DETECTION_OFF (1 << 0) #define KVM_BUS_LOCK_DETECTION_EXIT (1 << 1) +#define KVM_PMU_CAP_DISABLE (1 << 0) + /** * struct kvm_stats_header - Header of per vm/vcpu binary statistics data. * @flags: Some extra information for header, always 0 for now. -- cgit v1.2.3 From 6d8491910fcd3324d0f0ece3bd68e85ead3a04d7 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 1 Mar 2022 06:03:47 +0000 Subject: KVM: x86: Introduce KVM_CAP_DISABLE_QUIRKS2 KVM_CAP_DISABLE_QUIRKS is irrevocably broken. The capability does not advertise the set of quirks which may be disabled to userspace, so it is impossible to predict the behavior of KVM. Worse yet, KVM_CAP_DISABLE_QUIRKS will tolerate any value for cap->args[0], meaning it fails to reject attempts to set invalid quirk bits. The only valid workaround for the quirky quirks API is to add a new CAP. Actually advertise the set of quirks that can be disabled to userspace so it can predict KVM's behavior. Reject values for cap->args[0] that contain invalid bits. Finally, add documentation for the new capability and describe the existing quirks. Signed-off-by: Oliver Upton Message-Id: <20220301060351.442881-5-oupton@google.com> Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 50 +++++++++++++++++++++++++++++++++++++++++ arch/x86/include/asm/kvm_host.h | 7 ++++++ arch/x86/kvm/x86.c | 8 +++++++ include/uapi/linux/kvm.h | 1 + 4 files changed, 66 insertions(+) (limited to 'include/uapi') diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 55efa82e37eb..07a45474abe9 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -7083,6 +7083,56 @@ resource that is controlled with the H_SET_MODE hypercall. This capability allows a guest kernel to use a better-performance mode for handling interrupts and system calls. +7.31 KVM_CAP_DISABLE_QUIRKS2 +---------------------------- + +:Capability: KVM_CAP_DISABLE_QUIRKS2 +:Parameters: args[0] - set of KVM quirks to disable +:Architectures: x86 +:Type: vm + +This capability, if enabled, will cause KVM to disable some behavior +quirks. + +Calling KVM_CHECK_EXTENSION for this capability returns a bitmask of +quirks that can be disabled in KVM. + +The argument to KVM_ENABLE_CAP for this capability is a bitmask of +quirks to disable, and must be a subset of the bitmask returned by +KVM_CHECK_EXTENSION. + +The valid bits in cap.args[0] are: + +=================================== ============================================ + KVM_X86_QUIRK_LINT0_REENABLED By default, the reset value for the LVT + LINT0 register is 0x700 (APIC_MODE_EXTINT). + When this quirk is disabled, the reset value + is 0x10000 (APIC_LVT_MASKED). + + KVM_X86_QUIRK_CD_NW_CLEARED By default, KVM clears CR0.CD and CR0.NW. + When this quirk is disabled, KVM does not + change the value of CR0.CD and CR0.NW. + + KVM_X86_QUIRK_LAPIC_MMIO_HOLE By default, the MMIO LAPIC interface is + available even when configured for x2APIC + mode. When this quirk is disabled, KVM + disables the MMIO LAPIC interface if the + LAPIC is in x2APIC mode. + + KVM_X86_QUIRK_OUT_7E_INC_RIP By default, KVM pre-increments %rip before + exiting to userspace for an OUT instruction + to port 0x7e. When this quirk is disabled, + KVM does not pre-increment %rip before + exiting to userspace. + + KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT When this quirk is disabled, KVM sets + CPUID.01H:ECX[bit 3] (MONITOR/MWAIT) if + IA32_MISC_ENABLE[bit 18] (MWAIT) is set. + Additionally, when this quirk is disabled, + KVM clears CPUID.01H:ECX[bit 3] if + IA32_MISC_ENABLE[bit 18] is cleared. +=================================== ============================================ + 8. Other capabilities. ====================== diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 3a2c855f04e3..0ddc2e67a731 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1966,4 +1966,11 @@ int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages); #define KVM_CLOCK_VALID_FLAGS \ (KVM_CLOCK_TSC_STABLE | KVM_CLOCK_REALTIME | KVM_CLOCK_HOST_TSC) +#define KVM_X86_VALID_QUIRKS \ + (KVM_X86_QUIRK_LINT0_REENABLED | \ + KVM_X86_QUIRK_CD_NW_CLEARED | \ + KVM_X86_QUIRK_LAPIC_MMIO_HOLE | \ + KVM_X86_QUIRK_OUT_7E_INC_RIP | \ + KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT) + #endif /* _ASM_X86_KVM_HOST_H */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c58265d9f1b2..fe2171b11441 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4363,6 +4363,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = enable_pmu ? KVM_CAP_PMU_VALID_MASK : 0; break; } + case KVM_CAP_DISABLE_QUIRKS2: + r = KVM_X86_VALID_QUIRKS; + break; default: break; } @@ -5909,6 +5912,11 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, return -EINVAL; switch (cap->cap) { + case KVM_CAP_DISABLE_QUIRKS2: + r = -EINVAL; + if (cap->args[0] & ~KVM_X86_VALID_QUIRKS) + break; + fallthrough; case KVM_CAP_DISABLE_QUIRKS: kvm->arch.disabled_quirks = cap->args[0]; r = 0; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index d2f1efc3aa35..91a6fe4e02c0 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1143,6 +1143,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_PPC_AIL_MODE_3 210 #define KVM_CAP_S390_MEM_OP_EXTENSION 211 #define KVM_CAP_PMU_CAPABILITY 212 +#define KVM_CAP_DISABLE_QUIRKS2 213 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3