aboutsummaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
authorJakub Kicinski2024-03-07 10:19:05 -0800
committerJakub Kicinski2024-03-07 10:29:36 -0800
commite3afe5dd3abea43e056a004df02fb49f24cc5ad7 (patch)
treea8f38510fe1aeee32a54112fdee27e1948cde35e /arch/x86
parenta148f82c457f957cdc9445101882c225aa8c3ff0 (diff)
parentdf4793505abd5df399bc6d9a4d8fe81761f557cd (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Cross-merge networking fixes after downstream PR. No conflicts. Adjacent changes: net/core/page_pool_user.c 0b11b1c5c320 ("netdev: let netlink core handle -EMSGSIZE errors") 429679dcf7d9 ("page_pool: fix netlink dump stop/resume") Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/hyperv/hv_vtl.c7
-rw-r--r--arch/x86/hyperv/ivm.c65
-rw-r--r--arch/x86/include/asm/set_memory.h1
-rw-r--r--arch/x86/kernel/cpu/common.c4
-rw-r--r--arch/x86/kernel/cpu/intel.c178
-rw-r--r--arch/x86/kernel/e820.c8
-rw-r--r--arch/x86/mm/pat/set_memory.c24
7 files changed, 180 insertions, 107 deletions
diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c
index 96e6c51515f5..cf1b78cb2d04 100644
--- a/arch/x86/hyperv/hv_vtl.c
+++ b/arch/x86/hyperv/hv_vtl.c
@@ -16,6 +16,11 @@
extern struct boot_params boot_params;
static struct real_mode_header hv_vtl_real_mode_header;
+static bool __init hv_vtl_msi_ext_dest_id(void)
+{
+ return true;
+}
+
void __init hv_vtl_init_platform(void)
{
pr_info("Linux runs in Hyper-V Virtual Trust Level\n");
@@ -38,6 +43,8 @@ void __init hv_vtl_init_platform(void)
x86_platform.legacy.warm_reset = 0;
x86_platform.legacy.reserve_bios_regions = 0;
x86_platform.legacy.devices.pnpbios = 0;
+
+ x86_init.hyper.msi_ext_dest_id = hv_vtl_msi_ext_dest_id;
}
static inline u64 hv_vtl_system_desc_base(struct ldttss_desc *desc)
diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
index 7dcbf153ad72..768d73de0d09 100644
--- a/arch/x86/hyperv/ivm.c
+++ b/arch/x86/hyperv/ivm.c
@@ -15,6 +15,7 @@
#include <asm/io.h>
#include <asm/coco.h>
#include <asm/mem_encrypt.h>
+#include <asm/set_memory.h>
#include <asm/mshyperv.h>
#include <asm/hypervisor.h>
#include <asm/mtrr.h>
@@ -503,6 +504,31 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[],
}
/*
+ * When transitioning memory between encrypted and decrypted, the caller
+ * of set_memory_encrypted() or set_memory_decrypted() is responsible for
+ * ensuring that the memory isn't in use and isn't referenced while the
+ * transition is in progress. The transition has multiple steps, and the
+ * memory is in an inconsistent state until all steps are complete. A
+ * reference while the state is inconsistent could result in an exception
+ * that can't be cleanly fixed up.
+ *
+ * But the Linux kernel load_unaligned_zeropad() mechanism could cause a
+ * stray reference that can't be prevented by the caller, so Linux has
+ * specific code to handle this case. But when the #VC and #VE exceptions
+ * routed to a paravisor, the specific code doesn't work. To avoid this
+ * problem, mark the pages as "not present" while the transition is in
+ * progress. If load_unaligned_zeropad() causes a stray reference, a normal
+ * page fault is generated instead of #VC or #VE, and the page-fault-based
+ * handlers for load_unaligned_zeropad() resolve the reference. When the
+ * transition is complete, hv_vtom_set_host_visibility() marks the pages
+ * as "present" again.
+ */
+static bool hv_vtom_clear_present(unsigned long kbuffer, int pagecount, bool enc)
+{
+ return !set_memory_np(kbuffer, pagecount);
+}
+
+/*
* hv_vtom_set_host_visibility - Set specified memory visible to host.
*
* In Isolation VM, all guest memory is encrypted from host and guest
@@ -515,16 +541,28 @@ static bool hv_vtom_set_host_visibility(unsigned long kbuffer, int pagecount, bo
enum hv_mem_host_visibility visibility = enc ?
VMBUS_PAGE_NOT_VISIBLE : VMBUS_PAGE_VISIBLE_READ_WRITE;
u64 *pfn_array;
+ phys_addr_t paddr;
+ void *vaddr;
int ret = 0;
bool result = true;
int i, pfn;
pfn_array = kmalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
- if (!pfn_array)
- return false;
+ if (!pfn_array) {
+ result = false;
+ goto err_set_memory_p;
+ }
for (i = 0, pfn = 0; i < pagecount; i++) {
- pfn_array[pfn] = virt_to_hvpfn((void *)kbuffer + i * HV_HYP_PAGE_SIZE);
+ /*
+ * Use slow_virt_to_phys() because the PRESENT bit has been
+ * temporarily cleared in the PTEs. slow_virt_to_phys() works
+ * without the PRESENT bit while virt_to_hvpfn() or similar
+ * does not.
+ */
+ vaddr = (void *)kbuffer + (i * HV_HYP_PAGE_SIZE);
+ paddr = slow_virt_to_phys(vaddr);
+ pfn_array[pfn] = paddr >> HV_HYP_PAGE_SHIFT;
pfn++;
if (pfn == HV_MAX_MODIFY_GPA_REP_COUNT || i == pagecount - 1) {
@@ -538,14 +576,30 @@ static bool hv_vtom_set_host_visibility(unsigned long kbuffer, int pagecount, bo
}
}
- err_free_pfn_array:
+err_free_pfn_array:
kfree(pfn_array);
+
+err_set_memory_p:
+ /*
+ * Set the PTE PRESENT bits again to revert what hv_vtom_clear_present()
+ * did. Do this even if there is an error earlier in this function in
+ * order to avoid leaving the memory range in a "broken" state. Setting
+ * the PRESENT bits shouldn't fail, but return an error if it does.
+ */
+ if (set_memory_p(kbuffer, pagecount))
+ result = false;
+
return result;
}
static bool hv_vtom_tlb_flush_required(bool private)
{
- return true;
+ /*
+ * Since hv_vtom_clear_present() marks the PTEs as "not present"
+ * and flushes the TLB, they can't be in the TLB. That makes the
+ * flush controlled by this function redundant, so return "false".
+ */
+ return false;
}
static bool hv_vtom_cache_flush_required(void)
@@ -608,6 +662,7 @@ void __init hv_vtom_init(void)
x86_platform.hyper.is_private_mmio = hv_is_private_mmio;
x86_platform.guest.enc_cache_flush_required = hv_vtom_cache_flush_required;
x86_platform.guest.enc_tlb_flush_required = hv_vtom_tlb_flush_required;
+ x86_platform.guest.enc_status_change_prepare = hv_vtom_clear_present;
x86_platform.guest.enc_status_change_finish = hv_vtom_set_host_visibility;
/* Set WB as the default cache mode. */
diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h
index a5e89641bd2d..9aee31862b4a 100644
--- a/arch/x86/include/asm/set_memory.h
+++ b/arch/x86/include/asm/set_memory.h
@@ -47,6 +47,7 @@ int set_memory_uc(unsigned long addr, int numpages);
int set_memory_wc(unsigned long addr, int numpages);
int set_memory_wb(unsigned long addr, int numpages);
int set_memory_np(unsigned long addr, int numpages);
+int set_memory_p(unsigned long addr, int numpages);
int set_memory_4k(unsigned long addr, int numpages);
int set_memory_encrypted(unsigned long addr, int numpages);
int set_memory_decrypted(unsigned long addr, int numpages);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 0b97bcde70c6..fbc4e60d027c 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1589,6 +1589,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
get_cpu_vendor(c);
get_cpu_cap(c);
setup_force_cpu_cap(X86_FEATURE_CPUID);
+ get_cpu_address_sizes(c);
cpu_parse_early_param();
if (this_cpu->c_early_init)
@@ -1601,10 +1602,9 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
this_cpu->c_bsp_init(c);
} else {
setup_clear_cpu_cap(X86_FEATURE_CPUID);
+ get_cpu_address_sizes(c);
}
- get_cpu_address_sizes(c);
-
setup_force_cpu_cap(X86_FEATURE_ALWAYS);
cpu_set_bug_bits(c);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index a927a8fc9624..40dec9b56f87 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -184,6 +184,90 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c)
return false;
}
+#define MSR_IA32_TME_ACTIVATE 0x982
+
+/* Helpers to access TME_ACTIVATE MSR */
+#define TME_ACTIVATE_LOCKED(x) (x & 0x1)
+#define TME_ACTIVATE_ENABLED(x) (x & 0x2)
+
+#define TME_ACTIVATE_POLICY(x) ((x >> 4) & 0xf) /* Bits 7:4 */
+#define TME_ACTIVATE_POLICY_AES_XTS_128 0
+
+#define TME_ACTIVATE_KEYID_BITS(x) ((x >> 32) & 0xf) /* Bits 35:32 */
+
+#define TME_ACTIVATE_CRYPTO_ALGS(x) ((x >> 48) & 0xffff) /* Bits 63:48 */
+#define TME_ACTIVATE_CRYPTO_AES_XTS_128 1
+
+/* Values for mktme_status (SW only construct) */
+#define MKTME_ENABLED 0
+#define MKTME_DISABLED 1
+#define MKTME_UNINITIALIZED 2
+static int mktme_status = MKTME_UNINITIALIZED;
+
+static void detect_tme_early(struct cpuinfo_x86 *c)
+{
+ u64 tme_activate, tme_policy, tme_crypto_algs;
+ int keyid_bits = 0, nr_keyids = 0;
+ static u64 tme_activate_cpu0 = 0;
+
+ rdmsrl(MSR_IA32_TME_ACTIVATE, tme_activate);
+
+ if (mktme_status != MKTME_UNINITIALIZED) {
+ if (tme_activate != tme_activate_cpu0) {
+ /* Broken BIOS? */
+ pr_err_once("x86/tme: configuration is inconsistent between CPUs\n");
+ pr_err_once("x86/tme: MKTME is not usable\n");
+ mktme_status = MKTME_DISABLED;
+
+ /* Proceed. We may need to exclude bits from x86_phys_bits. */
+ }
+ } else {
+ tme_activate_cpu0 = tme_activate;
+ }
+
+ if (!TME_ACTIVATE_LOCKED(tme_activate) || !TME_ACTIVATE_ENABLED(tme_activate)) {
+ pr_info_once("x86/tme: not enabled by BIOS\n");
+ mktme_status = MKTME_DISABLED;
+ return;
+ }
+
+ if (mktme_status != MKTME_UNINITIALIZED)
+ goto detect_keyid_bits;
+
+ pr_info("x86/tme: enabled by BIOS\n");
+
+ tme_policy = TME_ACTIVATE_POLICY(tme_activate);
+ if (tme_policy != TME_ACTIVATE_POLICY_AES_XTS_128)
+ pr_warn("x86/tme: Unknown policy is active: %#llx\n", tme_policy);
+
+ tme_crypto_algs = TME_ACTIVATE_CRYPTO_ALGS(tme_activate);
+ if (!(tme_crypto_algs & TME_ACTIVATE_CRYPTO_AES_XTS_128)) {
+ pr_err("x86/mktme: No known encryption algorithm is supported: %#llx\n",
+ tme_crypto_algs);
+ mktme_status = MKTME_DISABLED;
+ }
+detect_keyid_bits:
+ keyid_bits = TME_ACTIVATE_KEYID_BITS(tme_activate);
+ nr_keyids = (1UL << keyid_bits) - 1;
+ if (nr_keyids) {
+ pr_info_once("x86/mktme: enabled by BIOS\n");
+ pr_info_once("x86/mktme: %d KeyIDs available\n", nr_keyids);
+ } else {
+ pr_info_once("x86/mktme: disabled by BIOS\n");
+ }
+
+ if (mktme_status == MKTME_UNINITIALIZED) {
+ /* MKTME is usable */
+ mktme_status = MKTME_ENABLED;
+ }
+
+ /*
+ * KeyID bits effectively lower the number of physical address
+ * bits. Update cpuinfo_x86::x86_phys_bits accordingly.
+ */
+ c->x86_phys_bits -= keyid_bits;
+}
+
static void early_init_intel(struct cpuinfo_x86 *c)
{
u64 misc_enable;
@@ -322,6 +406,13 @@ static void early_init_intel(struct cpuinfo_x86 *c)
*/
if (detect_extended_topology_early(c) < 0)
detect_ht_early(c);
+
+ /*
+ * Adjust the number of physical bits early because it affects the
+ * valid bits of the MTRR mask registers.
+ */
+ if (cpu_has(c, X86_FEATURE_TME))
+ detect_tme_early(c);
}
static void bsp_init_intel(struct cpuinfo_x86 *c)
@@ -482,90 +573,6 @@ static void srat_detect_node(struct cpuinfo_x86 *c)
#endif
}
-#define MSR_IA32_TME_ACTIVATE 0x982
-
-/* Helpers to access TME_ACTIVATE MSR */
-#define TME_ACTIVATE_LOCKED(x) (x & 0x1)
-#define TME_ACTIVATE_ENABLED(x) (x & 0x2)
-
-#define TME_ACTIVATE_POLICY(x) ((x >> 4) & 0xf) /* Bits 7:4 */
-#define TME_ACTIVATE_POLICY_AES_XTS_128 0
-
-#define TME_ACTIVATE_KEYID_BITS(x) ((x >> 32) & 0xf) /* Bits 35:32 */
-
-#define TME_ACTIVATE_CRYPTO_ALGS(x) ((x >> 48) & 0xffff) /* Bits 63:48 */
-#define TME_ACTIVATE_CRYPTO_AES_XTS_128 1
-
-/* Values for mktme_status (SW only construct) */
-#define MKTME_ENABLED 0
-#define MKTME_DISABLED 1
-#define MKTME_UNINITIALIZED 2
-static int mktme_status = MKTME_UNINITIALIZED;
-
-static void detect_tme(struct cpuinfo_x86 *c)
-{
- u64 tme_activate, tme_policy, tme_crypto_algs;
- int keyid_bits = 0, nr_keyids = 0;
- static u64 tme_activate_cpu0 = 0;
-
- rdmsrl(MSR_IA32_TME_ACTIVATE, tme_activate);
-
- if (mktme_status != MKTME_UNINITIALIZED) {
- if (tme_activate != tme_activate_cpu0) {
- /* Broken BIOS? */
- pr_err_once("x86/tme: configuration is inconsistent between CPUs\n");
- pr_err_once("x86/tme: MKTME is not usable\n");
- mktme_status = MKTME_DISABLED;
-
- /* Proceed. We may need to exclude bits from x86_phys_bits. */
- }
- } else {
- tme_activate_cpu0 = tme_activate;
- }
-
- if (!TME_ACTIVATE_LOCKED(tme_activate) || !TME_ACTIVATE_ENABLED(tme_activate)) {
- pr_info_once("x86/tme: not enabled by BIOS\n");
- mktme_status = MKTME_DISABLED;
- return;
- }
-
- if (mktme_status != MKTME_UNINITIALIZED)
- goto detect_keyid_bits;
-
- pr_info("x86/tme: enabled by BIOS\n");
-
- tme_policy = TME_ACTIVATE_POLICY(tme_activate);
- if (tme_policy != TME_ACTIVATE_POLICY_AES_XTS_128)
- pr_warn("x86/tme: Unknown policy is active: %#llx\n", tme_policy);
-
- tme_crypto_algs = TME_ACTIVATE_CRYPTO_ALGS(tme_activate);
- if (!(tme_crypto_algs & TME_ACTIVATE_CRYPTO_AES_XTS_128)) {
- pr_err("x86/mktme: No known encryption algorithm is supported: %#llx\n",
- tme_crypto_algs);
- mktme_status = MKTME_DISABLED;
- }
-detect_keyid_bits:
- keyid_bits = TME_ACTIVATE_KEYID_BITS(tme_activate);
- nr_keyids = (1UL << keyid_bits) - 1;
- if (nr_keyids) {
- pr_info_once("x86/mktme: enabled by BIOS\n");
- pr_info_once("x86/mktme: %d KeyIDs available\n", nr_keyids);
- } else {
- pr_info_once("x86/mktme: disabled by BIOS\n");
- }
-
- if (mktme_status == MKTME_UNINITIALIZED) {
- /* MKTME is usable */
- mktme_status = MKTME_ENABLED;
- }
-
- /*
- * KeyID bits effectively lower the number of physical address
- * bits. Update cpuinfo_x86::x86_phys_bits accordingly.
- */
- c->x86_phys_bits -= keyid_bits;
-}
-
static void init_cpuid_fault(struct cpuinfo_x86 *c)
{
u64 msr;
@@ -702,9 +709,6 @@ static void init_intel(struct cpuinfo_x86 *c)
init_ia32_feat_ctl(c);
- if (cpu_has(c, X86_FEATURE_TME))
- detect_tme(c);
-
init_intel_misc_features(c);
split_lock_init();
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index fb8cf953380d..b66f540de054 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1017,10 +1017,12 @@ void __init e820__reserve_setup_data(void)
e820__range_update(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
/*
- * SETUP_EFI and SETUP_IMA are supplied by kexec and do not need
- * to be reserved.
+ * SETUP_EFI, SETUP_IMA and SETUP_RNG_SEED are supplied by
+ * kexec and do not need to be reserved.
*/
- if (data->type != SETUP_EFI && data->type != SETUP_IMA)
+ if (data->type != SETUP_EFI &&
+ data->type != SETUP_IMA &&
+ data->type != SETUP_RNG_SEED)
e820__range_update_kexec(pa_data,
sizeof(*data) + data->len,
E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index e9b448d1b1b7..102880404046 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -755,10 +755,14 @@ pmd_t *lookup_pmd_address(unsigned long address)
* areas on 32-bit NUMA systems. The percpu areas can
* end up in this kind of memory, for instance.
*
- * This could be optimized, but it is only intended to be
- * used at initialization time, and keeping it
- * unoptimized should increase the testing coverage for
- * the more obscure platforms.
+ * Note that as long as the PTEs are well-formed with correct PFNs, this
+ * works without checking the PRESENT bit in the leaf PTE. This is unlike
+ * the similar vmalloc_to_page() and derivatives. Callers may depend on
+ * this behavior.
+ *
+ * This could be optimized, but it is only used in paths that are not perf
+ * sensitive, and keeping it unoptimized should increase the testing coverage
+ * for the more obscure platforms.
*/
phys_addr_t slow_virt_to_phys(void *__virt_addr)
{
@@ -2041,17 +2045,12 @@ int set_mce_nospec(unsigned long pfn)
return rc;
}
-static int set_memory_p(unsigned long *addr, int numpages)
-{
- return change_page_attr_set(addr, numpages, __pgprot(_PAGE_PRESENT), 0);
-}
-
/* Restore full speculative operation to the pfn. */
int clear_mce_nospec(unsigned long pfn)
{
unsigned long addr = (unsigned long) pfn_to_kaddr(pfn);
- return set_memory_p(&addr, 1);
+ return set_memory_p(addr, 1);
}
EXPORT_SYMBOL_GPL(clear_mce_nospec);
#endif /* CONFIG_X86_64 */
@@ -2104,6 +2103,11 @@ int set_memory_np_noalias(unsigned long addr, int numpages)
CPA_NO_CHECK_ALIAS, NULL);
}
+int set_memory_p(unsigned long addr, int numpages)
+{
+ return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_PRESENT), 0);
+}
+
int set_memory_4k(unsigned long addr, int numpages)
{
return change_page_attr_set_clr(&addr, numpages, __pgprot(0),