diff options
author | Linus Torvalds | 2020-03-30 13:17:50 -0700 |
---|---|---|
committer | Linus Torvalds | 2020-03-30 13:17:50 -0700 |
commit | ff7b862a4c354793580545afa64c56fafa18952b (patch) | |
tree | 6daa5fdbfa5fe1a0327a1030249a26c02929bcf1 /arch | |
parent | aaf985e21a4abb471df2a10ad7163367cbcd4088 (diff) | |
parent | 077168e241ec5a3b273652acb1e85f8bc1dc2d81 (diff) |
Merge tag 'ras_updates_for_5.7' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull RAS updates from Borislav Petkov:
- Do not report spurious MCEs on some Intel platforms caused by errata;
by Prarit Bhargava.
- Change dev-mcelog's hardcoded limit of 32 error records to a dynamic
one, controlled by the number of logical CPUs, by Tony Luck.
- Add support for the processor identification number (PPIN) on AMD, by
Wei Huang.
* tag 'ras_updates_for_5.7' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/mce/amd: Add PPIN support for AMD MCE
x86/mce/dev-mcelog: Dynamically allocate space for machine check records
x86/mce: Do not log spurious corrected mce errors
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/include/asm/cpufeatures.h | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/mce.h | 6 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/amd.c | 30 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mce/core.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mce/dev-mcelog.c | 47 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mce/intel.c | 17 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mce/internal.h | 2 |
7 files changed, 84 insertions, 23 deletions
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index f3327cb56edf..4b263ffb793b 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -299,6 +299,7 @@ #define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */ #define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */ #define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* "" Single Thread Indirect Branch Predictors always-on preferred */ +#define X86_FEATURE_AMD_PPIN (13*32+23) /* Protected Processor Inventory Number */ #define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */ #define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ #define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */ diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 4359b955e0b7..9d5b09913ef3 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -102,7 +102,7 @@ #define MCE_OVERFLOW 0 /* bit 0 in flags means overflow */ -#define MCE_LOG_LEN 32 +#define MCE_LOG_MIN_LEN 32U #define MCE_LOG_SIGNATURE "MACHINECHECK" /* AMD Scalable MCA */ @@ -135,11 +135,11 @@ */ struct mce_log_buffer { char signature[12]; /* "MACHINECHECK" */ - unsigned len; /* = MCE_LOG_LEN */ + unsigned len; /* = elements in .mce_entry[] */ unsigned next; unsigned flags; unsigned recordlen; /* length of struct mce */ - struct mce entry[MCE_LOG_LEN]; + struct mce entry[]; }; enum mce_notifier_prios { diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 1f875fbe1384..aef06c37d338 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -394,6 +394,35 @@ static void amd_detect_cmp(struct cpuinfo_x86 *c) per_cpu(cpu_llc_id, cpu) = c->phys_proc_id; } +static void amd_detect_ppin(struct cpuinfo_x86 *c) +{ + unsigned long long val; + + if (!cpu_has(c, X86_FEATURE_AMD_PPIN)) + return; + + /* When PPIN is defined in CPUID, still need to check PPIN_CTL MSR */ + if (rdmsrl_safe(MSR_AMD_PPIN_CTL, &val)) + goto clear_ppin; + + /* PPIN is locked in disabled mode, clear feature bit */ + if ((val & 3UL) == 1UL) + goto clear_ppin; + + /* If PPIN is disabled, try to enable it */ + if (!(val & 2UL)) { + wrmsrl_safe(MSR_AMD_PPIN_CTL, val | 2UL); + rdmsrl_safe(MSR_AMD_PPIN_CTL, &val); + } + + /* If PPIN_EN bit is 1, return from here; otherwise fall through */ + if (val & 2UL) + return; + +clear_ppin: + clear_cpu_cap(c, X86_FEATURE_AMD_PPIN); +} + u16 amd_get_nb_id(int cpu) { return per_cpu(cpu_llc_id, cpu); @@ -941,6 +970,7 @@ static void init_amd(struct cpuinfo_x86 *c) amd_detect_cmp(c); amd_get_topology(c); srat_detect_node(c); + amd_detect_ppin(c); init_amd_cacheinfo(c); diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 2c4f949611e4..dd06fce537fc 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -142,6 +142,8 @@ void mce_setup(struct mce *m) if (this_cpu_has(X86_FEATURE_INTEL_PPIN)) rdmsrl(MSR_PPIN, m->ppin); + else if (this_cpu_has(X86_FEATURE_AMD_PPIN)) + rdmsrl(MSR_AMD_PPIN, m->ppin); m->microcode = boot_cpu_data.microcode; } @@ -1877,6 +1879,8 @@ bool filter_mce(struct mce *m) { if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) return amd_filter_mce(m); + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + return intel_filter_mce(m); return false; } diff --git a/arch/x86/kernel/cpu/mce/dev-mcelog.c b/arch/x86/kernel/cpu/mce/dev-mcelog.c index 7c8958dee103..d089567a9ce8 100644 --- a/arch/x86/kernel/cpu/mce/dev-mcelog.c +++ b/arch/x86/kernel/cpu/mce/dev-mcelog.c @@ -29,11 +29,7 @@ static char *mce_helper_argv[2] = { mce_helper, NULL }; * separate MCEs from kernel messages to avoid bogus bug reports. */ -static struct mce_log_buffer mcelog = { - .signature = MCE_LOG_SIGNATURE, - .len = MCE_LOG_LEN, - .recordlen = sizeof(struct mce), -}; +static struct mce_log_buffer *mcelog; static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait); @@ -45,21 +41,21 @@ static int dev_mce_log(struct notifier_block *nb, unsigned long val, mutex_lock(&mce_chrdev_read_mutex); - entry = mcelog.next; + entry = mcelog->next; /* * When the buffer fills up discard new entries. Assume that the * earlier errors are the more interesting ones: */ - if (entry >= MCE_LOG_LEN) { - set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog.flags); + if (entry >= mcelog->len) { + set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog->flags); goto unlock; } - mcelog.next = entry + 1; + mcelog->next = entry + 1; - memcpy(mcelog.entry + entry, mce, sizeof(struct mce)); - mcelog.entry[entry].finished = 1; + memcpy(mcelog->entry + entry, mce, sizeof(struct mce)); + mcelog->entry[entry].finished = 1; /* wake processes polling /dev/mcelog */ wake_up_interruptible(&mce_chrdev_wait); @@ -214,21 +210,21 @@ static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf, /* Only supports full reads right now */ err = -EINVAL; - if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) + if (*off != 0 || usize < mcelog->len * sizeof(struct mce)) goto out; - next = mcelog.next; + next = mcelog->next; err = 0; for (i = 0; i < next; i++) { - struct mce *m = &mcelog.entry[i]; + struct mce *m = &mcelog->entry[i]; err |= copy_to_user(buf, m, sizeof(*m)); buf += sizeof(*m); } - memset(mcelog.entry, 0, next * sizeof(struct mce)); - mcelog.next = 0; + memset(mcelog->entry, 0, next * sizeof(struct mce)); + mcelog->next = 0; if (err) err = -EFAULT; @@ -242,7 +238,7 @@ out: static __poll_t mce_chrdev_poll(struct file *file, poll_table *wait) { poll_wait(file, &mce_chrdev_wait, wait); - if (READ_ONCE(mcelog.next)) + if (READ_ONCE(mcelog->next)) return EPOLLIN | EPOLLRDNORM; if (!mce_apei_read_done && apei_check_mce()) return EPOLLIN | EPOLLRDNORM; @@ -261,13 +257,13 @@ static long mce_chrdev_ioctl(struct file *f, unsigned int cmd, case MCE_GET_RECORD_LEN: return put_user(sizeof(struct mce), p); case MCE_GET_LOG_LEN: - return put_user(MCE_LOG_LEN, p); + return put_user(mcelog->len, p); case MCE_GETCLEAR_FLAGS: { unsigned flags; do { - flags = mcelog.flags; - } while (cmpxchg(&mcelog.flags, flags, 0) != flags); + flags = mcelog->flags; + } while (cmpxchg(&mcelog->flags, flags, 0) != flags); return put_user(flags, p); } @@ -339,8 +335,18 @@ static struct miscdevice mce_chrdev_device = { static __init int dev_mcelog_init_device(void) { + int mce_log_len; int err; + mce_log_len = max(MCE_LOG_MIN_LEN, num_online_cpus()); + mcelog = kzalloc(sizeof(*mcelog) + mce_log_len * sizeof(struct mce), GFP_KERNEL); + if (!mcelog) + return -ENOMEM; + + strncpy(mcelog->signature, MCE_LOG_SIGNATURE, sizeof(mcelog->signature)); + mcelog->len = mce_log_len; + mcelog->recordlen = sizeof(struct mce); + /* register character device /dev/mcelog */ err = misc_register(&mce_chrdev_device); if (err) { @@ -350,6 +356,7 @@ static __init int dev_mcelog_init_device(void) else pr_err("Unable to init device /dev/mcelog (rc: %d)\n", err); + kfree(mcelog); return err; } diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c index f996ffb887bc..d8f9230d2034 100644 --- a/arch/x86/kernel/cpu/mce/intel.c +++ b/arch/x86/kernel/cpu/mce/intel.c @@ -521,3 +521,20 @@ void mce_intel_feature_clear(struct cpuinfo_x86 *c) { intel_clear_lmce(); } + +bool intel_filter_mce(struct mce *m) +{ + struct cpuinfo_x86 *c = &boot_cpu_data; + + /* MCE errata HSD131, HSM142, HSW131, BDM48, and HSM142 */ + if ((c->x86 == 6) && + ((c->x86_model == INTEL_FAM6_HASWELL) || + (c->x86_model == INTEL_FAM6_HASWELL_L) || + (c->x86_model == INTEL_FAM6_BROADWELL) || + (c->x86_model == INTEL_FAM6_HASWELL_G)) && + (m->bank == 0) && + ((m->status & 0xa0000000ffffffff) == 0x80000000000f0005)) + return true; + + return false; +} diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h index b785c0d0b590..97db18441d2c 100644 --- a/arch/x86/kernel/cpu/mce/internal.h +++ b/arch/x86/kernel/cpu/mce/internal.h @@ -48,6 +48,7 @@ void cmci_disable_bank(int bank); void intel_init_cmci(void); void intel_init_lmce(void); void intel_clear_lmce(void); +bool intel_filter_mce(struct mce *m); #else # define cmci_intel_adjust_timer mce_adjust_timer_default static inline bool mce_intel_cmci_poll(void) { return false; } @@ -56,6 +57,7 @@ static inline void cmci_disable_bank(int bank) { } static inline void intel_init_cmci(void) { } static inline void intel_init_lmce(void) { } static inline void intel_clear_lmce(void) { } +static inline bool intel_filter_mce(struct mce *m) { return false; }; #endif void mce_timer_kick(unsigned long interval); |