diff options
author | Linus Torvalds | 2014-01-20 12:10:27 -0800 |
---|---|---|
committer | Linus Torvalds | 2014-01-20 12:10:27 -0800 |
commit | fab5669d556200c4dd119af705bff14085845d1e (patch) | |
tree | 366d05fc858fc599177d687f37b227a4832c2f48 /drivers | |
parent | 74e8ee8262c3f93bbc41804037b43f07b95897bb (diff) | |
parent | b769e014f3ae4af8a56c6327077b3c40410dedad (diff) |
Merge branch 'x86-ras-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 RAS changes from Ingo Molnar:
- SCI reporting for other error types not only correctable ones
- GHES cleanups
- Add the functionality to override error reporting agents as some
machines are sporting a new extended error logging capability which,
if done properly in the BIOS, makes a corresponding EDAC module
redundant
- PCIe AER tracepoint severity levels fix
- Error path correction for the mce device init
- MCE timer fix
- Add more flexibility to the error injection (EINJ) debugfs interface
* 'x86-ras-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86, mce: Fix mce_start_timer semantics
ACPI, APEI, GHES: Cleanup ghes memory error handling
ACPI, APEI: Cleanup alignment-aware accesses
ACPI, APEI, GHES: Do not report only correctable errors with SCI
ACPI, APEI, EINJ: Changes to the ACPI/APEI/EINJ debugfs interface
ACPI, eMCA: Combine eMCA/EDAC event reporting priority
EDAC, sb_edac: Modify H/W event reporting policy
EDAC: Add an edac_report parameter to EDAC
PCI, AER: Fix severity usage in aer trace event
x86, mce: Call put_device on device_register failure
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/acpi/acpi_extlog.c | 18 | ||||
-rw-r--r-- | drivers/acpi/apei/apei-base.c | 4 | ||||
-rw-r--r-- | drivers/acpi/apei/einj.c | 58 | ||||
-rw-r--r-- | drivers/acpi/apei/erst.c | 2 | ||||
-rw-r--r-- | drivers/acpi/apei/ghes.c | 39 | ||||
-rw-r--r-- | drivers/edac/edac_stub.c | 19 | ||||
-rw-r--r-- | drivers/edac/sb_edac.c | 6 |
7 files changed, 107 insertions, 39 deletions
diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c index a6869e110ce5..5d33c5415405 100644 --- a/drivers/acpi/acpi_extlog.c +++ b/drivers/acpi/acpi_extlog.c @@ -12,6 +12,7 @@ #include <acpi/acpi_bus.h> #include <linux/cper.h> #include <linux/ratelimit.h> +#include <linux/edac.h> #include <asm/cpu.h> #include <asm/mce.h> @@ -43,6 +44,8 @@ struct extlog_l1_head { u8 rev1[12]; }; +static int old_edac_report_status; + static u8 extlog_dsm_uuid[] = "663E35AF-CC10-41A4-88EA-5470AF055295"; /* L1 table related physical address */ @@ -150,7 +153,7 @@ static int extlog_print(struct notifier_block *nb, unsigned long val, rc = print_extlog_rcd(NULL, (struct acpi_generic_status *)elog_buf, cpu); - return NOTIFY_DONE; + return NOTIFY_STOP; } static int extlog_get_dsm(acpi_handle handle, int rev, int func, u64 *ret) @@ -231,8 +234,12 @@ static int __init extlog_init(void) u64 cap; int rc; - rc = -ENODEV; + if (get_edac_report_status() == EDAC_REPORTING_FORCE) { + pr_warn("Not loading eMCA, error reporting force-enabled through EDAC.\n"); + return -EPERM; + } + rc = -ENODEV; rdmsrl(MSR_IA32_MCG_CAP, cap); if (!(cap & MCG_ELOG_P)) return rc; @@ -287,6 +294,12 @@ static int __init extlog_init(void) if (elog_buf == NULL) goto err_release_elog; + /* + * eMCA event report method has higher priority than EDAC method, + * unless EDAC event report method is mandatory. + */ + old_edac_report_status = get_edac_report_status(); + set_edac_report_status(EDAC_REPORTING_DISABLED); mce_register_decode_chain(&extlog_mce_dec); /* enable OS to be involved to take over management from BIOS */ ((struct extlog_l1_head *)extlog_l1_addr)->flags |= FLAG_OS_OPTIN; @@ -308,6 +321,7 @@ err: static void __exit extlog_exit(void) { + set_edac_report_status(old_edac_report_status); mce_unregister_decode_chain(&extlog_mce_dec); ((struct extlog_l1_head *)extlog_l1_addr)->flags &= ~FLAG_OS_OPTIN; if (extlog_l1_addr) diff --git a/drivers/acpi/apei/apei-base.c b/drivers/acpi/apei/apei-base.c index 6d2c49b86b7f..e55584a072c6 100644 --- a/drivers/acpi/apei/apei-base.c +++ b/drivers/acpi/apei/apei-base.c @@ -41,6 +41,7 @@ #include <linux/rculist.h> #include <linux/interrupt.h> #include <linux/debugfs.h> +#include <asm/unaligned.h> #include "apei-internal.h" @@ -567,8 +568,7 @@ static int apei_check_gar(struct acpi_generic_address *reg, u64 *paddr, bit_offset = reg->bit_offset; access_size_code = reg->access_width; space_id = reg->space_id; - /* Handle possible alignment issues */ - memcpy(paddr, ®->address, sizeof(*paddr)); + *paddr = get_unaligned(®->address); if (!*paddr) { pr_warning(FW_BUG APEI_PFX "Invalid physical address in GAR [0x%llx/%u/%u/%u/%u]\n", diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c index fb57d03e698b..7dcc8a824aae 100644 --- a/drivers/acpi/apei/einj.c +++ b/drivers/acpi/apei/einj.c @@ -34,6 +34,7 @@ #include <linux/delay.h> #include <linux/mm.h> #include <acpi/acpi.h> +#include <asm/unaligned.h> #include "apei-internal.h" @@ -216,7 +217,7 @@ static void check_vendor_extension(u64 paddr, static void *einj_get_parameter_address(void) { int i; - u64 paddrv4 = 0, paddrv5 = 0; + u64 pa_v4 = 0, pa_v5 = 0; struct acpi_whea_header *entry; entry = EINJ_TAB_ENTRY(einj_tab); @@ -225,30 +226,28 @@ static void *einj_get_parameter_address(void) entry->instruction == ACPI_EINJ_WRITE_REGISTER && entry->register_region.space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) - memcpy(&paddrv4, &entry->register_region.address, - sizeof(paddrv4)); + pa_v4 = get_unaligned(&entry->register_region.address); if (entry->action == ACPI_EINJ_SET_ERROR_TYPE_WITH_ADDRESS && entry->instruction == ACPI_EINJ_WRITE_REGISTER && entry->register_region.space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) - memcpy(&paddrv5, &entry->register_region.address, - sizeof(paddrv5)); + pa_v5 = get_unaligned(&entry->register_region.address); entry++; } - if (paddrv5) { + if (pa_v5) { struct set_error_type_with_address *v5param; - v5param = acpi_os_map_memory(paddrv5, sizeof(*v5param)); + v5param = acpi_os_map_memory(pa_v5, sizeof(*v5param)); if (v5param) { acpi5 = 1; - check_vendor_extension(paddrv5, v5param); + check_vendor_extension(pa_v5, v5param); return v5param; } } - if (param_extension && paddrv4) { + if (param_extension && pa_v4) { struct einj_parameter *v4param; - v4param = acpi_os_map_memory(paddrv4, sizeof(*v4param)); + v4param = acpi_os_map_memory(pa_v4, sizeof(*v4param)); if (!v4param) return NULL; if (v4param->reserved1 || v4param->reserved2) { @@ -416,7 +415,8 @@ out: return rc; } -static int __einj_error_inject(u32 type, u64 param1, u64 param2) +static int __einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2, + u64 param3, u64 param4) { struct apei_exec_context ctx; u64 val, trigger_paddr, timeout = FIRMWARE_TIMEOUT; @@ -446,6 +446,12 @@ static int __einj_error_inject(u32 type, u64 param1, u64 param2) break; } v5param->flags = vendor_flags; + } else if (flags) { + v5param->flags = flags; + v5param->memory_address = param1; + v5param->memory_address_range = param2; + v5param->apicid = param3; + v5param->pcie_sbdf = param4; } else { switch (type) { case ACPI_EINJ_PROCESSOR_CORRECTABLE: @@ -514,11 +520,17 @@ static int __einj_error_inject(u32 type, u64 param1, u64 param2) } /* Inject the specified hardware error */ -static int einj_error_inject(u32 type, u64 param1, u64 param2) +static int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2, + u64 param3, u64 param4) { int rc; unsigned long pfn; + /* If user manually set "flags", make sure it is legal */ + if (flags && (flags & + ~(SETWA_FLAGS_APICID|SETWA_FLAGS_MEM|SETWA_FLAGS_PCIE_SBDF))) + return -EINVAL; + /* * We need extra sanity checks for memory errors. * Other types leap directly to injection. @@ -532,7 +544,7 @@ static int einj_error_inject(u32 type, u64 param1, u64 param2) if (type & ACPI5_VENDOR_BIT) { if (vendor_flags != SETWA_FLAGS_MEM) goto inject; - } else if (!(type & MEM_ERROR_MASK)) + } else if (!(type & MEM_ERROR_MASK) && !(flags & SETWA_FLAGS_MEM)) goto inject; /* @@ -546,15 +558,18 @@ static int einj_error_inject(u32 type, u64 param1, u64 param2) inject: mutex_lock(&einj_mutex); - rc = __einj_error_inject(type, param1, param2); + rc = __einj_error_inject(type, flags, param1, param2, param3, param4); mutex_unlock(&einj_mutex); return rc; } static u32 error_type; +static u32 error_flags; static u64 error_param1; static u64 error_param2; +static u64 error_param3; +static u64 error_param4; static struct dentry *einj_debug_dir; static int available_error_type_show(struct seq_file *m, void *v) @@ -648,7 +663,8 @@ static int error_inject_set(void *data, u64 val) if (!error_type) return -EINVAL; - return einj_error_inject(error_type, error_param1, error_param2); + return einj_error_inject(error_type, error_flags, error_param1, error_param2, + error_param3, error_param4); } DEFINE_SIMPLE_ATTRIBUTE(error_inject_fops, NULL, @@ -729,6 +745,10 @@ static int __init einj_init(void) rc = -ENOMEM; einj_param = einj_get_parameter_address(); if ((param_extension || acpi5) && einj_param) { + fentry = debugfs_create_x32("flags", S_IRUSR | S_IWUSR, + einj_debug_dir, &error_flags); + if (!fentry) + goto err_unmap; fentry = debugfs_create_x64("param1", S_IRUSR | S_IWUSR, einj_debug_dir, &error_param1); if (!fentry) @@ -737,6 +757,14 @@ static int __init einj_init(void) einj_debug_dir, &error_param2); if (!fentry) goto err_unmap; + fentry = debugfs_create_x64("param3", S_IRUSR | S_IWUSR, + einj_debug_dir, &error_param3); + if (!fentry) + goto err_unmap; + fentry = debugfs_create_x64("param4", S_IRUSR | S_IWUSR, + einj_debug_dir, &error_param4); + if (!fentry) + goto err_unmap; fentry = debugfs_create_x32("notrigger", S_IRUSR | S_IWUSR, einj_debug_dir, ¬rigger); diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c index cb1d557fc22c..ed65e9c4b5b0 100644 --- a/drivers/acpi/apei/erst.c +++ b/drivers/acpi/apei/erst.c @@ -611,7 +611,7 @@ static void __erst_record_id_cache_compact(void) if (entries[i] == APEI_ERST_INVALID_RECORD_ID) continue; if (wpos != i) - memcpy(&entries[wpos], &entries[i], sizeof(entries[i])); + entries[wpos] = entries[i]; wpos++; } erst_record_id_cache.len = wpos; diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index a30bc313787b..46766ef7ef5d 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -413,27 +413,31 @@ static void ghes_handle_memory_failure(struct acpi_generic_data *gdata, int sev) { #ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE unsigned long pfn; + int flags = -1; int sec_sev = ghes_severity(gdata->error_severity); struct cper_sec_mem_err *mem_err; mem_err = (struct cper_sec_mem_err *)(gdata + 1); - if (sec_sev == GHES_SEV_CORRECTED && - (gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED) && - (mem_err->validation_bits & CPER_MEM_VALID_PA)) { - pfn = mem_err->physical_addr >> PAGE_SHIFT; - if (pfn_valid(pfn)) - memory_failure_queue(pfn, 0, MF_SOFT_OFFLINE); - else if (printk_ratelimit()) - pr_warn(FW_WARN GHES_PFX - "Invalid address in generic error data: %#llx\n", - mem_err->physical_addr); - } - if (sev == GHES_SEV_RECOVERABLE && - sec_sev == GHES_SEV_RECOVERABLE && - mem_err->validation_bits & CPER_MEM_VALID_PA) { - pfn = mem_err->physical_addr >> PAGE_SHIFT; - memory_failure_queue(pfn, 0, 0); + if (!(mem_err->validation_bits & CPER_MEM_VALID_PA)) + return; + + pfn = mem_err->physical_addr >> PAGE_SHIFT; + if (!pfn_valid(pfn)) { + pr_warn_ratelimited(FW_WARN GHES_PFX + "Invalid address in generic error data: %#llx\n", + mem_err->physical_addr); + return; } + + /* iff following two events can be handled properly by now */ + if (sec_sev == GHES_SEV_CORRECTED && + (gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED)) + flags = MF_SOFT_OFFLINE; + if (sev == GHES_SEV_RECOVERABLE && sec_sev == GHES_SEV_RECOVERABLE) + flags = 0; + + if (flags != -1) + memory_failure_queue(pfn, 0, flags); #endif } @@ -453,8 +457,7 @@ static void ghes_do_proc(struct ghes *ghes, ghes_edac_report_mem_error(ghes, sev, mem_err); #ifdef CONFIG_X86_MCE - apei_mce_report_mem_error(sev == GHES_SEV_CORRECTED, - mem_err); + apei_mce_report_mem_error(sev, mem_err); #endif ghes_handle_memory_failure(gdata, sev); } diff --git a/drivers/edac/edac_stub.c b/drivers/edac/edac_stub.c index 351945fa2ecd..9d9e18aefaaa 100644 --- a/drivers/edac/edac_stub.c +++ b/drivers/edac/edac_stub.c @@ -29,6 +29,25 @@ EXPORT_SYMBOL_GPL(edac_err_assert); static atomic_t edac_subsys_valid = ATOMIC_INIT(0); +int edac_report_status = EDAC_REPORTING_ENABLED; +EXPORT_SYMBOL_GPL(edac_report_status); + +static int __init edac_report_setup(char *str) +{ + if (!str) + return -EINVAL; + + if (!strncmp(str, "on", 2)) + set_edac_report_status(EDAC_REPORTING_ENABLED); + else if (!strncmp(str, "off", 3)) + set_edac_report_status(EDAC_REPORTING_DISABLED); + else if (!strncmp(str, "force", 5)) + set_edac_report_status(EDAC_REPORTING_FORCE); + + return 0; +} +__setup("edac_report=", edac_report_setup); + /* * called to determine if there is an EDAC driver interested in * knowing an event (such as NMI) occurred diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index de988c8da1c8..54e2abe671f7 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -1829,6 +1829,9 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val, struct mem_ctl_info *mci; struct sbridge_pvt *pvt; + if (get_edac_report_status() == EDAC_REPORTING_DISABLED) + return NOTIFY_DONE; + mci = get_mci_for_node_id(mce->socketid); if (!mci) return NOTIFY_BAD; @@ -2142,9 +2145,10 @@ static int __init sbridge_init(void) opstate_init(); pci_rc = pci_register_driver(&sbridge_driver); - if (pci_rc >= 0) { mce_register_decode_chain(&sbridge_mce_dec); + if (get_edac_report_status() == EDAC_REPORTING_DISABLED) + sbridge_printk(KERN_WARNING, "Loading driver, error reporting disabled.\n"); return 0; } |