From 4cbbdb51cc921f95978360fd7a0652d493dadc3e Mon Sep 17 00:00:00 2001 From: Aravind Gopalakrishnan Date: Thu, 18 Sep 2014 14:56:35 -0500 Subject: pci_ids: Add PCI device IDs for F15h M60h Add F3, F4 device IDs to be used in amd_nb.c and amd64_edac.c Signed-off-by: Aravind Gopalakrishnan Acked-by: Bjorn Helgaas Link: http://lkml.kernel.org/r/1411070195-10177-1-git-send-email-Aravind.Gopalakrishnan@amd.com Signed-off-by: Borislav Petkov --- include/linux/pci_ids.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 1fa99a301817..97fb9f69aaed 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -522,6 +522,8 @@ #define PCI_DEVICE_ID_AMD_15H_M10H_F3 0x1403 #define PCI_DEVICE_ID_AMD_15H_M30H_NB_F3 0x141d #define PCI_DEVICE_ID_AMD_15H_M30H_NB_F4 0x141e +#define PCI_DEVICE_ID_AMD_15H_M60H_NB_F3 0x1573 +#define PCI_DEVICE_ID_AMD_15H_M60H_NB_F4 0x1574 #define PCI_DEVICE_ID_AMD_15H_NB_F0 0x1600 #define PCI_DEVICE_ID_AMD_15H_NB_F1 0x1601 #define PCI_DEVICE_ID_AMD_15H_NB_F2 0x1602 -- cgit v1.2.3 From 15895a729e02ea55433b912cc31d5c6de16359ec Mon Sep 17 00:00:00 2001 From: Aravind Gopalakrishnan Date: Thu, 18 Sep 2014 14:56:45 -0500 Subject: x86, amd_nb: Add device IDs to NB tables for F15h M60h Add F3 and F4 PCI device IDs to amd_nb_misc_ids[] and amd_nb_link_ids[] respectively. Signed-off-by: Aravind Gopalakrishnan Acked-by: Thomas Gleixner Link: http://lkml.kernel.org/r/1411070205-10217-1-git-send-email-Aravind.Gopalakrishnan@amd.com Signed-off-by: Borislav Petkov --- arch/x86/kernel/amd_nb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index f04dbb3069b8..5caed1dd7ccf 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -21,6 +21,7 @@ const struct pci_device_id amd_nb_misc_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M10H_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M30H_NB_F3) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M60H_NB_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F3) }, {} @@ -30,6 +31,7 @@ EXPORT_SYMBOL(amd_nb_misc_ids); static const struct pci_device_id amd_nb_link_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M30H_NB_F4) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M60H_NB_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F4) }, {} -- cgit v1.2.3 From 348fec70213835df18a587353b3bdc0481b37c6b Mon Sep 17 00:00:00 2001 From: Aravind Gopalakrishnan Date: Thu, 18 Sep 2014 14:56:58 -0500 Subject: EDAC: Add DDR3 LRDIMM entries to edac_mem_types F15hM60h adds support for DDR4 and DDR3 LRDIMMs. Add them here. Signed-off-by: Aravind Gopalakrishnan Link: http://lkml.kernel.org/r/1411070218-10258-1-git-send-email-Aravind.Gopalakrishnan@amd.com [ Boris: improve comments. ] Signed-off-by: Borislav Petkov --- drivers/edac/edac_mc.c | 3 +++ include/linux/edac.h | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index c3893b0ddb18..129ff9c36a78 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -146,6 +146,9 @@ const char * const edac_mem_types[] = { "Rambus XDR", "Unbuffered DDR3 RAM", "Registered DDR3 RAM", + "Load-Reduced DDR3 RAM", + "Unbuffered DDR4 RAM", + "Registered DDR4 RAM", }; EXPORT_SYMBOL_GPL(edac_mem_types); diff --git a/include/linux/edac.h b/include/linux/edac.h index e1e68da6f35c..da3b72e95db3 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -194,7 +194,8 @@ static inline char *mc_event_error_type(const unsigned int err_type) * @MEM_DDR3: DDR3 RAM * @MEM_RDDR3: Registered DDR3 RAM * This is a variant of the DDR3 memories. - * @MEM_DDR4: DDR4 RAM + * @MEM_LRDDR3 Load-Reduced DDR3 memory. + * @MEM_DDR4: Unbuffered DDR4 RAM * @MEM_RDDR4: Registered DDR4 RAM * This is a variant of the DDR4 memories. */ @@ -216,6 +217,7 @@ enum mem_type { MEM_XDR, MEM_DDR3, MEM_RDDR3, + MEM_LRDDR3, MEM_DDR4, MEM_RDDR4, }; -- cgit v1.2.3 From 4cfc3a40f723761a5d374adc618245dca5c895fb Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 30 Sep 2014 20:38:54 +0200 Subject: EDAC: Sync memory types and names Make keeping the sync between the mem_types enum and the actual string names simpler by using designated initializers. Signed-off-by: Borislav Petkov --- drivers/edac/edac_mc.c | 43 ++++++++++++++++++++----------------------- 1 file changed, 20 insertions(+), 23 deletions(-) diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 129ff9c36a78..1747906f10ce 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -125,30 +125,27 @@ static void edac_mc_dump_mci(struct mem_ctl_info *mci) #endif /* CONFIG_EDAC_DEBUG */ -/* - * keep those in sync with the enum mem_type - */ const char * const edac_mem_types[] = { - "Empty csrow", - "Reserved csrow type", - "Unknown csrow type", - "Fast page mode RAM", - "Extended data out RAM", - "Burst Extended data out RAM", - "Single data rate SDRAM", - "Registered single data rate SDRAM", - "Double data rate SDRAM", - "Registered Double data rate SDRAM", - "Rambus DRAM", - "Unbuffered DDR2 RAM", - "Fully buffered DDR2", - "Registered DDR2 RAM", - "Rambus XDR", - "Unbuffered DDR3 RAM", - "Registered DDR3 RAM", - "Load-Reduced DDR3 RAM", - "Unbuffered DDR4 RAM", - "Registered DDR4 RAM", + [MEM_EMPTY] = "Empty csrow", + [MEM_RESERVED] = "Reserved csrow type", + [MEM_UNKNOWN] = "Unknown csrow type", + [MEM_FPM] = "Fast page mode RAM", + [MEM_EDO] = "Extended data out RAM", + [MEM_BEDO] = "Burst Extended data out RAM", + [MEM_SDR] = "Single data rate SDRAM", + [MEM_RDR] = "Registered single data rate SDRAM", + [MEM_DDR] = "Double data rate SDRAM", + [MEM_RDDR] = "Registered Double data rate SDRAM", + [MEM_RMBS] = "Rambus DRAM", + [MEM_DDR2] = "Unbuffered DDR2 RAM", + [MEM_FB_DDR2] = "Fully buffered DDR2", + [MEM_RDDR2] = "Registered DDR2 RAM", + [MEM_XDR] = "Rambus XDR", + [MEM_DDR3] = "Unbuffered DDR3 RAM", + [MEM_RDDR3] = "Registered DDR3 RAM", + [MEM_LRDDR3] = "Load-Reduced DDR3 RAM", + [MEM_DDR4] = "Unbuffered DDR4 RAM", + [MEM_RDDR4] = "Registered DDR4 RAM", }; EXPORT_SYMBOL_GPL(edac_mem_types); -- cgit v1.2.3 From 5c43cbdf78b55f9de3e3e9546c9f4e909d1d31be Mon Sep 17 00:00:00 2001 From: Michael Opdenacker Date: Wed, 1 Oct 2014 12:24:03 +0200 Subject: {mv64x60,ppc4xx}_edac,: Remove deprecated IRQF_DISABLED It's a NOOP since 2.6.35. Signed-off-by: Michael Opdenacker Link: http://lkml.kernel.org/r/1412159043-7348-1-git-send-email-michael.opdenacker@free-electrons.com Signed-off-by: Borislav Petkov --- drivers/edac/mv64x60_edac.c | 8 ++++---- drivers/edac/ppc4xx_edac.c | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/edac/mv64x60_edac.c b/drivers/edac/mv64x60_edac.c index 542fad70e360..6366e880f978 100644 --- a/drivers/edac/mv64x60_edac.c +++ b/drivers/edac/mv64x60_edac.c @@ -178,7 +178,7 @@ static int mv64x60_pci_err_probe(struct platform_device *pdev) res = devm_request_irq(&pdev->dev, pdata->irq, mv64x60_pci_isr, - IRQF_DISABLED, + 0, "[EDAC] PCI err", pci); if (res < 0) { @@ -345,7 +345,7 @@ static int mv64x60_sram_err_probe(struct platform_device *pdev) res = devm_request_irq(&pdev->dev, pdata->irq, mv64x60_sram_isr, - IRQF_DISABLED, + 0, "[EDAC] SRAM err", edac_dev); if (res < 0) { @@ -540,7 +540,7 @@ static int mv64x60_cpu_err_probe(struct platform_device *pdev) res = devm_request_irq(&pdev->dev, pdata->irq, mv64x60_cpu_isr, - IRQF_DISABLED, + 0, "[EDAC] CPU err", edac_dev); if (res < 0) { @@ -800,7 +800,7 @@ static int mv64x60_mc_err_probe(struct platform_device *pdev) res = devm_request_irq(&pdev->dev, pdata->irq, mv64x60_mc_isr, - IRQF_DISABLED, + 0, "[EDAC] MC err", mci); if (res < 0) { diff --git a/drivers/edac/ppc4xx_edac.c b/drivers/edac/ppc4xx_edac.c index 0f04d5ead521..41593539cec4 100644 --- a/drivers/edac/ppc4xx_edac.c +++ b/drivers/edac/ppc4xx_edac.c @@ -1120,7 +1120,7 @@ static int ppc4xx_edac_register_irq(struct platform_device *op, status = request_irq(ded_irq, ppc4xx_edac_isr, - IRQF_DISABLED, + 0, "[EDAC] MC ECCDED", mci); @@ -1134,7 +1134,7 @@ static int ppc4xx_edac_register_irq(struct platform_device *op, status = request_irq(sec_irq, ppc4xx_edac_isr, - IRQF_DISABLED, + 0, "[EDAC] MC ECCSEC", mci); -- cgit v1.2.3 From a597d2a5d9820dbbadd70583170c48c7290427df Mon Sep 17 00:00:00 2001 From: Aravind Gopalakrishnan Date: Thu, 30 Oct 2014 12:16:09 +0100 Subject: amd64_edac: Add F15h M60h support This patch adds support for ECC error decoding for F15h M60h processor. Aside from the usual changes, the patch adds support for some new features in the processor: - DDR4(unbuffered, registered); LRDIMM DDR3 support - relevant debug messages have been modified/added to report these memory types - new dbam_to_cs mappers - if (F15h M60h && LRDIMM); we need a 'multiplier' value to find cs_size. This multiplier value is obtained from the per-dimm DCSM register. So, change the interface to accept a 'cs_mask_nr' value to facilitate this calculation - switch-casing determine_memory_type() - done to cleanse the function of too many if-else statements and improve readability - This is now called early in read_mc_regs() to cache dram_type Misc cleanup: - amd64_pci_table[] is condensed by using PCI_VDEVICE macro. Testing details: Tested the patch by injecting 'ECC' type errors using mce_amd_inj and error decoding works fine. Signed-off-by: Aravind Gopalakrishnan Link: http://lkml.kernel.org/r/1414617483-4941-1-git-send-email-Aravind.Gopalakrishnan@amd.com [ Boris: determine_memory_type() cleanups ] Signed-off-by: Borislav Petkov --- drivers/edac/amd64_edac.c | 255 ++++++++++++++++++++++++++++++++-------------- drivers/edac/amd64_edac.h | 15 ++- 2 files changed, 188 insertions(+), 82 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index bbd65149cdb2..1a1d7c43a20f 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -692,9 +692,19 @@ static void debug_dump_dramcfg_low(struct amd64_pvt *pvt, u32 dclr, int chan) { edac_dbg(1, "F2x%d90 (DRAM Cfg Low): 0x%08x\n", chan, dclr); - edac_dbg(1, " DIMM type: %sbuffered; all DIMMs support ECC: %s\n", - (dclr & BIT(16)) ? "un" : "", - (dclr & BIT(19)) ? "yes" : "no"); + if (pvt->dram_type == MEM_LRDDR3) { + u32 dcsm = pvt->csels[chan].csmasks[0]; + /* + * It's assumed all LRDIMMs in a DCT are going to be of + * same 'type' until proven otherwise. So, use a cs + * value of '0' here to get dcsm value. + */ + edac_dbg(1, " LRDIMM %dx rank multiply\n", (dcsm & 0x3)); + } + + edac_dbg(1, "All DIMMs support ECC:%s\n", + (dclr & BIT(19)) ? "yes" : "no"); + edac_dbg(1, " PAR/ERR parity: %s\n", (dclr & BIT(8)) ? "enabled" : "disabled"); @@ -756,7 +766,7 @@ static void prep_chip_selects(struct amd64_pvt *pvt) if (pvt->fam == 0xf && pvt->ext_model < K8_REV_F) { pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 8; pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 8; - } else if (pvt->fam == 0x15 && pvt->model >= 0x30) { + } else if (pvt->fam == 0x15 && pvt->model == 0x30) { pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 4; pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 2; } else { @@ -813,25 +823,63 @@ static void read_dct_base_mask(struct amd64_pvt *pvt) } } -static enum mem_type determine_memory_type(struct amd64_pvt *pvt, int cs) +static void determine_memory_type(struct amd64_pvt *pvt) { - enum mem_type type; + u32 dram_ctrl, dcsm; - /* F15h supports only DDR3 */ - if (pvt->fam >= 0x15) - type = (pvt->dclr0 & BIT(16)) ? MEM_DDR3 : MEM_RDDR3; - else if (pvt->fam == 0x10 || pvt->ext_model >= K8_REV_F) { + switch (pvt->fam) { + case 0xf: + if (pvt->ext_model >= K8_REV_F) + goto ddr3; + + pvt->dram_type = (pvt->dclr0 & BIT(18)) ? MEM_DDR : MEM_RDDR; + return; + + case 0x10: if (pvt->dchr0 & DDR3_MODE) - type = (pvt->dclr0 & BIT(16)) ? MEM_DDR3 : MEM_RDDR3; + goto ddr3; + + pvt->dram_type = (pvt->dclr0 & BIT(16)) ? MEM_DDR2 : MEM_RDDR2; + return; + + case 0x15: + if (pvt->model < 0x60) + goto ddr3; + + /* + * Model 0x60h needs special handling: + * + * We use a Chip Select value of '0' to obtain dcsm. + * Theoretically, it is possible to populate LRDIMMs of different + * 'Rank' value on a DCT. But this is not the common case. So, + * it's reasonable to assume all DIMMs are going to be of same + * 'type' until proven otherwise. + */ + amd64_read_dct_pci_cfg(pvt, 0, DRAM_CONTROL, &dram_ctrl); + dcsm = pvt->csels[0].csmasks[0]; + + if (((dram_ctrl >> 8) & 0x7) == 0x2) + pvt->dram_type = MEM_DDR4; + else if (pvt->dclr0 & BIT(16)) + pvt->dram_type = MEM_DDR3; + else if (dcsm & 0x3) + pvt->dram_type = MEM_LRDDR3; else - type = (pvt->dclr0 & BIT(16)) ? MEM_DDR2 : MEM_RDDR2; - } else { - type = (pvt->dclr0 & BIT(18)) ? MEM_DDR : MEM_RDDR; - } + pvt->dram_type = MEM_RDDR3; - amd64_info("CS%d: %s\n", cs, edac_mem_types[type]); + return; + + case 0x16: + goto ddr3; + + default: + WARN(1, KERN_ERR "%s: Family??? 0x%x\n", __func__, pvt->fam); + pvt->dram_type = MEM_EMPTY; + } + return; - return type; +ddr3: + pvt->dram_type = (pvt->dclr0 & BIT(16)) ? MEM_DDR3 : MEM_RDDR3; } /* Get the number of DCT channels the memory controller is using. */ @@ -958,8 +1006,12 @@ static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range) if (WARN_ON(!nb)) return; - pci_func = (pvt->model == 0x30) ? PCI_DEVICE_ID_AMD_15H_M30H_NB_F1 - : PCI_DEVICE_ID_AMD_15H_NB_F1; + if (pvt->model == 0x60) + pci_func = PCI_DEVICE_ID_AMD_15H_M60H_NB_F1; + else if (pvt->model == 0x30) + pci_func = PCI_DEVICE_ID_AMD_15H_M30H_NB_F1; + else + pci_func = PCI_DEVICE_ID_AMD_15H_NB_F1; f1 = pci_get_related_function(nb->misc->vendor, pci_func, nb->misc); if (WARN_ON(!f1)) @@ -1049,7 +1101,7 @@ static int ddr2_cs_size(unsigned i, bool dct_width) } static int k8_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct, - unsigned cs_mode) + unsigned cs_mode, int cs_mask_nr) { u32 dclr = dct ? pvt->dclr1 : pvt->dclr0; @@ -1167,8 +1219,43 @@ static int ddr3_cs_size(unsigned i, bool dct_width) return cs_size; } +static int ddr3_lrdimm_cs_size(unsigned i, unsigned rank_multiply) +{ + unsigned shift = 0; + int cs_size = 0; + + if (i < 4 || i == 6) + cs_size = -1; + else if (i == 12) + shift = 7; + else if (!(i & 0x1)) + shift = i >> 1; + else + shift = (i + 1) >> 1; + + if (cs_size != -1) + cs_size = rank_multiply * (128 << shift); + + return cs_size; +} + +static int ddr4_cs_size(unsigned i) +{ + int cs_size = 0; + + if (i == 0) + cs_size = -1; + else if (i == 1) + cs_size = 1024; + else + /* Min cs_size = 1G */ + cs_size = 1024 * (1 << (i >> 1)); + + return cs_size; +} + static int f10_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct, - unsigned cs_mode) + unsigned cs_mode, int cs_mask_nr) { u32 dclr = dct ? pvt->dclr1 : pvt->dclr0; @@ -1184,18 +1271,49 @@ static int f10_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct, * F15h supports only 64bit DCT interfaces */ static int f15_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct, - unsigned cs_mode) + unsigned cs_mode, int cs_mask_nr) { WARN_ON(cs_mode > 12); return ddr3_cs_size(cs_mode, false); } +/* F15h M60h supports DDR4 mapping as well.. */ +static int f15_m60h_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct, + unsigned cs_mode, int cs_mask_nr) +{ + int cs_size; + u32 dcsm = pvt->csels[dct].csmasks[cs_mask_nr]; + + WARN_ON(cs_mode > 12); + + if (pvt->dram_type == MEM_DDR4) { + if (cs_mode > 9) + return -1; + + cs_size = ddr4_cs_size(cs_mode); + } else if (pvt->dram_type == MEM_LRDDR3) { + unsigned rank_multiply = dcsm & 0xf; + + if (rank_multiply == 3) + rank_multiply = 4; + cs_size = ddr3_lrdimm_cs_size(cs_mode, rank_multiply); + } else { + /* Minimum cs size is 512mb for F15hM60h*/ + if (cs_mode == 0x1) + return -1; + + cs_size = ddr3_cs_size(cs_mode, false); + } + + return cs_size; +} + /* * F16h and F15h model 30h have only limited cs_modes. */ static int f16_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct, - unsigned cs_mode) + unsigned cs_mode, int cs_mask_nr) { WARN_ON(cs_mode > 12); @@ -1757,13 +1875,20 @@ static void debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl) size0 = 0; if (dcsb[dimm*2] & DCSB_CS_ENABLE) + /* For f15m60h, need multiplier for LRDIMM cs_size + * calculation. We pass 'dimm' value to the dbam_to_cs + * mapper so we can find the multiplier from the + * corresponding DCSM. + */ size0 = pvt->ops->dbam_to_cs(pvt, ctrl, - DBAM_DIMM(dimm, dbam)); + DBAM_DIMM(dimm, dbam), + dimm); size1 = 0; if (dcsb[dimm*2 + 1] & DCSB_CS_ENABLE) size1 = pvt->ops->dbam_to_cs(pvt, ctrl, - DBAM_DIMM(dimm, dbam)); + DBAM_DIMM(dimm, dbam), + dimm); amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n", dimm * 2, size0, @@ -1812,6 +1937,16 @@ static struct amd64_family_type family_types[] = { .dbam_to_cs = f16_dbam_to_chip_select, } }, + [F15_M60H_CPUS] = { + .ctl_name = "F15h_M60h", + .f1_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F1, + .f3_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F3, + .ops = { + .early_channel_count = f1x_early_channel_count, + .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, + .dbam_to_cs = f15_m60h_dbam_to_chip_select, + } + }, [F16_CPUS] = { .ctl_name = "F16h", .f1_id = PCI_DEVICE_ID_AMD_16H_NB_F1, @@ -2175,6 +2310,8 @@ static void read_mc_regs(struct amd64_pvt *pvt) } pvt->ecc_sym_sz = 4; + determine_memory_type(pvt); + edac_dbg(1, " DIMM type: %s\n", edac_mem_types[pvt->dram_type]); if (pvt->fam >= 0x10) { amd64_read_pci_cfg(pvt->F3, EXT_NB_MCA_CFG, &tmp); @@ -2238,7 +2375,8 @@ static u32 get_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr) */ cs_mode = DBAM_DIMM(csrow_nr / 2, dbam); - nr_pages = pvt->ops->dbam_to_cs(pvt, dct, cs_mode) << (20 - PAGE_SHIFT); + nr_pages = pvt->ops->dbam_to_cs(pvt, dct, cs_mode, (csrow_nr / 2)) + << (20 - PAGE_SHIFT); edac_dbg(0, "csrow: %d, channel: %d, DBAM idx: %d\n", csrow_nr, dct, cs_mode); @@ -2257,7 +2395,6 @@ static int init_csrows(struct mem_ctl_info *mci) struct csrow_info *csrow; struct dimm_info *dimm; enum edac_type edac_mode; - enum mem_type mtype; int i, j, empty = 1; int nr_pages = 0; u32 val; @@ -2302,8 +2439,6 @@ static int init_csrows(struct mem_ctl_info *mci) nr_pages += row_dct1_pages; } - mtype = determine_memory_type(pvt, i); - edac_dbg(1, "Total csrow%d pages: %u\n", i, nr_pages); /* @@ -2317,7 +2452,7 @@ static int init_csrows(struct mem_ctl_info *mci) for (j = 0; j < pvt->channel_count; j++) { dimm = csrow->channels[j]->dimm; - dimm->mtype = mtype; + dimm->mtype = pvt->dram_type; dimm->edac_mode = edac_mode; } } @@ -2604,6 +2739,10 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt) fam_type = &family_types[F15_M30H_CPUS]; pvt->ops = &family_types[F15_M30H_CPUS].ops; break; + } else if (pvt->model == 0x60) { + fam_type = &family_types[F15_M60H_CPUS]; + pvt->ops = &family_types[F15_M60H_CPUS].ops; + break; } fam_type = &family_types[F15_CPUS]; @@ -2828,55 +2967,13 @@ static void remove_one_instance(struct pci_dev *pdev) * inquiry this table to see if this driver is for a given device found. */ static const struct pci_device_id amd64_pci_table[] = { - { - .vendor = PCI_VENDOR_ID_AMD, - .device = PCI_DEVICE_ID_AMD_K8_NB_MEMCTL, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .class = 0, - .class_mask = 0, - }, - { - .vendor = PCI_VENDOR_ID_AMD, - .device = PCI_DEVICE_ID_AMD_10H_NB_DRAM, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .class = 0, - .class_mask = 0, - }, - { - .vendor = PCI_VENDOR_ID_AMD, - .device = PCI_DEVICE_ID_AMD_15H_NB_F2, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .class = 0, - .class_mask = 0, - }, - { - .vendor = PCI_VENDOR_ID_AMD, - .device = PCI_DEVICE_ID_AMD_15H_M30H_NB_F2, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .class = 0, - .class_mask = 0, - }, - { - .vendor = PCI_VENDOR_ID_AMD, - .device = PCI_DEVICE_ID_AMD_16H_NB_F2, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .class = 0, - .class_mask = 0, - }, - { - .vendor = PCI_VENDOR_ID_AMD, - .device = PCI_DEVICE_ID_AMD_16H_M30H_NB_F2, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .class = 0, - .class_mask = 0, - }, - + { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_K8_NB_MEMCTL) }, + { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_10H_NB_DRAM) }, + { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_NB_F2) }, + { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_M30H_NB_F2) }, + { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_M60H_NB_F2) }, + { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_16H_NB_F2) }, + { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F2) }, {0, } }; MODULE_DEVICE_TABLE(pci, amd64_pci_table); diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index 55fb5941c6d4..d8468c667925 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -162,10 +162,12 @@ /* * PCI-defined configuration space registers */ -#define PCI_DEVICE_ID_AMD_15H_M30H_NB_F1 0x141b -#define PCI_DEVICE_ID_AMD_15H_M30H_NB_F2 0x141c #define PCI_DEVICE_ID_AMD_15H_NB_F1 0x1601 #define PCI_DEVICE_ID_AMD_15H_NB_F2 0x1602 +#define PCI_DEVICE_ID_AMD_15H_M30H_NB_F1 0x141b +#define PCI_DEVICE_ID_AMD_15H_M30H_NB_F2 0x141c +#define PCI_DEVICE_ID_AMD_15H_M60H_NB_F1 0x1571 +#define PCI_DEVICE_ID_AMD_15H_M60H_NB_F2 0x1572 #define PCI_DEVICE_ID_AMD_16H_NB_F1 0x1531 #define PCI_DEVICE_ID_AMD_16H_NB_F2 0x1532 #define PCI_DEVICE_ID_AMD_16H_M30H_NB_F1 0x1581 @@ -221,6 +223,8 @@ #define csrow_enabled(i, dct, pvt) ((pvt)->csels[(dct)].csbases[(i)] & DCSB_CS_ENABLE) +#define DRAM_CONTROL 0x78 + #define DBAM0 0x80 #define DBAM1 0x180 @@ -301,6 +305,7 @@ enum amd_families { F10_CPUS, F15_CPUS, F15_M30H_CPUS, + F15_M60H_CPUS, F16_CPUS, F16_M30H_CPUS, NUM_FAMILIES, @@ -379,6 +384,9 @@ struct amd64_pvt { /* place to store error injection parameters prior to issue */ struct error_injection injection; + + /* cache the dram_type */ + enum mem_type dram_type; }; enum err_codes { @@ -480,7 +488,8 @@ struct low_ops { int (*early_channel_count) (struct amd64_pvt *pvt); void (*map_sysaddr_to_csrow) (struct mem_ctl_info *mci, u64 sys_addr, struct err_info *); - int (*dbam_to_cs) (struct amd64_pvt *pvt, u8 dct, unsigned cs_mode); + int (*dbam_to_cs) (struct amd64_pvt *pvt, u8 dct, + unsigned cs_mode, int cs_mask_nr); }; struct amd64_family_type { -- cgit v1.2.3 From bc4febe93c2fd7d0e74dad773bad2ed0237780ee Mon Sep 17 00:00:00 2001 From: Aravind Gopalakrishnan Date: Tue, 4 Nov 2014 11:41:08 -0600 Subject: EDAC, MCE, AMD: Add decoding table for MC6 xec Extended error code meanings are tabulated for other banks. Extend that tradition for MC6 too. Signed-off-by: Aravind Gopalakrishnan Link: http://lkml.kernel.org/r/1415122868-10969-1-git-send-email-aravind.gopalakrishnan@amd.com Signed-off-by: Borislav Petkov --- drivers/edac/mce_amd.c | 41 +++++++++++------------------------------ 1 file changed, 11 insertions(+), 30 deletions(-) diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index f78c1c54dbd5..5d4efae864e4 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -138,6 +138,15 @@ static const char * const mc5_mce_desc[] = { "Retire status queue" }; +static const char * const mc6_mce_desc[] = { + "Hardware Assertion", + "Free List", + "Physical Register File", + "Retire Queue", + "Scheduler table", + "Status Register File", +}; + static bool f12h_mc0_mce(u16 ec, u8 xec) { bool ret = false; @@ -672,38 +681,10 @@ static void decode_mc6_mce(struct mce *m) pr_emerg(HW_ERR "MC6 Error: "); - switch (xec) { - case 0x0: - pr_cont("Hardware Assertion"); - break; - - case 0x1: - pr_cont("Free List"); - break; - - case 0x2: - pr_cont("Physical Register File"); - break; - - case 0x3: - pr_cont("Retire Queue"); - break; - - case 0x4: - pr_cont("Scheduler table"); - break; - - case 0x5: - pr_cont("Status Register File"); - break; - - default: + if (xec > 0x5) goto wrong_mc6_mce; - break; - } - - pr_cont(" parity error.\n"); + pr_cont("%s parity error.\n", mc6_mce_desc[xec]); return; wrong_mc6_mce: -- cgit v1.2.3 From f5b10c45ef80c0630ed5318a386da65dde0a1582 Mon Sep 17 00:00:00 2001 From: Tomasz Pala Date: Sun, 2 Nov 2014 11:22:12 +0100 Subject: amd64_edac: Build module on x86-32 By popular demand, enable amd64_edac on 32-bit too. Boris: - update Kconfig text. - add a warning on load which states that 32-bit configurations are unsupported. Signed-off-by: Tomasz Pala Link: http://lkml.kernel.org/r/20141102102212.GA7034@polanet.pl Signed-off-by: Borislav Petkov --- drivers/edac/Kconfig | 6 +++--- drivers/edac/amd64_edac.c | 5 +++++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 7072c2892d63..4316c9e955b3 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -105,11 +105,11 @@ config EDAC_GHES In doubt, say 'Y'. config EDAC_AMD64 - tristate "AMD64 (Opteron, Athlon64) K8, F10h" - depends on EDAC_MM_EDAC && AMD_NB && X86_64 && EDAC_DECODE_MCE + tristate "AMD64 (Opteron, Athlon64)" + depends on EDAC_MM_EDAC && AMD_NB && EDAC_DECODE_MCE help Support for error detection and correction of DRAM ECC errors on - the AMD64 families of memory controllers (K8 and F10h) + the AMD64 families (>= K8) of memory controllers. config EDAC_AMD64_ERROR_INJECTION bool "Sysfs HW Error injection facilities" diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 1a1d7c43a20f..17638d7cf5c2 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -3035,6 +3035,11 @@ static int __init amd64_edac_init(void) goto err_no_instances; setup_pci_device(); + +#ifdef CONFIG_X86_32 + amd64_err("%s on 32-bit is unsupported. USE AT YOUR OWN RISK!\n", EDAC_MOD_STR); +#endif + return 0; err_no_instances: -- cgit v1.2.3 From 665aa8cdc499b9aeea6532e682a58ca34b7f94e6 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 1 Aug 2014 11:25:14 +0300 Subject: ghes_edac: Use snprintf() to silence a static checker warning My static checker complains because the "e->location" has up to 256 characters but we are copying it into the "pvt->detail_location" which only has space for 240 characters. That's not counting the surrounding text and the "e->other_detail" string which can be over 80 characters long. I am not familiar with this code but presumably it normally works. Let's add a limit though for safety. Signed-off-by: Dan Carpenter Acked-by: Mauro Carvalho Chehab Link: http://lkml.kernel.org/r/20140801082514.GD28869@mwanda Signed-off-by: Borislav Petkov --- drivers/edac/ghes_edac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c index 8399b4e16fe0..b24681998740 100644 --- a/drivers/edac/ghes_edac.c +++ b/drivers/edac/ghes_edac.c @@ -413,8 +413,8 @@ void ghes_edac_report_mem_error(struct ghes *ghes, int sev, /* Generate the trace event */ grain_bits = fls_long(e->grain); - sprintf(pvt->detail_location, "APEI location: %s %s", - e->location, e->other_detail); + snprintf(pvt->detail_location, sizeof(pvt->detail_location), + "APEI location: %s %s", e->location, e->other_detail); trace_mc_event(type, e->msg, e->label, e->error_count, mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer, PAGES_TO_MiB(e->page_frame_number) | e->offset_in_page, -- cgit v1.2.3 From 19ca5a3cc425cc9a8abedb0f4fb7b4e7ceee2255 Mon Sep 17 00:00:00 2001 From: Andreas Ruprecht Date: Sun, 10 Aug 2014 21:10:03 +0200 Subject: EDAC, pci_sysfs: remove unneccessary ifdef around entire file The file edac_pci_sysfs.c is dependent on CONFIG_PCI. This is already modelled in the Makefile, but edac_pci_sysfs.o is still contained in the list of files compiled even without CONFIG_PCI. This change removes edac_pci_sysfs.o from the list of built objects when not having CONFIG_PCI enabled and removes the then-unnecessary ifdef from the source file. Signed-off-by: Andreas Ruprecht Link: http://lkml.kernel.org/r/1407697803-3837-1-git-send-email-rupran@einserver.de Signed-off-by: Borislav Petkov --- drivers/edac/Makefile | 2 +- drivers/edac/edac_pci_sysfs.c | 5 ----- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index 359aa499b200..d40c69a04df7 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -9,7 +9,7 @@ obj-$(CONFIG_EDAC) := edac_stub.o obj-$(CONFIG_EDAC_MM_EDAC) += edac_core.o -edac_core-y := edac_mc.o edac_device.o edac_mc_sysfs.o edac_pci_sysfs.o +edac_core-y := edac_mc.o edac_device.o edac_mc_sysfs.o edac_core-y += edac_module.o edac_device_sysfs.o ifdef CONFIG_PCI diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c index e8658e451762..24d877f6e577 100644 --- a/drivers/edac/edac_pci_sysfs.c +++ b/drivers/edac/edac_pci_sysfs.c @@ -14,9 +14,6 @@ #include "edac_core.h" #include "edac_module.h" -/* Turn off this whole feature if PCI is not configured */ -#ifdef CONFIG_PCI - #define EDAC_PCI_SYMLINK "device" /* data variables exported via sysfs */ @@ -761,5 +758,3 @@ MODULE_PARM_DESC(check_pci_errors, module_param(edac_pci_panic_on_pe, int, 0644); MODULE_PARM_DESC(edac_pci_panic_on_pe, "Panic on PCI Bus Parity error: 0=off 1=on"); - -#endif /* CONFIG_PCI */ -- cgit v1.2.3 From 0a98babd85b2e69d64ef6f2f8a5d029ddaa702e1 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Wed, 19 Nov 2014 16:00:13 +0100 Subject: EDAC: Delete unnecessary check before calling pci_dev_put() The pci_dev_put() function tests whether its argument is NULL and then returns immediately. Thus the test before the call is not needed. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Link: http://lkml.kernel.org/r/546CB20D.4070808@users.sourceforge.net [ Boris: commit message. ] Signed-off-by: Borislav Petkov --- drivers/edac/i3000_edac.c | 3 +-- drivers/edac/i3200_edac.c | 3 +-- drivers/edac/i82443bxgx_edac.c | 3 +-- drivers/edac/x38_edac.c | 3 +-- 4 files changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/edac/i3000_edac.c b/drivers/edac/i3000_edac.c index cd28b968e5c7..5cb36a6022cc 100644 --- a/drivers/edac/i3000_edac.c +++ b/drivers/edac/i3000_edac.c @@ -542,8 +542,7 @@ fail1: pci_unregister_driver(&i3000_driver); fail0: - if (mci_pdev) - pci_dev_put(mci_pdev); + pci_dev_put(mci_pdev); return pci_rc; } diff --git a/drivers/edac/i3200_edac.c b/drivers/edac/i3200_edac.c index 022a70273ada..d1ff0ea2d3f3 100644 --- a/drivers/edac/i3200_edac.c +++ b/drivers/edac/i3200_edac.c @@ -523,8 +523,7 @@ fail1: pci_unregister_driver(&i3200_driver); fail0: - if (mci_pdev) - pci_dev_put(mci_pdev); + pci_dev_put(mci_pdev); return pci_rc; } diff --git a/drivers/edac/i82443bxgx_edac.c b/drivers/edac/i82443bxgx_edac.c index d730e276d1a8..b4705d9366bf 100644 --- a/drivers/edac/i82443bxgx_edac.c +++ b/drivers/edac/i82443bxgx_edac.c @@ -458,8 +458,7 @@ static void __exit i82443bxgx_edacmc_exit(void) if (!i82443bxgx_registered) i82443bxgx_edacmc_remove_one(mci_pdev); - if (mci_pdev) - pci_dev_put(mci_pdev); + pci_dev_put(mci_pdev); } module_init(i82443bxgx_edacmc_init); diff --git a/drivers/edac/x38_edac.c b/drivers/edac/x38_edac.c index e644b52c287c..7c5cdc62f31c 100644 --- a/drivers/edac/x38_edac.c +++ b/drivers/edac/x38_edac.c @@ -500,8 +500,7 @@ fail1: pci_unregister_driver(&x38_driver); fail0: - if (mci_pdev) - pci_dev_put(mci_pdev); + pci_dev_put(mci_pdev); return pci_rc; } -- cgit v1.2.3 From fd19fcd6324d29c97ba69ac3433e392989af401c Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 22 Nov 2014 11:09:12 +0100 Subject: EDAC, mce_amd_inj: Convert mce_amd_inj module to debugfs This module's interface belongs in debugfs, not in sysfs. Signed-off-by: Borislav Petkov --- drivers/edac/Kconfig | 10 +-- drivers/edac/mce_amd_inj.c | 180 ++++++++++++++++++++------------------------- 2 files changed, 83 insertions(+), 107 deletions(-) diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 4316c9e955b3..49c265255a07 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -61,14 +61,14 @@ config EDAC_DECODE_MCE has been initialized. config EDAC_MCE_INJ - tristate "Simple MCE injection interface over /sysfs" - depends on EDAC_DECODE_MCE + tristate "Simple MCE injection interface" + depends on EDAC_DECODE_MCE && DEBUG_FS default n help - This is a simple interface to inject MCEs over /sysfs and test - the MCE decoding code in EDAC. + This is a simple debugfs interface to inject MCEs and test different + aspects of the MCE handling code. - This is currently AMD-only. + WARNING: Do not even assume this interface is staying stable! config EDAC_MM_EDAC tristate "Main Memory EDAC (Error Detection And Correction) reporting" diff --git a/drivers/edac/mce_amd_inj.c b/drivers/edac/mce_amd_inj.c index 5e46a9fea31b..9b5023d6f553 100644 --- a/drivers/edac/mce_amd_inj.c +++ b/drivers/edac/mce_amd_inj.c @@ -1,173 +1,149 @@ /* - * A simple MCE injection facility for testing the MCE decoding code. This - * driver should be built as module so that it can be loaded on production - * kernels for testing purposes. + * A simple MCE injection facility for testing different aspects of the RAS + * code. This driver should be built as module so that it can be loaded + * on production kernels for testing purposes. * * This file may be distributed under the terms of the GNU General Public * License version 2. * - * Copyright (c) 2010: Borislav Petkov + * Copyright (c) 2010-14: Borislav Petkov * Advanced Micro Devices Inc. */ #include +#include #include -#include #include #include #include "mce_amd.h" -struct edac_mce_attr { - struct attribute attr; - ssize_t (*show) (struct kobject *kobj, struct edac_mce_attr *attr, char *buf); - ssize_t (*store)(struct kobject *kobj, struct edac_mce_attr *attr, - const char *buf, size_t count); -}; - -#define EDAC_MCE_ATTR(_name, _mode, _show, _store) \ -static struct edac_mce_attr mce_attr_##_name = __ATTR(_name, _mode, _show, _store) - -static struct kobject *mce_kobj; - /* * Collect all the MCi_XXX settings */ static struct mce i_mce; +static struct dentry *dfs_inj; -#define MCE_INJECT_STORE(reg) \ -static ssize_t edac_inject_##reg##_store(struct kobject *kobj, \ - struct edac_mce_attr *attr, \ - const char *data, size_t count)\ +#define MCE_INJECT_SET(reg) \ +static int inj_##reg##_set(void *data, u64 val) \ { \ - int ret = 0; \ - unsigned long value; \ - \ - ret = kstrtoul(data, 16, &value); \ - if (ret < 0) \ - printk(KERN_ERR "Error writing MCE " #reg " field.\n"); \ - \ - i_mce.reg = value; \ + struct mce *m = (struct mce *)data; \ \ - return count; \ + m->reg = val; \ + return 0; \ } -MCE_INJECT_STORE(status); -MCE_INJECT_STORE(misc); -MCE_INJECT_STORE(addr); +MCE_INJECT_SET(status); +MCE_INJECT_SET(misc); +MCE_INJECT_SET(addr); -#define MCE_INJECT_SHOW(reg) \ -static ssize_t edac_inject_##reg##_show(struct kobject *kobj, \ - struct edac_mce_attr *attr, \ - char *buf) \ +#define MCE_INJECT_GET(reg) \ +static int inj_##reg##_get(void *data, u64 *val) \ { \ - return sprintf(buf, "0x%016llx\n", i_mce.reg); \ + struct mce *m = (struct mce *)data; \ + \ + *val = m->reg; \ + return 0; \ } -MCE_INJECT_SHOW(status); -MCE_INJECT_SHOW(misc); -MCE_INJECT_SHOW(addr); +MCE_INJECT_GET(status); +MCE_INJECT_GET(misc); +MCE_INJECT_GET(addr); -EDAC_MCE_ATTR(status, 0644, edac_inject_status_show, edac_inject_status_store); -EDAC_MCE_ATTR(misc, 0644, edac_inject_misc_show, edac_inject_misc_store); -EDAC_MCE_ATTR(addr, 0644, edac_inject_addr_show, edac_inject_addr_store); +DEFINE_SIMPLE_ATTRIBUTE(status_fops, inj_status_get, inj_status_set, "%llx\n"); +DEFINE_SIMPLE_ATTRIBUTE(misc_fops, inj_misc_get, inj_misc_set, "%llx\n"); +DEFINE_SIMPLE_ATTRIBUTE(addr_fops, inj_addr_get, inj_addr_set, "%llx\n"); /* * This denotes into which bank we're injecting and triggers * the injection, at the same time. */ -static ssize_t edac_inject_bank_store(struct kobject *kobj, - struct edac_mce_attr *attr, - const char *data, size_t count) +static int inj_bank_set(void *data, u64 val) { - int ret = 0; - unsigned long value; + struct mce *m = (struct mce *)data; - ret = kstrtoul(data, 10, &value); - if (ret < 0) { - printk(KERN_ERR "Invalid bank value!\n"); - return -EINVAL; - } - - if (value > 5) - if (boot_cpu_data.x86 != 0x15 || value > 6) { - printk(KERN_ERR "Non-existent MCE bank: %lu\n", value); + if (val > 5) { + if (boot_cpu_data.x86 != 0x15 || val > 6) { + pr_err("Non-existent MCE bank: %llu\n", val); return -EINVAL; } + } - i_mce.bank = value; + m->bank = val; - amd_decode_mce(NULL, 0, &i_mce); + amd_decode_mce(NULL, 0, m); - return count; + return 0; } -static ssize_t edac_inject_bank_show(struct kobject *kobj, - struct edac_mce_attr *attr, char *buf) +static int inj_bank_get(void *data, u64 *val) { - return sprintf(buf, "%d\n", i_mce.bank); -} + struct mce *m = (struct mce *)data; -EDAC_MCE_ATTR(bank, 0644, edac_inject_bank_show, edac_inject_bank_store); + *val = m->bank; + return 0; +} -static struct edac_mce_attr *sysfs_attrs[] = { &mce_attr_status, &mce_attr_misc, - &mce_attr_addr, &mce_attr_bank +DEFINE_SIMPLE_ATTRIBUTE(bank_fops, inj_bank_get, inj_bank_set, "%llu\n"); + +struct dfs_node { + char *name; + struct dentry *d; + const struct file_operations *fops; +} dfs_fls[] = { + { .name = "status", .fops = &status_fops }, + { .name = "misc", .fops = &misc_fops }, + { .name = "addr", .fops = &addr_fops }, + { .name = "bank", .fops = &bank_fops }, }; -static int __init edac_init_mce_inject(void) +static int __init init_mce_inject(void) { - struct bus_type *edac_subsys = NULL; - int i, err = 0; + int i; - edac_subsys = edac_get_sysfs_subsys(); - if (!edac_subsys) + dfs_inj = debugfs_create_dir("mce-inject", NULL); + if (!dfs_inj) return -EINVAL; - mce_kobj = kobject_create_and_add("mce", &edac_subsys->dev_root->kobj); - if (!mce_kobj) { - printk(KERN_ERR "Error creating a mce kset.\n"); - err = -ENOMEM; - goto err_mce_kobj; - } + for (i = 0; i < ARRAY_SIZE(dfs_fls); i++) { + dfs_fls[i].d = debugfs_create_file(dfs_fls[i].name, + S_IRUSR | S_IWUSR, + dfs_inj, + &i_mce, + dfs_fls[i].fops); - for (i = 0; i < ARRAY_SIZE(sysfs_attrs); i++) { - err = sysfs_create_file(mce_kobj, &sysfs_attrs[i]->attr); - if (err) { - printk(KERN_ERR "Error creating %s in sysfs.\n", - sysfs_attrs[i]->attr.name); - goto err_sysfs_create; - } + if (!dfs_fls[i].d) + goto err_dfs_add; } + return 0; -err_sysfs_create: +err_dfs_add: while (--i >= 0) - sysfs_remove_file(mce_kobj, &sysfs_attrs[i]->attr); + debugfs_remove(dfs_fls[i].d); - kobject_del(mce_kobj); + debugfs_remove(dfs_inj); + dfs_inj = NULL; -err_mce_kobj: - edac_put_sysfs_subsys(); - - return err; + return -ENOMEM; } -static void __exit edac_exit_mce_inject(void) +static void __exit exit_mce_inject(void) { int i; - for (i = 0; i < ARRAY_SIZE(sysfs_attrs); i++) - sysfs_remove_file(mce_kobj, &sysfs_attrs[i]->attr); + for (i = 0; i < ARRAY_SIZE(dfs_fls); i++) + debugfs_remove(dfs_fls[i].d); - kobject_del(mce_kobj); + memset(&dfs_fls, 0, sizeof(dfs_fls)); - edac_put_sysfs_subsys(); + debugfs_remove(dfs_inj); + dfs_inj = NULL; } - -module_init(edac_init_mce_inject); -module_exit(edac_exit_mce_inject); +module_init(init_mce_inject); +module_exit(exit_mce_inject); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Borislav Petkov "); MODULE_AUTHOR("AMD Inc."); -MODULE_DESCRIPTION("MCE injection facility for testing MCE decoding"); +MODULE_DESCRIPTION("MCE injection facility for RAS testing"); -- cgit v1.2.3 From 21690934d93408bb7247943f886b960cf30ecd19 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 22 Nov 2014 11:22:35 +0100 Subject: EDAC, mce_amd_inj: Enable direct writes to MCE MSRs Normally, writing those causes a #GP but HWCR[McStatusWrEn] controls that. Provide a knob. Signed-off-by: Borislav Petkov --- drivers/edac/mce_amd_inj.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/edac/mce_amd_inj.c b/drivers/edac/mce_amd_inj.c index 9b5023d6f553..9b5ca92737a2 100644 --- a/drivers/edac/mce_amd_inj.c +++ b/drivers/edac/mce_amd_inj.c @@ -54,6 +54,30 @@ DEFINE_SIMPLE_ATTRIBUTE(status_fops, inj_status_get, inj_status_set, "%llx\n"); DEFINE_SIMPLE_ATTRIBUTE(misc_fops, inj_misc_get, inj_misc_set, "%llx\n"); DEFINE_SIMPLE_ATTRIBUTE(addr_fops, inj_addr_get, inj_addr_set, "%llx\n"); +/* + * Caller needs to be make sure this cpu doesn't disappear + * from under us, i.e.: get_cpu/put_cpu. + */ +static int toggle_hw_mce_inject(unsigned int cpu, bool enable) +{ + u32 l, h; + int err; + + err = rdmsr_on_cpu(cpu, MSR_K7_HWCR, &l, &h); + if (err) { + pr_err("%s: error reading HWCR\n", __func__); + return err; + } + + enable ? (l |= BIT(18)) : (l &= ~BIT(18)); + + err = wrmsr_on_cpu(cpu, MSR_K7_HWCR, l, h); + if (err) + pr_err("%s: error writing HWCR\n", __func__); + + return err; +} + /* * This denotes into which bank we're injecting and triggers * the injection, at the same time. -- cgit v1.2.3 From b18f3864bfb4ef2af0e25b6ed5e1292f2ac36878 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 22 Nov 2014 11:35:26 +0100 Subject: EDAC, mce_amd_inj: Add hw-injection attributes Expose struct mce->inject_flags. Signed-off-by: Borislav Petkov --- drivers/edac/mce_amd_inj.c | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/drivers/edac/mce_amd_inj.c b/drivers/edac/mce_amd_inj.c index 9b5ca92737a2..5a758d63a8cc 100644 --- a/drivers/edac/mce_amd_inj.c +++ b/drivers/edac/mce_amd_inj.c @@ -78,6 +78,44 @@ static int toggle_hw_mce_inject(unsigned int cpu, bool enable) return err; } +static int flags_get(void *data, u64 *val) +{ + struct mce *m = (struct mce *)data; + + *val = m->inject_flags; + + return 0; +} + +static int flags_set(void *data, u64 val) +{ + struct mce *m = (struct mce *)data; + + m->inject_flags = (u8)val; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(flags_fops, flags_get, flags_set, "%llu\n"); + +/* + * On which CPU to inject? + */ +MCE_INJECT_GET(extcpu); + +static int inj_extcpu_set(void *data, u64 val) +{ + struct mce *m = (struct mce *)data; + + if (val >= nr_cpu_ids || !cpu_online(val)) { + pr_err("%s: Invalid CPU: %llu\n", __func__, val); + return -EINVAL; + } + m->extcpu = val; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(extcpu_fops, inj_extcpu_get, inj_extcpu_set, "%llu\n"); + /* * This denotes into which bank we're injecting and triggers * the injection, at the same time. @@ -119,6 +157,8 @@ struct dfs_node { { .name = "misc", .fops = &misc_fops }, { .name = "addr", .fops = &addr_fops }, { .name = "bank", .fops = &bank_fops }, + { .name = "flags", .fops = &flags_fops }, + { .name = "cpu", .fops = &extcpu_fops }, }; static int __init init_mce_inject(void) -- cgit v1.2.3 From 51756a5034c12da3ce1286749b0e05de8bfbbbaf Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 22 Nov 2014 11:47:07 +0100 Subject: EDAC, mce_amd_inj: Add an injector function Selectively inject either a real MCE or a sw-only version which exercises the decoding path only. The hardware-injected MCE triggers a machine check exception (#MC) so that the MCE handler can be bothered to do something too. Signed-off-by: Borislav Petkov --- drivers/edac/mce_amd_inj.c | 53 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 51 insertions(+), 2 deletions(-) diff --git a/drivers/edac/mce_amd_inj.c b/drivers/edac/mce_amd_inj.c index 5a758d63a8cc..0bd91a802c67 100644 --- a/drivers/edac/mce_amd_inj.c +++ b/drivers/edac/mce_amd_inj.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include "mce_amd.h" @@ -116,6 +117,55 @@ static int inj_extcpu_set(void *data, u64 val) DEFINE_SIMPLE_ATTRIBUTE(extcpu_fops, inj_extcpu_get, inj_extcpu_set, "%llu\n"); +static void trigger_mce(void *info) +{ + asm volatile("int $18"); +} + +static void do_inject(void) +{ + u64 mcg_status = 0; + unsigned int cpu = i_mce.extcpu; + u8 b = i_mce.bank; + + if (!(i_mce.inject_flags & MCJ_EXCEPTION)) { + amd_decode_mce(NULL, 0, &i_mce); + return; + } + + get_online_cpus(); + if (!cpu_online(cpu)) + goto err; + + /* prep MCE global settings for the injection */ + mcg_status = MCG_STATUS_MCIP | MCG_STATUS_EIPV; + + if (!(i_mce.status & MCI_STATUS_PCC)) + mcg_status |= MCG_STATUS_RIPV; + + toggle_hw_mce_inject(cpu, true); + + wrmsr_on_cpu(cpu, MSR_IA32_MCG_STATUS, + (u32)mcg_status, (u32)(mcg_status >> 32)); + + wrmsr_on_cpu(cpu, MSR_IA32_MCx_STATUS(b), + (u32)i_mce.status, (u32)(i_mce.status >> 32)); + + wrmsr_on_cpu(cpu, MSR_IA32_MCx_ADDR(b), + (u32)i_mce.addr, (u32)(i_mce.addr >> 32)); + + wrmsr_on_cpu(cpu, MSR_IA32_MCx_MISC(b), + (u32)i_mce.misc, (u32)(i_mce.misc >> 32)); + + toggle_hw_mce_inject(cpu, false); + + smp_call_function_single(cpu, trigger_mce, NULL, 0); + +err: + put_online_cpus(); + +} + /* * This denotes into which bank we're injecting and triggers * the injection, at the same time. @@ -132,8 +182,7 @@ static int inj_bank_set(void *data, u64 val) } m->bank = val; - - amd_decode_mce(NULL, 0, m); + do_inject(); return 0; } -- cgit v1.2.3 From 50872ccd8786dc72bc5a32c17695561e031fae4c Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 22 Nov 2014 13:41:01 +0100 Subject: EDAC, MCE, AMD: Correct formatting of decoded text Write out MCx_ADDR into the more humanly readable "MCx Error Address" and remove double colon in the output. Cc: Aravind Gopalakrishnan Signed-off-by: Borislav Petkov --- drivers/edac/mce_amd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index 5d4efae864e4..58586d59bf8e 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -441,8 +441,8 @@ static bool k8_mc2_mce(u16 ec, u8 xec) pr_cont(": %s error in the L2 cache tags.\n", R4_MSG(ec)); else if (xec == 0x0) { if (TLB_ERROR(ec)) - pr_cont(": %s error in a Page Descriptor Cache or " - "Guest TLB.\n", TT_MSG(ec)); + pr_cont("%s error in a Page Descriptor Cache or Guest TLB.\n", + TT_MSG(ec)); else if (BUS_ERROR(ec)) pr_cont(": %s/ECC error in data read from NB: %s.\n", R4_MSG(ec), PP_MSG(ec)); @@ -781,7 +781,7 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) pr_cont("]: 0x%016llx\n", m->status); if (m->status & MCI_STATUS_ADDRV) - pr_emerg(HW_ERR "MC%d_ADDR: 0x%016llx\n", m->bank, m->addr); + pr_emerg(HW_ERR "MC%d Error Address: 0x%016llx\n", m->bank, m->addr); if (!fam_ops) goto err_code; -- cgit v1.2.3