aboutsummaryrefslogtreecommitdiff
path: root/drivers/edac
diff options
context:
space:
mode:
authorLinus Torvalds2019-03-08 09:07:07 -0800
committerLinus Torvalds2019-03-08 09:07:07 -0800
commit1b37b8c48d2c2d8553f116ec2a75d21056f1fb35 (patch)
treeadf2855d311440fb4c48b2a96a13bdaae28d2b63 /drivers/edac
parentc6400e5cef5eafc259e649ceedc4c7eecc9069d8 (diff)
parent580b5cf50ca8f4781961382d54959683341b3126 (diff)
Merge tag 'edac_for_5.1' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp
Pull EDAC updates from Borislav Petkov: - A new EDAC AST 2500 SoC driver (Stefan M Schaeckeler) - New i10nm EDAC driver for Intel 10nm CPUs (Qiuxu Zhuo and Tony Luck) - Altera SDRAM functionality carveout for separate enablement of RAS and SDRAM capabilities on some Altera chips. (Thor Thayer) - The usual round of cleanups and fixes And last but not least: recruit James Morse as a reviewer for the ARM side. * tag 'edac_for_5.1' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp: EDAC/altera: Add separate SDRAM EDAC config EDAC, altera: Add missing of_node_put() EDAC, skx_common: Add code to recognise new compound error code EDAC, i10nm: Fix randconfig builds EDAC, i10nm: Add a driver for Intel 10nm server processors EDAC, skx_edac: Delete duplicated code EDAC, skx_common: Separate common code out from skx_edac EDAC: Do not check return value of debugfs_create() functions EDAC: Add James Morse as a reviewer dt-bindings, EDAC: Add Aspeed AST2500 EDAC, aspeed: Add an Aspeed AST2500 EDAC driver
Diffstat (limited to 'drivers/edac')
-rw-r--r--drivers/edac/Kconfig35
-rw-r--r--drivers/edac/Makefile8
-rw-r--r--drivers/edac/altera_edac.c72
-rw-r--r--drivers/edac/aspeed_edac.c421
-rw-r--r--drivers/edac/debugfs.c48
-rw-r--r--drivers/edac/edac_module.h8
-rw-r--r--drivers/edac/i10nm_base.c275
-rw-r--r--drivers/edac/skx_base.c650
-rw-r--r--drivers/edac/skx_common.c691
-rw-r--r--drivers/edac/skx_common.h152
-rw-r--r--drivers/edac/skx_edac.c1358
11 files changed, 2284 insertions, 1434 deletions
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index e286b5b99003..47eb4d13ed5f 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -241,6 +241,18 @@ config EDAC_SKX
system has non-volatile DIMMs you should also manually
select CONFIG_ACPI_NFIT.
+config EDAC_I10NM
+ tristate "Intel 10nm server Integrated MC"
+ depends on PCI && X86_64 && X86_MCE_INTEL && PCI_MMCONFIG && ACPI
+ depends on ACPI_NFIT || !ACPI_NFIT # if ACPI_NFIT=m, EDAC_I10NM can't be y
+ select DMI
+ select ACPI_ADXL
+ help
+ Support for error detection and correction the Intel
+ 10nm server Integrated Memory Controllers. If your
+ system has non-volatile DIMMs you should also manually
+ select CONFIG_ACPI_NFIT.
+
config EDAC_PND2
tristate "Intel Pondicherry2"
depends on PCI && X86_64 && X86_MCE_INTEL
@@ -379,9 +391,17 @@ config EDAC_ALTERA
depends on EDAC=y && (ARCH_SOCFPGA || ARCH_STRATIX10)
help
Support for error detection and correction on the
- Altera SOCs. This must be selected for SDRAM ECC.
- Note that the preloader must initialize the SDRAM
- before loading the kernel.
+ Altera SOCs. This is the global enable for the
+ various Altera peripherals.
+
+config EDAC_ALTERA_SDRAM
+ bool "Altera SDRAM ECC"
+ depends on EDAC_ALTERA=y
+ help
+ Support for error detection and correction on the
+ Altera SDRAM Memory for Altera SoCs. Note that the
+ preloader must initialize the SDRAM before loading
+ the kernel.
config EDAC_ALTERA_L2C
bool "Altera L2 Cache ECC"
@@ -475,4 +495,13 @@ config EDAC_QCOM
For debugging issues having to do with stability and overall system
health, you should probably say 'Y' here.
+config EDAC_ASPEED
+ tristate "Aspeed AST 2500 SoC"
+ depends on MACH_ASPEED_G5
+ help
+ Support for error detection and correction on the Aspeed AST 2500 SoC.
+
+ First, ECC must be configured in the bootloader. Then, this driver
+ will expose error counters via the EDAC kernel framework.
+
endif # EDAC
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index 716096d08ea0..89ad4a84a0f6 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -30,7 +30,6 @@ obj-$(CONFIG_EDAC_I5400) += i5400_edac.o
obj-$(CONFIG_EDAC_I7300) += i7300_edac.o
obj-$(CONFIG_EDAC_I7CORE) += i7core_edac.o
obj-$(CONFIG_EDAC_SBRIDGE) += sb_edac.o
-obj-$(CONFIG_EDAC_SKX) += skx_edac.o
obj-$(CONFIG_EDAC_PND2) += pnd2_edac.o
obj-$(CONFIG_EDAC_E7XXX) += e7xxx_edac.o
obj-$(CONFIG_EDAC_E752X) += e752x_edac.o
@@ -58,6 +57,12 @@ obj-$(CONFIG_EDAC_MPC85XX) += mpc85xx_edac_mod.o
layerscape_edac_mod-y := fsl_ddr_edac.o layerscape_edac.o
obj-$(CONFIG_EDAC_LAYERSCAPE) += layerscape_edac_mod.o
+skx_edac-y := skx_common.o skx_base.o
+obj-$(CONFIG_EDAC_SKX) += skx_edac.o
+
+i10nm_edac-y := skx_common.o i10nm_base.o
+obj-$(CONFIG_EDAC_I10NM) += i10nm_edac.o
+
obj-$(CONFIG_EDAC_MV64X60) += mv64x60_edac.o
obj-$(CONFIG_EDAC_CELL) += cell_edac.o
obj-$(CONFIG_EDAC_PPC4XX) += ppc4xx_edac.o
@@ -78,3 +83,4 @@ obj-$(CONFIG_EDAC_SYNOPSYS) += synopsys_edac.o
obj-$(CONFIG_EDAC_XGENE) += xgene_edac.o
obj-$(CONFIG_EDAC_TI) += ti_edac.o
obj-$(CONFIG_EDAC_QCOM) += qcom_edac.o
+obj-$(CONFIG_EDAC_ASPEED) += aspeed_edac.o
diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c
index c89d82aa2776..1bcf9aea0cdf 100644
--- a/drivers/edac/altera_edac.c
+++ b/drivers/edac/altera_edac.c
@@ -29,6 +29,7 @@
#define EDAC_MOD_STR "altera_edac"
#define EDAC_DEVICE "Altera"
+#ifdef CONFIG_EDAC_ALTERA_SDRAM
static const struct altr_sdram_prv_data c5_data = {
.ecc_ctrl_offset = CV_CTLCFG_OFST,
.ecc_ctl_en_mask = CV_CTLCFG_ECC_AUTO_EN,
@@ -468,6 +469,39 @@ static int altr_sdram_remove(struct platform_device *pdev)
return 0;
}
+/*
+ * If you want to suspend, need to disable EDAC by removing it
+ * from the device tree or defconfig.
+ */
+#ifdef CONFIG_PM
+static int altr_sdram_prepare(struct device *dev)
+{
+ pr_err("Suspend not allowed when EDAC is enabled.\n");
+
+ return -EPERM;
+}
+
+static const struct dev_pm_ops altr_sdram_pm_ops = {
+ .prepare = altr_sdram_prepare,
+};
+#endif
+
+static struct platform_driver altr_sdram_edac_driver = {
+ .probe = altr_sdram_probe,
+ .remove = altr_sdram_remove,
+ .driver = {
+ .name = "altr_sdram_edac",
+#ifdef CONFIG_PM
+ .pm = &altr_sdram_pm_ops,
+#endif
+ .of_match_table = altr_sdram_ctrl_of_match,
+ },
+};
+
+module_platform_driver(altr_sdram_edac_driver);
+
+#endif /* CONFIG_EDAC_ALTERA_SDRAM */
+
/**************** Stratix 10 EDAC Memory Controller Functions ************/
/**
@@ -530,37 +564,6 @@ static const struct regmap_config s10_sdram_regmap_cfg = {
/************** </Stratix10 EDAC Memory Controller Functions> ***********/
-/*
- * If you want to suspend, need to disable EDAC by removing it
- * from the device tree or defconfig.
- */
-#ifdef CONFIG_PM
-static int altr_sdram_prepare(struct device *dev)
-{
- pr_err("Suspend not allowed when EDAC is enabled.\n");
-
- return -EPERM;
-}
-
-static const struct dev_pm_ops altr_sdram_pm_ops = {
- .prepare = altr_sdram_prepare,
-};
-#endif
-
-static struct platform_driver altr_sdram_edac_driver = {
- .probe = altr_sdram_probe,
- .remove = altr_sdram_remove,
- .driver = {
- .name = "altr_sdram_edac",
-#ifdef CONFIG_PM
- .pm = &altr_sdram_pm_ops,
-#endif
- .of_match_table = altr_sdram_ctrl_of_match,
- },
-};
-
-module_platform_driver(altr_sdram_edac_driver);
-
/************************* EDAC Parent Probe *************************/
static const struct of_device_id altr_edac_device_of_match[];
@@ -1046,14 +1049,17 @@ altr_init_a10_ecc_block(struct device_node *np, u32 irq_mask,
return -ENODEV;
}
- if (of_address_to_resource(sysmgr_np, 0, &res))
+ if (of_address_to_resource(sysmgr_np, 0, &res)) {
+ of_node_put(sysmgr_np);
return -ENOMEM;
+ }
/* Need physical address for SMCC call */
base = res.start;
ecc_mgr_map = regmap_init(NULL, NULL, (void *)base,
&s10_sdram_regmap_cfg);
+ of_node_put(sysmgr_np);
}
of_node_put(np_eccmgr);
if (IS_ERR(ecc_mgr_map)) {
@@ -2140,11 +2146,13 @@ static int altr_edac_a10_probe(struct platform_device *pdev)
altr_edac_a10_device_add(edac, child);
+#ifdef CONFIG_EDAC_ALTERA_SDRAM
else if ((of_device_is_compatible(child, "altr,sdram-edac-a10")) ||
(of_device_is_compatible(child, "altr,sdram-edac-s10")))
of_platform_populate(pdev->dev.of_node,
altr_sdram_ctrl_of_match,
NULL, &pdev->dev);
+#endif
}
return 0;
diff --git a/drivers/edac/aspeed_edac.c b/drivers/edac/aspeed_edac.c
new file mode 100644
index 000000000000..11833c0a5d07
--- /dev/null
+++ b/drivers/edac/aspeed_edac.c
@@ -0,0 +1,421 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright 2018, 2019 Cisco Systems
+ */
+
+#include <linux/edac.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/stop_machine.h>
+#include <linux/io.h>
+#include <linux/of_address.h>
+#include <linux/regmap.h>
+#include "edac_module.h"
+
+
+#define DRV_NAME "aspeed-edac"
+
+
+#define ASPEED_MCR_PROT 0x00 /* protection key register */
+#define ASPEED_MCR_CONF 0x04 /* configuration register */
+#define ASPEED_MCR_INTR_CTRL 0x50 /* interrupt control/status register */
+#define ASPEED_MCR_ADDR_UNREC 0x58 /* address of first un-recoverable error */
+#define ASPEED_MCR_ADDR_REC 0x5c /* address of last recoverable error */
+#define ASPEED_MCR_LAST ASPEED_MCR_ADDR_REC
+
+
+#define ASPEED_MCR_PROT_PASSWD 0xfc600309
+#define ASPEED_MCR_CONF_DRAM_TYPE BIT(4)
+#define ASPEED_MCR_CONF_ECC BIT(7)
+#define ASPEED_MCR_INTR_CTRL_CLEAR BIT(31)
+#define ASPEED_MCR_INTR_CTRL_CNT_REC GENMASK(23, 16)
+#define ASPEED_MCR_INTR_CTRL_CNT_UNREC GENMASK(15, 12)
+#define ASPEED_MCR_INTR_CTRL_ENABLE (BIT(0) | BIT(1))
+
+
+static struct regmap *aspeed_regmap;
+
+
+static int regmap_reg_write(void *context, unsigned int reg, unsigned int val)
+{
+ void __iomem *regs = (void __iomem *)context;
+
+ /* enable write to MCR register set */
+ writel(ASPEED_MCR_PROT_PASSWD, regs + ASPEED_MCR_PROT);
+
+ writel(val, regs + reg);
+
+ /* disable write to MCR register set */
+ writel(~ASPEED_MCR_PROT_PASSWD, regs + ASPEED_MCR_PROT);
+
+ return 0;
+}
+
+
+static int regmap_reg_read(void *context, unsigned int reg, unsigned int *val)
+{
+ void __iomem *regs = (void __iomem *)context;
+
+ *val = readl(regs + reg);
+
+ return 0;
+}
+
+static bool regmap_is_volatile(struct device *dev, unsigned int reg)
+{
+ switch (reg) {
+ case ASPEED_MCR_PROT:
+ case ASPEED_MCR_INTR_CTRL:
+ case ASPEED_MCR_ADDR_UNREC:
+ case ASPEED_MCR_ADDR_REC:
+ return true;
+ default:
+ return false;
+ }
+}
+
+
+static const struct regmap_config aspeed_regmap_config = {
+ .reg_bits = 32,
+ .val_bits = 32,
+ .reg_stride = 4,
+ .max_register = ASPEED_MCR_LAST,
+ .reg_write = regmap_reg_write,
+ .reg_read = regmap_reg_read,
+ .volatile_reg = regmap_is_volatile,
+ .fast_io = true,
+};
+
+
+static void count_rec(struct mem_ctl_info *mci, u8 rec_cnt, u32 rec_addr)
+{
+ struct csrow_info *csrow = mci->csrows[0];
+ u32 page, offset, syndrome;
+
+ if (!rec_cnt)
+ return;
+
+ /* report first few errors (if there are) */
+ /* note: no addresses are recorded */
+ if (rec_cnt > 1) {
+ /* page, offset and syndrome are not available */
+ page = 0;
+ offset = 0;
+ syndrome = 0;
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, rec_cnt-1,
+ page, offset, syndrome, 0, 0, -1,
+ "address(es) not available", "");
+ }
+
+ /* report last error */
+ /* note: rec_addr is the last recoverable error addr */
+ page = rec_addr >> PAGE_SHIFT;
+ offset = rec_addr & ~PAGE_MASK;
+ /* syndrome is not available */
+ syndrome = 0;
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
+ csrow->first_page + page, offset, syndrome,
+ 0, 0, -1, "", "");
+}
+
+
+static void count_un_rec(struct mem_ctl_info *mci, u8 un_rec_cnt,
+ u32 un_rec_addr)
+{
+ struct csrow_info *csrow = mci->csrows[0];
+ u32 page, offset, syndrome;
+
+ if (!un_rec_cnt)
+ return;
+
+ /* report 1. error */
+ /* note: un_rec_addr is the first unrecoverable error addr */
+ page = un_rec_addr >> PAGE_SHIFT;
+ offset = un_rec_addr & ~PAGE_MASK;
+ /* syndrome is not available */
+ syndrome = 0;
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
+ csrow->first_page + page, offset, syndrome,
+ 0, 0, -1, "", "");
+
+ /* report further errors (if there are) */
+ /* note: no addresses are recorded */
+ if (un_rec_cnt > 1) {
+ /* page, offset and syndrome are not available */
+ page = 0;
+ offset = 0;
+ syndrome = 0;
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, un_rec_cnt-1,
+ page, offset, syndrome, 0, 0, -1,
+ "address(es) not available", "");
+ }
+}
+
+
+static irqreturn_t mcr_isr(int irq, void *arg)
+{
+ struct mem_ctl_info *mci = arg;
+ u32 rec_addr, un_rec_addr;
+ u32 reg50, reg5c, reg58;
+ u8 rec_cnt, un_rec_cnt;
+
+ regmap_read(aspeed_regmap, ASPEED_MCR_INTR_CTRL, &reg50);
+ dev_dbg(mci->pdev, "received edac interrupt w/ mcr register 50: 0x%x\n",
+ reg50);
+
+ /* collect data about recoverable and unrecoverable errors */
+ rec_cnt = (reg50 & ASPEED_MCR_INTR_CTRL_CNT_REC) >> 16;
+ un_rec_cnt = (reg50 & ASPEED_MCR_INTR_CTRL_CNT_UNREC) >> 12;
+
+ dev_dbg(mci->pdev, "%d recoverable interrupts and %d unrecoverable interrupts\n",
+ rec_cnt, un_rec_cnt);
+
+ regmap_read(aspeed_regmap, ASPEED_MCR_ADDR_UNREC, &reg58);
+ un_rec_addr = reg58;
+
+ regmap_read(aspeed_regmap, ASPEED_MCR_ADDR_REC, &reg5c);
+ rec_addr = reg5c;
+
+ /* clear interrupt flags and error counters: */
+ regmap_update_bits(aspeed_regmap, ASPEED_MCR_INTR_CTRL,
+ ASPEED_MCR_INTR_CTRL_CLEAR,
+ ASPEED_MCR_INTR_CTRL_CLEAR);
+
+ regmap_update_bits(aspeed_regmap, ASPEED_MCR_INTR_CTRL,
+ ASPEED_MCR_INTR_CTRL_CLEAR, 0);
+
+ /* process recoverable and unrecoverable errors */
+ count_rec(mci, rec_cnt, rec_addr);
+ count_un_rec(mci, un_rec_cnt, un_rec_addr);
+
+ if (!rec_cnt && !un_rec_cnt)
+ dev_dbg(mci->pdev, "received edac interrupt, but did not find any ECC counters\n");
+
+ regmap_read(aspeed_regmap, ASPEED_MCR_INTR_CTRL, &reg50);
+ dev_dbg(mci->pdev, "edac interrupt handled. mcr reg 50 is now: 0x%x\n",
+ reg50);
+
+ return IRQ_HANDLED;
+}
+
+
+static int config_irq(void *ctx, struct platform_device *pdev)
+{
+ int irq;
+ int rc;
+
+ /* register interrupt handler */
+ irq = platform_get_irq(pdev, 0);
+ dev_dbg(&pdev->dev, "got irq %d\n", irq);
+ if (!irq)
+ return -ENODEV;
+
+ rc = devm_request_irq(&pdev->dev, irq, mcr_isr, IRQF_TRIGGER_HIGH,
+ DRV_NAME, ctx);
+ if (rc) {
+ dev_err(&pdev->dev, "unable to request irq %d\n", irq);
+ return rc;
+ }
+
+ /* enable interrupts */
+ regmap_update_bits(aspeed_regmap, ASPEED_MCR_INTR_CTRL,
+ ASPEED_MCR_INTR_CTRL_ENABLE,
+ ASPEED_MCR_INTR_CTRL_ENABLE);
+
+ return 0;
+}
+
+
+static int init_csrows(struct mem_ctl_info *mci)
+{
+ struct csrow_info *csrow = mci->csrows[0];
+ u32 nr_pages, dram_type;
+ struct dimm_info *dimm;
+ struct device_node *np;
+ struct resource r;
+ u32 reg04;
+ int rc;
+
+ /* retrieve info about physical memory from device tree */
+ np = of_find_node_by_path("/memory");
+ if (!np) {
+ dev_err(mci->pdev, "dt: missing /memory node\n");
+ return -ENODEV;
+ };
+
+ rc = of_address_to_resource(np, 0, &r);
+
+ of_node_put(np);
+
+ if (rc) {
+ dev_err(mci->pdev, "dt: failed requesting resource for /memory node\n");
+ return rc;
+ };
+
+ dev_dbg(mci->pdev, "dt: /memory node resources: first page r.start=0x%x, resource_size=0x%x, PAGE_SHIFT macro=0x%x\n",
+ r.start, resource_size(&r), PAGE_SHIFT);
+
+ csrow->first_page = r.start >> PAGE_SHIFT;
+ nr_pages = resource_size(&r) >> PAGE_SHIFT;
+ csrow->last_page = csrow->first_page + nr_pages - 1;
+
+ regmap_read(aspeed_regmap, ASPEED_MCR_CONF, &reg04);
+ dram_type = (reg04 & ASPEED_MCR_CONF_DRAM_TYPE) ? MEM_DDR4 : MEM_DDR3;
+
+ dimm = csrow->channels[0]->dimm;
+ dimm->mtype = dram_type;
+ dimm->edac_mode = EDAC_SECDED;
+ dimm->nr_pages = nr_pages / csrow->nr_channels;
+
+ dev_dbg(mci->pdev, "initialized dimm with first_page=0x%lx and nr_pages=0x%x\n",
+ csrow->first_page, nr_pages);
+
+ return 0;
+}
+
+
+static int aspeed_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct edac_mc_layer layers[2];
+ struct mem_ctl_info *mci;
+ struct device_node *np;
+ struct resource *res;
+ void __iomem *regs;
+ u32 reg04;
+ int rc;
+
+ /* setup regmap */
+ np = dev->of_node;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!res)
+ return -ENOENT;
+
+ regs = devm_ioremap_resource(dev, res);
+ if (IS_ERR(regs))
+ return PTR_ERR(regs);
+
+ aspeed_regmap = devm_regmap_init(dev, NULL, (__force void *)regs,
+ &aspeed_regmap_config);
+ if (IS_ERR(aspeed_regmap))
+ return PTR_ERR(aspeed_regmap);
+
+ /* bail out if ECC mode is not configured */
+ regmap_read(aspeed_regmap, ASPEED_MCR_CONF, &reg04);
+ if (!(reg04 & ASPEED_MCR_CONF_ECC)) {
+ dev_err(&pdev->dev, "ECC mode is not configured in u-boot\n");
+ return -EPERM;
+ }
+
+ edac_op_state = EDAC_OPSTATE_INT;
+
+ /* allocate & init EDAC MC data structure */
+ layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
+ layers[0].size = 1;
+ layers[0].is_virt_csrow = true;
+ layers[1].type = EDAC_MC_LAYER_CHANNEL;
+ layers[1].size = 1;
+ layers[1].is_virt_csrow = false;
+
+ mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, 0);
+ if (!mci)
+ return -ENOMEM;
+
+ mci->pdev = &pdev->dev;
+ mci->mtype_cap = MEM_FLAG_DDR3 | MEM_FLAG_DDR4;
+ mci->edac_ctl_cap = EDAC_FLAG_SECDED;
+ mci->edac_cap = EDAC_FLAG_SECDED;
+ mci->scrub_cap = SCRUB_FLAG_HW_SRC;
+ mci->scrub_mode = SCRUB_HW_SRC;
+ mci->mod_name = DRV_NAME;
+ mci->ctl_name = "MIC";
+ mci->dev_name = dev_name(&pdev->dev);
+
+ rc = init_csrows(mci);
+ if (rc) {
+ dev_err(&pdev->dev, "failed to init csrows\n");
+ goto probe_exit02;
+ }
+
+ platform_set_drvdata(pdev, mci);
+
+ /* register with edac core */
+ rc = edac_mc_add_mc(mci);
+ if (rc) {
+ dev_err(&pdev->dev, "failed to register with EDAC core\n");
+ goto probe_exit02;
+ }
+
+ /* register interrupt handler and enable interrupts */
+ rc = config_irq(mci, pdev);
+ if (rc) {
+ dev_err(&pdev->dev, "failed setting up irq\n");
+ goto probe_exit01;
+ }
+
+ return 0;
+
+probe_exit01:
+ edac_mc_del_mc(&pdev->dev);
+probe_exit02:
+ edac_mc_free(mci);
+ return rc;
+}
+
+
+static int aspeed_remove(struct platform_device *pdev)
+{
+ struct mem_ctl_info *mci;
+
+ /* disable interrupts */
+ regmap_update_bits(aspeed_regmap, ASPEED_MCR_INTR_CTRL,
+ ASPEED_MCR_INTR_CTRL_ENABLE, 0);
+
+ /* free resources */
+ mci = edac_mc_del_mc(&pdev->dev);
+ if (mci)
+ edac_mc_free(mci);
+
+ return 0;
+}
+
+
+static const struct of_device_id aspeed_of_match[] = {
+ { .compatible = "aspeed,ast2500-sdram-edac" },
+ {},
+};
+
+
+static struct platform_driver aspeed_driver = {
+ .driver = {
+ .name = DRV_NAME,
+ .of_match_table = aspeed_of_match
+ },
+ .probe = aspeed_probe,
+ .remove = aspeed_remove
+};
+
+
+static int __init aspeed_init(void)
+{
+ return platform_driver_register(&aspeed_driver);
+}
+
+
+static void __exit aspeed_exit(void)
+{
+ platform_driver_unregister(&aspeed_driver);
+}
+
+
+module_init(aspeed_init);
+module_exit(aspeed_exit);
+
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Stefan Schaeckeler <sschaeck@cisco.com>");
+MODULE_DESCRIPTION("Aspeed AST2500 EDAC driver");
+MODULE_VERSION("1.0");
diff --git a/drivers/edac/debugfs.c b/drivers/edac/debugfs.c
index 92dbb7e2320c..0a9277228c50 100644
--- a/drivers/edac/debugfs.c
+++ b/drivers/edac/debugfs.c
@@ -41,14 +41,9 @@ static const struct file_operations debug_fake_inject_fops = {
.llseek = generic_file_llseek,
};
-int __init edac_debugfs_init(void)
+void __init edac_debugfs_init(void)
{
edac_debugfs = debugfs_create_dir("edac", NULL);
- if (IS_ERR(edac_debugfs)) {
- edac_debugfs = NULL;
- return -ENOMEM;
- }
- return 0;
}
void edac_debugfs_exit(void)
@@ -56,50 +51,31 @@ void edac_debugfs_exit(void)
debugfs_remove_recursive(edac_debugfs);
}
-int edac_create_debugfs_nodes(struct mem_ctl_info *mci)
+void edac_create_debugfs_nodes(struct mem_ctl_info *mci)
{
- struct dentry *d, *parent;
+ struct dentry *parent;
char name[80];
int i;
- if (!edac_debugfs)
- return -ENODEV;
-
- d = debugfs_create_dir(mci->dev.kobj.name, edac_debugfs);
- if (!d)
- return -ENOMEM;
- parent = d;
+ parent = debugfs_create_dir(mci->dev.kobj.name, edac_debugfs);
for (i = 0; i < mci->n_layers; i++) {
sprintf(name, "fake_inject_%s",
edac_layer_name[mci->layers[i].type]);
- d = debugfs_create_u8(name, S_IRUGO | S_IWUSR, parent,
- &mci->fake_inject_layer[i]);
- if (!d)
- goto nomem;
+ debugfs_create_u8(name, S_IRUGO | S_IWUSR, parent,
+ &mci->fake_inject_layer[i]);
}
- d = debugfs_create_bool("fake_inject_ue", S_IRUGO | S_IWUSR, parent,
- &mci->fake_inject_ue);
- if (!d)
- goto nomem;
+ debugfs_create_bool("fake_inject_ue", S_IRUGO | S_IWUSR, parent,
+ &mci->fake_inject_ue);
- d = debugfs_create_u16("fake_inject_count", S_IRUGO | S_IWUSR, parent,
- &mci->fake_inject_count);
- if (!d)
- goto nomem;
+ debugfs_create_u16("fake_inject_count", S_IRUGO | S_IWUSR, parent,
+ &mci->fake_inject_count);
- d = debugfs_create_file("fake_inject", S_IWUSR, parent,
- &mci->dev,
- &debug_fake_inject_fops);
- if (!d)
- goto nomem;
+ debugfs_create_file("fake_inject", S_IWUSR, parent, &mci->dev,
+ &debug_fake_inject_fops);
mci->debugfs = parent;
- return 0;
-nomem:
- edac_debugfs_remove_recursive(mci->debugfs);
- return -ENOMEM;
}
/* Create a toplevel dir under EDAC's debugfs hierarchy */
diff --git a/drivers/edac/edac_module.h b/drivers/edac/edac_module.h
index dec88dcea036..dd7d0b509aa3 100644
--- a/drivers/edac/edac_module.h
+++ b/drivers/edac/edac_module.h
@@ -69,9 +69,9 @@ extern void *edac_align_ptr(void **p, unsigned size, int n_elems);
#define edac_debugfs_remove_recursive debugfs_remove_recursive
#define edac_debugfs_remove debugfs_remove
#ifdef CONFIG_EDAC_DEBUG
-int edac_debugfs_init(void);
+void edac_debugfs_init(void);
void edac_debugfs_exit(void);
-int edac_create_debugfs_nodes(struct mem_ctl_info *mci);
+void edac_create_debugfs_nodes(struct mem_ctl_info *mci);
struct dentry *edac_debugfs_create_dir(const char *dirname);
struct dentry *
edac_debugfs_create_dir_at(const char *dirname, struct dentry *parent);
@@ -83,9 +83,9 @@ edac_debugfs_create_x8(const char *name, umode_t mode, struct dentry *parent, u8
struct dentry *
edac_debugfs_create_x16(const char *name, umode_t mode, struct dentry *parent, u16 *value);
#else
-static inline int edac_debugfs_init(void) { return -ENODEV; }
+static inline void edac_debugfs_init(void) { }
static inline void edac_debugfs_exit(void) { }
-static inline int edac_create_debugfs_nodes(struct mem_ctl_info *mci) { return 0; }
+static inline void edac_create_debugfs_nodes(struct mem_ctl_info *mci) { }
static inline struct dentry *edac_debugfs_create_dir(const char *dirname) { return NULL; }
static inline struct dentry *
edac_debugfs_create_dir_at(const char *dirname, struct dentry *parent) { return NULL; }
diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c
new file mode 100644
index 000000000000..c334fb7c63df
--- /dev/null
+++ b/drivers/edac/i10nm_base.c
@@ -0,0 +1,275 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Driver for Intel(R) 10nm server memory controller.
+ * Copyright (c) 2019, Intel Corporation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
+#include <asm/mce.h>
+#include "edac_module.h"
+#include "skx_common.h"
+
+#define I10NM_REVISION "v0.0.3"
+#define EDAC_MOD_STR "i10nm_edac"
+
+/* Debug macros */
+#define i10nm_printk(level, fmt, arg...) \
+ edac_printk(level, "i10nm", fmt, ##arg)
+
+#define I10NM_GET_SCK_BAR(d, reg) \
+ pci_read_config_dword((d)->uracu, 0xd0, &(reg))
+#define I10NM_GET_IMC_BAR(d, i, reg) \
+ pci_read_config_dword((d)->uracu, 0xd8 + (i) * 4, &(reg))
+#define I10NM_GET_DIMMMTR(m, i, j) \
+ (*(u32 *)((m)->mbase + 0x2080c + (i) * 0x4000 + (j) * 4))
+#define I10NM_GET_MCDDRTCFG(m, i, j) \
+ (*(u32 *)((m)->mbase + 0x20970 + (i) * 0x4000 + (j) * 4))
+
+#define I10NM_GET_SCK_MMIO_BASE(reg) (GET_BITFIELD(reg, 0, 28) << 23)
+#define I10NM_GET_IMC_MMIO_OFFSET(reg) (GET_BITFIELD(reg, 0, 10) << 12)
+#define I10NM_GET_IMC_MMIO_SIZE(reg) ((GET_BITFIELD(reg, 13, 23) - \
+ GET_BITFIELD(reg, 0, 10) + 1) << 12)
+
+static struct list_head *i10nm_edac_list;
+
+static struct pci_dev *pci_get_dev_wrapper(int dom, unsigned int bus,
+ unsigned int dev, unsigned int fun)
+{
+ struct pci_dev *pdev;
+
+ pdev = pci_get_domain_bus_and_slot(dom, bus, PCI_DEVFN(dev, fun));
+ if (!pdev) {
+ edac_dbg(2, "No device %02x:%02x.%x\n",
+ bus, dev, fun);
+ return NULL;
+ }
+
+ if (unlikely(pci_enable_device(pdev) < 0)) {
+ edac_dbg(2, "Failed to enable device %02x:%02x.%x\n",
+ bus, dev, fun);
+ return NULL;
+ }
+
+ pci_dev_get(pdev);
+
+ return pdev;
+}
+
+static int i10nm_get_all_munits(void)
+{
+ struct pci_dev *mdev;
+ void __iomem *mbase;
+ unsigned long size;
+ struct skx_dev *d;
+ int i, j = 0;
+ u32 reg, off;
+ u64 base;
+
+ list_for_each_entry(d, i10nm_edac_list, list) {
+ d->util_all = pci_get_dev_wrapper(d->seg, d->bus[1], 29, 1);
+ if (!d->util_all)
+ return -ENODEV;
+
+ d->uracu = pci_get_dev_wrapper(d->seg, d->bus[0], 0, 1);
+ if (!d->uracu)
+ return -ENODEV;
+
+ if (I10NM_GET_SCK_BAR(d, reg)) {
+ i10nm_printk(KERN_ERR, "Failed to socket bar\n");
+ return -ENODEV;
+ }
+
+ base = I10NM_GET_SCK_MMIO_BASE(reg);
+ edac_dbg(2, "socket%d mmio base 0x%llx (reg 0x%x)\n",
+ j++, base, reg);
+
+ for (i = 0; i < I10NM_NUM_IMC; i++) {
+ mdev = pci_get_dev_wrapper(d->seg, d->bus[0],
+ 12 + i, 0);
+ if (i == 0 && !mdev) {
+ i10nm_printk(KERN_ERR, "No IMC found\n");
+ return -ENODEV;
+ }
+ if (!mdev)
+ continue;
+
+ d->imc[i].mdev = mdev;
+
+ if (I10NM_GET_IMC_BAR(d, i, reg)) {
+ i10nm_printk(KERN_ERR, "Failed to get mc bar\n");
+ return -ENODEV;
+ }
+
+ off = I10NM_GET_IMC_MMIO_OFFSET(reg);
+ size = I10NM_GET_IMC_MMIO_SIZE(reg);
+ edac_dbg(2, "mc%d mmio base 0x%llx size 0x%lx (reg 0x%x)\n",
+ i, base + off, size, reg);
+
+ mbase = ioremap(base + off, size);
+ if (!mbase) {
+ i10nm_printk(KERN_ERR, "Failed to ioremap 0x%llx\n",
+ base + off);
+ return -ENODEV;
+ }
+
+ d->imc[i].mbase = mbase;
+ }
+ }
+
+ return 0;
+}
+
+static const struct x86_cpu_id i10nm_cpuids[] = {
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_TREMONT_X, 0, 0 },
+ { }
+};
+MODULE_DEVICE_TABLE(x86cpu, i10nm_cpuids);
+
+static bool i10nm_check_ecc(struct skx_imc *imc, int chan)
+{
+ u32 mcmtr;
+
+ mcmtr = *(u32 *)(imc->mbase + 0x20ef8 + chan * 0x4000);
+ edac_dbg(1, "ch%d mcmtr reg %x\n", chan, mcmtr);
+
+ return !!GET_BITFIELD(mcmtr, 2, 2);
+}
+
+static int i10nm_get_dimm_config(struct mem_ctl_info *mci)
+{
+ struct skx_pvt *pvt = mci->pvt_info;
+ struct skx_imc *imc = pvt->imc;
+ struct dimm_info *dimm;
+ u32 mtr, mcddrtcfg;
+ int i, j, ndimms;
+
+ for (i = 0; i < I10NM_NUM_CHANNELS; i++) {
+ if (!imc->mbase)
+ continue;
+
+ ndimms = 0;
+ for (j = 0; j < I10NM_NUM_DIMMS; j++) {
+ dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms,
+ mci->n_layers, i, j, 0);
+ mtr = I10NM_GET_DIMMMTR(imc, i, j);
+ mcddrtcfg = I10NM_GET_MCDDRTCFG(imc, i, j);
+ edac_dbg(1, "dimmmtr 0x%x mcddrtcfg 0x%x (mc%d ch%d dimm%d)\n",
+ mtr, mcddrtcfg, imc->mc, i, j);
+
+ if (IS_DIMM_PRESENT(mtr))
+ ndimms += skx_get_dimm_info(mtr, 0, dimm,
+ imc, i, j);
+ else if (IS_NVDIMM_PRESENT(mcddrtcfg, j))
+ ndimms += skx_get_nvdimm_info(dimm, imc, i, j,
+ EDAC_MOD_STR);
+ }
+ if (ndimms && !i10nm_check_ecc(imc, 0)) {
+ i10nm_printk(KERN_ERR, "ECC is disabled on imc %d\n",
+ imc->mc);
+ return -ENODEV;
+ }
+ }
+
+ return 0;
+}
+
+static struct notifier_block i10nm_mce_dec = {
+ .notifier_call = skx_mce_check_error,
+ .priority = MCE_PRIO_EDAC,
+};
+
+static int __init i10nm_init(void)
+{
+ u8 mc = 0, src_id = 0, node_id = 0;
+ const struct x86_cpu_id *id;
+ const char *owner;
+ struct skx_dev *d;
+ int rc, i, off[3] = {0xd0, 0xc8, 0xcc};
+ u64 tolm, tohm;
+
+ edac_dbg(2, "\n");
+
+ owner = edac_get_owner();
+ if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR)))
+ return -EBUSY;
+
+ id = x86_match_cpu(i10nm_cpuids);
+ if (!id)
+ return -ENODEV;
+
+ rc = skx_get_hi_lo(0x09a2, off, &tolm, &tohm);
+ if (rc)
+ return rc;
+
+ rc = skx_get_all_bus_mappings(0x3452, 0xcc, I10NM, &i10nm_edac_list);
+ if (rc < 0)
+ goto fail;
+ if (rc == 0) {
+ i10nm_printk(KERN_ERR, "No memory controllers found\n");
+ return -ENODEV;
+ }
+
+ rc = i10nm_get_all_munits();
+ if (rc < 0)
+ goto fail;
+
+ list_for_each_entry(d, i10nm_edac_list, list) {
+ rc = skx_get_src_id(d, &src_id);
+ if (rc < 0)
+ goto fail;
+
+ rc = skx_get_node_id(d, &node_id);
+ if (rc < 0)
+ goto fail;
+
+ edac_dbg(2, "src_id = %d node_id = %d\n", src_id, node_id);
+ for (i = 0; i < I10NM_NUM_IMC; i++) {
+ if (!d->imc[i].mdev)
+ continue;
+
+ d->imc[i].mc = mc++;
+ d->imc[i].lmc = i;
+ d->imc[i].src_id = src_id;
+ d->imc[i].node_id = node_id;
+
+ rc = skx_register_mci(&d->imc[i], d->imc[i].mdev,
+ "Intel_10nm Socket", EDAC_MOD_STR,
+ i10nm_get_dimm_config);
+ if (rc < 0)
+ goto fail;
+ }
+ }
+
+ rc = skx_adxl_get();
+ if (rc)
+ goto fail;
+
+ opstate_init();
+ mce_register_decode_chain(&i10nm_mce_dec);
+ setup_skx_debug("i10nm_test");
+
+ i10nm_printk(KERN_INFO, "%s\n", I10NM_REVISION);
+
+ return 0;
+fail:
+ skx_remove();
+ return rc;
+}
+
+static void __exit i10nm_exit(void)
+{
+ edac_dbg(2, "\n");
+ teardown_skx_debug();
+ mce_unregister_decode_chain(&i10nm_mce_dec);
+ skx_adxl_put();
+ skx_remove();
+}
+
+module_init(i10nm_init);
+module_exit(i10nm_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("MC Driver for Intel 10nm server processors");
diff --git a/drivers/edac/skx_base.c b/drivers/edac/skx_base.c
new file mode 100644
index 000000000000..adae4c848ca1
--- /dev/null
+++ b/drivers/edac/skx_base.c
@@ -0,0 +1,650 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * EDAC driver for Intel(R) Xeon(R) Skylake processors
+ * Copyright (c) 2016, Intel Corporation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/processor.h>
+#include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
+#include <asm/mce.h>
+
+#include "edac_module.h"
+#include "skx_common.h"
+
+#define EDAC_MOD_STR "skx_edac"
+
+/*
+ * Debug macros
+ */
+#define skx_printk(level, fmt, arg...) \
+ edac_printk(level, "skx", fmt, ##arg)
+
+#define skx_mc_printk(mci, level, fmt, arg...) \
+ edac_mc_chipset_printk(mci, level, "skx", fmt, ##arg)
+
+static struct list_head *skx_edac_list;
+
+static u64 skx_tolm, skx_tohm;
+static int skx_num_sockets;
+static unsigned int nvdimm_count;
+
+#define MASK26 0x3FFFFFF /* Mask for 2^26 */
+#define MASK29 0x1FFFFFFF /* Mask for 2^29 */
+
+static struct skx_dev *get_skx_dev(struct pci_bus *bus, u8 idx)
+{
+ struct skx_dev *d;
+
+ list_for_each_entry(d, skx_edac_list, list) {
+ if (d->seg == pci_domain_nr(bus) && d->bus[idx] == bus->number)
+ return d;
+ }
+
+ return NULL;
+}
+
+enum munittype {
+ CHAN0, CHAN1, CHAN2, SAD_ALL, UTIL_ALL, SAD
+};
+
+struct munit {
+ u16 did;
+ u16 devfn[SKX_NUM_IMC];
+ u8 busidx;
+ u8 per_socket;
+ enum munittype mtype;
+};
+
+/*
+ * List of PCI device ids that we need together with some device
+ * number and function numbers to tell which memory controller the
+ * device belongs to.
+ */
+static const struct munit skx_all_munits[] = {
+ { 0x2054, { }, 1, 1, SAD_ALL },
+ { 0x2055, { }, 1, 1, UTIL_ALL },
+ { 0x2040, { PCI_DEVFN(10, 0), PCI_DEVFN(12, 0) }, 2, 2, CHAN0 },
+ { 0x2044, { PCI_DEVFN(10, 4), PCI_DEVFN(12, 4) }, 2, 2, CHAN1 },
+ { 0x2048, { PCI_DEVFN(11, 0), PCI_DEVFN(13, 0) }, 2, 2, CHAN2 },
+ { 0x208e, { }, 1, 0, SAD },
+ { }
+};
+
+static int get_all_munits(const struct munit *m)
+{
+ struct pci_dev *pdev, *prev;
+ struct skx_dev *d;
+ u32 reg;
+ int i = 0, ndev = 0;
+
+ prev = NULL;
+ for (;;) {
+ pdev = pci_get_device(PCI_VENDOR_ID_INTEL, m->did, prev);
+ if (!pdev)
+ break;
+ ndev++;
+ if (m->per_socket == SKX_NUM_IMC) {
+ for (i = 0; i < SKX_NUM_IMC; i++)
+ if (m->devfn[i] == pdev->devfn)
+ break;
+ if (i == SKX_NUM_IMC)
+ goto fail;
+ }
+ d = get_skx_dev(pdev->bus, m->busidx);
+ if (!d)
+ goto fail;
+
+ /* Be sure that the device is enabled */
+ if (unlikely(pci_enable_device(pdev) < 0)) {
+ skx_printk(KERN_ERR, "Couldn't enable device %04x:%04x\n",
+ PCI_VENDOR_ID_INTEL, m->did);
+ goto fail;
+ }
+
+ switch (m->mtype) {
+ case CHAN0: case CHAN1: case CHAN2:
+ pci_dev_get(pdev);
+ d->imc[i].chan[m->mtype].cdev = pdev;
+ break;
+ case SAD_ALL:
+ pci_dev_get(pdev);
+ d->sad_all = pdev;
+ break;
+ case UTIL_ALL:
+ pci_dev_get(pdev);
+ d->util_all = pdev;
+ break;
+ case SAD:
+ /*
+ * one of these devices per core, including cores
+ * that don't exist on this SKU. Ignore any that
+ * read a route table of zero, make sure all the
+ * non-zero values match.
+ */
+ pci_read_config_dword(pdev, 0xB4, &reg);
+ if (reg != 0) {
+ if (d->mcroute == 0) {
+ d->mcroute = reg;
+ } else if (d->mcroute != reg) {
+ skx_printk(KERN_ERR, "mcroute mismatch\n");
+ goto fail;
+ }
+ }
+ ndev--;
+ break;
+ }
+
+ prev = pdev;
+ }
+
+ return ndev;
+fail:
+ pci_dev_put(pdev);
+ return -ENODEV;
+}
+
+static const struct x86_cpu_id skx_cpuids[] = {
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_SKYLAKE_X, 0, 0 },
+ { }
+};
+MODULE_DEVICE_TABLE(x86cpu, skx_cpuids);
+
+#define SKX_GET_MTMTR(dev, reg) \
+ pci_read_config_dword((dev), 0x87c, &(reg))
+
+static bool skx_check_ecc(struct pci_dev *pdev)
+{
+ u32 mtmtr;
+
+ SKX_GET_MTMTR(pdev, mtmtr);
+
+ return !!GET_BITFIELD(mtmtr, 2, 2);
+}
+
+static int skx_get_dimm_config(struct mem_ctl_info *mci)
+{
+ struct skx_pvt *pvt = mci->pvt_info;
+ struct skx_imc *imc = pvt->imc;
+ u32 mtr, amap, mcddrtcfg;
+ struct dimm_info *dimm;
+ int i, j;
+ int ndimms;
+
+ for (i = 0; i < SKX_NUM_CHANNELS; i++) {
+ ndimms = 0;
+ pci_read_config_dword(imc->chan[i].cdev, 0x8C, &amap);
+ pci_read_config_dword(imc->chan[i].cdev, 0x400, &mcddrtcfg);
+ for (j = 0; j < SKX_NUM_DIMMS; j++) {
+ dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms,
+ mci->n_layers, i, j, 0);
+ pci_read_config_dword(imc->chan[i].cdev,
+ 0x80 + 4 * j, &mtr);
+ if (IS_DIMM_PRESENT(mtr)) {
+ ndimms += skx_get_dimm_info(mtr, amap, dimm, imc, i, j);
+ } else if (IS_NVDIMM_PRESENT(mcddrtcfg, j)) {
+ ndimms += skx_get_nvdimm_info(dimm, imc, i, j,
+ EDAC_MOD_STR);
+ nvdimm_count++;
+ }
+ }
+ if (ndimms && !skx_check_ecc(imc->chan[0].cdev)) {
+ skx_printk(KERN_ERR, "ECC is disabled on imc %d\n", imc->mc);
+ return -ENODEV;
+ }
+ }
+
+ return 0;
+}
+
+#define SKX_MAX_SAD 24
+
+#define SKX_GET_SAD(d, i, reg) \
+ pci_read_config_dword((d)->sad_all, 0x60 + 8 * (i), &(reg))
+#define SKX_GET_ILV(d, i, reg) \
+ pci_read_config_dword((d)->sad_all, 0x64 + 8 * (i), &(reg))
+
+#define SKX_SAD_MOD3MODE(sad) GET_BITFIELD((sad), 30, 31)
+#define SKX_SAD_MOD3(sad) GET_BITFIELD((sad), 27, 27)
+#define SKX_SAD_LIMIT(sad) (((u64)GET_BITFIELD((sad), 7, 26) << 26) | MASK26)
+#define SKX_SAD_MOD3ASMOD2(sad) GET_BITFIELD((sad), 5, 6)
+#define SKX_SAD_ATTR(sad) GET_BITFIELD((sad), 3, 4)
+#define SKX_SAD_INTERLEAVE(sad) GET_BITFIELD((sad), 1, 2)
+#define SKX_SAD_ENABLE(sad) GET_BITFIELD((sad), 0, 0)
+
+#define SKX_ILV_REMOTE(tgt) (((tgt) & 8) == 0)
+#define SKX_ILV_TARGET(tgt) ((tgt) & 7)
+
+static bool skx_sad_decode(struct decoded_addr *res)
+{
+ struct skx_dev *d = list_first_entry(skx_edac_list, typeof(*d), list);
+ u64 addr = res->addr;
+ int i, idx, tgt, lchan, shift;
+ u32 sad, ilv;
+ u64 limit, prev_limit;
+ int remote = 0;
+
+ /* Simple sanity check for I/O space or out of range */
+ if (addr >= skx_tohm || (addr >= skx_tolm && addr < BIT_ULL(32))) {
+ edac_dbg(0, "Address 0x%llx out of range\n", addr);
+ return false;
+ }
+
+restart:
+ prev_limit = 0;
+ for (i = 0; i < SKX_MAX_SAD; i++) {
+ SKX_GET_SAD(d, i, sad);
+ limit = SKX_SAD_LIMIT(sad);
+ if (SKX_SAD_ENABLE(sad)) {
+ if (addr >= prev_limit && addr <= limit)
+ goto sad_found;
+ }
+ prev_limit = limit + 1;
+ }
+ edac_dbg(0, "No SAD entry for 0x%llx\n", addr);
+ return false;
+
+sad_found:
+ SKX_GET_ILV(d, i, ilv);
+
+ switch (SKX_SAD_INTERLEAVE(sad)) {
+ case 0:
+ idx = GET_BITFIELD(addr, 6, 8);
+ break;
+ case 1:
+ idx = GET_BITFIELD(addr, 8, 10);
+ break;
+ case 2:
+ idx = GET_BITFIELD(addr, 12, 14);
+ break;
+ case 3:
+ idx = GET_BITFIELD(addr, 30, 32);
+ break;
+ }
+
+ tgt = GET_BITFIELD(ilv, 4 * idx, 4 * idx + 3);
+
+ /* If point to another node, find it and start over */
+ if (SKX_ILV_REMOTE(tgt)) {
+ if (remote) {
+ edac_dbg(0, "Double remote!\n");
+ return false;
+ }
+ remote = 1;
+ list_for_each_entry(d, skx_edac_list, list) {
+ if (d->imc[0].src_id == SKX_ILV_TARGET(tgt))
+ goto restart;
+ }
+ edac_dbg(0, "Can't find node %d\n", SKX_ILV_TARGET(tgt));
+ return false;
+ }
+
+ if (SKX_SAD_MOD3(sad) == 0) {
+ lchan = SKX_ILV_TARGET(tgt);
+ } else {
+ switch (SKX_SAD_MOD3MODE(sad)) {
+ case 0:
+ shift = 6;
+ break;
+ case 1:
+ shift = 8;
+ break;
+ case 2:
+ shift = 12;
+ break;
+ default:
+ edac_dbg(0, "illegal mod3mode\n");
+ return false;
+ }
+ switch (SKX_SAD_MOD3ASMOD2(sad)) {
+ case 0:
+ lchan = (addr >> shift) % 3;
+ break;
+ case 1:
+ lchan = (addr >> shift) % 2;
+ break;
+ case 2:
+ lchan = (addr >> shift) % 2;
+ lchan = (lchan << 1) | !lchan;
+ break;
+ case 3:
+ lchan = ((addr >> shift) % 2) << 1;
+ break;
+ }
+ lchan = (lchan << 1) | (SKX_ILV_TARGET(tgt) & 1);
+ }
+
+ res->dev = d;
+ res->socket = d->imc[0].src_id;
+ res->imc = GET_BITFIELD(d->mcroute, lchan * 3, lchan * 3 + 2);
+ res->channel = GET_BITFIELD(d->mcroute, lchan * 2 + 18, lchan * 2 + 19);
+
+ edac_dbg(2, "0x%llx: socket=%d imc=%d channel=%d\n",
+ res->addr, res->socket, res->imc, res->channel);
+ return true;
+}
+
+#define SKX_MAX_TAD 8
+
+#define SKX_GET_TADBASE(d, mc, i, reg) \
+ pci_read_config_dword((d)->imc[mc].chan[0].cdev, 0x850 + 4 * (i), &(reg))
+#define SKX_GET_TADWAYNESS(d, mc, i, reg) \
+ pci_read_config_dword((d)->imc[mc].chan[0].cdev, 0x880 + 4 * (i), &(reg))
+#define SKX_GET_TADCHNILVOFFSET(d, mc, ch, i, reg) \
+ pci_read_config_dword((d)->imc[mc].chan[ch].cdev, 0x90 + 4 * (i), &(reg))
+
+#define SKX_TAD_BASE(b) ((u64)GET_BITFIELD((b), 12, 31) << 26)
+#define SKX_TAD_SKT_GRAN(b) GET_BITFIELD((b), 4, 5)
+#define SKX_TAD_CHN_GRAN(b) GET_BITFIELD((b), 6, 7)
+#define SKX_TAD_LIMIT(b) (((u64)GET_BITFIELD((b), 12, 31) << 26) | MASK26)
+#define SKX_TAD_OFFSET(b) ((u64)GET_BITFIELD((b), 4, 23) << 26)
+#define SKX_TAD_SKTWAYS(b) (1 << GET_BITFIELD((b), 10, 11))
+#define SKX_TAD_CHNWAYS(b) (GET_BITFIELD((b), 8, 9) + 1)
+
+/* which bit used for both socket and channel interleave */
+static int skx_granularity[] = { 6, 8, 12, 30 };
+
+static u64 skx_do_interleave(u64 addr, int shift, int ways, u64 lowbits)
+{
+ addr >>= shift;
+ addr /= ways;
+ addr <<= shift;
+
+ return addr | (lowbits & ((1ull << shift) - 1));
+}
+
+static bool skx_tad_decode(struct decoded_addr *res)
+{
+ int i;
+ u32 base, wayness, chnilvoffset;
+ int skt_interleave_bit, chn_interleave_bit;
+ u64 channel_addr;
+
+ for (i = 0; i < SKX_MAX_TAD; i++) {
+ SKX_GET_TADBASE(res->dev, res->imc, i, base);
+ SKX_GET_TADWAYNESS(res->dev, res->imc, i, wayness);
+ if (SKX_TAD_BASE(base) <= res->addr && res->addr <= SKX_TAD_LIMIT(wayness))
+ goto tad_found;
+ }
+ edac_dbg(0, "No TAD entry for 0x%llx\n", res->addr);
+ return false;
+
+tad_found:
+ res->sktways = SKX_TAD_SKTWAYS(wayness);
+ res->chanways = SKX_TAD_CHNWAYS(wayness);
+ skt_interleave_bit = skx_granularity[SKX_TAD_SKT_GRAN(base)];
+ chn_interleave_bit = skx_granularity[SKX_TAD_CHN_GRAN(base)];
+
+ SKX_GET_TADCHNILVOFFSET(res->dev, res->imc, res->channel, i, chnilvoffset);
+ channel_addr = res->addr - SKX_TAD_OFFSET(chnilvoffset);
+
+ if (res->chanways == 3 && skt_interleave_bit > chn_interleave_bit) {
+ /* Must handle channel first, then socket */
+ channel_addr = skx_do_interleave(channel_addr, chn_interleave_bit,
+ res->chanways, channel_addr);
+ channel_addr = skx_do_interleave(channel_addr, skt_interleave_bit,
+ res->sktways, channel_addr);
+ } else {
+ /* Handle socket then channel. Preserve low bits from original address */
+ channel_addr = skx_do_interleave(channel_addr, skt_interleave_bit,
+ res->sktways, res->addr);
+ channel_addr = skx_do_interleave(channel_addr, chn_interleave_bit,
+ res->chanways, res->addr);
+ }
+
+ res->chan_addr = channel_addr;
+
+ edac_dbg(2, "0x%llx: chan_addr=0x%llx sktways=%d chanways=%d\n",
+ res->addr, res->chan_addr, res->sktways, res->chanways);
+ return true;
+}
+
+#define SKX_MAX_RIR 4
+
+#define SKX_GET_RIRWAYNESS(d, mc, ch, i, reg) \
+ pci_read_config_dword((d)->imc[mc].chan[ch].cdev, \
+ 0x108 + 4 * (i), &(reg))
+#define SKX_GET_RIRILV(d, mc, ch, idx, i, reg) \
+ pci_read_config_dword((d)->imc[mc].chan[ch].cdev, \
+ 0x120 + 16 * (idx) + 4 * (i), &(reg))
+
+#define SKX_RIR_VALID(b) GET_BITFIELD((b), 31, 31)
+#define SKX_RIR_LIMIT(b) (((u64)GET_BITFIELD((b), 1, 11) << 29) | MASK29)
+#define SKX_RIR_WAYS(b) (1 << GET_BITFIELD((b), 28, 29))
+#define SKX_RIR_CHAN_RANK(b) GET_BITFIELD((b), 16, 19)
+#define SKX_RIR_OFFSET(b) ((u64)(GET_BITFIELD((b), 2, 15) << 26))
+
+static bool skx_rir_decode(struct decoded_addr *res)
+{
+ int i, idx, chan_rank;
+ int shift;
+ u32 rirway, rirlv;
+ u64 rank_addr, prev_limit = 0, limit;
+
+ if (res->dev->imc[res->imc].chan[res->channel].dimms[0].close_pg)
+ shift = 6;
+ else
+ shift = 13;
+
+ for (i = 0; i < SKX_MAX_RIR; i++) {
+ SKX_GET_RIRWAYNESS(res->dev, res->imc, res->channel, i, rirway);
+ limit = SKX_RIR_LIMIT(rirway);
+ if (SKX_RIR_VALID(rirway)) {
+ if (prev_limit <= res->chan_addr &&
+ res->chan_addr <= limit)
+ goto rir_found;
+ }
+ prev_limit = limit;
+ }
+ edac_dbg(0, "No RIR entry for 0x%llx\n", res->addr);
+ return false;
+
+rir_found:
+ rank_addr = res->chan_addr >> shift;
+ rank_addr /= SKX_RIR_WAYS(rirway);
+ rank_addr <<= shift;
+ rank_addr |= res->chan_addr & GENMASK_ULL(shift - 1, 0);
+
+ res->rank_address = rank_addr;
+ idx = (res->chan_addr >> shift) % SKX_RIR_WAYS(rirway);
+
+ SKX_GET_RIRILV(res->dev, res->imc, res->channel, idx, i, rirlv);
+ res->rank_address = rank_addr - SKX_RIR_OFFSET(rirlv);
+ chan_rank = SKX_RIR_CHAN_RANK(rirlv);
+ res->channel_rank = chan_rank;
+ res->dimm = chan_rank / 4;
+ res->rank = chan_rank % 4;
+
+ edac_dbg(2, "0x%llx: dimm=%d rank=%d chan_rank=%d rank_addr=0x%llx\n",
+ res->addr, res->dimm, res->rank,
+ res->channel_rank, res->rank_address);
+ return true;
+}
+
+static u8 skx_close_row[] = {
+ 15, 16, 17, 18, 20, 21, 22, 28, 10, 11, 12, 13, 29, 30, 31, 32, 33
+};
+
+static u8 skx_close_column[] = {
+ 3, 4, 5, 14, 19, 23, 24, 25, 26, 27
+};
+
+static u8 skx_open_row[] = {
+ 14, 15, 16, 20, 28, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33
+};
+
+static u8 skx_open_column[] = {
+ 3, 4, 5, 6, 7, 8, 9, 10, 11, 12
+};
+
+static u8 skx_open_fine_column[] = {
+ 3, 4, 5, 7, 8, 9, 10, 11, 12, 13
+};
+
+static int skx_bits(u64 addr, int nbits, u8 *bits)
+{
+ int i, res = 0;
+
+ for (i = 0; i < nbits; i++)
+ res |= ((addr >> bits[i]) & 1) << i;
+ return res;
+}
+
+static int skx_bank_bits(u64 addr, int b0, int b1, int do_xor, int x0, int x1)
+{
+ int ret = GET_BITFIELD(addr, b0, b0) | (GET_BITFIELD(addr, b1, b1) << 1);
+
+ if (do_xor)
+ ret ^= GET_BITFIELD(addr, x0, x0) | (GET_BITFIELD(addr, x1, x1) << 1);
+
+ return ret;
+}
+
+static bool skx_mad_decode(struct decoded_addr *r)
+{
+ struct skx_dimm *dimm = &r->dev->imc[r->imc].chan[r->channel].dimms[r->dimm];
+ int bg0 = dimm->fine_grain_bank ? 6 : 13;
+
+ if (dimm->close_pg) {
+ r->row = skx_bits(r->rank_address, dimm->rowbits, skx_close_row);
+ r->column = skx_bits(r->rank_address, dimm->colbits, skx_close_column);
+ r->column |= 0x400; /* C10 is autoprecharge, always set */
+ r->bank_address = skx_bank_bits(r->rank_address, 8, 9, dimm->bank_xor_enable, 22, 28);
+ r->bank_group = skx_bank_bits(r->rank_address, 6, 7, dimm->bank_xor_enable, 20, 21);
+ } else {
+ r->row = skx_bits(r->rank_address, dimm->rowbits, skx_open_row);
+ if (dimm->fine_grain_bank)
+ r->column = skx_bits(r->rank_address, dimm->colbits, skx_open_fine_column);
+ else
+ r->column = skx_bits(r->rank_address, dimm->colbits, skx_open_column);
+ r->bank_address = skx_bank_bits(r->rank_address, 18, 19, dimm->bank_xor_enable, 22, 23);
+ r->bank_group = skx_bank_bits(r->rank_address, bg0, 17, dimm->bank_xor_enable, 20, 21);
+ }
+ r->row &= (1u << dimm->rowbits) - 1;
+
+ edac_dbg(2, "0x%llx: row=0x%x col=0x%x bank_addr=%d bank_group=%d\n",
+ r->addr, r->row, r->column, r->bank_address,
+ r->bank_group);
+ return true;
+}
+
+static bool skx_decode(struct decoded_addr *res)
+{
+ return skx_sad_decode(res) && skx_tad_decode(res) &&
+ skx_rir_decode(res) && skx_mad_decode(res);
+}
+
+static struct notifier_block skx_mce_dec = {
+ .notifier_call = skx_mce_check_error,
+ .priority = MCE_PRIO_EDAC,
+};
+
+/*
+ * skx_init:
+ * make sure we are running on the correct cpu model
+ * search for all the devices we need
+ * check which DIMMs are present.
+ */
+static int __init skx_init(void)
+{
+ const struct x86_cpu_id *id;
+ const struct munit *m;
+ const char *owner;
+ int rc = 0, i, off[3] = {0xd0, 0xd4, 0xd8};
+ u8 mc = 0, src_id, node_id;
+ struct skx_dev *d;
+
+ edac_dbg(2, "\n");
+
+ owner = edac_get_owner();
+ if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR)))
+ return -EBUSY;
+
+ id = x86_match_cpu(skx_cpuids);
+ if (!id)
+ return -ENODEV;
+
+ rc = skx_get_hi_lo(0x2034, off, &skx_tolm, &skx_tohm);
+ if (rc)
+ return rc;
+
+ rc = skx_get_all_bus_mappings(0x2016, 0xcc, SKX, &skx_edac_list);
+ if (rc < 0)
+ goto fail;
+ if (rc == 0) {
+ edac_dbg(2, "No memory controllers found\n");
+ return -ENODEV;
+ }
+ skx_num_sockets = rc;
+
+ for (m = skx_all_munits; m->did; m++) {
+ rc = get_all_munits(m);
+ if (rc < 0)
+ goto fail;
+ if (rc != m->per_socket * skx_num_sockets) {
+ edac_dbg(2, "Expected %d, got %d of 0x%x\n",
+ m->per_socket * skx_num_sockets, rc, m->did);
+ rc = -ENODEV;
+ goto fail;
+ }
+ }
+
+ list_for_each_entry(d, skx_edac_list, list) {
+ rc = skx_get_src_id(d, &src_id);
+ if (rc < 0)
+ goto fail;
+ rc = skx_get_node_id(d, &node_id);
+ if (rc < 0)
+ goto fail;
+ edac_dbg(2, "src_id=%d node_id=%d\n", src_id, node_id);
+ for (i = 0; i < SKX_NUM_IMC; i++) {
+ d->imc[i].mc = mc++;
+ d->imc[i].lmc = i;
+ d->imc[i].src_id = src_id;
+ d->imc[i].node_id = node_id;
+ rc = skx_register_mci(&d->imc[i], d->imc[i].chan[0].cdev,
+ "Skylake Socket", EDAC_MOD_STR,
+ skx_get_dimm_config);
+ if (rc < 0)
+ goto fail;
+ }
+ }
+
+ skx_set_decode(skx_decode);
+
+ if (nvdimm_count && skx_adxl_get() == -ENODEV)
+ skx_printk(KERN_NOTICE, "Only decoding DDR4 address!\n");
+
+ /* Ensure that the OPSTATE is set correctly for POLL or NMI */
+ opstate_init();
+
+ setup_skx_debug("skx_test");
+
+ mce_register_decode_chain(&skx_mce_dec);
+
+ return 0;
+fail:
+ skx_remove();
+ return rc;
+}
+
+static void __exit skx_exit(void)
+{
+ edac_dbg(2, "\n");
+ mce_unregister_decode_chain(&skx_mce_dec);
+ teardown_skx_debug();
+ if (nvdimm_count)
+ skx_adxl_put();
+ skx_remove();
+}
+
+module_init(skx_init);
+module_exit(skx_exit);
+
+module_param(edac_op_state, int, 0444);
+MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Tony Luck");
+MODULE_DESCRIPTION("MC Driver for Intel Skylake server processors");
diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c
new file mode 100644
index 000000000000..0e96e7b5b0a7
--- /dev/null
+++ b/drivers/edac/skx_common.c
@@ -0,0 +1,691 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Common codes for both the skx_edac driver and Intel 10nm server EDAC driver.
+ * Originally split out from the skx_edac driver.
+ *
+ * Copyright (c) 2018, Intel Corporation.
+ */
+
+#include <linux/acpi.h>
+#include <linux/dmi.h>
+#include <linux/adxl.h>
+#include <acpi/nfit.h>
+#include <asm/mce.h>
+#include "edac_module.h"
+#include "skx_common.h"
+
+static const char * const component_names[] = {
+ [INDEX_SOCKET] = "ProcessorSocketId",
+ [INDEX_MEMCTRL] = "MemoryControllerId",
+ [INDEX_CHANNEL] = "ChannelId",
+ [INDEX_DIMM] = "DimmSlotId",
+};
+
+static int component_indices[ARRAY_SIZE(component_names)];
+static int adxl_component_count;
+static const char * const *adxl_component_names;
+static u64 *adxl_values;
+static char *adxl_msg;
+
+static char skx_msg[MSG_SIZE];
+static skx_decode_f skx_decode;
+static u64 skx_tolm, skx_tohm;
+static LIST_HEAD(dev_edac_list);
+
+int __init skx_adxl_get(void)
+{
+ const char * const *names;
+ int i, j;
+
+ names = adxl_get_component_names();
+ if (!names) {
+ skx_printk(KERN_NOTICE, "No firmware support for address translation.\n");
+ return -ENODEV;
+ }
+
+ for (i = 0; i < INDEX_MAX; i++) {
+ for (j = 0; names[j]; j++) {
+ if (!strcmp(component_names[i], names[j])) {
+ component_indices[i] = j;
+ break;
+ }
+ }
+
+ if (!names[j])
+ goto err;
+ }
+
+ adxl_component_names = names;
+ while (*names++)
+ adxl_component_count++;
+
+ adxl_values = kcalloc(adxl_component_count, sizeof(*adxl_values),
+ GFP_KERNEL);
+ if (!adxl_values) {
+ adxl_component_count = 0;
+ return -ENOMEM;
+ }
+
+ adxl_msg = kzalloc(MSG_SIZE, GFP_KERNEL);
+ if (!adxl_msg) {
+ adxl_component_count = 0;
+ kfree(adxl_values);
+ return -ENOMEM;
+ }
+
+ return 0;
+err:
+ skx_printk(KERN_ERR, "'%s' is not matched from DSM parameters: ",
+ component_names[i]);
+ for (j = 0; names[j]; j++)
+ skx_printk(KERN_CONT, "%s ", names[j]);
+ skx_printk(KERN_CONT, "\n");
+
+ return -ENODEV;
+}
+
+void __exit skx_adxl_put(void)
+{
+ kfree(adxl_values);
+ kfree(adxl_msg);
+}
+
+static bool skx_adxl_decode(struct decoded_addr *res)
+{
+ int i, len = 0;
+
+ if (res->addr >= skx_tohm || (res->addr >= skx_tolm &&
+ res->addr < BIT_ULL(32))) {
+ edac_dbg(0, "Address 0x%llx out of range\n", res->addr);
+ return false;
+ }
+
+ if (adxl_decode(res->addr, adxl_values)) {
+ edac_dbg(0, "Failed to decode 0x%llx\n", res->addr);
+ return false;
+ }
+
+ res->socket = (int)adxl_values[component_indices[INDEX_SOCKET]];
+ res->imc = (int)adxl_values[component_indices[INDEX_MEMCTRL]];
+ res->channel = (int)adxl_values[component_indices[INDEX_CHANNEL]];
+ res->dimm = (int)adxl_values[component_indices[INDEX_DIMM]];
+
+ for (i = 0; i < adxl_component_count; i++) {
+ if (adxl_values[i] == ~0x0ull)
+ continue;
+
+ len += snprintf(adxl_msg + len, MSG_SIZE - len, " %s:0x%llx",
+ adxl_component_names[i], adxl_values[i]);
+ if (MSG_SIZE - len <= 0)
+ break;
+ }
+
+ return true;
+}
+
+void skx_set_decode(skx_decode_f decode)
+{
+ skx_decode = decode;
+}
+
+int skx_get_src_id(struct skx_dev *d, u8 *id)
+{
+ u32 reg;
+
+ if (pci_read_config_dword(d->util_all, 0xf0, &reg)) {
+ skx_printk(KERN_ERR, "Failed to read src id\n");
+ return -ENODEV;
+ }
+
+ *id = GET_BITFIELD(reg, 12, 14);
+ return 0;
+}
+
+int skx_get_node_id(struct skx_dev *d, u8 *id)
+{
+ u32 reg;
+
+ if (pci_read_config_dword(d->util_all, 0xf4, &reg)) {
+ skx_printk(KERN_ERR, "Failed to read node id\n");
+ return -ENODEV;
+ }
+
+ *id = GET_BITFIELD(reg, 0, 2);
+ return 0;
+}
+
+static int get_width(u32 mtr)
+{
+ switch (GET_BITFIELD(mtr, 8, 9)) {
+ case 0:
+ return DEV_X4;
+ case 1:
+ return DEV_X8;
+ case 2:
+ return DEV_X16;
+ }
+ return DEV_UNKNOWN;
+}
+
+/*
+ * We use the per-socket device @did to count how many sockets are present,
+ * and to detemine which PCI buses are associated with each socket. Allocate
+ * and build the full list of all the skx_dev structures that we need here.
+ */
+int skx_get_all_bus_mappings(unsigned int did, int off, enum type type,
+ struct list_head **list)
+{
+ struct pci_dev *pdev, *prev;
+ struct skx_dev *d;
+ u32 reg;
+ int ndev = 0;
+
+ prev = NULL;
+ for (;;) {
+ pdev = pci_get_device(PCI_VENDOR_ID_INTEL, did, prev);
+ if (!pdev)
+ break;
+ ndev++;
+ d = kzalloc(sizeof(*d), GFP_KERNEL);
+ if (!d) {
+ pci_dev_put(pdev);
+ return -ENOMEM;
+ }
+
+ if (pci_read_config_dword(pdev, off, &reg)) {
+ kfree(d);
+ pci_dev_put(pdev);
+ skx_printk(KERN_ERR, "Failed to read bus idx\n");
+ return -ENODEV;
+ }
+
+ d->bus[0] = GET_BITFIELD(reg, 0, 7);
+ d->bus[1] = GET_BITFIELD(reg, 8, 15);
+ if (type == SKX) {
+ d->seg = pci_domain_nr(pdev->bus);
+ d->bus[2] = GET_BITFIELD(reg, 16, 23);
+ d->bus[3] = GET_BITFIELD(reg, 24, 31);
+ } else {
+ d->seg = GET_BITFIELD(reg, 16, 23);
+ }
+
+ edac_dbg(2, "busses: 0x%x, 0x%x, 0x%x, 0x%x\n",
+ d->bus[0], d->bus[1], d->bus[2], d->bus[3]);
+ list_add_tail(&d->list, &dev_edac_list);
+ prev = pdev;
+ }
+
+ if (list)
+ *list = &dev_edac_list;
+ return ndev;
+}
+
+int skx_get_hi_lo(unsigned int did, int off[], u64 *tolm, u64 *tohm)
+{
+ struct pci_dev *pdev;
+ u32 reg;
+
+ pdev = pci_get_device(PCI_VENDOR_ID_INTEL, did, NULL);
+ if (!pdev) {
+ skx_printk(KERN_ERR, "Can't get tolm/tohm\n");
+ return -ENODEV;
+ }
+
+ if (pci_read_config_dword(pdev, off[0], &reg)) {
+ skx_printk(KERN_ERR, "Failed to read tolm\n");
+ goto fail;
+ }
+ skx_tolm = reg;
+
+ if (pci_read_config_dword(pdev, off[1], &reg)) {
+ skx_printk(KERN_ERR, "Failed to read lower tohm\n");
+ goto fail;
+ }
+ skx_tohm = reg;
+
+ if (pci_read_config_dword(pdev, off[2], &reg)) {
+ skx_printk(KERN_ERR, "Failed to read upper tohm\n");
+ goto fail;
+ }
+ skx_tohm |= (u64)reg << 32;
+
+ pci_dev_put(pdev);
+ *tolm = skx_tolm;
+ *tohm = skx_tohm;
+ edac_dbg(2, "tolm = 0x%llx tohm = 0x%llx\n", skx_tolm, skx_tohm);
+ return 0;
+fail:
+ pci_dev_put(pdev);
+ return -ENODEV;
+}
+
+static int skx_get_dimm_attr(u32 reg, int lobit, int hibit, int add,
+ int minval, int maxval, const char *name)
+{
+ u32 val = GET_BITFIELD(reg, lobit, hibit);
+
+ if (val < minval || val > maxval) {
+ edac_dbg(2, "bad %s = %d (raw=0x%x)\n", name, val, reg);
+ return -EINVAL;
+ }
+ return val + add;
+}
+
+#define numrank(reg) skx_get_dimm_attr(reg, 12, 13, 0, 0, 2, "ranks")
+#define numrow(reg) skx_get_dimm_attr(reg, 2, 4, 12, 1, 6, "rows")
+#define numcol(reg) skx_get_dimm_attr(reg, 0, 1, 10, 0, 2, "cols")
+
+int skx_get_dimm_info(u32 mtr, u32 amap, struct dimm_info *dimm,
+ struct skx_imc *imc, int chan, int dimmno)
+{
+ int banks = 16, ranks, rows, cols, npages;
+ u64 size;
+
+ ranks = numrank(mtr);
+ rows = numrow(mtr);
+ cols = numcol(mtr);
+
+ /*
+ * Compute size in 8-byte (2^3) words, then shift to MiB (2^20)
+ */
+ size = ((1ull << (rows + cols + ranks)) * banks) >> (20 - 3);
+ npages = MiB_TO_PAGES(size);
+
+ edac_dbg(0, "mc#%d: channel %d, dimm %d, %lld MiB (%d pages) bank: %d, rank: %d, row: 0x%x, col: 0x%x\n",
+ imc->mc, chan, dimmno, size, npages,
+ banks, 1 << ranks, rows, cols);
+
+ imc->chan[chan].dimms[dimmno].close_pg = GET_BITFIELD(mtr, 0, 0);
+ imc->chan[chan].dimms[dimmno].bank_xor_enable = GET_BITFIELD(mtr, 9, 9);
+ imc->chan[chan].dimms[dimmno].fine_grain_bank = GET_BITFIELD(amap, 0, 0);
+ imc->chan[chan].dimms[dimmno].rowbits = rows;
+ imc->chan[chan].dimms[dimmno].colbits = cols;
+
+ dimm->nr_pages = npages;
+ dimm->grain = 32;
+ dimm->dtype = get_width(mtr);
+ dimm->mtype = MEM_DDR4;
+ dimm->edac_mode = EDAC_SECDED; /* likely better than this */
+ snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u",
+ imc->src_id, imc->lmc, chan, dimmno);
+
+ return 1;
+}
+
+int skx_get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc,
+ int chan, int dimmno, const char *mod_str)
+{
+ int smbios_handle;
+ u32 dev_handle;
+ u16 flags;
+ u64 size = 0;
+
+ dev_handle = ACPI_NFIT_BUILD_DEVICE_HANDLE(dimmno, chan, imc->lmc,
+ imc->src_id, 0);
+
+ smbios_handle = nfit_get_smbios_id(dev_handle, &flags);
+ if (smbios_handle == -EOPNOTSUPP) {
+ pr_warn_once("%s: Can't find size of NVDIMM. Try enabling CONFIG_ACPI_NFIT\n", mod_str);
+ goto unknown_size;
+ }
+
+ if (smbios_handle < 0) {
+ skx_printk(KERN_ERR, "Can't find handle for NVDIMM ADR=0x%x\n", dev_handle);
+ goto unknown_size;
+ }
+
+ if (flags & ACPI_NFIT_MEM_MAP_FAILED) {
+ skx_printk(KERN_ERR, "NVDIMM ADR=0x%x is not mapped\n", dev_handle);
+ goto unknown_size;
+ }
+
+ size = dmi_memdev_size(smbios_handle);
+ if (size == ~0ull)
+ skx_printk(KERN_ERR, "Can't find size for NVDIMM ADR=0x%x/SMBIOS=0x%x\n",
+ dev_handle, smbios_handle);
+
+unknown_size:
+ dimm->nr_pages = size >> PAGE_SHIFT;
+ dimm->grain = 32;
+ dimm->dtype = DEV_UNKNOWN;
+ dimm->mtype = MEM_NVDIMM;
+ dimm->edac_mode = EDAC_SECDED; /* likely better than this */
+
+ edac_dbg(0, "mc#%d: channel %d, dimm %d, %llu MiB (%u pages)\n",
+ imc->mc, chan, dimmno, size >> 20, dimm->nr_pages);
+
+ snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u",
+ imc->src_id, imc->lmc, chan, dimmno);
+
+ return (size == 0 || size == ~0ull) ? 0 : 1;
+}
+
+int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev,
+ const char *ctl_name, const char *mod_str,
+ get_dimm_config_f get_dimm_config)
+{
+ struct mem_ctl_info *mci;
+ struct edac_mc_layer layers[2];
+ struct skx_pvt *pvt;
+ int rc;
+
+ /* Allocate a new MC control structure */
+ layers[0].type = EDAC_MC_LAYER_CHANNEL;
+ layers[0].size = NUM_CHANNELS;
+ layers[0].is_virt_csrow = false;
+ layers[1].type = EDAC_MC_LAYER_SLOT;
+ layers[1].size = NUM_DIMMS;
+ layers[1].is_virt_csrow = true;
+ mci = edac_mc_alloc(imc->mc, ARRAY_SIZE(layers), layers,
+ sizeof(struct skx_pvt));
+
+ if (unlikely(!mci))
+ return -ENOMEM;
+
+ edac_dbg(0, "MC#%d: mci = %p\n", imc->mc, mci);
+
+ /* Associate skx_dev and mci for future usage */
+ imc->mci = mci;
+ pvt = mci->pvt_info;
+ pvt->imc = imc;
+
+ mci->ctl_name = kasprintf(GFP_KERNEL, "%s#%d IMC#%d", ctl_name,
+ imc->node_id, imc->lmc);
+ if (!mci->ctl_name) {
+ rc = -ENOMEM;
+ goto fail0;
+ }
+
+ mci->mtype_cap = MEM_FLAG_DDR4 | MEM_FLAG_NVDIMM;
+ mci->edac_ctl_cap = EDAC_FLAG_NONE;
+ mci->edac_cap = EDAC_FLAG_NONE;
+ mci->mod_name = mod_str;
+ mci->dev_name = pci_name(pdev);
+ mci->ctl_page_to_phys = NULL;
+
+ rc = get_dimm_config(mci);
+ if (rc < 0)
+ goto fail;
+
+ /* Record ptr to the generic device */
+ mci->pdev = &pdev->dev;
+
+ /* Add this new MC control structure to EDAC's list of MCs */
+ if (unlikely(edac_mc_add_mc(mci))) {
+ edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
+ rc = -EINVAL;
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ kfree(mci->ctl_name);
+fail0:
+ edac_mc_free(mci);
+ imc->mci = NULL;
+ return rc;
+}
+
+static void skx_unregister_mci(struct skx_imc *imc)
+{
+ struct mem_ctl_info *mci = imc->mci;
+
+ if (!mci)
+ return;
+
+ edac_dbg(0, "MC%d: mci = %p\n", imc->mc, mci);
+
+ /* Remove MC sysfs nodes */
+ edac_mc_del_mc(mci->pdev);
+
+ edac_dbg(1, "%s: free mci struct\n", mci->ctl_name);
+ kfree(mci->ctl_name);
+ edac_mc_free(mci);
+}
+
+static struct mem_ctl_info *get_mci(int src_id, int lmc)
+{
+ struct skx_dev *d;
+
+ if (lmc > NUM_IMC - 1) {
+ skx_printk(KERN_ERR, "Bad lmc %d\n", lmc);
+ return NULL;
+ }
+
+ list_for_each_entry(d, &dev_edac_list, list) {
+ if (d->imc[0].src_id == src_id)
+ return d->imc[lmc].mci;
+ }
+
+ skx_printk(KERN_ERR, "No mci for src_id %d lmc %d\n", src_id, lmc);
+ return NULL;
+}
+
+static void skx_mce_output_error(struct mem_ctl_info *mci,
+ const struct mce *m,
+ struct decoded_addr *res)
+{
+ enum hw_event_mc_err_type tp_event;
+ char *type, *optype;
+ bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0);
+ bool overflow = GET_BITFIELD(m->status, 62, 62);
+ bool uncorrected_error = GET_BITFIELD(m->status, 61, 61);
+ bool recoverable;
+ u32 core_err_cnt = GET_BITFIELD(m->status, 38, 52);
+ u32 mscod = GET_BITFIELD(m->status, 16, 31);
+ u32 errcode = GET_BITFIELD(m->status, 0, 15);
+ u32 optypenum = GET_BITFIELD(m->status, 4, 6);
+
+ recoverable = GET_BITFIELD(m->status, 56, 56);
+
+ if (uncorrected_error) {
+ core_err_cnt = 1;
+ if (ripv) {
+ type = "FATAL";
+ tp_event = HW_EVENT_ERR_FATAL;
+ } else {
+ type = "NON_FATAL";
+ tp_event = HW_EVENT_ERR_UNCORRECTED;
+ }
+ } else {
+ type = "CORRECTED";
+ tp_event = HW_EVENT_ERR_CORRECTED;
+ }
+
+ /*
+ * According to Intel Architecture spec vol 3B,
+ * Table 15-10 "IA32_MCi_Status [15:0] Compound Error Code Encoding"
+ * memory errors should fit one of these masks:
+ * 000f 0000 1mmm cccc (binary)
+ * 000f 0010 1mmm cccc (binary) [RAM used as cache]
+ * where:
+ * f = Correction Report Filtering Bit. If 1, subsequent errors
+ * won't be shown
+ * mmm = error type
+ * cccc = channel
+ * If the mask doesn't match, report an error to the parsing logic
+ */
+ if (!((errcode & 0xef80) == 0x80 || (errcode & 0xef80) == 0x280)) {
+ optype = "Can't parse: it is not a mem";
+ } else {
+ switch (optypenum) {
+ case 0:
+ optype = "generic undef request error";
+ break;
+ case 1:
+ optype = "memory read error";
+ break;
+ case 2:
+ optype = "memory write error";
+ break;
+ case 3:
+ optype = "addr/cmd error";
+ break;
+ case 4:
+ optype = "memory scrubbing error";
+ break;
+ default:
+ optype = "reserved";
+ break;
+ }
+ }
+ if (adxl_component_count) {
+ snprintf(skx_msg, MSG_SIZE, "%s%s err_code:0x%04x:0x%04x %s",
+ overflow ? " OVERFLOW" : "",
+ (uncorrected_error && recoverable) ? " recoverable" : "",
+ mscod, errcode, adxl_msg);
+ } else {
+ snprintf(skx_msg, MSG_SIZE,
+ "%s%s err_code:0x%04x:0x%04x socket:%d imc:%d rank:%d bg:%d ba:%d row:0x%x col:0x%x",
+ overflow ? " OVERFLOW" : "",
+ (uncorrected_error && recoverable) ? " recoverable" : "",
+ mscod, errcode,
+ res->socket, res->imc, res->rank,
+ res->bank_group, res->bank_address, res->row, res->column);
+ }
+
+ edac_dbg(0, "%s\n", skx_msg);
+
+ /* Call the helper to output message */
+ edac_mc_handle_error(tp_event, mci, core_err_cnt,
+ m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0,
+ res->channel, res->dimm, -1,
+ optype, skx_msg);
+}
+
+int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
+ void *data)
+{
+ struct mce *mce = (struct mce *)data;
+ struct decoded_addr res;
+ struct mem_ctl_info *mci;
+ char *type;
+
+ if (edac_get_report_status() == EDAC_REPORTING_DISABLED)
+ return NOTIFY_DONE;
+
+ /* ignore unless this is memory related with an address */
+ if ((mce->status & 0xefff) >> 7 != 1 || !(mce->status & MCI_STATUS_ADDRV))
+ return NOTIFY_DONE;
+
+ memset(&res, 0, sizeof(res));
+ res.addr = mce->addr;
+
+ if (adxl_component_count) {
+ if (!skx_adxl_decode(&res))
+ return NOTIFY_DONE;
+
+ mci = get_mci(res.socket, res.imc);
+ } else {
+ if (!skx_decode || !skx_decode(&res))
+ return NOTIFY_DONE;
+
+ mci = res.dev->imc[res.imc].mci;
+ }
+
+ if (!mci)
+ return NOTIFY_DONE;
+
+ if (mce->mcgstatus & MCG_STATUS_MCIP)
+ type = "Exception";
+ else
+ type = "Event";
+
+ skx_mc_printk(mci, KERN_DEBUG, "HANDLING MCE MEMORY ERROR\n");
+
+ skx_mc_printk(mci, KERN_DEBUG, "CPU %d: Machine Check %s: 0x%llx "
+ "Bank %d: 0x%llx\n", mce->extcpu, type,
+ mce->mcgstatus, mce->bank, mce->status);
+ skx_mc_printk(mci, KERN_DEBUG, "TSC 0x%llx ", mce->tsc);
+ skx_mc_printk(mci, KERN_DEBUG, "ADDR 0x%llx ", mce->addr);
+ skx_mc_printk(mci, KERN_DEBUG, "MISC 0x%llx ", mce->misc);
+
+ skx_mc_printk(mci, KERN_DEBUG, "PROCESSOR %u:0x%x TIME %llu SOCKET "
+ "%u APIC 0x%x\n", mce->cpuvendor, mce->cpuid,
+ mce->time, mce->socketid, mce->apicid);
+
+ skx_mce_output_error(mci, mce, &res);
+
+ return NOTIFY_DONE;
+}
+
+void skx_remove(void)
+{
+ int i, j;
+ struct skx_dev *d, *tmp;
+
+ edac_dbg(0, "\n");
+
+ list_for_each_entry_safe(d, tmp, &dev_edac_list, list) {
+ list_del(&d->list);
+ for (i = 0; i < NUM_IMC; i++) {
+ if (d->imc[i].mci)
+ skx_unregister_mci(&d->imc[i]);
+
+ if (d->imc[i].mdev)
+ pci_dev_put(d->imc[i].mdev);
+
+ if (d->imc[i].mbase)
+ iounmap(d->imc[i].mbase);
+
+ for (j = 0; j < NUM_CHANNELS; j++) {
+ if (d->imc[i].chan[j].cdev)
+ pci_dev_put(d->imc[i].chan[j].cdev);
+ }
+ }
+ if (d->util_all)
+ pci_dev_put(d->util_all);
+ if (d->sad_all)
+ pci_dev_put(d->sad_all);
+ if (d->uracu)
+ pci_dev_put(d->uracu);
+
+ kfree(d);
+ }
+}
+
+#ifdef CONFIG_EDAC_DEBUG
+/*
+ * Debug feature.
+ * Exercise the address decode logic by writing an address to
+ * /sys/kernel/debug/edac/dirname/addr.
+ */
+static struct dentry *skx_test;
+
+static int debugfs_u64_set(void *data, u64 val)
+{
+ struct mce m;
+
+ pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val);
+
+ memset(&m, 0, sizeof(m));
+ /* ADDRV + MemRd + Unknown channel */
+ m.status = MCI_STATUS_ADDRV + 0x90;
+ /* One corrected error */
+ m.status |= BIT_ULL(MCI_STATUS_CEC_SHIFT);
+ m.addr = val;
+ skx_mce_check_error(NULL, 0, &m);
+
+ return 0;
+}
+DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n");
+
+void setup_skx_debug(const char *dirname)
+{
+ skx_test = edac_debugfs_create_dir(dirname);
+ if (!skx_test)
+ return;
+
+ if (!edac_debugfs_create_file("addr", 0200, skx_test,
+ NULL, &fops_u64_wo)) {
+ debugfs_remove(skx_test);
+ skx_test = NULL;
+ }
+}
+
+void teardown_skx_debug(void)
+{
+ debugfs_remove_recursive(skx_test);
+}
+#endif /*CONFIG_EDAC_DEBUG*/
diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h
new file mode 100644
index 000000000000..d25374e34d4f
--- /dev/null
+++ b/drivers/edac/skx_common.h
@@ -0,0 +1,152 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Common codes for both the skx_edac driver and Intel 10nm server EDAC driver.
+ * Originally split out from the skx_edac driver.
+ *
+ * Copyright (c) 2018, Intel Corporation.
+ */
+
+#ifndef _SKX_COMM_EDAC_H
+#define _SKX_COMM_EDAC_H
+
+#define MSG_SIZE 1024
+
+/*
+ * Debug macros
+ */
+#define skx_printk(level, fmt, arg...) \
+ edac_printk(level, "skx", fmt, ##arg)
+
+#define skx_mc_printk(mci, level, fmt, arg...) \
+ edac_mc_chipset_printk(mci, level, "skx", fmt, ##arg)
+
+/*
+ * Get a bit field at register value <v>, from bit <lo> to bit <hi>
+ */
+#define GET_BITFIELD(v, lo, hi) \
+ (((v) & GENMASK_ULL((hi), (lo))) >> (lo))
+
+#define SKX_NUM_IMC 2 /* Memory controllers per socket */
+#define SKX_NUM_CHANNELS 3 /* Channels per memory controller */
+#define SKX_NUM_DIMMS 2 /* Max DIMMS per channel */
+
+#define I10NM_NUM_IMC 4
+#define I10NM_NUM_CHANNELS 2
+#define I10NM_NUM_DIMMS 2
+
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+#define NUM_IMC MAX(SKX_NUM_IMC, I10NM_NUM_IMC)
+#define NUM_CHANNELS MAX(SKX_NUM_CHANNELS, I10NM_NUM_CHANNELS)
+#define NUM_DIMMS MAX(SKX_NUM_DIMMS, I10NM_NUM_DIMMS)
+
+#define IS_DIMM_PRESENT(r) GET_BITFIELD(r, 15, 15)
+#define IS_NVDIMM_PRESENT(r, i) GET_BITFIELD(r, i, i)
+
+/*
+ * Each cpu socket contains some pci devices that provide global
+ * information, and also some that are local to each of the two
+ * memory controllers on the die.
+ */
+struct skx_dev {
+ struct list_head list;
+ u8 bus[4];
+ int seg;
+ struct pci_dev *sad_all;
+ struct pci_dev *util_all;
+ struct pci_dev *uracu; /* for i10nm CPU */
+ u32 mcroute;
+ struct skx_imc {
+ struct mem_ctl_info *mci;
+ struct pci_dev *mdev; /* for i10nm CPU */
+ void __iomem *mbase; /* for i10nm CPU */
+ u8 mc; /* system wide mc# */
+ u8 lmc; /* socket relative mc# */
+ u8 src_id, node_id;
+ struct skx_channel {
+ struct pci_dev *cdev;
+ struct skx_dimm {
+ u8 close_pg;
+ u8 bank_xor_enable;
+ u8 fine_grain_bank;
+ u8 rowbits;
+ u8 colbits;
+ } dimms[NUM_DIMMS];
+ } chan[NUM_CHANNELS];
+ } imc[NUM_IMC];
+};
+
+struct skx_pvt {
+ struct skx_imc *imc;
+};
+
+enum type {
+ SKX,
+ I10NM
+};
+
+enum {
+ INDEX_SOCKET,
+ INDEX_MEMCTRL,
+ INDEX_CHANNEL,
+ INDEX_DIMM,
+ INDEX_MAX
+};
+
+struct decoded_addr {
+ struct skx_dev *dev;
+ u64 addr;
+ int socket;
+ int imc;
+ int channel;
+ u64 chan_addr;
+ int sktways;
+ int chanways;
+ int dimm;
+ int rank;
+ int channel_rank;
+ u64 rank_address;
+ int row;
+ int column;
+ int bank_address;
+ int bank_group;
+};
+
+typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci);
+typedef bool (*skx_decode_f)(struct decoded_addr *res);
+
+int __init skx_adxl_get(void);
+void __exit skx_adxl_put(void);
+void skx_set_decode(skx_decode_f decode);
+
+int skx_get_src_id(struct skx_dev *d, u8 *id);
+int skx_get_node_id(struct skx_dev *d, u8 *id);
+
+int skx_get_all_bus_mappings(unsigned int did, int off, enum type,
+ struct list_head **list);
+
+int skx_get_hi_lo(unsigned int did, int off[], u64 *tolm, u64 *tohm);
+
+int skx_get_dimm_info(u32 mtr, u32 amap, struct dimm_info *dimm,
+ struct skx_imc *imc, int chan, int dimmno);
+
+int skx_get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc,
+ int chan, int dimmno, const char *mod_str);
+
+int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev,
+ const char *ctl_name, const char *mod_str,
+ get_dimm_config_f get_dimm_config);
+
+int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
+ void *data);
+
+void skx_remove(void);
+
+#ifdef CONFIG_EDAC_DEBUG
+void setup_skx_debug(const char *dirname);
+void teardown_skx_debug(void);
+#else
+static inline void setup_skx_debug(const char *dirname) {}
+static inline void teardown_skx_debug(void) {}
+#endif /*CONFIG_EDAC_DEBUG*/
+
+#endif /* _SKX_COMM_EDAC_H */
diff --git a/drivers/edac/skx_edac.c b/drivers/edac/skx_edac.c
deleted file mode 100644
index 93ef161bb5e1..000000000000
--- a/drivers/edac/skx_edac.c
+++ /dev/null
@@ -1,1358 +0,0 @@
-/*
- * EDAC driver for Intel(R) Xeon(R) Skylake processors
- * Copyright (c) 2016, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/acpi.h>
-#include <linux/dmi.h>
-#include <linux/pci.h>
-#include <linux/pci_ids.h>
-#include <linux/slab.h>
-#include <linux/delay.h>
-#include <linux/edac.h>
-#include <linux/mmzone.h>
-#include <linux/smp.h>
-#include <linux/bitmap.h>
-#include <linux/math64.h>
-#include <linux/mod_devicetable.h>
-#include <linux/adxl.h>
-#include <acpi/nfit.h>
-#include <asm/cpu_device_id.h>
-#include <asm/intel-family.h>
-#include <asm/processor.h>
-#include <asm/mce.h>
-
-#include "edac_module.h"
-
-#define EDAC_MOD_STR "skx_edac"
-#define MSG_SIZE 1024
-
-/*
- * Debug macros
- */
-#define skx_printk(level, fmt, arg...) \
- edac_printk(level, "skx", fmt, ##arg)
-
-#define skx_mc_printk(mci, level, fmt, arg...) \
- edac_mc_chipset_printk(mci, level, "skx", fmt, ##arg)
-
-/*
- * Get a bit field at register value <v>, from bit <lo> to bit <hi>
- */
-#define GET_BITFIELD(v, lo, hi) \
- (((v) & GENMASK_ULL((hi), (lo))) >> (lo))
-
-static LIST_HEAD(skx_edac_list);
-
-static u64 skx_tolm, skx_tohm;
-static char *skx_msg;
-static unsigned int nvdimm_count;
-
-enum {
- INDEX_SOCKET,
- INDEX_MEMCTRL,
- INDEX_CHANNEL,
- INDEX_DIMM,
- INDEX_MAX
-};
-
-static const char * const component_names[] = {
- [INDEX_SOCKET] = "ProcessorSocketId",
- [INDEX_MEMCTRL] = "MemoryControllerId",
- [INDEX_CHANNEL] = "ChannelId",
- [INDEX_DIMM] = "DimmSlotId",
-};
-
-static int component_indices[ARRAY_SIZE(component_names)];
-static int adxl_component_count;
-static const char * const *adxl_component_names;
-static u64 *adxl_values;
-static char *adxl_msg;
-
-#define NUM_IMC 2 /* memory controllers per socket */
-#define NUM_CHANNELS 3 /* channels per memory controller */
-#define NUM_DIMMS 2 /* Max DIMMS per channel */
-
-#define MASK26 0x3FFFFFF /* Mask for 2^26 */
-#define MASK29 0x1FFFFFFF /* Mask for 2^29 */
-
-/*
- * Each cpu socket contains some pci devices that provide global
- * information, and also some that are local to each of the two
- * memory controllers on the die.
- */
-struct skx_dev {
- struct list_head list;
- u8 bus[4];
- int seg;
- struct pci_dev *sad_all;
- struct pci_dev *util_all;
- u32 mcroute;
- struct skx_imc {
- struct mem_ctl_info *mci;
- u8 mc; /* system wide mc# */
- u8 lmc; /* socket relative mc# */
- u8 src_id, node_id;
- struct skx_channel {
- struct pci_dev *cdev;
- struct skx_dimm {
- u8 close_pg;
- u8 bank_xor_enable;
- u8 fine_grain_bank;
- u8 rowbits;
- u8 colbits;
- } dimms[NUM_DIMMS];
- } chan[NUM_CHANNELS];
- } imc[NUM_IMC];
-};
-static int skx_num_sockets;
-
-struct skx_pvt {
- struct skx_imc *imc;
-};
-
-struct decoded_addr {
- struct skx_dev *dev;
- u64 addr;
- int socket;
- int imc;
- int channel;
- u64 chan_addr;
- int sktways;
- int chanways;
- int dimm;
- int rank;
- int channel_rank;
- u64 rank_address;
- int row;
- int column;
- int bank_address;
- int bank_group;
-};
-
-static struct skx_dev *get_skx_dev(struct pci_bus *bus, u8 idx)
-{
- struct skx_dev *d;
-
- list_for_each_entry(d, &skx_edac_list, list) {
- if (d->seg == pci_domain_nr(bus) && d->bus[idx] == bus->number)
- return d;
- }
-
- return NULL;
-}
-
-enum munittype {
- CHAN0, CHAN1, CHAN2, SAD_ALL, UTIL_ALL, SAD
-};
-
-struct munit {
- u16 did;
- u16 devfn[NUM_IMC];
- u8 busidx;
- u8 per_socket;
- enum munittype mtype;
-};
-
-/*
- * List of PCI device ids that we need together with some device
- * number and function numbers to tell which memory controller the
- * device belongs to.
- */
-static const struct munit skx_all_munits[] = {
- { 0x2054, { }, 1, 1, SAD_ALL },
- { 0x2055, { }, 1, 1, UTIL_ALL },
- { 0x2040, { PCI_DEVFN(10, 0), PCI_DEVFN(12, 0) }, 2, 2, CHAN0 },
- { 0x2044, { PCI_DEVFN(10, 4), PCI_DEVFN(12, 4) }, 2, 2, CHAN1 },
- { 0x2048, { PCI_DEVFN(11, 0), PCI_DEVFN(13, 0) }, 2, 2, CHAN2 },
- { 0x208e, { }, 1, 0, SAD },
- { }
-};
-
-/*
- * We use the per-socket device 0x2016 to count how many sockets are present,
- * and to detemine which PCI buses are associated with each socket. Allocate
- * and build the full list of all the skx_dev structures that we need here.
- */
-static int get_all_bus_mappings(void)
-{
- struct pci_dev *pdev, *prev;
- struct skx_dev *d;
- u32 reg;
- int ndev = 0;
-
- prev = NULL;
- for (;;) {
- pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x2016, prev);
- if (!pdev)
- break;
- ndev++;
- d = kzalloc(sizeof(*d), GFP_KERNEL);
- if (!d) {
- pci_dev_put(pdev);
- return -ENOMEM;
- }
- d->seg = pci_domain_nr(pdev->bus);
- pci_read_config_dword(pdev, 0xCC, &reg);
- d->bus[0] = GET_BITFIELD(reg, 0, 7);
- d->bus[1] = GET_BITFIELD(reg, 8, 15);
- d->bus[2] = GET_BITFIELD(reg, 16, 23);
- d->bus[3] = GET_BITFIELD(reg, 24, 31);
- edac_dbg(2, "busses: 0x%x, 0x%x, 0x%x, 0x%x\n",
- d->bus[0], d->bus[1], d->bus[2], d->bus[3]);
- list_add_tail(&d->list, &skx_edac_list);
- skx_num_sockets++;
- prev = pdev;
- }
-
- return ndev;
-}
-
-static int get_all_munits(const struct munit *m)
-{
- struct pci_dev *pdev, *prev;
- struct skx_dev *d;
- u32 reg;
- int i = 0, ndev = 0;
-
- prev = NULL;
- for (;;) {
- pdev = pci_get_device(PCI_VENDOR_ID_INTEL, m->did, prev);
- if (!pdev)
- break;
- ndev++;
- if (m->per_socket == NUM_IMC) {
- for (i = 0; i < NUM_IMC; i++)
- if (m->devfn[i] == pdev->devfn)
- break;
- if (i == NUM_IMC)
- goto fail;
- }
- d = get_skx_dev(pdev->bus, m->busidx);
- if (!d)
- goto fail;
-
- /* Be sure that the device is enabled */
- if (unlikely(pci_enable_device(pdev) < 0)) {
- skx_printk(KERN_ERR, "Couldn't enable device %04x:%04x\n",
- PCI_VENDOR_ID_INTEL, m->did);
- goto fail;
- }
-
- switch (m->mtype) {
- case CHAN0: case CHAN1: case CHAN2:
- pci_dev_get(pdev);
- d->imc[i].chan[m->mtype].cdev = pdev;
- break;
- case SAD_ALL:
- pci_dev_get(pdev);
- d->sad_all = pdev;
- break;
- case UTIL_ALL:
- pci_dev_get(pdev);
- d->util_all = pdev;
- break;
- case SAD:
- /*
- * one of these devices per core, including cores
- * that don't exist on this SKU. Ignore any that
- * read a route table of zero, make sure all the
- * non-zero values match.
- */
- pci_read_config_dword(pdev, 0xB4, &reg);
- if (reg != 0) {
- if (d->mcroute == 0)
- d->mcroute = reg;
- else if (d->mcroute != reg) {
- skx_printk(KERN_ERR,
- "mcroute mismatch\n");
- goto fail;
- }
- }
- ndev--;
- break;
- }
-
- prev = pdev;
- }
-
- return ndev;
-fail:
- pci_dev_put(pdev);
- return -ENODEV;
-}
-
-static const struct x86_cpu_id skx_cpuids[] = {
- { X86_VENDOR_INTEL, 6, INTEL_FAM6_SKYLAKE_X, 0, 0 },
- { }
-};
-MODULE_DEVICE_TABLE(x86cpu, skx_cpuids);
-
-static u8 get_src_id(struct skx_dev *d)
-{
- u32 reg;
-
- pci_read_config_dword(d->util_all, 0xF0, &reg);
-
- return GET_BITFIELD(reg, 12, 14);
-}
-
-static u8 skx_get_node_id(struct skx_dev *d)
-{
- u32 reg;
-
- pci_read_config_dword(d->util_all, 0xF4, &reg);
-
- return GET_BITFIELD(reg, 0, 2);
-}
-
-static int get_dimm_attr(u32 reg, int lobit, int hibit, int add, int minval,
- int maxval, char *name)
-{
- u32 val = GET_BITFIELD(reg, lobit, hibit);
-
- if (val < minval || val > maxval) {
- edac_dbg(2, "bad %s = %d (raw=0x%x)\n", name, val, reg);
- return -EINVAL;
- }
- return val + add;
-}
-
-#define IS_DIMM_PRESENT(mtr) GET_BITFIELD((mtr), 15, 15)
-#define IS_NVDIMM_PRESENT(mcddrtcfg, i) GET_BITFIELD((mcddrtcfg), (i), (i))
-
-#define numrank(reg) get_dimm_attr((reg), 12, 13, 0, 0, 2, "ranks")
-#define numrow(reg) get_dimm_attr((reg), 2, 4, 12, 1, 6, "rows")
-#define numcol(reg) get_dimm_attr((reg), 0, 1, 10, 0, 2, "cols")
-
-static int get_width(u32 mtr)
-{
- switch (GET_BITFIELD(mtr, 8, 9)) {
- case 0:
- return DEV_X4;
- case 1:
- return DEV_X8;
- case 2:
- return DEV_X16;
- }
- return DEV_UNKNOWN;
-}
-
-static int skx_get_hi_lo(void)
-{
- struct pci_dev *pdev;
- u32 reg;
-
- pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x2034, NULL);
- if (!pdev) {
- edac_dbg(0, "Can't get tolm/tohm\n");
- return -ENODEV;
- }
-
- pci_read_config_dword(pdev, 0xD0, &reg);
- skx_tolm = reg;
- pci_read_config_dword(pdev, 0xD4, &reg);
- skx_tohm = reg;
- pci_read_config_dword(pdev, 0xD8, &reg);
- skx_tohm |= (u64)reg << 32;
-
- pci_dev_put(pdev);
- edac_dbg(2, "tolm=0x%llx tohm=0x%llx\n", skx_tolm, skx_tohm);
-
- return 0;
-}
-
-static int get_dimm_info(u32 mtr, u32 amap, struct dimm_info *dimm,
- struct skx_imc *imc, int chan, int dimmno)
-{
- int banks = 16, ranks, rows, cols, npages;
- u64 size;
-
- ranks = numrank(mtr);
- rows = numrow(mtr);
- cols = numcol(mtr);
-
- /*
- * Compute size in 8-byte (2^3) words, then shift to MiB (2^20)
- */
- size = ((1ull << (rows + cols + ranks)) * banks) >> (20 - 3);
- npages = MiB_TO_PAGES(size);
-
- edac_dbg(0, "mc#%d: channel %d, dimm %d, %lld MiB (%d pages) bank: %d, rank: %d, row: 0x%#x, col: 0x%#x\n",
- imc->mc, chan, dimmno, size, npages,
- banks, 1 << ranks, rows, cols);
-
- imc->chan[chan].dimms[dimmno].close_pg = GET_BITFIELD(mtr, 0, 0);
- imc->chan[chan].dimms[dimmno].bank_xor_enable = GET_BITFIELD(mtr, 9, 9);
- imc->chan[chan].dimms[dimmno].fine_grain_bank = GET_BITFIELD(amap, 0, 0);
- imc->chan[chan].dimms[dimmno].rowbits = rows;
- imc->chan[chan].dimms[dimmno].colbits = cols;
-
- dimm->nr_pages = npages;
- dimm->grain = 32;
- dimm->dtype = get_width(mtr);
- dimm->mtype = MEM_DDR4;
- dimm->edac_mode = EDAC_SECDED; /* likely better than this */
- snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u",
- imc->src_id, imc->lmc, chan, dimmno);
-
- return 1;
-}
-
-static int get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc,
- int chan, int dimmno)
-{
- int smbios_handle;
- u32 dev_handle;
- u16 flags;
- u64 size = 0;
-
- nvdimm_count++;
-
- dev_handle = ACPI_NFIT_BUILD_DEVICE_HANDLE(dimmno, chan, imc->lmc,
- imc->src_id, 0);
-
- smbios_handle = nfit_get_smbios_id(dev_handle, &flags);
- if (smbios_handle == -EOPNOTSUPP) {
- pr_warn_once(EDAC_MOD_STR ": Can't find size of NVDIMM. Try enabling CONFIG_ACPI_NFIT\n");
- goto unknown_size;
- }
-
- if (smbios_handle < 0) {
- skx_printk(KERN_ERR, "Can't find handle for NVDIMM ADR=0x%x\n", dev_handle);
- goto unknown_size;
- }
-
- if (flags & ACPI_NFIT_MEM_MAP_FAILED) {
- skx_printk(KERN_ERR, "NVDIMM ADR=0x%x is not mapped\n", dev_handle);
- goto unknown_size;
- }
-
- size = dmi_memdev_size(smbios_handle);
- if (size == ~0ull)
- skx_printk(KERN_ERR, "Can't find size for NVDIMM ADR=0x%x/SMBIOS=0x%x\n",
- dev_handle, smbios_handle);
-
-unknown_size:
- dimm->nr_pages = size >> PAGE_SHIFT;
- dimm->grain = 32;
- dimm->dtype = DEV_UNKNOWN;
- dimm->mtype = MEM_NVDIMM;
- dimm->edac_mode = EDAC_SECDED; /* likely better than this */
-
- edac_dbg(0, "mc#%d: channel %d, dimm %d, %llu MiB (%u pages)\n",
- imc->mc, chan, dimmno, size >> 20, dimm->nr_pages);
-
- snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u",
- imc->src_id, imc->lmc, chan, dimmno);
-
- return (size == 0 || size == ~0ull) ? 0 : 1;
-}
-
-#define SKX_GET_MTMTR(dev, reg) \
- pci_read_config_dword((dev), 0x87c, &reg)
-
-static bool skx_check_ecc(struct pci_dev *pdev)
-{
- u32 mtmtr;
-
- SKX_GET_MTMTR(pdev, mtmtr);
-
- return !!GET_BITFIELD(mtmtr, 2, 2);
-}
-
-static int skx_get_dimm_config(struct mem_ctl_info *mci)
-{
- struct skx_pvt *pvt = mci->pvt_info;
- struct skx_imc *imc = pvt->imc;
- u32 mtr, amap, mcddrtcfg;
- struct dimm_info *dimm;
- int i, j;
- int ndimms;
-
- for (i = 0; i < NUM_CHANNELS; i++) {
- ndimms = 0;
- pci_read_config_dword(imc->chan[i].cdev, 0x8C, &amap);
- pci_read_config_dword(imc->chan[i].cdev, 0x400, &mcddrtcfg);
- for (j = 0; j < NUM_DIMMS; j++) {
- dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms,
- mci->n_layers, i, j, 0);
- pci_read_config_dword(imc->chan[i].cdev,
- 0x80 + 4*j, &mtr);
- if (IS_DIMM_PRESENT(mtr))
- ndimms += get_dimm_info(mtr, amap, dimm, imc, i, j);
- else if (IS_NVDIMM_PRESENT(mcddrtcfg, j))
- ndimms += get_nvdimm_info(dimm, imc, i, j);
- }
- if (ndimms && !skx_check_ecc(imc->chan[0].cdev)) {
- skx_printk(KERN_ERR, "ECC is disabled on imc %d\n", imc->mc);
- return -ENODEV;
- }
- }
-
- return 0;
-}
-
-static void skx_unregister_mci(struct skx_imc *imc)
-{
- struct mem_ctl_info *mci = imc->mci;
-
- if (!mci)
- return;
-
- edac_dbg(0, "MC%d: mci = %p\n", imc->mc, mci);
-
- /* Remove MC sysfs nodes */
- edac_mc_del_mc(mci->pdev);
-
- edac_dbg(1, "%s: free mci struct\n", mci->ctl_name);
- kfree(mci->ctl_name);
- edac_mc_free(mci);
-}
-
-static int skx_register_mci(struct skx_imc *imc)
-{
- struct mem_ctl_info *mci;
- struct edac_mc_layer layers[2];
- struct pci_dev *pdev = imc->chan[0].cdev;
- struct skx_pvt *pvt;
- int rc;
-
- /* allocate a new MC control structure */
- layers[0].type = EDAC_MC_LAYER_CHANNEL;
- layers[0].size = NUM_CHANNELS;
- layers[0].is_virt_csrow = false;
- layers[1].type = EDAC_MC_LAYER_SLOT;
- layers[1].size = NUM_DIMMS;
- layers[1].is_virt_csrow = true;
- mci = edac_mc_alloc(imc->mc, ARRAY_SIZE(layers), layers,
- sizeof(struct skx_pvt));
-
- if (unlikely(!mci))
- return -ENOMEM;
-
- edac_dbg(0, "MC#%d: mci = %p\n", imc->mc, mci);
-
- /* Associate skx_dev and mci for future usage */
- imc->mci = mci;
- pvt = mci->pvt_info;
- pvt->imc = imc;
-
- mci->ctl_name = kasprintf(GFP_KERNEL, "Skylake Socket#%d IMC#%d",
- imc->node_id, imc->lmc);
- if (!mci->ctl_name) {
- rc = -ENOMEM;
- goto fail0;
- }
-
- mci->mtype_cap = MEM_FLAG_DDR4 | MEM_FLAG_NVDIMM;
- mci->edac_ctl_cap = EDAC_FLAG_NONE;
- mci->edac_cap = EDAC_FLAG_NONE;
- mci->mod_name = EDAC_MOD_STR;
- mci->dev_name = pci_name(imc->chan[0].cdev);
- mci->ctl_page_to_phys = NULL;
-
- rc = skx_get_dimm_config(mci);
- if (rc < 0)
- goto fail;
-
- /* record ptr to the generic device */
- mci->pdev = &pdev->dev;
-
- /* add this new MC control structure to EDAC's list of MCs */
- if (unlikely(edac_mc_add_mc(mci))) {
- edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
- rc = -EINVAL;
- goto fail;
- }
-
- return 0;
-
-fail:
- kfree(mci->ctl_name);
-fail0:
- edac_mc_free(mci);
- imc->mci = NULL;
- return rc;
-}
-
-#define SKX_MAX_SAD 24
-
-#define SKX_GET_SAD(d, i, reg) \
- pci_read_config_dword((d)->sad_all, 0x60 + 8 * (i), &reg)
-#define SKX_GET_ILV(d, i, reg) \
- pci_read_config_dword((d)->sad_all, 0x64 + 8 * (i), &reg)
-
-#define SKX_SAD_MOD3MODE(sad) GET_BITFIELD((sad), 30, 31)
-#define SKX_SAD_MOD3(sad) GET_BITFIELD((sad), 27, 27)
-#define SKX_SAD_LIMIT(sad) (((u64)GET_BITFIELD((sad), 7, 26) << 26) | MASK26)
-#define SKX_SAD_MOD3ASMOD2(sad) GET_BITFIELD((sad), 5, 6)
-#define SKX_SAD_ATTR(sad) GET_BITFIELD((sad), 3, 4)
-#define SKX_SAD_INTERLEAVE(sad) GET_BITFIELD((sad), 1, 2)
-#define SKX_SAD_ENABLE(sad) GET_BITFIELD((sad), 0, 0)
-
-#define SKX_ILV_REMOTE(tgt) (((tgt) & 8) == 0)
-#define SKX_ILV_TARGET(tgt) ((tgt) & 7)
-
-static bool skx_sad_decode(struct decoded_addr *res)
-{
- struct skx_dev *d = list_first_entry(&skx_edac_list, typeof(*d), list);
- u64 addr = res->addr;
- int i, idx, tgt, lchan, shift;
- u32 sad, ilv;
- u64 limit, prev_limit;
- int remote = 0;
-
- /* Simple sanity check for I/O space or out of range */
- if (addr >= skx_tohm || (addr >= skx_tolm && addr < BIT_ULL(32))) {
- edac_dbg(0, "Address 0x%llx out of range\n", addr);
- return false;
- }
-
-restart:
- prev_limit = 0;
- for (i = 0; i < SKX_MAX_SAD; i++) {
- SKX_GET_SAD(d, i, sad);
- limit = SKX_SAD_LIMIT(sad);
- if (SKX_SAD_ENABLE(sad)) {
- if (addr >= prev_limit && addr <= limit)
- goto sad_found;
- }
- prev_limit = limit + 1;
- }
- edac_dbg(0, "No SAD entry for 0x%llx\n", addr);
- return false;
-
-sad_found:
- SKX_GET_ILV(d, i, ilv);
-
- switch (SKX_SAD_INTERLEAVE(sad)) {
- case 0:
- idx = GET_BITFIELD(addr, 6, 8);
- break;
- case 1:
- idx = GET_BITFIELD(addr, 8, 10);
- break;
- case 2:
- idx = GET_BITFIELD(addr, 12, 14);
- break;
- case 3:
- idx = GET_BITFIELD(addr, 30, 32);
- break;
- }
-
- tgt = GET_BITFIELD(ilv, 4 * idx, 4 * idx + 3);
-
- /* If point to another node, find it and start over */
- if (SKX_ILV_REMOTE(tgt)) {
- if (remote) {
- edac_dbg(0, "Double remote!\n");
- return false;
- }
- remote = 1;
- list_for_each_entry(d, &skx_edac_list, list) {
- if (d->imc[0].src_id == SKX_ILV_TARGET(tgt))
- goto restart;
- }
- edac_dbg(0, "Can't find node %d\n", SKX_ILV_TARGET(tgt));
- return false;
- }
-
- if (SKX_SAD_MOD3(sad) == 0)
- lchan = SKX_ILV_TARGET(tgt);
- else {
- switch (SKX_SAD_MOD3MODE(sad)) {
- case 0:
- shift = 6;
- break;
- case 1:
- shift = 8;
- break;
- case 2:
- shift = 12;
- break;
- default:
- edac_dbg(0, "illegal mod3mode\n");
- return false;
- }
- switch (SKX_SAD_MOD3ASMOD2(sad)) {
- case 0:
- lchan = (addr >> shift) % 3;
- break;
- case 1:
- lchan = (addr >> shift) % 2;
- break;
- case 2:
- lchan = (addr >> shift) % 2;
- lchan = (lchan << 1) | !lchan;
- break;
- case 3:
- lchan = ((addr >> shift) % 2) << 1;
- break;
- }
- lchan = (lchan << 1) | (SKX_ILV_TARGET(tgt) & 1);
- }
-
- res->dev = d;
- res->socket = d->imc[0].src_id;
- res->imc = GET_BITFIELD(d->mcroute, lchan * 3, lchan * 3 + 2);
- res->channel = GET_BITFIELD(d->mcroute, lchan * 2 + 18, lchan * 2 + 19);
-
- edac_dbg(2, "0x%llx: socket=%d imc=%d channel=%d\n",
- res->addr, res->socket, res->imc, res->channel);
- return true;
-}
-
-#define SKX_MAX_TAD 8
-
-#define SKX_GET_TADBASE(d, mc, i, reg) \
- pci_read_config_dword((d)->imc[mc].chan[0].cdev, 0x850 + 4 * (i), &reg)
-#define SKX_GET_TADWAYNESS(d, mc, i, reg) \
- pci_read_config_dword((d)->imc[mc].chan[0].cdev, 0x880 + 4 * (i), &reg)
-#define SKX_GET_TADCHNILVOFFSET(d, mc, ch, i, reg) \
- pci_read_config_dword((d)->imc[mc].chan[ch].cdev, 0x90 + 4 * (i), &reg)
-
-#define SKX_TAD_BASE(b) ((u64)GET_BITFIELD((b), 12, 31) << 26)
-#define SKX_TAD_SKT_GRAN(b) GET_BITFIELD((b), 4, 5)
-#define SKX_TAD_CHN_GRAN(b) GET_BITFIELD((b), 6, 7)
-#define SKX_TAD_LIMIT(b) (((u64)GET_BITFIELD((b), 12, 31) << 26) | MASK26)
-#define SKX_TAD_OFFSET(b) ((u64)GET_BITFIELD((b), 4, 23) << 26)
-#define SKX_TAD_SKTWAYS(b) (1 << GET_BITFIELD((b), 10, 11))
-#define SKX_TAD_CHNWAYS(b) (GET_BITFIELD((b), 8, 9) + 1)
-
-/* which bit used for both socket and channel interleave */
-static int skx_granularity[] = { 6, 8, 12, 30 };
-
-static u64 skx_do_interleave(u64 addr, int shift, int ways, u64 lowbits)
-{
- addr >>= shift;
- addr /= ways;
- addr <<= shift;
-
- return addr | (lowbits & ((1ull << shift) - 1));
-}
-
-static bool skx_tad_decode(struct decoded_addr *res)
-{
- int i;
- u32 base, wayness, chnilvoffset;
- int skt_interleave_bit, chn_interleave_bit;
- u64 channel_addr;
-
- for (i = 0; i < SKX_MAX_TAD; i++) {
- SKX_GET_TADBASE(res->dev, res->imc, i, base);
- SKX_GET_TADWAYNESS(res->dev, res->imc, i, wayness);
- if (SKX_TAD_BASE(base) <= res->addr && res->addr <= SKX_TAD_LIMIT(wayness))
- goto tad_found;
- }
- edac_dbg(0, "No TAD entry for 0x%llx\n", res->addr);
- return false;
-
-tad_found:
- res->sktways = SKX_TAD_SKTWAYS(wayness);
- res->chanways = SKX_TAD_CHNWAYS(wayness);
- skt_interleave_bit = skx_granularity[SKX_TAD_SKT_GRAN(base)];
- chn_interleave_bit = skx_granularity[SKX_TAD_CHN_GRAN(base)];
-
- SKX_GET_TADCHNILVOFFSET(res->dev, res->imc, res->channel, i, chnilvoffset);
- channel_addr = res->addr - SKX_TAD_OFFSET(chnilvoffset);
-
- if (res->chanways == 3 && skt_interleave_bit > chn_interleave_bit) {
- /* Must handle channel first, then socket */
- channel_addr = skx_do_interleave(channel_addr, chn_interleave_bit,
- res->chanways, channel_addr);
- channel_addr = skx_do_interleave(channel_addr, skt_interleave_bit,
- res->sktways, channel_addr);
- } else {
- /* Handle socket then channel. Preserve low bits from original address */
- channel_addr = skx_do_interleave(channel_addr, skt_interleave_bit,
- res->sktways, res->addr);
- channel_addr = skx_do_interleave(channel_addr, chn_interleave_bit,
- res->chanways, res->addr);
- }
-
- res->chan_addr = channel_addr;
-
- edac_dbg(2, "0x%llx: chan_addr=0x%llx sktways=%d chanways=%d\n",
- res->addr, res->chan_addr, res->sktways, res->chanways);
- return true;
-}
-
-#define SKX_MAX_RIR 4
-
-#define SKX_GET_RIRWAYNESS(d, mc, ch, i, reg) \
- pci_read_config_dword((d)->imc[mc].chan[ch].cdev, \
- 0x108 + 4 * (i), &reg)
-#define SKX_GET_RIRILV(d, mc, ch, idx, i, reg) \
- pci_read_config_dword((d)->imc[mc].chan[ch].cdev, \
- 0x120 + 16 * idx + 4 * (i), &reg)
-
-#define SKX_RIR_VALID(b) GET_BITFIELD((b), 31, 31)
-#define SKX_RIR_LIMIT(b) (((u64)GET_BITFIELD((b), 1, 11) << 29) | MASK29)
-#define SKX_RIR_WAYS(b) (1 << GET_BITFIELD((b), 28, 29))
-#define SKX_RIR_CHAN_RANK(b) GET_BITFIELD((b), 16, 19)
-#define SKX_RIR_OFFSET(b) ((u64)(GET_BITFIELD((b), 2, 15) << 26))
-
-static bool skx_rir_decode(struct decoded_addr *res)
-{
- int i, idx, chan_rank;
- int shift;
- u32 rirway, rirlv;
- u64 rank_addr, prev_limit = 0, limit;
-
- if (res->dev->imc[res->imc].chan[res->channel].dimms[0].close_pg)
- shift = 6;
- else
- shift = 13;
-
- for (i = 0; i < SKX_MAX_RIR; i++) {
- SKX_GET_RIRWAYNESS(res->dev, res->imc, res->channel, i, rirway);
- limit = SKX_RIR_LIMIT(rirway);
- if (SKX_RIR_VALID(rirway)) {
- if (prev_limit <= res->chan_addr &&
- res->chan_addr <= limit)
- goto rir_found;
- }
- prev_limit = limit;
- }
- edac_dbg(0, "No RIR entry for 0x%llx\n", res->addr);
- return false;
-
-rir_found:
- rank_addr = res->chan_addr >> shift;
- rank_addr /= SKX_RIR_WAYS(rirway);
- rank_addr <<= shift;
- rank_addr |= res->chan_addr & GENMASK_ULL(shift - 1, 0);
-
- res->rank_address = rank_addr;
- idx = (res->chan_addr >> shift) % SKX_RIR_WAYS(rirway);
-
- SKX_GET_RIRILV(res->dev, res->imc, res->channel, idx, i, rirlv);
- res->rank_address = rank_addr - SKX_RIR_OFFSET(rirlv);
- chan_rank = SKX_RIR_CHAN_RANK(rirlv);
- res->channel_rank = chan_rank;
- res->dimm = chan_rank / 4;
- res->rank = chan_rank % 4;
-
- edac_dbg(2, "0x%llx: dimm=%d rank=%d chan_rank=%d rank_addr=0x%llx\n",
- res->addr, res->dimm, res->rank,
- res->channel_rank, res->rank_address);
- return true;
-}
-
-static u8 skx_close_row[] = {
- 15, 16, 17, 18, 20, 21, 22, 28, 10, 11, 12, 13, 29, 30, 31, 32, 33
-};
-static u8 skx_close_column[] = {
- 3, 4, 5, 14, 19, 23, 24, 25, 26, 27
-};
-static u8 skx_open_row[] = {
- 14, 15, 16, 20, 28, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33
-};
-static u8 skx_open_column[] = {
- 3, 4, 5, 6, 7, 8, 9, 10, 11, 12
-};
-static u8 skx_open_fine_column[] = {
- 3, 4, 5, 7, 8, 9, 10, 11, 12, 13
-};
-
-static int skx_bits(u64 addr, int nbits, u8 *bits)
-{
- int i, res = 0;
-
- for (i = 0; i < nbits; i++)
- res |= ((addr >> bits[i]) & 1) << i;
- return res;
-}
-
-static int skx_bank_bits(u64 addr, int b0, int b1, int do_xor, int x0, int x1)
-{
- int ret = GET_BITFIELD(addr, b0, b0) | (GET_BITFIELD(addr, b1, b1) << 1);
-
- if (do_xor)
- ret ^= GET_BITFIELD(addr, x0, x0) | (GET_BITFIELD(addr, x1, x1) << 1);
-
- return ret;
-}
-
-static bool skx_mad_decode(struct decoded_addr *r)
-{
- struct skx_dimm *dimm = &r->dev->imc[r->imc].chan[r->channel].dimms[r->dimm];
- int bg0 = dimm->fine_grain_bank ? 6 : 13;
-
- if (dimm->close_pg) {
- r->row = skx_bits(r->rank_address, dimm->rowbits, skx_close_row);
- r->column = skx_bits(r->rank_address, dimm->colbits, skx_close_column);
- r->column |= 0x400; /* C10 is autoprecharge, always set */
- r->bank_address = skx_bank_bits(r->rank_address, 8, 9, dimm->bank_xor_enable, 22, 28);
- r->bank_group = skx_bank_bits(r->rank_address, 6, 7, dimm->bank_xor_enable, 20, 21);
- } else {
- r->row = skx_bits(r->rank_address, dimm->rowbits, skx_open_row);
- if (dimm->fine_grain_bank)
- r->column = skx_bits(r->rank_address, dimm->colbits, skx_open_fine_column);
- else
- r->column = skx_bits(r->rank_address, dimm->colbits, skx_open_column);
- r->bank_address = skx_bank_bits(r->rank_address, 18, 19, dimm->bank_xor_enable, 22, 23);
- r->bank_group = skx_bank_bits(r->rank_address, bg0, 17, dimm->bank_xor_enable, 20, 21);
- }
- r->row &= (1u << dimm->rowbits) - 1;
-
- edac_dbg(2, "0x%llx: row=0x%x col=0x%x bank_addr=%d bank_group=%d\n",
- r->addr, r->row, r->column, r->bank_address,
- r->bank_group);
- return true;
-}
-
-static bool skx_decode(struct decoded_addr *res)
-{
-
- return skx_sad_decode(res) && skx_tad_decode(res) &&
- skx_rir_decode(res) && skx_mad_decode(res);
-}
-
-static bool skx_adxl_decode(struct decoded_addr *res)
-
-{
- int i, len = 0;
-
- if (res->addr >= skx_tohm || (res->addr >= skx_tolm &&
- res->addr < BIT_ULL(32))) {
- edac_dbg(0, "Address 0x%llx out of range\n", res->addr);
- return false;
- }
-
- if (adxl_decode(res->addr, adxl_values)) {
- edac_dbg(0, "Failed to decode 0x%llx\n", res->addr);
- return false;
- }
-
- res->socket = (int)adxl_values[component_indices[INDEX_SOCKET]];
- res->imc = (int)adxl_values[component_indices[INDEX_MEMCTRL]];
- res->channel = (int)adxl_values[component_indices[INDEX_CHANNEL]];
- res->dimm = (int)adxl_values[component_indices[INDEX_DIMM]];
-
- for (i = 0; i < adxl_component_count; i++) {
- if (adxl_values[i] == ~0x0ull)
- continue;
-
- len += snprintf(adxl_msg + len, MSG_SIZE - len, " %s:0x%llx",
- adxl_component_names[i], adxl_values[i]);
- if (MSG_SIZE - len <= 0)
- break;
- }
-
- return true;
-}
-
-static void skx_mce_output_error(struct mem_ctl_info *mci,
- const struct mce *m,
- struct decoded_addr *res)
-{
- enum hw_event_mc_err_type tp_event;
- char *type, *optype;
- bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0);
- bool overflow = GET_BITFIELD(m->status, 62, 62);
- bool uncorrected_error = GET_BITFIELD(m->status, 61, 61);
- bool recoverable;
- u32 core_err_cnt = GET_BITFIELD(m->status, 38, 52);
- u32 mscod = GET_BITFIELD(m->status, 16, 31);
- u32 errcode = GET_BITFIELD(m->status, 0, 15);
- u32 optypenum = GET_BITFIELD(m->status, 4, 6);
-
- recoverable = GET_BITFIELD(m->status, 56, 56);
-
- if (uncorrected_error) {
- core_err_cnt = 1;
- if (ripv) {
- type = "FATAL";
- tp_event = HW_EVENT_ERR_FATAL;
- } else {
- type = "NON_FATAL";
- tp_event = HW_EVENT_ERR_UNCORRECTED;
- }
- } else {
- type = "CORRECTED";
- tp_event = HW_EVENT_ERR_CORRECTED;
- }
-
- /*
- * According with Table 15-9 of the Intel Architecture spec vol 3A,
- * memory errors should fit in this mask:
- * 000f 0000 1mmm cccc (binary)
- * where:
- * f = Correction Report Filtering Bit. If 1, subsequent errors
- * won't be shown
- * mmm = error type
- * cccc = channel
- * If the mask doesn't match, report an error to the parsing logic
- */
- if (!((errcode & 0xef80) == 0x80)) {
- optype = "Can't parse: it is not a mem";
- } else {
- switch (optypenum) {
- case 0:
- optype = "generic undef request error";
- break;
- case 1:
- optype = "memory read error";
- break;
- case 2:
- optype = "memory write error";
- break;
- case 3:
- optype = "addr/cmd error";
- break;
- case 4:
- optype = "memory scrubbing error";
- break;
- default:
- optype = "reserved";
- break;
- }
- }
- if (adxl_component_count) {
- snprintf(skx_msg, MSG_SIZE, "%s%s err_code:0x%04x:0x%04x %s",
- overflow ? " OVERFLOW" : "",
- (uncorrected_error && recoverable) ? " recoverable" : "",
- mscod, errcode, adxl_msg);
- } else {
- snprintf(skx_msg, MSG_SIZE,
- "%s%s err_code:0x%04x:0x%04x socket:%d imc:%d rank:%d bg:%d ba:%d row:0x%x col:0x%x",
- overflow ? " OVERFLOW" : "",
- (uncorrected_error && recoverable) ? " recoverable" : "",
- mscod, errcode,
- res->socket, res->imc, res->rank,
- res->bank_group, res->bank_address, res->row, res->column);
- }
-
- edac_dbg(0, "%s\n", skx_msg);
-
- /* Call the helper to output message */
- edac_mc_handle_error(tp_event, mci, core_err_cnt,
- m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0,
- res->channel, res->dimm, -1,
- optype, skx_msg);
-}
-
-static struct mem_ctl_info *get_mci(int src_id, int lmc)
-{
- struct skx_dev *d;
-
- if (lmc > NUM_IMC - 1) {
- skx_printk(KERN_ERR, "Bad lmc %d\n", lmc);
- return NULL;
- }
-
- list_for_each_entry(d, &skx_edac_list, list) {
- if (d->imc[0].src_id == src_id)
- return d->imc[lmc].mci;
- }
-
- skx_printk(KERN_ERR, "No mci for src_id %d lmc %d\n", src_id, lmc);
-
- return NULL;
-}
-
-static int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
- void *data)
-{
- struct mce *mce = (struct mce *)data;
- struct decoded_addr res;
- struct mem_ctl_info *mci;
- char *type;
-
- if (edac_get_report_status() == EDAC_REPORTING_DISABLED)
- return NOTIFY_DONE;
-
- /* ignore unless this is memory related with an address */
- if ((mce->status & 0xefff) >> 7 != 1 || !(mce->status & MCI_STATUS_ADDRV))
- return NOTIFY_DONE;
-
- memset(&res, 0, sizeof(res));
- res.addr = mce->addr;
-
- if (adxl_component_count) {
- if (!skx_adxl_decode(&res))
- return NOTIFY_DONE;
-
- mci = get_mci(res.socket, res.imc);
- } else {
- if (!skx_decode(&res))
- return NOTIFY_DONE;
-
- mci = res.dev->imc[res.imc].mci;
- }
-
- if (!mci)
- return NOTIFY_DONE;
-
- if (mce->mcgstatus & MCG_STATUS_MCIP)
- type = "Exception";
- else
- type = "Event";
-
- skx_mc_printk(mci, KERN_DEBUG, "HANDLING MCE MEMORY ERROR\n");
-
- skx_mc_printk(mci, KERN_DEBUG, "CPU %d: Machine Check %s: 0x%llx "
- "Bank %d: %016Lx\n", mce->extcpu, type,
- mce->mcgstatus, mce->bank, mce->status);
- skx_mc_printk(mci, KERN_DEBUG, "TSC 0x%llx ", mce->tsc);
- skx_mc_printk(mci, KERN_DEBUG, "ADDR 0x%llx ", mce->addr);
- skx_mc_printk(mci, KERN_DEBUG, "MISC 0x%llx ", mce->misc);
-
- skx_mc_printk(mci, KERN_DEBUG, "PROCESSOR %u:0x%x TIME %llu SOCKET "
- "%u APIC 0x%x\n", mce->cpuvendor, mce->cpuid,
- mce->time, mce->socketid, mce->apicid);
-
- skx_mce_output_error(mci, mce, &res);
-
- return NOTIFY_DONE;
-}
-
-static struct notifier_block skx_mce_dec = {
- .notifier_call = skx_mce_check_error,
- .priority = MCE_PRIO_EDAC,
-};
-
-#ifdef CONFIG_EDAC_DEBUG
-/*
- * Debug feature.
- * Exercise the address decode logic by writing an address to
- * /sys/kernel/debug/edac/skx_test/addr.
- */
-static struct dentry *skx_test;
-
-static int debugfs_u64_set(void *data, u64 val)
-{
- struct mce m;
-
- pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val);
-
- memset(&m, 0, sizeof(m));
- /* ADDRV + MemRd + Unknown channel */
- m.status = MCI_STATUS_ADDRV + 0x90;
- /* One corrected error */
- m.status |= BIT_ULL(MCI_STATUS_CEC_SHIFT);
- m.addr = val;
- skx_mce_check_error(NULL, 0, &m);
-
- return 0;
-}
-DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n");
-
-static void setup_skx_debug(void)
-{
- skx_test = edac_debugfs_create_dir("skx_test");
- if (!skx_test)
- return;
-
- if (!edac_debugfs_create_file("addr", 0200, skx_test,
- NULL, &fops_u64_wo)) {
- debugfs_remove(skx_test);
- skx_test = NULL;
- }
-}
-
-static void teardown_skx_debug(void)
-{
- debugfs_remove_recursive(skx_test);
-}
-#else
-static void setup_skx_debug(void) {}
-static void teardown_skx_debug(void) {}
-#endif /*CONFIG_EDAC_DEBUG*/
-
-static void skx_remove(void)
-{
- int i, j;
- struct skx_dev *d, *tmp;
-
- edac_dbg(0, "\n");
-
- list_for_each_entry_safe(d, tmp, &skx_edac_list, list) {
- list_del(&d->list);
- for (i = 0; i < NUM_IMC; i++) {
- skx_unregister_mci(&d->imc[i]);
- for (j = 0; j < NUM_CHANNELS; j++)
- pci_dev_put(d->imc[i].chan[j].cdev);
- }
- pci_dev_put(d->util_all);
- pci_dev_put(d->sad_all);
-
- kfree(d);
- }
-}
-
-static void __init skx_adxl_get(void)
-{
- const char * const *names;
- int i, j;
-
- names = adxl_get_component_names();
- if (!names) {
- skx_printk(KERN_NOTICE, "No firmware support for address translation.");
- skx_printk(KERN_CONT, " Only decoding DDR4 address!\n");
- return;
- }
-
- for (i = 0; i < INDEX_MAX; i++) {
- for (j = 0; names[j]; j++) {
- if (!strcmp(component_names[i], names[j])) {
- component_indices[i] = j;
- break;
- }
- }
-
- if (!names[j])
- goto err;
- }
-
- adxl_component_names = names;
- while (*names++)
- adxl_component_count++;
-
- adxl_values = kcalloc(adxl_component_count, sizeof(*adxl_values),
- GFP_KERNEL);
- if (!adxl_values) {
- adxl_component_count = 0;
- return;
- }
-
- adxl_msg = kzalloc(MSG_SIZE, GFP_KERNEL);
- if (!adxl_msg) {
- adxl_component_count = 0;
- kfree(adxl_values);
- }
-
- return;
-err:
- skx_printk(KERN_ERR, "'%s' is not matched from DSM parameters: ",
- component_names[i]);
- for (j = 0; names[j]; j++)
- skx_printk(KERN_CONT, "%s ", names[j]);
- skx_printk(KERN_CONT, "\n");
-}
-
-static void __exit skx_adxl_put(void)
-{
- kfree(adxl_values);
- kfree(adxl_msg);
-}
-
-/*
- * skx_init:
- * make sure we are running on the correct cpu model
- * search for all the devices we need
- * check which DIMMs are present.
- */
-static int __init skx_init(void)
-{
- const struct x86_cpu_id *id;
- const struct munit *m;
- const char *owner;
- int rc = 0, i;
- u8 mc = 0, src_id, node_id;
- struct skx_dev *d;
-
- edac_dbg(2, "\n");
-
- owner = edac_get_owner();
- if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR)))
- return -EBUSY;
-
- id = x86_match_cpu(skx_cpuids);
- if (!id)
- return -ENODEV;
-
- rc = skx_get_hi_lo();
- if (rc)
- return rc;
-
- rc = get_all_bus_mappings();
- if (rc < 0)
- goto fail;
- if (rc == 0) {
- edac_dbg(2, "No memory controllers found\n");
- return -ENODEV;
- }
-
- for (m = skx_all_munits; m->did; m++) {
- rc = get_all_munits(m);
- if (rc < 0)
- goto fail;
- if (rc != m->per_socket * skx_num_sockets) {
- edac_dbg(2, "Expected %d, got %d of 0x%x\n",
- m->per_socket * skx_num_sockets, rc, m->did);
- rc = -ENODEV;
- goto fail;
- }
- }
-
- list_for_each_entry(d, &skx_edac_list, list) {
- src_id = get_src_id(d);
- node_id = skx_get_node_id(d);
- edac_dbg(2, "src_id=%d node_id=%d\n", src_id, node_id);
- for (i = 0; i < NUM_IMC; i++) {
- d->imc[i].mc = mc++;
- d->imc[i].lmc = i;
- d->imc[i].src_id = src_id;
- d->imc[i].node_id = node_id;
- rc = skx_register_mci(&d->imc[i]);
- if (rc < 0)
- goto fail;
- }
- }
-
- skx_msg = kzalloc(MSG_SIZE, GFP_KERNEL);
- if (!skx_msg) {
- rc = -ENOMEM;
- goto fail;
- }
-
- if (nvdimm_count)
- skx_adxl_get();
-
- /* Ensure that the OPSTATE is set correctly for POLL or NMI */
- opstate_init();
-
- setup_skx_debug();
-
- mce_register_decode_chain(&skx_mce_dec);
-
- return 0;
-fail:
- skx_remove();
- return rc;
-}
-
-static void __exit skx_exit(void)
-{
- edac_dbg(2, "\n");
- mce_unregister_decode_chain(&skx_mce_dec);
- teardown_skx_debug();
- if (nvdimm_count)
- skx_adxl_put();
- kfree(skx_msg);
- skx_remove();
-}
-
-module_init(skx_init);
-module_exit(skx_exit);
-
-module_param(edac_op_state, int, 0444);
-MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
-
-MODULE_LICENSE("GPL v2");
-MODULE_AUTHOR("Tony Luck");
-MODULE_DESCRIPTION("MC Driver for Intel Skylake server processors");