From 50cc18fcd3053fb46a09db5a39e6516e9560f765 Mon Sep 17 00:00:00 2001 From: Sean V Kelley Date: Fri, 20 Nov 2020 16:10:22 -0800 Subject: PCI/AER: Write AER Capability only when we control it If an OS has not been granted AER control via _OSC, it should not make changes to PCI_ERR_ROOT_COMMAND and PCI_ERR_ROOT_STATUS related registers. Per section 4.5.1 of the System Firmware Intermediary (SFI) _OSC and DPC Updates ECN [1], this bit also covers these aspects of the PCI Express Advanced Error Reporting. Based on the above and earlier discussion [2], make the following changes: Add a check for the native case (i.e., AER control via _OSC) Note that the previous "clear, reset, enable" order suggests that the reset might cause errors that we should ignore. After this commit, those errors (if any) will remain logged in the PCI_ERR_ROOT_STATUS register. [1] System Firmware Intermediary (SFI) _OSC and DPC Updates ECN, Feb 24, 2020, affecting PCI Firmware Specification, Rev. 3.2 https://members.pcisig.com/wg/PCI-SIG/document/14076 [2] https://lore.kernel.org/linux-pci/20201020162820.GA370938@bjorn-Precision-5520/ Link: https://lore.kernel.org/r/20201121001036.8560-2-sean.v.kelley@intel.com Tested-by: Jonathan Cameron # non-native/no RCEC Signed-off-by: Sean V Kelley Signed-off-by: Bjorn Helgaas --- drivers/pci/pcie/aer.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index 65dff5f3457a..6fe2d4ef0635 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -1361,23 +1361,26 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev) u32 reg32; int rc; - - /* Disable Root's interrupt in response to error messages */ - pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, ®32); - reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK; - pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, reg32); + if (pcie_aer_is_native(dev)) { + /* Disable Root's interrupt in response to error messages */ + pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, ®32); + reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK; + pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, reg32); + } rc = pci_bus_error_reset(dev); - pci_info(dev, "Root Port link has been reset\n"); + pci_info(dev, "Root Port link has been reset (%d)\n", rc); - /* Clear Root Error Status */ - pci_read_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, ®32); - pci_write_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, reg32); + if (pcie_aer_is_native(dev)) { + /* Clear Root Error Status */ + pci_read_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, ®32); + pci_write_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, reg32); - /* Enable Root Port's interrupt in response to error messages */ - pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, ®32); - reg32 |= ROOT_PORT_INTR_ON_MESG_MASK; - pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, reg32); + /* Enable Root Port's interrupt in response to error messages */ + pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, ®32); + reg32 |= ROOT_PORT_INTR_ON_MESG_MASK; + pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, reg32); + } return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; } -- cgit v1.2.3 From c9d659b60770db94b898f94947192a94bbf95c5c Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Fri, 20 Nov 2020 16:10:23 -0800 Subject: PCI/ERR: Bind RCEC devices to the Root Port driver If a Root Complex Integrated Endpoint (RCiEP) is implemented, it may signal errors through a Root Complex Event Collector (RCEC). Each RCiEP must be associated with no more than one RCEC. For an RCEC (which is technically not a Bridge), error messages "received" from associated RCiEPs must be enabled for "transmission" in order to cause a System Error via the Root Control register or (when the Advanced Error Reporting Capability is present) reporting via the Root Error Command register and logging in the Root Error Status register and Error Source Identification register. Given the commonality with Root Ports and the need to also support AER and PME services for RCECs, extend the Root Port driver to support RCEC devices by adding the RCEC Class ID to the driver structure. Co-developed-by: Sean V Kelley Link: https://lore.kernel.org/r/20201121001036.8560-3-sean.v.kelley@intel.com Tested-by: Jonathan Cameron # non-native/no RCEC Signed-off-by: Sean V Kelley Signed-off-by: Qiuxu Zhuo Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron Reviewed-by: Kuppuswamy Sathyanarayanan --- drivers/pci/pcie/portdrv_pci.c | 5 ++++- include/linux/pci_ids.h | 1 + include/uapi/linux/pci_regs.h | 7 +++++++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c index 3a3ce40ae1ab..4d880679b9b1 100644 --- a/drivers/pci/pcie/portdrv_pci.c +++ b/drivers/pci/pcie/portdrv_pci.c @@ -106,7 +106,8 @@ static int pcie_portdrv_probe(struct pci_dev *dev, if (!pci_is_pcie(dev) || ((pci_pcie_type(dev) != PCI_EXP_TYPE_ROOT_PORT) && (pci_pcie_type(dev) != PCI_EXP_TYPE_UPSTREAM) && - (pci_pcie_type(dev) != PCI_EXP_TYPE_DOWNSTREAM))) + (pci_pcie_type(dev) != PCI_EXP_TYPE_DOWNSTREAM) && + (pci_pcie_type(dev) != PCI_EXP_TYPE_RC_EC))) return -ENODEV; status = pcie_port_device_register(dev); @@ -195,6 +196,8 @@ static const struct pci_device_id port_pci_ids[] = { { PCI_DEVICE_CLASS(((PCI_CLASS_BRIDGE_PCI << 8) | 0x00), ~0) }, /* subtractive decode PCI-to-PCI bridge, class type is 060401h */ { PCI_DEVICE_CLASS(((PCI_CLASS_BRIDGE_PCI << 8) | 0x01), ~0) }, + /* handle any Root Complex Event Collector */ + { PCI_DEVICE_CLASS(((PCI_CLASS_SYSTEM_RCEC << 8) | 0x00), ~0) }, { }, }; diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 1ab1e24bcbce..d8156a5dbee8 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -81,6 +81,7 @@ #define PCI_CLASS_SYSTEM_RTC 0x0803 #define PCI_CLASS_SYSTEM_PCI_HOTPLUG 0x0804 #define PCI_CLASS_SYSTEM_SDHCI 0x0805 +#define PCI_CLASS_SYSTEM_RCEC 0x0807 #define PCI_CLASS_SYSTEM_OTHER 0x0880 #define PCI_BASE_CLASS_INPUT 0x09 diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index a95d55f9f257..bccd3e35cb65 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -831,6 +831,13 @@ #define PCI_PWR_CAP_BUDGET(x) ((x) & 1) /* Included in system budget */ #define PCI_EXT_CAP_PWR_SIZEOF 16 +/* Root Complex Event Collector Endpoint Association */ +#define PCI_RCEC_RCIEP_BITMAP 4 /* Associated Bitmap for RCiEPs */ +#define PCI_RCEC_BUSN 8 /* RCEC Associated Bus Numbers */ +#define PCI_RCEC_BUSN_REG_VER 0x02 /* Least version with BUSN present */ +#define PCI_RCEC_BUSN_NEXT(x) (((x) >> 8) & 0xff) +#define PCI_RCEC_BUSN_LAST(x) (((x) >> 16) & 0xff) + /* Vendor-Specific (VSEC, PCI_EXT_CAP_ID_VNDR) */ #define PCI_VNDR_HEADER 4 /* Vendor-Specific Header */ #define PCI_VNDR_HEADER_ID(x) ((x) & 0xffff) -- cgit v1.2.3 From 90655631988f8f501529e6de5f13614389717ead Mon Sep 17 00:00:00 2001 From: Sean V Kelley Date: Fri, 20 Nov 2020 16:10:24 -0800 Subject: PCI/ERR: Cache RCEC EA Capability offset in pci_init_capabilities() Extend support for Root Complex Event Collectors by decoding and caching the RCEC Endpoint Association Extended Capabilities when enumerating. Use that cached information for later error source reporting. See PCIe r5.0, sec 7.9.10. Co-developed-by: Qiuxu Zhuo Link: https://lore.kernel.org/r/20201121001036.8560-4-sean.v.kelley@intel.com Tested-by: Jonathan Cameron # non-native/no RCEC Signed-off-by: Qiuxu Zhuo Signed-off-by: Sean V Kelley Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron --- drivers/pci/pci.h | 17 ++++++++++++++ drivers/pci/pcie/Makefile | 2 +- drivers/pci/pcie/rcec.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++ drivers/pci/probe.c | 2 ++ include/linux/pci.h | 4 ++++ 5 files changed, 83 insertions(+), 1 deletion(-) create mode 100644 drivers/pci/pcie/rcec.c diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index f86cae9aa1f4..12dcad8f3635 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -448,6 +448,15 @@ int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info); void aer_print_error(struct pci_dev *dev, struct aer_err_info *info); #endif /* CONFIG_PCIEAER */ +#ifdef CONFIG_PCIEPORTBUS +/* Cached RCEC Endpoint Association */ +struct rcec_ea { + u8 nextbusn; + u8 lastbusn; + u32 bitmap; +}; +#endif + #ifdef CONFIG_PCIE_DPC void pci_save_dpc_state(struct pci_dev *dev); void pci_restore_dpc_state(struct pci_dev *dev); @@ -460,6 +469,14 @@ static inline void pci_restore_dpc_state(struct pci_dev *dev) {} static inline void pci_dpc_init(struct pci_dev *pdev) {} #endif +#ifdef CONFIG_PCIEPORTBUS +void pci_rcec_init(struct pci_dev *dev); +void pci_rcec_exit(struct pci_dev *dev); +#else +static inline void pci_rcec_init(struct pci_dev *dev) {} +static inline void pci_rcec_exit(struct pci_dev *dev) {} +#endif + #ifdef CONFIG_PCI_ATS /* Address Translation Service */ void pci_ats_init(struct pci_dev *dev); diff --git a/drivers/pci/pcie/Makefile b/drivers/pci/pcie/Makefile index 68da9280ff11..d9697892fa3e 100644 --- a/drivers/pci/pcie/Makefile +++ b/drivers/pci/pcie/Makefile @@ -2,7 +2,7 @@ # # Makefile for PCI Express features and port driver -pcieportdrv-y := portdrv_core.o portdrv_pci.o err.o +pcieportdrv-y := portdrv_core.o portdrv_pci.o err.o rcec.o obj-$(CONFIG_PCIEPORTBUS) += pcieportdrv.o diff --git a/drivers/pci/pcie/rcec.c b/drivers/pci/pcie/rcec.c new file mode 100644 index 000000000000..038e9d706d5f --- /dev/null +++ b/drivers/pci/pcie/rcec.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Root Complex Event Collector Support + * + * Authors: + * Sean V Kelley + * Qiuxu Zhuo + * + * Copyright (C) 2020 Intel Corp. + */ + +#include +#include +#include + +#include "../pci.h" + +void pci_rcec_init(struct pci_dev *dev) +{ + struct rcec_ea *rcec_ea; + u32 rcec, hdr, busn; + u8 ver; + + /* Only for Root Complex Event Collectors */ + if (pci_pcie_type(dev) != PCI_EXP_TYPE_RC_EC) + return; + + rcec = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_RCEC); + if (!rcec) + return; + + rcec_ea = kzalloc(sizeof(*rcec_ea), GFP_KERNEL); + if (!rcec_ea) + return; + + pci_read_config_dword(dev, rcec + PCI_RCEC_RCIEP_BITMAP, + &rcec_ea->bitmap); + + /* Check whether RCEC BUSN register is present */ + pci_read_config_dword(dev, rcec, &hdr); + ver = PCI_EXT_CAP_VER(hdr); + if (ver >= PCI_RCEC_BUSN_REG_VER) { + pci_read_config_dword(dev, rcec + PCI_RCEC_BUSN, &busn); + rcec_ea->nextbusn = PCI_RCEC_BUSN_NEXT(busn); + rcec_ea->lastbusn = PCI_RCEC_BUSN_LAST(busn); + } else { + /* Avoid later ver check by setting nextbusn */ + rcec_ea->nextbusn = 0xff; + rcec_ea->lastbusn = 0x00; + } + + dev->rcec_ea = rcec_ea; +} + +void pci_rcec_exit(struct pci_dev *dev) +{ + kfree(dev->rcec_ea); + dev->rcec_ea = NULL; +} diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 4289030b0fff..f22e1451d65d 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -2216,6 +2216,7 @@ static void pci_configure_device(struct pci_dev *dev) static void pci_release_capabilities(struct pci_dev *dev) { pci_aer_exit(dev); + pci_rcec_exit(dev); pci_vpd_release(dev); pci_iov_release(dev); pci_free_cap_save_buffers(dev); @@ -2415,6 +2416,7 @@ static void pci_init_capabilities(struct pci_dev *dev) pci_ptm_init(dev); /* Precision Time Measurement */ pci_aer_init(dev); /* Advanced Error Reporting */ pci_dpc_init(dev); /* Downstream Port Containment */ + pci_rcec_init(dev); /* Root Complex Event Collector */ pcie_report_downtraining(dev); diff --git a/include/linux/pci.h b/include/linux/pci.h index 22207a79762c..f8c927fd0602 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -304,6 +304,7 @@ struct pcie_link_state; struct pci_vpd; struct pci_sriov; struct pci_p2pdma; +struct rcec_ea; /* The pci_dev structure describes PCI devices */ struct pci_dev { @@ -326,6 +327,9 @@ struct pci_dev { #ifdef CONFIG_PCIEAER u16 aer_cap; /* AER capability offset */ struct aer_stats *aer_stats; /* AER stats for this device */ +#endif +#ifdef CONFIG_PCIEPORTBUS + struct rcec_ea *rcec_ea; /* RCEC cached endpoint association */ #endif u8 pcie_cap; /* PCIe capability offset */ u8 msi_cap; /* MSI capability offset */ -- cgit v1.2.3 From 8f1bbfbc3596d401b60d1562b27ec28c2724f60d Mon Sep 17 00:00:00 2001 From: Sean V Kelley Date: Fri, 20 Nov 2020 16:10:25 -0800 Subject: PCI/ERR: Rename reset_link() to reset_subordinates() reset_link() appears to be misnamed. The point is to reset any devices below a given bridge, so rename it to reset_subordinates() to make it clear that we are passing a bridge with the intent to reset the devices below it. Link: https://lore.kernel.org/r/20201121001036.8560-5-sean.v.kelley@intel.com Tested-by: Jonathan Cameron # non-native/no RCEC Signed-off-by: Sean V Kelley Signed-off-by: Bjorn Helgaas Reviewed-by: Kuppuswamy Sathyanarayanan Acked-by: Jonathan Cameron --- drivers/pci/pci.h | 4 ++-- drivers/pci/pcie/err.c | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 12dcad8f3635..3c4570a3058f 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -572,8 +572,8 @@ static inline int pci_dev_specific_disable_acs_redir(struct pci_dev *dev) /* PCI error reporting and recovery */ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, - pci_channel_state_t state, - pci_ers_result_t (*reset_link)(struct pci_dev *pdev)); + pci_channel_state_t state, + pci_ers_result_t (*reset_subordinates)(struct pci_dev *pdev)); bool pcie_wait_for_link(struct pci_dev *pdev, bool active); #ifdef CONFIG_PCIEASPM diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c index c543f419d8f9..db149c6ce4fb 100644 --- a/drivers/pci/pcie/err.c +++ b/drivers/pci/pcie/err.c @@ -147,8 +147,8 @@ out: } pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, - pci_channel_state_t state, - pci_ers_result_t (*reset_link)(struct pci_dev *pdev)) + pci_channel_state_t state, + pci_ers_result_t (*reset_subordinates)(struct pci_dev *pdev)) { pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER; struct pci_bus *bus; @@ -165,9 +165,9 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, pci_dbg(dev, "broadcast error_detected message\n"); if (state == pci_channel_io_frozen) { pci_walk_bus(bus, report_frozen_detected, &status); - status = reset_link(dev); + status = reset_subordinates(dev); if (status != PCI_ERS_RESULT_RECOVERED) { - pci_warn(dev, "link reset failed\n"); + pci_warn(dev, "subordinate device reset failed\n"); goto failed; } } else { -- cgit v1.2.3 From 5d69dcc9f839bd2d5cac7a098712f52149e1673f Mon Sep 17 00:00:00 2001 From: Sean V Kelley Date: Fri, 20 Nov 2020 16:10:26 -0800 Subject: PCI/ERR: Simplify by using pci_upstream_bridge() Use pci_upstream_bridge() in place of dev->bus->self. No functional change intended. Link: https://lore.kernel.org/r/20201121001036.8560-6-sean.v.kelley@intel.com Tested-by: Jonathan Cameron # non-native/no RCEC Signed-off-by: Sean V Kelley Signed-off-by: Bjorn Helgaas Reviewed-by: Kuppuswamy Sathyanarayanan Acked-by: Jonathan Cameron --- drivers/pci/pcie/err.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c index db149c6ce4fb..05f61da5ed9d 100644 --- a/drivers/pci/pcie/err.c +++ b/drivers/pci/pcie/err.c @@ -159,7 +159,7 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, */ if (!(pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT || pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM)) - dev = dev->bus->self; + dev = pci_upstream_bridge(dev); bus = dev->subordinate; pci_dbg(dev, "broadcast error_detected message\n"); -- cgit v1.2.3 From 480ef7cb9fcebda7b28cbed4f6cdcf0a02f4a6ca Mon Sep 17 00:00:00 2001 From: Sean V Kelley Date: Fri, 20 Nov 2020 16:10:27 -0800 Subject: PCI/ERR: Simplify by computing pci_pcie_type() once Instead of calling pci_pcie_type(dev) twice, call it once and save the result. No functional change intended. Link: https://lore.kernel.org/r/20201121001036.8560-7-sean.v.kelley@intel.com Tested-by: Jonathan Cameron # non-native/no RCEC Signed-off-by: Sean V Kelley Signed-off-by: Bjorn Helgaas Acked-by: Jonathan Cameron --- drivers/pci/pcie/aer.c | 5 +++-- drivers/pci/pcie/err.c | 5 +++-- drivers/pci/pcie/portdrv_pci.c | 9 +++++---- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index 6fe2d4ef0635..0ba0b47ae751 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -1034,6 +1034,7 @@ EXPORT_SYMBOL_GPL(aer_recover_queue); */ int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info) { + int type = pci_pcie_type(dev); int aer = dev->aer_cap; int temp; @@ -1052,8 +1053,8 @@ int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info) &info->mask); if (!(info->status & ~info->mask)) return 0; - } else if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT || - pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM || + } else if (type == PCI_EXP_TYPE_ROOT_PORT || + type == PCI_EXP_TYPE_DOWNSTREAM || info->severity == AER_NONFATAL) { /* Link is still healthy for IO reads */ diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c index 05f61da5ed9d..7a5af873d8bc 100644 --- a/drivers/pci/pcie/err.c +++ b/drivers/pci/pcie/err.c @@ -150,6 +150,7 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, pci_channel_state_t state, pci_ers_result_t (*reset_subordinates)(struct pci_dev *pdev)) { + int type = pci_pcie_type(dev); pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER; struct pci_bus *bus; @@ -157,8 +158,8 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, * Error recovery runs on all subordinates of the first downstream port. * If the downstream port detected the error, it is cleared at the end. */ - if (!(pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT || - pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM)) + if (!(type == PCI_EXP_TYPE_ROOT_PORT || + type == PCI_EXP_TYPE_DOWNSTREAM)) dev = pci_upstream_bridge(dev); bus = dev->subordinate; diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c index 4d880679b9b1..ff9517ee92b3 100644 --- a/drivers/pci/pcie/portdrv_pci.c +++ b/drivers/pci/pcie/portdrv_pci.c @@ -101,13 +101,14 @@ static const struct dev_pm_ops pcie_portdrv_pm_ops = { static int pcie_portdrv_probe(struct pci_dev *dev, const struct pci_device_id *id) { + int type = pci_pcie_type(dev); int status; if (!pci_is_pcie(dev) || - ((pci_pcie_type(dev) != PCI_EXP_TYPE_ROOT_PORT) && - (pci_pcie_type(dev) != PCI_EXP_TYPE_UPSTREAM) && - (pci_pcie_type(dev) != PCI_EXP_TYPE_DOWNSTREAM) && - (pci_pcie_type(dev) != PCI_EXP_TYPE_RC_EC))) + ((type != PCI_EXP_TYPE_ROOT_PORT) && + (type != PCI_EXP_TYPE_UPSTREAM) && + (type != PCI_EXP_TYPE_DOWNSTREAM) && + (type != PCI_EXP_TYPE_RC_EC))) return -ENODEV; status = pcie_port_device_register(dev); -- cgit v1.2.3 From 0791721d800790e6e533bd8467df67f0dc4f2fec Mon Sep 17 00:00:00 2001 From: Sean V Kelley Date: Fri, 20 Nov 2020 16:10:28 -0800 Subject: PCI/ERR: Use "bridge" for clarity in pcie_do_recovery() pcie_do_recovery() may be called with "dev" being either a bridge (Root Port or Switch Downstream Port) or an Endpoint. The bulk of the function deals with the bridge, so if we start with an Endpoint, we reset "dev" to be the bridge leading to it. For clarity, replace "dev" in the body of the function with "bridge". No functional change intended. Link: https://lore.kernel.org/r/20201121001036.8560-8-sean.v.kelley@intel.com Tested-by: Jonathan Cameron # non-native/no RCEC Signed-off-by: Sean V Kelley Signed-off-by: Bjorn Helgaas Reviewed-by: Kuppuswamy Sathyanarayanan Acked-by: Jonathan Cameron --- drivers/pci/pcie/err.c | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c index 7a5af873d8bc..46a5b84f8842 100644 --- a/drivers/pci/pcie/err.c +++ b/drivers/pci/pcie/err.c @@ -151,24 +151,27 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, pci_ers_result_t (*reset_subordinates)(struct pci_dev *pdev)) { int type = pci_pcie_type(dev); - pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER; + struct pci_dev *bridge; struct pci_bus *bus; + pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER; /* - * Error recovery runs on all subordinates of the first downstream port. - * If the downstream port detected the error, it is cleared at the end. + * Error recovery runs on all subordinates of the bridge. If the + * bridge detected the error, it is cleared at the end. */ if (!(type == PCI_EXP_TYPE_ROOT_PORT || type == PCI_EXP_TYPE_DOWNSTREAM)) - dev = pci_upstream_bridge(dev); - bus = dev->subordinate; + bridge = pci_upstream_bridge(dev); + else + bridge = dev; - pci_dbg(dev, "broadcast error_detected message\n"); + bus = bridge->subordinate; + pci_dbg(bridge, "broadcast error_detected message\n"); if (state == pci_channel_io_frozen) { pci_walk_bus(bus, report_frozen_detected, &status); - status = reset_subordinates(dev); + status = reset_subordinates(bridge); if (status != PCI_ERS_RESULT_RECOVERED) { - pci_warn(dev, "subordinate device reset failed\n"); + pci_warn(bridge, "subordinate device reset failed\n"); goto failed; } } else { @@ -177,7 +180,7 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, if (status == PCI_ERS_RESULT_CAN_RECOVER) { status = PCI_ERS_RESULT_RECOVERED; - pci_dbg(dev, "broadcast mmio_enabled message\n"); + pci_dbg(bridge, "broadcast mmio_enabled message\n"); pci_walk_bus(bus, report_mmio_enabled, &status); } @@ -188,27 +191,27 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, * drivers' slot_reset callbacks? */ status = PCI_ERS_RESULT_RECOVERED; - pci_dbg(dev, "broadcast slot_reset message\n"); + pci_dbg(bridge, "broadcast slot_reset message\n"); pci_walk_bus(bus, report_slot_reset, &status); } if (status != PCI_ERS_RESULT_RECOVERED) goto failed; - pci_dbg(dev, "broadcast resume message\n"); + pci_dbg(bridge, "broadcast resume message\n"); pci_walk_bus(bus, report_resume, &status); - if (pcie_aer_is_native(dev)) - pcie_clear_device_status(dev); - pci_aer_clear_nonfatal_status(dev); - pci_info(dev, "device recovery successful\n"); + if (pcie_aer_is_native(bridge)) + pcie_clear_device_status(bridge); + pci_aer_clear_nonfatal_status(bridge); + pci_info(bridge, "device recovery successful\n"); return status; failed: - pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT); + pci_uevent_ers(bridge, PCI_ERS_RESULT_DISCONNECT); /* TODO: Should kernel panic here? */ - pci_info(dev, "device recovery failed\n"); + pci_info(bridge, "device recovery failed\n"); return status; } -- cgit v1.2.3 From 3d7d8fc78f4b504819882278fcfe10784eb985fa Mon Sep 17 00:00:00 2001 From: Sean V Kelley Date: Fri, 20 Nov 2020 16:10:29 -0800 Subject: PCI/ERR: Avoid negated conditional for clarity Reverse the sense of the Root Port/Downstream Port conditional for clarity. No functional change intended. Link: https://lore.kernel.org/r/20201121001036.8560-9-sean.v.kelley@intel.com Tested-by: Jonathan Cameron # non-native/no RCEC Signed-off-by: Sean V Kelley Signed-off-by: Bjorn Helgaas Reviewed-by: Kuppuswamy Sathyanarayanan Acked-by: Jonathan Cameron --- drivers/pci/pcie/err.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c index 46a5b84f8842..931e75f2549d 100644 --- a/drivers/pci/pcie/err.c +++ b/drivers/pci/pcie/err.c @@ -159,11 +159,11 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, * Error recovery runs on all subordinates of the bridge. If the * bridge detected the error, it is cleared at the end. */ - if (!(type == PCI_EXP_TYPE_ROOT_PORT || - type == PCI_EXP_TYPE_DOWNSTREAM)) - bridge = pci_upstream_bridge(dev); - else + if (type == PCI_EXP_TYPE_ROOT_PORT || + type == PCI_EXP_TYPE_DOWNSTREAM) bridge = dev; + else + bridge = pci_upstream_bridge(dev); bus = bridge->subordinate; pci_dbg(bridge, "broadcast error_detected message\n"); -- cgit v1.2.3 From 05e9ae19ab83881a0f33025bd1288e41e552a34b Mon Sep 17 00:00:00 2001 From: Sean V Kelley Date: Fri, 20 Nov 2020 16:10:30 -0800 Subject: PCI/ERR: Add pci_walk_bridge() to pcie_do_recovery() Consolidate subordinate bus checks with pci_walk_bus() into pci_walk_bridge() for walking below potentially AER affected bridges. Link: https://lore.kernel.org/r/20201121001036.8560-10-sean.v.kelley@intel.com Tested-by: Jonathan Cameron # non-native/no RCEC Signed-off-by: Sean V Kelley Signed-off-by: Bjorn Helgaas --- drivers/pci/pcie/err.c | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c index 931e75f2549d..8b53aecdb43d 100644 --- a/drivers/pci/pcie/err.c +++ b/drivers/pci/pcie/err.c @@ -146,13 +146,30 @@ out: return 0; } +/** + * pci_walk_bridge - walk bridges potentially AER affected + * @bridge: bridge which may be a Port + * @cb: callback to be called for each device found + * @userdata: arbitrary pointer to be passed to callback + * + * If the device provided is a bridge, walk the subordinate bus, including + * any bridged devices on buses under this bus. Call the provided callback + * on each device found. + */ +static void pci_walk_bridge(struct pci_dev *bridge, + int (*cb)(struct pci_dev *, void *), + void *userdata) +{ + if (bridge->subordinate) + pci_walk_bus(bridge->subordinate, cb, userdata); +} + pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, pci_channel_state_t state, pci_ers_result_t (*reset_subordinates)(struct pci_dev *pdev)) { int type = pci_pcie_type(dev); struct pci_dev *bridge; - struct pci_bus *bus; pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER; /* @@ -165,23 +182,22 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, else bridge = pci_upstream_bridge(dev); - bus = bridge->subordinate; pci_dbg(bridge, "broadcast error_detected message\n"); if (state == pci_channel_io_frozen) { - pci_walk_bus(bus, report_frozen_detected, &status); + pci_walk_bridge(bridge, report_frozen_detected, &status); status = reset_subordinates(bridge); if (status != PCI_ERS_RESULT_RECOVERED) { pci_warn(bridge, "subordinate device reset failed\n"); goto failed; } } else { - pci_walk_bus(bus, report_normal_detected, &status); + pci_walk_bridge(bridge, report_normal_detected, &status); } if (status == PCI_ERS_RESULT_CAN_RECOVER) { status = PCI_ERS_RESULT_RECOVERED; pci_dbg(bridge, "broadcast mmio_enabled message\n"); - pci_walk_bus(bus, report_mmio_enabled, &status); + pci_walk_bridge(bridge, report_mmio_enabled, &status); } if (status == PCI_ERS_RESULT_NEED_RESET) { @@ -192,14 +208,14 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, */ status = PCI_ERS_RESULT_RECOVERED; pci_dbg(bridge, "broadcast slot_reset message\n"); - pci_walk_bus(bus, report_slot_reset, &status); + pci_walk_bridge(bridge, report_slot_reset, &status); } if (status != PCI_ERS_RESULT_RECOVERED) goto failed; pci_dbg(bridge, "broadcast resume message\n"); - pci_walk_bus(bus, report_resume, &status); + pci_walk_bridge(bridge, report_resume, &status); if (pcie_aer_is_native(bridge)) pcie_clear_device_status(bridge); -- cgit v1.2.3 From aa344bc8b727b47b4350b59d8166216a3f351e55 Mon Sep 17 00:00:00 2001 From: Sean V Kelley Date: Tue, 24 Nov 2020 10:55:30 -0600 Subject: PCI/ERR: Clear AER status only when we control AER In some cases a bridge may not exist as the hardware controlling may be handled only by firmware and so is not visible to the OS. This scenario is also possible in future use cases involving non-native use of RCECs by firmware. In this scenario, we expect the platform to retain control of the bridge and to clear error status itself. Clear error status only when the OS has native control of AER. Signed-off-by: Sean V Kelley Signed-off-by: Bjorn Helgaas --- drivers/pci/pcie/err.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c index 8b53aecdb43d..45a0ce95632a 100644 --- a/drivers/pci/pcie/err.c +++ b/drivers/pci/pcie/err.c @@ -171,6 +171,7 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, int type = pci_pcie_type(dev); struct pci_dev *bridge; pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER; + struct pci_host_bridge *host = pci_find_host_bridge(dev->bus); /* * Error recovery runs on all subordinates of the bridge. If the @@ -217,9 +218,17 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, pci_dbg(bridge, "broadcast resume message\n"); pci_walk_bridge(bridge, report_resume, &status); - if (pcie_aer_is_native(bridge)) + /* + * If we have native control of AER, clear error status in the Root + * Port or Downstream Port that signaled the error. If the + * platform retained control of AER, it is responsible for clearing + * this status. In that case, the signaling device may not even be + * visible to the OS. + */ + if (host->native_aer || pcie_ports_native) { pcie_clear_device_status(bridge); - pci_aer_clear_nonfatal_status(bridge); + pci_aer_clear_nonfatal_status(bridge); + } pci_info(bridge, "device recovery successful\n"); return status; -- cgit v1.2.3 From a175102b0a82fc57853a9e611c42d1d6172e5180 Mon Sep 17 00:00:00 2001 From: Sean V Kelley Date: Wed, 2 Dec 2020 11:26:29 -0600 Subject: PCI/ERR: Recover from RCEC AER errors A Root Complex Event Collector (RCEC) collects and signals AER errors that were detected by Root Complex Integrated Endpoints (RCiEPs), but it may also signal errors it detects itself. This is analogous to errors detected and signaled by a Root Port. Update the AER service driver to claim RCECs in addition to Root Ports. Add support for handling RCEC-detected AER errors. This does not include handling RCiEP-detected errors that are signaled by the RCEC. Note that we expect these errors only from the native AER and APEI paths, not from DPC or EDR. [bhelgaas: split from combined RCEC/RCiEP patch, commit log] Signed-off-by: Sean V Kelley Signed-off-by: Bjorn Helgaas --- drivers/pci/pcie/aer.c | 58 +++++++++++++++++++++++++++++++++++--------------- drivers/pci/pcie/err.c | 19 +++++++++++++---- 2 files changed, 56 insertions(+), 21 deletions(-) diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index 0ba0b47ae751..84a785fa272c 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -300,7 +300,8 @@ int pci_aer_raw_clear_status(struct pci_dev *dev) return -EIO; port_type = pci_pcie_type(dev); - if (port_type == PCI_EXP_TYPE_ROOT_PORT) { + if (port_type == PCI_EXP_TYPE_ROOT_PORT || + port_type == PCI_EXP_TYPE_RC_EC) { pci_read_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, &status); pci_write_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, status); } @@ -595,7 +596,8 @@ static umode_t aer_stats_attrs_are_visible(struct kobject *kobj, if ((a == &dev_attr_aer_rootport_total_err_cor.attr || a == &dev_attr_aer_rootport_total_err_fatal.attr || a == &dev_attr_aer_rootport_total_err_nonfatal.attr) && - pci_pcie_type(pdev) != PCI_EXP_TYPE_ROOT_PORT) + ((pci_pcie_type(pdev) != PCI_EXP_TYPE_ROOT_PORT) && + (pci_pcie_type(pdev) != PCI_EXP_TYPE_RC_EC))) return 0; return a->mode; @@ -1206,6 +1208,7 @@ static int set_device_error_reporting(struct pci_dev *dev, void *data) int type = pci_pcie_type(dev); if ((type == PCI_EXP_TYPE_ROOT_PORT) || + (type == PCI_EXP_TYPE_RC_EC) || (type == PCI_EXP_TYPE_UPSTREAM) || (type == PCI_EXP_TYPE_DOWNSTREAM)) { if (enable) @@ -1330,6 +1333,11 @@ static int aer_probe(struct pcie_device *dev) struct device *device = &dev->device; struct pci_dev *port = dev->port; + /* Limit to Root Ports or Root Complex Event Collectors */ + if ((pci_pcie_type(port) != PCI_EXP_TYPE_RC_EC) && + (pci_pcie_type(port) != PCI_EXP_TYPE_ROOT_PORT)) + return -ENODEV; + rpc = devm_kzalloc(device, sizeof(struct aer_rpc), GFP_KERNEL); if (!rpc) return -ENOMEM; @@ -1351,36 +1359,52 @@ static int aer_probe(struct pcie_device *dev) } /** - * aer_root_reset - reset link on Root Port - * @dev: pointer to Root Port's pci_dev data structure + * aer_root_reset - reset Root Port hierarchy or RCEC + * @dev: pointer to Root Port or RCEC * - * Invoked by Port Bus driver when performing link reset at Root Port. + * Invoked by Port Bus driver when performing reset. */ static pci_ers_result_t aer_root_reset(struct pci_dev *dev) { - int aer = dev->aer_cap; + int type = pci_pcie_type(dev); + struct pci_dev *root; + int aer; + struct pci_host_bridge *host = pci_find_host_bridge(dev->bus); u32 reg32; int rc; - if (pcie_aer_is_native(dev)) { + root = dev; /* device with Root Error registers */ + aer = root->aer_cap; + + if ((host->native_aer || pcie_ports_native) && aer) { /* Disable Root's interrupt in response to error messages */ - pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, ®32); + pci_read_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, ®32); reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK; - pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, reg32); + pci_write_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, reg32); } - rc = pci_bus_error_reset(dev); - pci_info(dev, "Root Port link has been reset (%d)\n", rc); + if (type == PCI_EXP_TYPE_RC_EC) { + if (pcie_has_flr(dev)) { + rc = pcie_flr(dev); + pci_info(dev, "has been reset (%d)\n", rc); + } else { + pci_info(dev, "not reset (no FLR support)\n"); + rc = -ENOTTY; + } + } else { + rc = pci_bus_error_reset(dev); + pci_info(dev, "Root Port link has been reset (%d)\n", rc); + } - if (pcie_aer_is_native(dev)) { + if ((host->native_aer || pcie_ports_native) && aer) { /* Clear Root Error Status */ - pci_read_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, ®32); - pci_write_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, reg32); + pci_read_config_dword(root, aer + PCI_ERR_ROOT_STATUS, ®32); + pci_write_config_dword(root, aer + PCI_ERR_ROOT_STATUS, reg32); /* Enable Root Port's interrupt in response to error messages */ - pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, ®32); + pci_read_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, ®32); reg32 |= ROOT_PORT_INTR_ON_MESG_MASK; - pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, reg32); + pci_write_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, reg32); } return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; @@ -1388,7 +1412,7 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev) static struct pcie_port_service_driver aerdriver = { .name = "aer", - .port_type = PCI_EXP_TYPE_ROOT_PORT, + .port_type = PCIE_ANY_PORT, .service = PCIE_PORT_SERVICE_AER, .probe = aer_probe, diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c index 45a0ce95632a..87a2dc8d17f8 100644 --- a/drivers/pci/pcie/err.c +++ b/drivers/pci/pcie/err.c @@ -148,13 +148,16 @@ out: /** * pci_walk_bridge - walk bridges potentially AER affected - * @bridge: bridge which may be a Port + * @bridge: bridge which may be a Port or an RCEC * @cb: callback to be called for each device found * @userdata: arbitrary pointer to be passed to callback * * If the device provided is a bridge, walk the subordinate bus, including * any bridged devices on buses under this bus. Call the provided callback * on each device found. + * + * If the device provided has no subordinate bus, e.g., an RCEC, call the + * callback on the device itself. */ static void pci_walk_bridge(struct pci_dev *bridge, int (*cb)(struct pci_dev *, void *), @@ -162,6 +165,8 @@ static void pci_walk_bridge(struct pci_dev *bridge, { if (bridge->subordinate) pci_walk_bus(bridge->subordinate, cb, userdata); + else + cb(bridge, userdata); } pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, @@ -174,11 +179,17 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, struct pci_host_bridge *host = pci_find_host_bridge(dev->bus); /* - * Error recovery runs on all subordinates of the bridge. If the - * bridge detected the error, it is cleared at the end. + * If the error was detected by a Root Port, Downstream Port, or + * RCEC, recovery runs on the device itself. For Ports, that also + * includes any subordinate devices. + * + * If it was detected by another device (Endpoint, etc), recovery + * runs on the device and anything else under the same Port, i.e., + * everything under "bridge". */ if (type == PCI_EXP_TYPE_ROOT_PORT || - type == PCI_EXP_TYPE_DOWNSTREAM) + type == PCI_EXP_TYPE_DOWNSTREAM || + type == PCI_EXP_TYPE_RC_EC) bridge = dev; else bridge = pci_upstream_bridge(dev); -- cgit v1.2.3 From 507b460f814458605c47b0ed03c11e49a712fc08 Mon Sep 17 00:00:00 2001 From: Sean V Kelley Date: Fri, 20 Nov 2020 16:10:32 -0800 Subject: PCI/ERR: Add pcie_link_rcec() to associate RCiEPs A Root Complex Event Collector terminates error and PME messages from associated RCiEPs. Use the RCEC Endpoint Association Extended Capability to identify associated RCiEPs. Link the associated RCiEPs as the RCECs are enumerated. Co-developed-by: Qiuxu Zhuo Link: https://lore.kernel.org/r/20201121001036.8560-12-sean.v.kelley@intel.com Tested-by: Jonathan Cameron # non-native/no RCEC Signed-off-by: Qiuxu Zhuo Signed-off-by: Sean V Kelley Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron --- drivers/pci/pci.h | 2 + drivers/pci/pcie/portdrv_pci.c | 3 ++ drivers/pci/pcie/rcec.c | 94 ++++++++++++++++++++++++++++++++++++++++++ include/linux/pci.h | 1 + 4 files changed, 100 insertions(+) diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 3c4570a3058f..ae2ee4df1cff 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -472,9 +472,11 @@ static inline void pci_dpc_init(struct pci_dev *pdev) {} #ifdef CONFIG_PCIEPORTBUS void pci_rcec_init(struct pci_dev *dev); void pci_rcec_exit(struct pci_dev *dev); +void pcie_link_rcec(struct pci_dev *rcec); #else static inline void pci_rcec_init(struct pci_dev *dev) {} static inline void pci_rcec_exit(struct pci_dev *dev) {} +static inline void pcie_link_rcec(struct pci_dev *rcec) {} #endif #ifdef CONFIG_PCI_ATS diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c index ff9517ee92b3..0b250bc5f405 100644 --- a/drivers/pci/pcie/portdrv_pci.c +++ b/drivers/pci/pcie/portdrv_pci.c @@ -111,6 +111,9 @@ static int pcie_portdrv_probe(struct pci_dev *dev, (type != PCI_EXP_TYPE_RC_EC))) return -ENODEV; + if (type == PCI_EXP_TYPE_RC_EC) + pcie_link_rcec(dev); + status = pcie_port_device_register(dev); if (status) return status; diff --git a/drivers/pci/pcie/rcec.c b/drivers/pci/pcie/rcec.c index 038e9d706d5f..cdec277cbd62 100644 --- a/drivers/pci/pcie/rcec.c +++ b/drivers/pci/pcie/rcec.c @@ -15,6 +15,100 @@ #include "../pci.h" +struct walk_rcec_data { + struct pci_dev *rcec; + int (*user_callback)(struct pci_dev *dev, void *data); + void *user_data; +}; + +static bool rcec_assoc_rciep(struct pci_dev *rcec, struct pci_dev *rciep) +{ + unsigned long bitmap = rcec->rcec_ea->bitmap; + unsigned int devn; + + /* An RCiEP found on a different bus in range */ + if (rcec->bus->number != rciep->bus->number) + return true; + + /* Same bus, so check bitmap */ + for_each_set_bit(devn, &bitmap, 32) + if (devn == rciep->devfn) + return true; + + return false; +} + +static int link_rcec_helper(struct pci_dev *dev, void *data) +{ + struct walk_rcec_data *rcec_data = data; + struct pci_dev *rcec = rcec_data->rcec; + + if ((pci_pcie_type(dev) == PCI_EXP_TYPE_RC_END) && + rcec_assoc_rciep(rcec, dev)) { + dev->rcec = rcec; + pci_dbg(dev, "PME & error events signaled via %s\n", + pci_name(rcec)); + } + + return 0; +} + +static void walk_rcec(int (*cb)(struct pci_dev *dev, void *data), + void *userdata) +{ + struct walk_rcec_data *rcec_data = userdata; + struct pci_dev *rcec = rcec_data->rcec; + u8 nextbusn, lastbusn; + struct pci_bus *bus; + unsigned int bnr; + + if (!rcec->rcec_ea) + return; + + /* Walk own bus for bitmap based association */ + pci_walk_bus(rcec->bus, cb, rcec_data); + + nextbusn = rcec->rcec_ea->nextbusn; + lastbusn = rcec->rcec_ea->lastbusn; + + /* All RCiEP devices are on the same bus as the RCEC */ + if (nextbusn == 0xff && lastbusn == 0x00) + return; + + for (bnr = nextbusn; bnr <= lastbusn; bnr++) { + /* No association indicated (PCIe 5.0-1, 7.9.10.3) */ + if (bnr == rcec->bus->number) + continue; + + bus = pci_find_bus(pci_domain_nr(rcec->bus), bnr); + if (!bus) + continue; + + /* Find RCiEP devices on the given bus ranges */ + pci_walk_bus(bus, cb, rcec_data); + } +} + +/** + * pcie_link_rcec - Link RCiEP devices associated with RCEC. + * @rcec: RCEC whose RCiEP devices should be linked. + * + * Link the given RCEC to each RCiEP device found. + */ +void pcie_link_rcec(struct pci_dev *rcec) +{ + struct walk_rcec_data rcec_data; + + if (!rcec->rcec_ea) + return; + + rcec_data.rcec = rcec; + rcec_data.user_callback = NULL; + rcec_data.user_data = NULL; + + walk_rcec(link_rcec_helper, &rcec_data); +} + void pci_rcec_init(struct pci_dev *dev) { struct rcec_ea *rcec_ea; diff --git a/include/linux/pci.h b/include/linux/pci.h index f8c927fd0602..7c7d2d23e8a3 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -330,6 +330,7 @@ struct pci_dev { #endif #ifdef CONFIG_PCIEPORTBUS struct rcec_ea *rcec_ea; /* RCEC cached endpoint association */ + struct pci_dev *rcec; /* Associated RCEC device */ #endif u8 pcie_cap; /* PCIe capability offset */ u8 msi_cap; /* MSI capability offset */ -- cgit v1.2.3 From 5790862255028c831761e13014ee87a06df828f1 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Fri, 20 Nov 2020 16:10:33 -0800 Subject: PCI/ERR: Recover from RCiEP AER errors Add support for handling AER errors detected by Root Complex Integrated Endpoints (RCiEPs). These errors are signaled to software natively via a Root Complex Event Collector (RCEC) or non-natively via ACPI APEI if the platform retains control of AER or uses a non-standard RCEC-like device. When recovering from RCiEP errors, the Root Error Command and Status registers are in the AER Capability of an associated RCEC (if any), not in a Root Port. In the non-native case, the platform is responsible for those registers and we can't touch them. [bhelgaas: commit log, etc] Co-developed-by: Sean V Kelley Link: https://lore.kernel.org/r/20201121001036.8560-13-sean.v.kelley@intel.com Signed-off-by: Sean V Kelley Signed-off-by: Qiuxu Zhuo Signed-off-by: Bjorn Helgaas --- drivers/pci/pcie/aer.c | 24 +++++++++++++++++++----- drivers/pci/pcie/err.c | 15 ++++++++------- 2 files changed, 27 insertions(+), 12 deletions(-) diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index 84a785fa272c..e682df536be5 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -1359,8 +1359,8 @@ static int aer_probe(struct pcie_device *dev) } /** - * aer_root_reset - reset Root Port hierarchy or RCEC - * @dev: pointer to Root Port or RCEC + * aer_root_reset - reset Root Port hierarchy, RCEC, or RCiEP + * @dev: pointer to Root Port, RCEC, or RCiEP * * Invoked by Port Bus driver when performing reset. */ @@ -1373,8 +1373,22 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev) u32 reg32; int rc; - root = dev; /* device with Root Error registers */ - aer = root->aer_cap; + /* + * Only Root Ports and RCECs have AER Root Command and Root Status + * registers. If "dev" is an RCiEP, the relevant registers are in + * the RCEC. + */ + if (type == PCI_EXP_TYPE_RC_END) + root = dev->rcec; + else + root = dev; + + /* + * If the platform retained control of AER, an RCiEP may not have + * an RCEC visible to us, so dev->rcec ("root") may be NULL. In + * that case, firmware is responsible for these registers. + */ + aer = root ? root->aer_cap : 0; if ((host->native_aer || pcie_ports_native) && aer) { /* Disable Root's interrupt in response to error messages */ @@ -1383,7 +1397,7 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev) pci_write_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, reg32); } - if (type == PCI_EXP_TYPE_RC_EC) { + if (type == PCI_EXP_TYPE_RC_EC || type == PCI_EXP_TYPE_RC_END) { if (pcie_has_flr(dev)) { rc = pcie_flr(dev); pci_info(dev, "has been reset (%d)\n", rc); diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c index 87a2dc8d17f8..510f31f0ef6d 100644 --- a/drivers/pci/pcie/err.c +++ b/drivers/pci/pcie/err.c @@ -148,7 +148,7 @@ out: /** * pci_walk_bridge - walk bridges potentially AER affected - * @bridge: bridge which may be a Port or an RCEC + * @bridge: bridge which may be a Port, an RCEC, or an RCiEP * @cb: callback to be called for each device found * @userdata: arbitrary pointer to be passed to callback * @@ -156,8 +156,8 @@ out: * any bridged devices on buses under this bus. Call the provided callback * on each device found. * - * If the device provided has no subordinate bus, e.g., an RCEC, call the - * callback on the device itself. + * If the device provided has no subordinate bus, e.g., an RCEC or RCiEP, + * call the callback on the device itself. */ static void pci_walk_bridge(struct pci_dev *bridge, int (*cb)(struct pci_dev *, void *), @@ -179,9 +179,9 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, struct pci_host_bridge *host = pci_find_host_bridge(dev->bus); /* - * If the error was detected by a Root Port, Downstream Port, or - * RCEC, recovery runs on the device itself. For Ports, that also - * includes any subordinate devices. + * If the error was detected by a Root Port, Downstream Port, RCEC, + * or RCiEP, recovery runs on the device itself. For Ports, that + * also includes any subordinate devices. * * If it was detected by another device (Endpoint, etc), recovery * runs on the device and anything else under the same Port, i.e., @@ -189,7 +189,8 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, */ if (type == PCI_EXP_TYPE_ROOT_PORT || type == PCI_EXP_TYPE_DOWNSTREAM || - type == PCI_EXP_TYPE_RC_EC) + type == PCI_EXP_TYPE_RC_EC || + type == PCI_EXP_TYPE_RC_END) bridge = dev; else bridge = pci_upstream_bridge(dev); -- cgit v1.2.3 From af113553d9610b2d811d05da96263b4f666f44f0 Mon Sep 17 00:00:00 2001 From: Sean V Kelley Date: Fri, 20 Nov 2020 16:10:34 -0800 Subject: PCI/AER: Add pcie_walk_rcec() to RCEC AER handling Root Complex Event Collectors (RCEC) appear as peers to Root Ports and also have the AER capability. In addition, actions need to be taken for associated RCiEPs. In such cases the RCECs will need to be walked in order to find and act upon their respective RCiEPs. Extend the existing ability to link the RCECs with a walking function pcie_walk_rcec(). Add RCEC support to the current AER service driver and attach the AER service driver to the RCEC device. Co-developed-by: Qiuxu Zhuo Link: https://lore.kernel.org/r/20201121001036.8560-14-sean.v.kelley@intel.com Tested-by: Jonathan Cameron # non-native/no RCEC Signed-off-by: Qiuxu Zhuo Signed-off-by: Sean V Kelley Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron Reviewed-by: Kuppuswamy Sathyanarayanan --- drivers/pci/pci.h | 6 ++++++ drivers/pci/pcie/aer.c | 15 +++++++++++---- drivers/pci/pcie/rcec.c | 37 +++++++++++++++++++++++++++++++++++++ 3 files changed, 54 insertions(+), 4 deletions(-) diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index ae2ee4df1cff..e988cc930d0b 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -473,10 +473,16 @@ static inline void pci_dpc_init(struct pci_dev *pdev) {} void pci_rcec_init(struct pci_dev *dev); void pci_rcec_exit(struct pci_dev *dev); void pcie_link_rcec(struct pci_dev *rcec); +void pcie_walk_rcec(struct pci_dev *rcec, + int (*cb)(struct pci_dev *, void *), + void *userdata); #else static inline void pci_rcec_init(struct pci_dev *dev) {} static inline void pci_rcec_exit(struct pci_dev *dev) {} static inline void pcie_link_rcec(struct pci_dev *rcec) {} +static inline void pcie_walk_rcec(struct pci_dev *rcec, + int (*cb)(struct pci_dev *, void *), + void *userdata) {} #endif #ifdef CONFIG_PCI_ATS diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index e682df536be5..77b0f2c45bc0 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -918,7 +918,10 @@ static bool find_source_device(struct pci_dev *parent, if (result) return true; - pci_walk_bus(parent->subordinate, find_device_iter, e_info); + if (pci_pcie_type(parent) == PCI_EXP_TYPE_RC_EC) + pcie_walk_rcec(parent, find_device_iter, e_info); + else + pci_walk_bus(parent->subordinate, find_device_iter, e_info); if (!e_info->error_dev_num) { pci_info(parent, "can't find device of ID%04x\n", e_info->id); @@ -1056,6 +1059,7 @@ int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info) if (!(info->status & ~info->mask)) return 0; } else if (type == PCI_EXP_TYPE_ROOT_PORT || + type == PCI_EXP_TYPE_RC_EC || type == PCI_EXP_TYPE_DOWNSTREAM || info->severity == AER_NONFATAL) { @@ -1233,9 +1237,12 @@ static void set_downstream_devices_error_reporting(struct pci_dev *dev, { set_device_error_reporting(dev, &enable); - if (!dev->subordinate) - return; - pci_walk_bus(dev->subordinate, set_device_error_reporting, &enable); + if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC) + pcie_walk_rcec(dev, set_device_error_reporting, &enable); + else if (dev->subordinate) + pci_walk_bus(dev->subordinate, set_device_error_reporting, + &enable); + } /** diff --git a/drivers/pci/pcie/rcec.c b/drivers/pci/pcie/rcec.c index cdec277cbd62..2c5c552994e4 100644 --- a/drivers/pci/pcie/rcec.c +++ b/drivers/pci/pcie/rcec.c @@ -53,6 +53,18 @@ static int link_rcec_helper(struct pci_dev *dev, void *data) return 0; } +static int walk_rcec_helper(struct pci_dev *dev, void *data) +{ + struct walk_rcec_data *rcec_data = data; + struct pci_dev *rcec = rcec_data->rcec; + + if ((pci_pcie_type(dev) == PCI_EXP_TYPE_RC_END) && + rcec_assoc_rciep(rcec, dev)) + rcec_data->user_callback(dev, rcec_data->user_data); + + return 0; +} + static void walk_rcec(int (*cb)(struct pci_dev *dev, void *data), void *userdata) { @@ -109,6 +121,31 @@ void pcie_link_rcec(struct pci_dev *rcec) walk_rcec(link_rcec_helper, &rcec_data); } +/** + * pcie_walk_rcec - Walk RCiEP devices associating with RCEC and call callback. + * @rcec: RCEC whose RCiEP devices should be walked + * @cb: Callback to be called for each RCiEP device found + * @userdata: Arbitrary pointer to be passed to callback + * + * Walk the given RCEC. Call the callback on each RCiEP found. + * + * If @cb returns anything other than 0, break out. + */ +void pcie_walk_rcec(struct pci_dev *rcec, int (*cb)(struct pci_dev *, void *), + void *userdata) +{ + struct walk_rcec_data rcec_data; + + if (!rcec->rcec_ea) + return; + + rcec_data.rcec = rcec; + rcec_data.user_callback = cb; + rcec_data.user_data = userdata; + + walk_rcec(walk_rcec_helper, &rcec_data); +} + void pci_rcec_init(struct pci_dev *dev) { struct rcec_ea *rcec_ea; -- cgit v1.2.3 From 9a2f604f44979e0effa8cf067e5a8ecda729f23b Mon Sep 17 00:00:00 2001 From: Sean V Kelley Date: Fri, 20 Nov 2020 16:10:35 -0800 Subject: PCI/PME: Add pcie_walk_rcec() to RCEC PME handling Root Complex Event Collectors (RCEC) appear as peers of Root Ports and also have the PME capability. As with AER, there is a need to be able to walk the RCiEPs associated with their RCEC for purposes of acting upon them with callbacks. Add RCEC support through the use of pcie_walk_rcec() to the current PME service driver and attach the PME service driver to the RCEC device. Co-developed-by: Qiuxu Zhuo Link: https://lore.kernel.org/r/20201121001036.8560-15-sean.v.kelley@intel.com Tested-by: Jonathan Cameron # non-native/no RCEC Signed-off-by: Qiuxu Zhuo Signed-off-by: Sean V Kelley Signed-off-by: Bjorn Helgaas --- drivers/pci/pcie/pme.c | 16 ++++++++++++---- drivers/pci/pcie/portdrv_core.c | 9 +++------ 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/pci/pcie/pme.c b/drivers/pci/pcie/pme.c index 6a32970bb731..3fc08488d65f 100644 --- a/drivers/pci/pcie/pme.c +++ b/drivers/pci/pcie/pme.c @@ -310,7 +310,10 @@ static int pcie_pme_can_wakeup(struct pci_dev *dev, void *ign) static void pcie_pme_mark_devices(struct pci_dev *port) { pcie_pme_can_wakeup(port, NULL); - if (port->subordinate) + + if (pci_pcie_type(port) == PCI_EXP_TYPE_RC_EC) + pcie_walk_rcec(port, pcie_pme_can_wakeup, NULL); + else if (port->subordinate) pci_walk_bus(port->subordinate, pcie_pme_can_wakeup, NULL); } @@ -320,10 +323,16 @@ static void pcie_pme_mark_devices(struct pci_dev *port) */ static int pcie_pme_probe(struct pcie_device *srv) { - struct pci_dev *port; + struct pci_dev *port = srv->port; struct pcie_pme_service_data *data; + int type = pci_pcie_type(port); int ret; + /* Limit to Root Ports or Root Complex Event Collectors */ + if (type != PCI_EXP_TYPE_RC_EC && + type != PCI_EXP_TYPE_ROOT_PORT) + return -ENODEV; + data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; @@ -333,7 +342,6 @@ static int pcie_pme_probe(struct pcie_device *srv) data->srv = srv; set_service_data(srv, data); - port = srv->port; pcie_pme_interrupt_enable(port, false); pcie_clear_root_pme_status(port); @@ -445,7 +453,7 @@ static void pcie_pme_remove(struct pcie_device *srv) static struct pcie_port_service_driver pcie_pme_driver = { .name = "pcie_pme", - .port_type = PCI_EXP_TYPE_ROOT_PORT, + .port_type = PCIE_ANY_PORT, .service = PCIE_PORT_SERVICE_PME, .probe = pcie_pme_probe, diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c index 50a9522ab07d..e1fed6649c41 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv_core.c @@ -233,12 +233,9 @@ static int get_port_device_capability(struct pci_dev *dev) } #endif - /* - * Root ports are capable of generating PME too. Root Complex - * Event Collectors can also generate PMEs, but we don't handle - * those yet. - */ - if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT && + /* Root Ports and Root Complex Event Collectors may generate PMEs */ + if ((pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT || + pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC) && (pcie_ports_native || host->native_pme)) { services |= PCIE_PORT_SERVICE_PME; -- cgit v1.2.3 From d292dd0eb3ac6ce6ea66715bb9f6b8e2ae70747c Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Fri, 20 Nov 2020 16:10:36 -0800 Subject: PCI/AER: Add RCEC AER error injection support Root Complex Event Collectors (RCEC) appear as peers to Root Ports and may also have the AER capability. Add RCEC support to the AER error injection driver. Co-developed-by: Sean V Kelley Link: https://lore.kernel.org/r/20201121001036.8560-16-sean.v.kelley@intel.com Tested-by: Jonathan Cameron # non-native/no RCEC Signed-off-by: Qiuxu Zhuo Signed-off-by: Sean V Kelley Signed-off-by: Bjorn Helgaas Reviewed-by: Kuppuswamy Sathyanarayanan --- drivers/pci/pcie/aer_inject.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/pci/pcie/aer_inject.c b/drivers/pci/pcie/aer_inject.c index c2cbf425afc5..767f8859b99b 100644 --- a/drivers/pci/pcie/aer_inject.c +++ b/drivers/pci/pcie/aer_inject.c @@ -333,8 +333,11 @@ static int aer_inject(struct aer_error_inj *einj) if (!dev) return -ENODEV; rpdev = pcie_find_root_port(dev); + /* If Root Port not found, try to find an RCEC */ + if (!rpdev) + rpdev = dev->rcec; if (!rpdev) { - pci_err(dev, "Root port not found\n"); + pci_err(dev, "Neither Root Port nor RCEC found\n"); ret = -ENODEV; goto out_put; } -- cgit v1.2.3