aboutsummaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
authorLinus Torvalds2024-03-11 18:14:06 -0700
committerLinus Torvalds2024-03-11 18:14:06 -0700
commitb0402403e54ae9eb94ce1cbb53c7def776e97426 (patch)
tree64198f48106bd0909cedf604df42ad8c667ce388 /include/linux
parent1f75619a721d5149d9a947f2177d3cffc473fbb7 (diff)
parentaf65545a0f82d7336f62e34f69d3c644806f5f95 (diff)
Merge tag 'edac_updates_for_v6.9' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras
Pull EDAC updates from Borislav Petkov: - Add a FRU (Field Replaceable Unit) memory poison manager which collects and manages previously encountered hw errors in order to save them to persistent storage across reboots. Previously recorded errors are "replayed" upon reboot in order to poison memory which has caused said errors in the past. The main use case is stacked, on-chip memory which cannot simply be replaced so poisoning faulty areas of it and thus making them inaccessible is the only strategy to prolong its lifetime. - Add an AMD address translation library glue which converts the reported addresses of hw errors into system physical addresses in order to be used by other subsystems like memory failure, for example. Add support for MI300 accelerators to that library. - igen6: Add support for Alder Lake-N SoC - i10nm: Add Grand Ridge support - The usual fixlets and cleanups * tag 'edac_updates_for_v6.9' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras: EDAC/versal: Convert to platform remove callback returning void RAS/AMD/FMPM: Fix off by one when unwinding on error RAS/AMD/FMPM: Add debugfs interface to print record entries RAS/AMD/FMPM: Save SPA values RAS: Export helper to get ras_debugfs_dir RAS/AMD/ATL: Fix bit overflow in denorm_addr_df4_np2() RAS: Introduce a FRU memory poison manager RAS/AMD/ATL: Add MI300 row retirement support Documentation: Move RAS section to admin-guide EDAC/versal: Make the bit position of injected errors configurable EDAC/i10nm: Add Intel Grand Ridge micro-server support EDAC/igen6: Add one more Intel Alder Lake-N SoC support RAS/AMD/ATL: Add MI300 DRAM to normalized address translation support RAS/AMD/ATL: Fix array overflow in get_logical_coh_st_fabric_id_mi300() RAS/AMD/ATL: Add MI300 support Documentation: RAS: Add index and address translation section EDAC/amd64: Use new AMD Address Translation Library RAS: Introduce AMD Address Translation Library EDAC/synopsys: Convert to devm_platform_ioremap_resource()
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/ras.h18
1 files changed, 18 insertions, 0 deletions
diff --git a/include/linux/ras.h b/include/linux/ras.h
index 1f4048bf2674..a64182bc72ad 100644
--- a/include/linux/ras.h
+++ b/include/linux/ras.h
@@ -25,6 +25,7 @@ void log_non_standard_event(const guid_t *sec_type,
const guid_t *fru_id, const char *fru_text,
const u8 sev, const u8 *err, const u32 len);
void log_arm_hw_error(struct cper_sec_proc_arm *err);
+
#else
static inline void
log_non_standard_event(const guid_t *sec_type,
@@ -35,4 +36,21 @@ static inline void
log_arm_hw_error(struct cper_sec_proc_arm *err) { return; }
#endif
+struct atl_err {
+ u64 addr;
+ u64 ipid;
+ u32 cpu;
+};
+
+#if IS_ENABLED(CONFIG_AMD_ATL)
+void amd_atl_register_decoder(unsigned long (*f)(struct atl_err *));
+void amd_atl_unregister_decoder(void);
+void amd_retire_dram_row(struct atl_err *err);
+unsigned long amd_convert_umc_mca_addr_to_sys_addr(struct atl_err *err);
+#else
+static inline void amd_retire_dram_row(struct atl_err *err) { }
+static inline unsigned long
+amd_convert_umc_mca_addr_to_sys_addr(struct atl_err *err) { return -EINVAL; }
+#endif /* CONFIG_AMD_ATL */
+
#endif /* __RAS_H__ */