From 7dbbc8f57d4ba3c369b692db9fd2c9653abf0bb5 Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Fri, 26 Jan 2024 08:06:43 +0000 Subject: x86/mm: delete unused cpu argument to leave_mm() The argument is unused since commit 3d28ebceaffa ("x86/mm: Rework lazy TLB to track the actual loaded mm"), delete it. Link: https://lkml.kernel.org/r/20240126080644.1714297-1-yosryahmed@google.com Signed-off-by: Yosry Ahmed Cc: Andy Lutomirski Cc: Borislav Petkov (AMD) Cc: Dave Hansen Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Andrew Morton --- arch/x86/include/asm/mmu.h | 2 +- arch/x86/kernel/alternative.c | 2 +- arch/x86/mm/tlb.c | 2 +- arch/x86/xen/mmu_pv.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index 0da5c227f490..ce4677b8b735 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -75,7 +75,7 @@ typedef struct { .lock = __MUTEX_INITIALIZER(mm.context.lock), \ } -void leave_mm(int cpu); +void leave_mm(void); #define leave_mm leave_mm #endif /* _ASM_X86_MMU_H */ diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 1d85cb7071cb..21108d8e6f6b 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -1805,7 +1805,7 @@ static inline temp_mm_state_t use_temporary_mm(struct mm_struct *mm) * restoring the previous mm. */ if (this_cpu_read(cpu_tlbstate_shared.is_lazy)) - leave_mm(smp_processor_id()); + leave_mm(); temp_state.mm = this_cpu_read(cpu_tlbstate.loaded_mm); switch_mm_irqs_off(NULL, mm, current); diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 5768d386efab..80b0caa82a91 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -299,7 +299,7 @@ static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, unsigned long lam, write_cr3(new_mm_cr3); } -void leave_mm(int cpu) +void leave_mm(void) { struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm); diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 72af496a160c..218773cfb009 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -913,7 +913,7 @@ static void drop_mm_ref_this_cpu(void *info) struct mm_struct *mm = info; if (this_cpu_read(cpu_tlbstate.loaded_mm) == mm) - leave_mm(smp_processor_id()); + leave_mm(); /* * If this cpu still has a stale cr3 reference, then make sure -- cgit v1.2.3 From 3cfd6625a6cf838137f3bf7a1635332cffe7ec63 Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Fri, 26 Jan 2024 08:06:44 +0000 Subject: x86/mm: clarify "prev" usage in switch_mm_irqs_off() In the x86 implementation of switch_mm_irqs_off(), we do not use the "prev" argument passed in by the caller, we use exclusively use "real_prev", which is cpu_tlbstate.loaded_mm. This is not obvious at the first sight. Furthermore, a comment describes a condition that happens when called with prev == next, but this should not affect the function in any way since prev is unused. Apparently, the comment is intended to clarify why we don't rely on prev == next to decide whether we need to update CR3, but again, it is not obvious. The comment also references the fact that leave_mm() calls with prev == NULL and tsk == NULL, but this also shouldn't matter because prev is unused and tsk is only used in one function which has a NULL check. Clarify things by renaming (prev -> unused) and (real_prev -> prev), also move and rewrite the comment as an explanation for why we don't rely on "prev" supplied by the caller in x86 code and use our own. Hopefully this makes reading the code easier. Link: https://lkml.kernel.org/r/20240126080644.1714297-2-yosryahmed@google.com Signed-off-by: Yosry Ahmed Cc: Andy Lutomirski Cc: Borislav Petkov (AMD) Cc: Dave Hansen Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Andrew Morton --- arch/x86/mm/tlb.c | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 80b0caa82a91..bf9605caf24f 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -492,10 +492,16 @@ void cr4_update_pce(void *ignored) static inline void cr4_update_pce_mm(struct mm_struct *mm) { } #endif -void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, +/* + * The "prev" argument passed by the caller does not always match CR3. For + * example, the scheduler passes in active_mm when switching from lazy TLB mode + * to normal mode, but switch_mm_irqs_off() can be called from x86 code without + * updating active_mm. Use cpu_tlbstate.loaded_mm instead. + */ +void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next, struct task_struct *tsk) { - struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm); + struct mm_struct *prev = this_cpu_read(cpu_tlbstate.loaded_mm); u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid); unsigned long new_lam = mm_lam_cr3_mask(next); bool was_lazy = this_cpu_read(cpu_tlbstate_shared.is_lazy); @@ -504,15 +510,6 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, bool need_flush; u16 new_asid; - /* - * NB: The scheduler will call us with prev == next when switching - * from lazy TLB mode to normal mode if active_mm isn't changing. - * When this happens, we don't assume that CR3 (and hence - * cpu_tlbstate.loaded_mm) matches next. - * - * NB: leave_mm() calls us with prev == NULL and tsk == NULL. - */ - /* We don't want flush_tlb_func() to run concurrently with us. */ if (IS_ENABLED(CONFIG_PROVE_LOCKING)) WARN_ON_ONCE(!irqs_disabled()); @@ -527,7 +524,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, * isn't free. */ #ifdef CONFIG_DEBUG_VM - if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid, + if (WARN_ON_ONCE(__read_cr3() != build_cr3(prev->pgd, prev_asid, tlbstate_lam_cr3_mask()))) { /* * If we were to BUG here, we'd be very likely to kill @@ -559,7 +556,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, * provides that full memory barrier and core serializing * instruction. */ - if (real_prev == next) { + if (prev == next) { /* Not actually switching mm's */ VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) != next->context.ctx_id); @@ -574,7 +571,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, * mm_cpumask. The TLB shootdown code can figure out from * cpu_tlbstate_shared.is_lazy whether or not to send an IPI. */ - if (WARN_ON_ONCE(real_prev != &init_mm && + if (WARN_ON_ONCE(prev != &init_mm && !cpumask_test_cpu(cpu, mm_cpumask(next)))) cpumask_set_cpu(cpu, mm_cpumask(next)); @@ -616,10 +613,10 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, * Skip kernel threads; we never send init_mm TLB flushing IPIs, * but the bitmap manipulation can cause cache line contention. */ - if (real_prev != &init_mm) { + if (prev != &init_mm) { VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu, - mm_cpumask(real_prev))); - cpumask_clear_cpu(cpu, mm_cpumask(real_prev)); + mm_cpumask(prev))); + cpumask_clear_cpu(cpu, mm_cpumask(prev)); } /* @@ -656,9 +653,9 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, this_cpu_write(cpu_tlbstate.loaded_mm, next); this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid); - if (next != real_prev) { + if (next != prev) { cr4_update_pce_mm(next); - switch_ldt(real_prev, next); + switch_ldt(prev, next); } } -- cgit v1.2.3 From a5e8131a0329673f70faee2e9ffb02e8a5bb3c89 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 30 Jan 2024 11:34:33 +0100 Subject: arm64, powerpc, riscv, s390, x86: ptdump: refactor CONFIG_DEBUG_WX All architectures using the core ptdump functionality also implement CONFIG_DEBUG_WX, and they all do it more or less the same way, with a function called debug_checkwx() that is called by mark_rodata_ro(), which is a substitute to ptdump_check_wx() when CONFIG_DEBUG_WX is set and a no-op otherwise. Refactor by centrally defining debug_checkwx() in linux/ptdump.h and call debug_checkwx() immediately after calling mark_rodata_ro() instead of calling it at the end of every mark_rodata_ro(). On x86_32, mark_rodata_ro() first checks __supported_pte_mask has _PAGE_NX before calling debug_checkwx(). Now the check is inside the callee ptdump_walk_pgd_level_checkwx(). On powerpc_64, mark_rodata_ro() bails out early before calling ptdump_check_wx() when the MMU doesn't have KERNEL_RO feature. The check is now also done in ptdump_check_wx() as it is called outside mark_rodata_ro(). Link: https://lkml.kernel.org/r/a59b102d7964261d31ead0316a9f18628e4e7a8e.1706610398.git.christophe.leroy@csgroup.eu Signed-off-by: Christophe Leroy Reviewed-by: Alexandre Ghiti Cc: Albert Ou Cc: Alexander Gordeev Cc: Andy Lutomirski Cc: "Aneesh Kumar K.V (IBM)" Cc: Borislav Petkov (AMD) Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Dave Hansen Cc: Gerald Schaefer Cc: Greg KH Cc: Heiko Carstens Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Kees Cook Cc: Mark Rutland Cc: Michael Ellerman Cc: "Naveen N. Rao" Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Peter Zijlstra Cc: Phong Tran Cc: Russell King Cc: Steven Price Cc: Sven Schnelle Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/arm64/include/asm/ptdump.h | 7 ------- arch/arm64/mm/mmu.c | 2 -- arch/powerpc/mm/mmu_decl.h | 6 ------ arch/powerpc/mm/pgtable_32.c | 4 ---- arch/powerpc/mm/pgtable_64.c | 3 --- arch/powerpc/mm/ptdump/ptdump.c | 3 +++ arch/riscv/include/asm/ptdump.h | 22 ---------------------- arch/riscv/mm/init.c | 3 --- arch/riscv/mm/ptdump.c | 1 - arch/s390/include/asm/ptdump.h | 14 -------------- arch/s390/mm/dump_pagetables.c | 1 - arch/s390/mm/init.c | 2 -- arch/x86/include/asm/pgtable.h | 3 +-- arch/x86/mm/dump_pagetables.c | 3 +++ arch/x86/mm/init_32.c | 2 -- arch/x86/mm/init_64.c | 2 -- include/linux/ptdump.h | 7 +++++++ init/main.c | 2 ++ 18 files changed, 16 insertions(+), 71 deletions(-) delete mode 100644 arch/riscv/include/asm/ptdump.h delete mode 100644 arch/s390/include/asm/ptdump.h (limited to 'arch/x86') diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h index 581caac525b0..5b1701c76d1c 100644 --- a/arch/arm64/include/asm/ptdump.h +++ b/arch/arm64/include/asm/ptdump.h @@ -29,13 +29,6 @@ void __init ptdump_debugfs_register(struct ptdump_info *info, const char *name); static inline void ptdump_debugfs_register(struct ptdump_info *info, const char *name) { } #endif -void ptdump_check_wx(void); #endif /* CONFIG_PTDUMP_CORE */ -#ifdef CONFIG_DEBUG_WX -#define debug_checkwx() ptdump_check_wx() -#else -#define debug_checkwx() do { } while (0) -#endif - #endif /* __ASM_PTDUMP_H */ diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 1ac7467d34c9..3a27d887f7dd 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -632,8 +632,6 @@ void mark_rodata_ro(void) section_size = (unsigned long)__init_begin - (unsigned long)__start_rodata; update_mapping_prot(__pa_symbol(__start_rodata), (unsigned long)__start_rodata, section_size, PAGE_KERNEL_RO); - - debug_checkwx(); } static void __init map_kernel_segment(pgd_t *pgdp, void *va_start, void *va_end, diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 72341b9fb552..90dcc2844056 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -171,12 +171,6 @@ static inline void mmu_mark_rodata_ro(void) { } void __init mmu_mapin_immr(void); #endif -#ifdef CONFIG_DEBUG_WX -void ptdump_check_wx(void); -#else -static inline void ptdump_check_wx(void) { } -#endif - static inline bool debug_pagealloc_enabled_or_kfence(void) { return IS_ENABLED(CONFIG_KFENCE) || debug_pagealloc_enabled(); diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 5c02fd08d61e..12498017da8e 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -153,7 +153,6 @@ void mark_rodata_ro(void) if (v_block_mapped((unsigned long)_stext + 1)) { mmu_mark_rodata_ro(); - ptdump_check_wx(); return; } @@ -166,9 +165,6 @@ void mark_rodata_ro(void) PFN_DOWN((unsigned long)_stext); set_memory_ro((unsigned long)_stext, numpages); - - // mark_initmem_nx() should have already run by now - ptdump_check_wx(); } #endif diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 5ac1fd30341b..1b366526f4f2 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -150,9 +150,6 @@ void mark_rodata_ro(void) radix__mark_rodata_ro(); else hash__mark_rodata_ro(); - - // mark_initmem_nx() should have already run by now - ptdump_check_wx(); } void mark_initmem_nx(void) diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c index 2313053fe679..620d4917ebe8 100644 --- a/arch/powerpc/mm/ptdump/ptdump.c +++ b/arch/powerpc/mm/ptdump/ptdump.c @@ -343,6 +343,9 @@ void ptdump_check_wx(void) } }; + if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !mmu_has_feature(MMU_FTR_KERNEL_RO)) + return; + ptdump_walk_pgd(&st.ptdump, &init_mm, NULL); if (st.wx_pages) diff --git a/arch/riscv/include/asm/ptdump.h b/arch/riscv/include/asm/ptdump.h deleted file mode 100644 index 3c9ea6dd5af7..000000000000 --- a/arch/riscv/include/asm/ptdump.h +++ /dev/null @@ -1,22 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2019 SiFive - */ - -#ifndef _ASM_RISCV_PTDUMP_H -#define _ASM_RISCV_PTDUMP_H - -void ptdump_check_wx(void); - -#ifdef CONFIG_DEBUG_WX -static inline void debug_checkwx(void) -{ - ptdump_check_wx(); -} -#else -static inline void debug_checkwx(void) -{ -} -#endif - -#endif /* _ASM_RISCV_PTDUMP_H */ diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index fa34cf55037b..a4f218cfb845 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -29,7 +29,6 @@ #include #include #include -#include #include #include #include @@ -723,8 +722,6 @@ void mark_rodata_ro(void) if (IS_ENABLED(CONFIG_64BIT)) set_kernel_memory(lm_alias(__start_rodata), lm_alias(_data), set_memory_ro); - - debug_checkwx(); } #else static __init pgprot_t pgprot_from_va(uintptr_t va) diff --git a/arch/riscv/mm/ptdump.c b/arch/riscv/mm/ptdump.c index 657c27bc07a7..075265603313 100644 --- a/arch/riscv/mm/ptdump.c +++ b/arch/riscv/mm/ptdump.c @@ -9,7 +9,6 @@ #include #include -#include #include #include diff --git a/arch/s390/include/asm/ptdump.h b/arch/s390/include/asm/ptdump.h deleted file mode 100644 index f960b2896606..000000000000 --- a/arch/s390/include/asm/ptdump.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -#ifndef _ASM_S390_PTDUMP_H -#define _ASM_S390_PTDUMP_H - -void ptdump_check_wx(void); - -static inline void debug_checkwx(void) -{ - if (IS_ENABLED(CONFIG_DEBUG_WX)) - ptdump_check_wx(); -} - -#endif /* _ASM_S390_PTDUMP_H */ diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index d37a8f607b71..8dcb4e0c71bd 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 8d9a60ccb777..f6391442c0c2 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -37,7 +37,6 @@ #include #include #include -#include #include #include #include @@ -109,7 +108,6 @@ void mark_rodata_ro(void) __set_memory_ro(__start_ro_after_init, __end_ro_after_init); pr_info("Write protected read-only-after-init data: %luk\n", size >> 10); - debug_checkwx(); } int set_memory_encrypted(unsigned long vaddr, int numpages) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 9d077bca6a10..6c979028e521 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -32,6 +32,7 @@ void ptdump_walk_pgd_level(struct seq_file *m, struct mm_struct *mm); void ptdump_walk_pgd_level_debugfs(struct seq_file *m, struct mm_struct *mm, bool user); void ptdump_walk_pgd_level_checkwx(void); +#define ptdump_check_wx ptdump_walk_pgd_level_checkwx void ptdump_walk_user_pgd_level_checkwx(void); /* @@ -41,10 +42,8 @@ void ptdump_walk_user_pgd_level_checkwx(void); #define pgprot_decrypted(prot) __pgprot(cc_mkdec(pgprot_val(prot))) #ifdef CONFIG_DEBUG_WX -#define debug_checkwx() ptdump_walk_pgd_level_checkwx() #define debug_checkwx_user() ptdump_walk_user_pgd_level_checkwx() #else -#define debug_checkwx() do { } while (0) #define debug_checkwx_user() do { } while (0) #endif diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index e1b599ecbbc2..0008524eebe9 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -433,6 +433,9 @@ void ptdump_walk_user_pgd_level_checkwx(void) void ptdump_walk_pgd_level_checkwx(void) { + if (!(__supported_pte_mask & _PAGE_NX)) + return; + ptdump_walk_pgd_level_core(NULL, &init_mm, INIT_PGD, true, false); } diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index b63403d7179d..5c736b707cae 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -800,6 +800,4 @@ void mark_rodata_ro(void) set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); #endif mark_nxdata_nx(); - if (__supported_pte_mask & _PAGE_NX) - debug_checkwx(); } diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index a0dffaca6d2b..ebdbcae48011 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1412,8 +1412,6 @@ void mark_rodata_ro(void) (void *)text_end, (void *)rodata_start); free_kernel_image_pages("unused kernel image (rodata/data gap)", (void *)rodata_end, (void *)_sdata); - - debug_checkwx(); } /* diff --git a/include/linux/ptdump.h b/include/linux/ptdump.h index 2a3a95586425..c10513739bf9 100644 --- a/include/linux/ptdump.h +++ b/include/linux/ptdump.h @@ -19,5 +19,12 @@ struct ptdump_state { }; void ptdump_walk_pgd(struct ptdump_state *st, struct mm_struct *mm, pgd_t *pgd); +void ptdump_check_wx(void); + +static inline void debug_checkwx(void) +{ + if (IS_ENABLED(CONFIG_DEBUG_WX)) + ptdump_check_wx(); +} #endif /* _LINUX_PTDUMP_H */ diff --git a/init/main.c b/init/main.c index e24b0780fdff..749a9f8d2c9b 100644 --- a/init/main.c +++ b/init/main.c @@ -99,6 +99,7 @@ #include #include #include +#include #include #include @@ -1408,6 +1409,7 @@ static void mark_readonly(void) */ rcu_barrier(); mark_rodata_ro(); + debug_checkwx(); rodata_test(); } else pr_info("Kernel memory protection disabled.\n"); -- cgit v1.2.3 From 6cdc82db0c044d36137dd98f33e8aa0b8742987f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 30 Jan 2024 11:34:35 +0100 Subject: mm: ptdump: have ptdump_check_wx() return bool Have ptdump_check_wx() return true when the check is successful or false otherwise. [akpm@linux-foundation.org: fix a couple of build issues (x86_64 allmodconfig)] Link: https://lkml.kernel.org/r/7943149fe955458cb7b57cd483bf41a3aad94684.1706610398.git.christophe.leroy@csgroup.eu Signed-off-by: Christophe Leroy Cc: Albert Ou Cc: Alexander Gordeev Cc: Alexandre Ghiti Cc: Andy Lutomirski Cc: "Aneesh Kumar K.V (IBM)" Cc: Borislav Petkov (AMD) Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Dave Hansen Cc: Gerald Schaefer Cc: Greg KH Cc: Heiko Carstens Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Kees Cook Cc: Mark Rutland Cc: Michael Ellerman Cc: "Naveen N. Rao" Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Peter Zijlstra Cc: Phong Tran Cc: Russell King Cc: Steven Price Cc: Sven Schnelle Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/arm64/mm/ptdump.c | 11 ++++++++--- arch/powerpc/mm/ptdump/ptdump.c | 13 +++++++++---- arch/riscv/mm/ptdump.c | 11 ++++++++--- arch/s390/mm/dump_pagetables.c | 13 +++++++++---- arch/x86/include/asm/pgtable.h | 2 +- arch/x86/mm/dump_pagetables.c | 23 ++++++++++++++--------- include/linux/ptdump.h | 5 ++++- 7 files changed, 53 insertions(+), 25 deletions(-) (limited to 'arch/x86') diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c index e305b6593c4e..696822f75582 100644 --- a/arch/arm64/mm/ptdump.c +++ b/arch/arm64/mm/ptdump.c @@ -345,7 +345,7 @@ static struct ptdump_info kernel_ptdump_info = { .base_addr = PAGE_OFFSET, }; -void ptdump_check_wx(void) +bool ptdump_check_wx(void) { struct pg_state st = { .seq = NULL, @@ -366,11 +366,16 @@ void ptdump_check_wx(void) ptdump_walk_pgd(&st.ptdump, &init_mm, NULL); - if (st.wx_pages || st.uxn_pages) + if (st.wx_pages || st.uxn_pages) { pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found, %lu non-UXN pages found\n", st.wx_pages, st.uxn_pages); - else + + return false; + } else { pr_info("Checked W+X mappings: passed, no W+X pages found\n"); + + return true; + } } static int __init ptdump_init(void) diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c index b835c80371cd..9dc239967b77 100644 --- a/arch/powerpc/mm/ptdump/ptdump.c +++ b/arch/powerpc/mm/ptdump/ptdump.c @@ -327,7 +327,7 @@ static void __init build_pgtable_complete_mask(void) pg_level[i].mask |= pg_level[i].flag[j].mask; } -void ptdump_check_wx(void) +bool ptdump_check_wx(void) { struct pg_state st = { .seq = NULL, @@ -344,15 +344,20 @@ void ptdump_check_wx(void) }; if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !mmu_has_feature(MMU_FTR_KERNEL_RO)) - return; + return true; ptdump_walk_pgd(&st.ptdump, &init_mm, NULL); - if (st.wx_pages) + if (st.wx_pages) { pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found\n", st.wx_pages); - else + + return false; + } else { pr_info("Checked W+X mappings: passed, no W+X pages found\n"); + + return true; + } } static int __init ptdump_init(void) diff --git a/arch/riscv/mm/ptdump.c b/arch/riscv/mm/ptdump.c index 075265603313..1289cc6d3700 100644 --- a/arch/riscv/mm/ptdump.c +++ b/arch/riscv/mm/ptdump.c @@ -335,7 +335,7 @@ static void ptdump_walk(struct seq_file *s, struct ptd_mm_info *pinfo) ptdump_walk_pgd(&st.ptdump, pinfo->mm, NULL); } -void ptdump_check_wx(void) +bool ptdump_check_wx(void) { struct pg_state st = { .seq = NULL, @@ -356,11 +356,16 @@ void ptdump_check_wx(void) ptdump_walk_pgd(&st.ptdump, &init_mm, NULL); - if (st.wx_pages) + if (st.wx_pages) { pr_warn("Checked W+X mappings: failed, %lu W+X pages found\n", st.wx_pages); - else + + return false; + } else { pr_info("Checked W+X mappings: passed, no W+X pages found\n"); + + return true; + } } static int ptdump_show(struct seq_file *m, void *v) diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index 99da5a5602a8..ffd07ed7b4af 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -192,7 +192,7 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, } } -void ptdump_check_wx(void) +bool ptdump_check_wx(void) { struct pg_state st = { .ptdump = { @@ -215,14 +215,19 @@ void ptdump_check_wx(void) }; if (!MACHINE_HAS_NX) - return; + return true; ptdump_walk_pgd(&st.ptdump, &init_mm, NULL); - if (st.wx_pages) + if (st.wx_pages) { pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found\n", st.wx_pages); - else + + return false; + } else { pr_info("Checked W+X mappings: passed, no %sW+X pages found\n", (nospec_uses_trampoline() || !static_key_enabled(&cpu_has_bear)) ? "unexpected " : ""); + + return true; + } } #ifdef CONFIG_PTDUMP_DEBUGFS diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 6c979028e521..b50b2ef63672 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -31,7 +31,7 @@ struct seq_file; void ptdump_walk_pgd_level(struct seq_file *m, struct mm_struct *mm); void ptdump_walk_pgd_level_debugfs(struct seq_file *m, struct mm_struct *mm, bool user); -void ptdump_walk_pgd_level_checkwx(void); +bool ptdump_walk_pgd_level_checkwx(void); #define ptdump_check_wx ptdump_walk_pgd_level_checkwx void ptdump_walk_user_pgd_level_checkwx(void); diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 0008524eebe9..35b2cfd47914 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -362,9 +362,9 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, } } -static void ptdump_walk_pgd_level_core(struct seq_file *m, - struct mm_struct *mm, pgd_t *pgd, - bool checkwx, bool dmesg) +bool ptdump_walk_pgd_level_core(struct seq_file *m, + struct mm_struct *mm, pgd_t *pgd, + bool checkwx, bool dmesg) { const struct ptdump_range ptdump_ranges[] = { #ifdef CONFIG_X86_64 @@ -391,12 +391,17 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, ptdump_walk_pgd(&st.ptdump, mm, pgd); if (!checkwx) - return; - if (st.wx_pages) + return true; + if (st.wx_pages) { pr_info("x86/mm: Checked W+X mappings: FAILED, %lu W+X pages found.\n", st.wx_pages); - else + + return false; + } else { pr_info("x86/mm: Checked W+X mappings: passed, no W+X pages found.\n"); + + return true; + } } void ptdump_walk_pgd_level(struct seq_file *m, struct mm_struct *mm) @@ -431,12 +436,12 @@ void ptdump_walk_user_pgd_level_checkwx(void) #endif } -void ptdump_walk_pgd_level_checkwx(void) +bool ptdump_walk_pgd_level_checkwx(void) { if (!(__supported_pte_mask & _PAGE_NX)) - return; + return true; - ptdump_walk_pgd_level_core(NULL, &init_mm, INIT_PGD, true, false); + return ptdump_walk_pgd_level_core(NULL, &init_mm, INIT_PGD, true, false); } static int __init pt_dump_init(void) diff --git a/include/linux/ptdump.h b/include/linux/ptdump.h index c10513739bf9..8dbd51ea8626 100644 --- a/include/linux/ptdump.h +++ b/include/linux/ptdump.h @@ -18,8 +18,11 @@ struct ptdump_state { const struct ptdump_range *range; }; +bool ptdump_walk_pgd_level_core(struct seq_file *m, + struct mm_struct *mm, pgd_t *pgd, + bool checkwx, bool dmesg); void ptdump_walk_pgd(struct ptdump_state *st, struct mm_struct *mm, pgd_t *pgd); -void ptdump_check_wx(void); +bool ptdump_check_wx(void); static inline void debug_checkwx(void) { -- cgit v1.2.3 From 506b586769ecef8c83fff64de227e7fa84b7be42 Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Thu, 15 Feb 2024 10:31:52 +0000 Subject: x86/mm: convert pte_next_pfn() to pte_advance_pfn() Core-mm needs to be able to advance the pfn by an arbitrary amount, so override the new pte_advance_pfn() API to do so. Link: https://lkml.kernel.org/r/20240215103205.2607016-6-ryan.roberts@arm.com Signed-off-by: Ryan Roberts Reviewed-by: David Hildenbrand Cc: Alistair Popple Cc: Andrey Ryabinin Cc: Ard Biesheuvel Cc: Barry Song <21cnbao@gmail.com> Cc: Borislav Petkov (AMD) Cc: Catalin Marinas Cc: Dave Hansen Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: James Morse Cc: John Hubbard Cc: Kefeng Wang Cc: Marc Zyngier Cc: Mark Rutland Cc: Matthew Wilcox (Oracle) Cc: Thomas Gleixner Cc: Will Deacon Cc: Yang Shi Cc: Zi Yan Signed-off-by: Andrew Morton --- arch/x86/include/asm/pgtable.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index b50b2ef63672..69ed0ea0641b 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -955,13 +955,13 @@ static inline int pte_same(pte_t a, pte_t b) return a.pte == b.pte; } -static inline pte_t pte_next_pfn(pte_t pte) +static inline pte_t pte_advance_pfn(pte_t pte, unsigned long nr) { if (__pte_needs_invert(pte_val(pte))) - return __pte(pte_val(pte) - (1UL << PFN_PTE_SHIFT)); - return __pte(pte_val(pte) + (1UL << PFN_PTE_SHIFT)); + return __pte(pte_val(pte) - (nr << PFN_PTE_SHIFT)); + return __pte(pte_val(pte) + (nr << PFN_PTE_SHIFT)); } -#define pte_next_pfn pte_next_pfn +#define pte_advance_pfn pte_advance_pfn static inline int pte_present(pte_t a) { -- cgit v1.2.3 From 85fcde402db191b5f222ebfecda653777d7d084e Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Wed, 24 Jan 2024 13:12:41 +0800 Subject: kexec: split crashkernel reservation code out from crash_core.c Patch series "Split crash out from kexec and clean up related config items", v3. Motivation: ============= Previously, LKP reported a building error. When investigating, it can't be resolved reasonablly with the present messy kdump config items. https://lore.kernel.org/oe-kbuild-all/202312182200.Ka7MzifQ-lkp@intel.com/ The kdump (crash dumping) related config items could causes confusions: Firstly, CRASH_CORE enables codes including - crashkernel reservation; - elfcorehdr updating; - vmcoreinfo exporting; - crash hotplug handling; Now fadump of powerpc, kcore dynamic debugging and kdump all selects CRASH_CORE, while fadump - fadump needs crashkernel parsing, vmcoreinfo exporting, and accessing global variable 'elfcorehdr_addr'; - kcore only needs vmcoreinfo exporting; - kdump needs all of the current kernel/crash_core.c. So only enabling PROC_CORE or FA_DUMP will enable CRASH_CORE, this mislead people that we enable crash dumping, actual it's not. Secondly, It's not reasonable to allow KEXEC_CORE select CRASH_CORE. Because KEXEC_CORE enables codes which allocate control pages, copy kexec/kdump segments, and prepare for switching. These codes are shared by both kexec reboot and kdump. We could want kexec reboot, but disable kdump. In that case, CRASH_CORE should not be selected. -------------------- CONFIG_CRASH_CORE=y CONFIG_KEXEC_CORE=y CONFIG_KEXEC=y CONFIG_KEXEC_FILE=y --------------------- Thirdly, It's not reasonable to allow CRASH_DUMP select KEXEC_CORE. That could make KEXEC_CORE, CRASH_DUMP are enabled independently from KEXEC or KEXEC_FILE. However, w/o KEXEC or KEXEC_FILE, the KEXEC_CORE code built in doesn't make any sense because no kernel loading or switching will happen to utilize the KEXEC_CORE code. --------------------- CONFIG_CRASH_CORE=y CONFIG_KEXEC_CORE=y CONFIG_CRASH_DUMP=y --------------------- In this case, what is worse, on arch sh and arm, KEXEC relies on MMU, while CRASH_DUMP can still be enabled when !MMU, then compiling error is seen as the lkp test robot reported in above link. ------arch/sh/Kconfig------ config ARCH_SUPPORTS_KEXEC def_bool MMU config ARCH_SUPPORTS_CRASH_DUMP def_bool BROKEN_ON_SMP --------------------------- Changes: =========== 1, split out crash_reserve.c from crash_core.c; 2, split out vmcore_infoc. from crash_core.c; 3, move crash related codes in kexec_core.c into crash_core.c; 4, remove dependency of FA_DUMP on CRASH_DUMP; 5, clean up kdump related config items; 6, wrap up crash codes in crash related ifdefs on all 8 arch-es which support crash dumping, except of ppc; Achievement: =========== With above changes, I can rearrange the config item logic as below (the right item depends on or is selected by the left item): PROC_KCORE -----------> VMCORE_INFO |----------> VMCORE_INFO FA_DUMP----| |----------> CRASH_RESERVE ---->VMCORE_INFO / |---->CRASH_RESERVE KEXEC --| /| |--> KEXEC_CORE--> CRASH_DUMP-->/-|---->PROC_VMCORE KEXEC_FILE --| \ | \---->CRASH_HOTPLUG KEXEC --| |--> KEXEC_CORE (for kexec reboot only) KEXEC_FILE --| Test ======== On all 8 architectures, including x86_64, arm64, s390x, sh, arm, mips, riscv, loongarch, I did below three cases of config item setting and building all passed. Take configs on x86_64 as exampmle here: (1) Both CONFIG_KEXEC and KEXEC_FILE is unset, then all kexec/kdump items are unset automatically: # Kexec and crash features # CONFIG_KEXEC is not set # CONFIG_KEXEC_FILE is not set # end of Kexec and crash features (2) set CONFIG_KEXEC_FILE and 'make olddefconfig': --------------- # Kexec and crash features CONFIG_CRASH_RESERVE=y CONFIG_VMCORE_INFO=y CONFIG_KEXEC_CORE=y CONFIG_KEXEC_FILE=y CONFIG_CRASH_DUMP=y CONFIG_CRASH_HOTPLUG=y CONFIG_CRASH_MAX_MEMORY_RANGES=8192 # end of Kexec and crash features --------------- (3) unset CONFIG_CRASH_DUMP in case 2 and execute 'make olddefconfig': ------------------------ # Kexec and crash features CONFIG_KEXEC_CORE=y CONFIG_KEXEC_FILE=y # end of Kexec and crash features ------------------------ Note: For ppc, it needs investigation to make clear how to split out crash code in arch folder. Hope Hari and Pingfan can help have a look, see if it's doable. Now, I make it either have both kexec and crash enabled, or disable both of them altogether. This patch (of 14): Both kdump and fa_dump of ppc rely on crashkernel reservation. Move the relevant codes into separate files: crash_reserve.c, include/linux/crash_reserve.h. And also add config item CRASH_RESERVE to control its enabling of the codes. And update config items which has relationship with crashkernel reservation. And also change ifdeffery from CONFIG_CRASH_CORE to CONFIG_CRASH_RESERVE when those scopes are only crashkernel reservation related. And also rename arch/XXX/include/asm/{crash_core.h => crash_reserve.h} on arm64, x86 and risc-v because those architectures' crash_core.h is only related to crashkernel reservation. [akpm@linux-foundation.org: s/CRASH_RESEERVE/CRASH_RESERVE/, per Klara Modin] Link: https://lkml.kernel.org/r/20240124051254.67105-1-bhe@redhat.com Link: https://lkml.kernel.org/r/20240124051254.67105-2-bhe@redhat.com Signed-off-by: Baoquan He Acked-by: Hari Bathini Cc: Al Viro Cc: Eric W. Biederman Cc: Pingfan Liu Cc: Klara Modin Cc: Michael Kelley Cc: Nathan Chancellor Cc: Stephen Rothwell Cc: Yang Li Signed-off-by: Andrew Morton --- arch/arm64/Kconfig | 2 +- arch/arm64/include/asm/crash_core.h | 10 - arch/arm64/include/asm/crash_reserve.h | 10 + arch/powerpc/Kconfig | 1 + arch/powerpc/mm/nohash/kaslr_booke.c | 4 +- arch/riscv/Kconfig | 2 +- arch/riscv/include/asm/crash_core.h | 11 - arch/riscv/include/asm/crash_reserve.h | 11 + arch/x86/Kconfig | 2 +- arch/x86/include/asm/crash_core.h | 42 --- arch/x86/include/asm/crash_reserve.h | 42 +++ include/linux/crash_core.h | 40 --- include/linux/crash_reserve.h | 48 ++++ include/linux/kexec.h | 1 + kernel/Kconfig.kexec | 5 +- kernel/Makefile | 1 + kernel/crash_core.c | 438 ------------------------------- kernel/crash_reserve.c | 464 +++++++++++++++++++++++++++++++++ 18 files changed, 587 insertions(+), 547 deletions(-) delete mode 100644 arch/arm64/include/asm/crash_core.h create mode 100644 arch/arm64/include/asm/crash_reserve.h delete mode 100644 arch/riscv/include/asm/crash_core.h create mode 100644 arch/riscv/include/asm/crash_reserve.h delete mode 100644 arch/x86/include/asm/crash_core.h create mode 100644 arch/x86/include/asm/crash_reserve.h create mode 100644 include/linux/crash_reserve.h create mode 100644 kernel/crash_reserve.c (limited to 'arch/x86') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 386566138620..5a7ac1f37bdc 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1519,7 +1519,7 @@ config ARCH_SUPPORTS_CRASH_DUMP def_bool y config ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION - def_bool CRASH_CORE + def_bool CRASH_RESERVE config TRANS_TABLE def_bool y diff --git a/arch/arm64/include/asm/crash_core.h b/arch/arm64/include/asm/crash_core.h deleted file mode 100644 index 9f5c8d339f44..000000000000 --- a/arch/arm64/include/asm/crash_core.h +++ /dev/null @@ -1,10 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -#ifndef _ARM64_CRASH_CORE_H -#define _ARM64_CRASH_CORE_H - -/* Current arm64 boot protocol requires 2MB alignment */ -#define CRASH_ALIGN SZ_2M - -#define CRASH_ADDR_LOW_MAX arm64_dma_phys_limit -#define CRASH_ADDR_HIGH_MAX (PHYS_MASK + 1) -#endif diff --git a/arch/arm64/include/asm/crash_reserve.h b/arch/arm64/include/asm/crash_reserve.h new file mode 100644 index 000000000000..4afe027a4e7b --- /dev/null +++ b/arch/arm64/include/asm/crash_reserve.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _ARM64_CRASH_RESERVE_H +#define _ARM64_CRASH_RESERVE_H + +/* Current arm64 boot protocol requires 2MB alignment */ +#define CRASH_ALIGN SZ_2M + +#define CRASH_ADDR_LOW_MAX arm64_dma_phys_limit +#define CRASH_ADDR_HIGH_MAX (PHYS_MASK + 1) +#endif diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index b9fc064d38d2..7f704ae5c5ef 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -691,6 +691,7 @@ config FA_DUMP bool "Firmware-assisted dump" depends on PPC64 && (PPC_RTAS || PPC_POWERNV) select CRASH_CORE + select CRASH_RESERVE select CRASH_DUMP help A robust mechanism to get reliable kernel crash dump with diff --git a/arch/powerpc/mm/nohash/kaslr_booke.c b/arch/powerpc/mm/nohash/kaslr_booke.c index b4f2786a7d2b..cdff129abb14 100644 --- a/arch/powerpc/mm/nohash/kaslr_booke.c +++ b/arch/powerpc/mm/nohash/kaslr_booke.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include #include @@ -173,7 +173,7 @@ static __init bool overlaps_region(const void *fdt, u32 start, static void __init get_crash_kernel(void *fdt, unsigned long size) { -#ifdef CONFIG_CRASH_CORE +#ifdef CONFIG_CRASH_RESERVE unsigned long long crash_size, crash_base; int ret; diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index bffbd869a068..bd06ad1bb97c 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -767,7 +767,7 @@ config ARCH_SUPPORTS_CRASH_DUMP def_bool y config ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION - def_bool CRASH_CORE + def_bool CRASH_RESERVE config COMPAT bool "Kernel support for 32-bit U-mode" diff --git a/arch/riscv/include/asm/crash_core.h b/arch/riscv/include/asm/crash_core.h deleted file mode 100644 index e1874b23feaf..000000000000 --- a/arch/riscv/include/asm/crash_core.h +++ /dev/null @@ -1,11 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -#ifndef _RISCV_CRASH_CORE_H -#define _RISCV_CRASH_CORE_H - -#define CRASH_ALIGN PMD_SIZE - -#define CRASH_ADDR_LOW_MAX dma32_phys_limit -#define CRASH_ADDR_HIGH_MAX memblock_end_of_DRAM() - -extern phys_addr_t memblock_end_of_DRAM(void); -#endif diff --git a/arch/riscv/include/asm/crash_reserve.h b/arch/riscv/include/asm/crash_reserve.h new file mode 100644 index 000000000000..013962e63587 --- /dev/null +++ b/arch/riscv/include/asm/crash_reserve.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _RISCV_CRASH_RESERVE_H +#define _RISCV_CRASH_RESERVE_H + +#define CRASH_ALIGN PMD_SIZE + +#define CRASH_ADDR_LOW_MAX dma32_phys_limit +#define CRASH_ADDR_HIGH_MAX memblock_end_of_DRAM() + +extern phys_addr_t memblock_end_of_DRAM(void); +#endif diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 5edec175b9bf..5bd925815154 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2106,7 +2106,7 @@ config ARCH_SUPPORTS_CRASH_HOTPLUG def_bool y config ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION - def_bool CRASH_CORE + def_bool CRASH_RESERVE config PHYSICAL_START hex "Physical address where the kernel is loaded" if (EXPERT || CRASH_DUMP) diff --git a/arch/x86/include/asm/crash_core.h b/arch/x86/include/asm/crash_core.h deleted file mode 100644 index 76af98f4e801..000000000000 --- a/arch/x86/include/asm/crash_core.h +++ /dev/null @@ -1,42 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _X86_CRASH_CORE_H -#define _X86_CRASH_CORE_H - -/* 16M alignment for crash kernel regions */ -#define CRASH_ALIGN SZ_16M - -/* - * Keep the crash kernel below this limit. - * - * Earlier 32-bits kernels would limit the kernel to the low 512 MB range - * due to mapping restrictions. - * - * 64-bit kdump kernels need to be restricted to be under 64 TB, which is - * the upper limit of system RAM in 4-level paging mode. Since the kdump - * jump could be from 5-level paging to 4-level paging, the jump will fail if - * the kernel is put above 64 TB, and during the 1st kernel bootup there's - * no good way to detect the paging mode of the target kernel which will be - * loaded for dumping. - */ -extern unsigned long swiotlb_size_or_default(void); - -#ifdef CONFIG_X86_32 -# define CRASH_ADDR_LOW_MAX SZ_512M -# define CRASH_ADDR_HIGH_MAX SZ_512M -#else -# define CRASH_ADDR_LOW_MAX SZ_4G -# define CRASH_ADDR_HIGH_MAX SZ_64T -#endif - -# define DEFAULT_CRASH_KERNEL_LOW_SIZE crash_low_size_default() - -static inline unsigned long crash_low_size_default(void) -{ -#ifdef CONFIG_X86_64 - return max(swiotlb_size_or_default() + (8UL << 20), 256UL << 20); -#else - return 0; -#endif -} - -#endif /* _X86_CRASH_CORE_H */ diff --git a/arch/x86/include/asm/crash_reserve.h b/arch/x86/include/asm/crash_reserve.h new file mode 100644 index 000000000000..152239f95541 --- /dev/null +++ b/arch/x86/include/asm/crash_reserve.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _X86_CRASH_RESERVE_H +#define _X86_CRASH_RESERVE_H + +/* 16M alignment for crash kernel regions */ +#define CRASH_ALIGN SZ_16M + +/* + * Keep the crash kernel below this limit. + * + * Earlier 32-bits kernels would limit the kernel to the low 512 MB range + * due to mapping restrictions. + * + * 64-bit kdump kernels need to be restricted to be under 64 TB, which is + * the upper limit of system RAM in 4-level paging mode. Since the kdump + * jump could be from 5-level paging to 4-level paging, the jump will fail if + * the kernel is put above 64 TB, and during the 1st kernel bootup there's + * no good way to detect the paging mode of the target kernel which will be + * loaded for dumping. + */ +extern unsigned long swiotlb_size_or_default(void); + +#ifdef CONFIG_X86_32 +# define CRASH_ADDR_LOW_MAX SZ_512M +# define CRASH_ADDR_HIGH_MAX SZ_512M +#else +# define CRASH_ADDR_LOW_MAX SZ_4G +# define CRASH_ADDR_HIGH_MAX SZ_64T +#endif + +# define DEFAULT_CRASH_KERNEL_LOW_SIZE crash_low_size_default() + +static inline unsigned long crash_low_size_default(void) +{ +#ifdef CONFIG_X86_64 + return max(swiotlb_size_or_default() + (8UL << 20), 256UL << 20); +#else + return 0; +#endif +} + +#endif /* _X86_CRASH_RESERVE_H */ diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h index 9eaeaafe0cad..1fde49246fa6 100644 --- a/include/linux/crash_core.h +++ b/include/linux/crash_core.h @@ -5,14 +5,6 @@ #include #include #include -#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION -#include -#endif - -/* Location of a reserved region to hold the crash kernel. - */ -extern struct resource crashk_res; -extern struct resource crashk_low_res; #define CRASH_CORE_NOTE_NAME "CORE" #define CRASH_CORE_NOTE_HEAD_BYTES ALIGN(sizeof(struct elf_note), 4) @@ -87,38 +79,6 @@ Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type, void *data, size_t data_len); void final_note(Elf_Word *buf); -int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, - unsigned long long *crash_size, unsigned long long *crash_base, - unsigned long long *low_size, bool *high); - -#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION -#ifndef DEFAULT_CRASH_KERNEL_LOW_SIZE -#define DEFAULT_CRASH_KERNEL_LOW_SIZE (128UL << 20) -#endif -#ifndef CRASH_ALIGN -#define CRASH_ALIGN SZ_2M -#endif -#ifndef CRASH_ADDR_LOW_MAX -#define CRASH_ADDR_LOW_MAX SZ_4G -#endif -#ifndef CRASH_ADDR_HIGH_MAX -#define CRASH_ADDR_HIGH_MAX memblock_end_of_DRAM() -#endif - -void __init reserve_crashkernel_generic(char *cmdline, - unsigned long long crash_size, - unsigned long long crash_base, - unsigned long long crash_low_size, - bool high); -#else -static inline void __init reserve_crashkernel_generic(char *cmdline, - unsigned long long crash_size, - unsigned long long crash_base, - unsigned long long crash_low_size, - bool high) -{} -#endif - /* Alignment required for elf header segment */ #define ELF_CORE_HEADER_ALIGN 4096 diff --git a/include/linux/crash_reserve.h b/include/linux/crash_reserve.h new file mode 100644 index 000000000000..5a9df944fb80 --- /dev/null +++ b/include/linux/crash_reserve.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef LINUX_CRASH_RESERVE_H +#define LINUX_CRASH_RESERVE_H + +#include +#include +#include +#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION +#include +#endif + +/* Location of a reserved region to hold the crash kernel. + */ +extern struct resource crashk_res; +extern struct resource crashk_low_res; + +int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, + unsigned long long *crash_size, unsigned long long *crash_base, + unsigned long long *low_size, bool *high); + +#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION +#ifndef DEFAULT_CRASH_KERNEL_LOW_SIZE +#define DEFAULT_CRASH_KERNEL_LOW_SIZE (128UL << 20) +#endif +#ifndef CRASH_ALIGN +#define CRASH_ALIGN SZ_2M +#endif +#ifndef CRASH_ADDR_LOW_MAX +#define CRASH_ADDR_LOW_MAX SZ_4G +#endif +#ifndef CRASH_ADDR_HIGH_MAX +#define CRASH_ADDR_HIGH_MAX memblock_end_of_DRAM() +#endif + +void __init reserve_crashkernel_generic(char *cmdline, + unsigned long long crash_size, + unsigned long long crash_base, + unsigned long long crash_low_size, + bool high); +#else +static inline void __init reserve_crashkernel_generic(char *cmdline, + unsigned long long crash_size, + unsigned long long crash_base, + unsigned long long crash_low_size, + bool high) +{} +#endif +#endif /* LINUX_CRASH_RESERVE_H */ diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 400cb6c02176..6d79bfb52e5b 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -16,6 +16,7 @@ #if !defined(__ASSEMBLY__) #include +#include #include #include diff --git a/kernel/Kconfig.kexec b/kernel/Kconfig.kexec index 946dffa048b7..8b7be71edd85 100644 --- a/kernel/Kconfig.kexec +++ b/kernel/Kconfig.kexec @@ -2,11 +2,15 @@ menu "Kexec and crash features" +config CRASH_RESERVE + bool + config CRASH_CORE bool config KEXEC_CORE select CRASH_CORE + select CRASH_RESERVE bool config KEXEC_ELF @@ -96,7 +100,6 @@ config KEXEC_JUMP config CRASH_DUMP bool "kernel crash dumps" depends on ARCH_SUPPORTS_CRASH_DUMP - select CRASH_CORE select KEXEC_CORE help Generate crash dump after being started by kexec. diff --git a/kernel/Makefile b/kernel/Makefile index ce105a5558fc..05fa88b3ab74 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -69,6 +69,7 @@ obj-$(CONFIG_KALLSYMS) += kallsyms.o obj-$(CONFIG_KALLSYMS_SELFTEST) += kallsyms_selftest.o obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o obj-$(CONFIG_CRASH_CORE) += crash_core.o +obj-$(CONFIG_CRASH_RESERVE) += crash_reserve.o obj-$(CONFIG_KEXEC_CORE) += kexec_core.o obj-$(CONFIG_KEXEC) += kexec.o obj-$(CONFIG_KEXEC_FILE) += kexec_file.o diff --git a/kernel/crash_core.c b/kernel/crash_core.c index 49b31e59d3cc..ae0d1ce89b46 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -34,444 +34,6 @@ u32 *vmcoreinfo_note; /* trusted vmcoreinfo, e.g. we can make a copy in the crash memory */ static unsigned char *vmcoreinfo_data_safecopy; -/* Location of the reserved area for the crash kernel */ -struct resource crashk_res = { - .name = "Crash kernel", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, - .desc = IORES_DESC_CRASH_KERNEL -}; -struct resource crashk_low_res = { - .name = "Crash kernel", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, - .desc = IORES_DESC_CRASH_KERNEL -}; - -/* - * parsing the "crashkernel" commandline - * - * this code is intended to be called from architecture specific code - */ - - -/* - * This function parses command lines in the format - * - * crashkernel=ramsize-range:size[,...][@offset] - * - * The function returns 0 on success and -EINVAL on failure. - */ -static int __init parse_crashkernel_mem(char *cmdline, - unsigned long long system_ram, - unsigned long long *crash_size, - unsigned long long *crash_base) -{ - char *cur = cmdline, *tmp; - unsigned long long total_mem = system_ram; - - /* - * Firmware sometimes reserves some memory regions for its own use, - * so the system memory size is less than the actual physical memory - * size. Work around this by rounding up the total size to 128M, - * which is enough for most test cases. - */ - total_mem = roundup(total_mem, SZ_128M); - - /* for each entry of the comma-separated list */ - do { - unsigned long long start, end = ULLONG_MAX, size; - - /* get the start of the range */ - start = memparse(cur, &tmp); - if (cur == tmp) { - pr_warn("crashkernel: Memory value expected\n"); - return -EINVAL; - } - cur = tmp; - if (*cur != '-') { - pr_warn("crashkernel: '-' expected\n"); - return -EINVAL; - } - cur++; - - /* if no ':' is here, than we read the end */ - if (*cur != ':') { - end = memparse(cur, &tmp); - if (cur == tmp) { - pr_warn("crashkernel: Memory value expected\n"); - return -EINVAL; - } - cur = tmp; - if (end <= start) { - pr_warn("crashkernel: end <= start\n"); - return -EINVAL; - } - } - - if (*cur != ':') { - pr_warn("crashkernel: ':' expected\n"); - return -EINVAL; - } - cur++; - - size = memparse(cur, &tmp); - if (cur == tmp) { - pr_warn("Memory value expected\n"); - return -EINVAL; - } - cur = tmp; - if (size >= total_mem) { - pr_warn("crashkernel: invalid size\n"); - return -EINVAL; - } - - /* match ? */ - if (total_mem >= start && total_mem < end) { - *crash_size = size; - break; - } - } while (*cur++ == ','); - - if (*crash_size > 0) { - while (*cur && *cur != ' ' && *cur != '@') - cur++; - if (*cur == '@') { - cur++; - *crash_base = memparse(cur, &tmp); - if (cur == tmp) { - pr_warn("Memory value expected after '@'\n"); - return -EINVAL; - } - } - } else - pr_info("crashkernel size resulted in zero bytes\n"); - - return 0; -} - -/* - * That function parses "simple" (old) crashkernel command lines like - * - * crashkernel=size[@offset] - * - * It returns 0 on success and -EINVAL on failure. - */ -static int __init parse_crashkernel_simple(char *cmdline, - unsigned long long *crash_size, - unsigned long long *crash_base) -{ - char *cur = cmdline; - - *crash_size = memparse(cmdline, &cur); - if (cmdline == cur) { - pr_warn("crashkernel: memory value expected\n"); - return -EINVAL; - } - - if (*cur == '@') - *crash_base = memparse(cur+1, &cur); - else if (*cur != ' ' && *cur != '\0') { - pr_warn("crashkernel: unrecognized char: %c\n", *cur); - return -EINVAL; - } - - return 0; -} - -#define SUFFIX_HIGH 0 -#define SUFFIX_LOW 1 -#define SUFFIX_NULL 2 -static __initdata char *suffix_tbl[] = { - [SUFFIX_HIGH] = ",high", - [SUFFIX_LOW] = ",low", - [SUFFIX_NULL] = NULL, -}; - -/* - * That function parses "suffix" crashkernel command lines like - * - * crashkernel=size,[high|low] - * - * It returns 0 on success and -EINVAL on failure. - */ -static int __init parse_crashkernel_suffix(char *cmdline, - unsigned long long *crash_size, - const char *suffix) -{ - char *cur = cmdline; - - *crash_size = memparse(cmdline, &cur); - if (cmdline == cur) { - pr_warn("crashkernel: memory value expected\n"); - return -EINVAL; - } - - /* check with suffix */ - if (strncmp(cur, suffix, strlen(suffix))) { - pr_warn("crashkernel: unrecognized char: %c\n", *cur); - return -EINVAL; - } - cur += strlen(suffix); - if (*cur != ' ' && *cur != '\0') { - pr_warn("crashkernel: unrecognized char: %c\n", *cur); - return -EINVAL; - } - - return 0; -} - -static __init char *get_last_crashkernel(char *cmdline, - const char *name, - const char *suffix) -{ - char *p = cmdline, *ck_cmdline = NULL; - - /* find crashkernel and use the last one if there are more */ - p = strstr(p, name); - while (p) { - char *end_p = strchr(p, ' '); - char *q; - - if (!end_p) - end_p = p + strlen(p); - - if (!suffix) { - int i; - - /* skip the one with any known suffix */ - for (i = 0; suffix_tbl[i]; i++) { - q = end_p - strlen(suffix_tbl[i]); - if (!strncmp(q, suffix_tbl[i], - strlen(suffix_tbl[i]))) - goto next; - } - ck_cmdline = p; - } else { - q = end_p - strlen(suffix); - if (!strncmp(q, suffix, strlen(suffix))) - ck_cmdline = p; - } -next: - p = strstr(p+1, name); - } - - return ck_cmdline; -} - -static int __init __parse_crashkernel(char *cmdline, - unsigned long long system_ram, - unsigned long long *crash_size, - unsigned long long *crash_base, - const char *suffix) -{ - char *first_colon, *first_space; - char *ck_cmdline; - char *name = "crashkernel="; - - BUG_ON(!crash_size || !crash_base); - *crash_size = 0; - *crash_base = 0; - - ck_cmdline = get_last_crashkernel(cmdline, name, suffix); - if (!ck_cmdline) - return -ENOENT; - - ck_cmdline += strlen(name); - - if (suffix) - return parse_crashkernel_suffix(ck_cmdline, crash_size, - suffix); - /* - * if the commandline contains a ':', then that's the extended - * syntax -- if not, it must be the classic syntax - */ - first_colon = strchr(ck_cmdline, ':'); - first_space = strchr(ck_cmdline, ' '); - if (first_colon && (!first_space || first_colon < first_space)) - return parse_crashkernel_mem(ck_cmdline, system_ram, - crash_size, crash_base); - - return parse_crashkernel_simple(ck_cmdline, crash_size, crash_base); -} - -/* - * That function is the entry point for command line parsing and should be - * called from the arch-specific code. - * - * If crashkernel=,high|low is supported on architecture, non-NULL values - * should be passed to parameters 'low_size' and 'high'. - */ -int __init parse_crashkernel(char *cmdline, - unsigned long long system_ram, - unsigned long long *crash_size, - unsigned long long *crash_base, - unsigned long long *low_size, - bool *high) -{ - int ret; - - /* crashkernel=X[@offset] */ - ret = __parse_crashkernel(cmdline, system_ram, crash_size, - crash_base, NULL); -#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION - /* - * If non-NULL 'high' passed in and no normal crashkernel - * setting detected, try parsing crashkernel=,high|low. - */ - if (high && ret == -ENOENT) { - ret = __parse_crashkernel(cmdline, 0, crash_size, - crash_base, suffix_tbl[SUFFIX_HIGH]); - if (ret || !*crash_size) - return -EINVAL; - - /* - * crashkernel=Y,low can be specified or not, but invalid value - * is not allowed. - */ - ret = __parse_crashkernel(cmdline, 0, low_size, - crash_base, suffix_tbl[SUFFIX_LOW]); - if (ret == -ENOENT) { - *low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE; - ret = 0; - } else if (ret) { - return ret; - } - - *high = true; - } -#endif - if (!*crash_size) - ret = -EINVAL; - - return ret; -} - -/* - * Add a dummy early_param handler to mark crashkernel= as a known command line - * parameter and suppress incorrect warnings in init/main.c. - */ -static int __init parse_crashkernel_dummy(char *arg) -{ - return 0; -} -early_param("crashkernel", parse_crashkernel_dummy); - -#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION -static int __init reserve_crashkernel_low(unsigned long long low_size) -{ -#ifdef CONFIG_64BIT - unsigned long long low_base; - - low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, 0, CRASH_ADDR_LOW_MAX); - if (!low_base) { - pr_err("cannot allocate crashkernel low memory (size:0x%llx).\n", low_size); - return -ENOMEM; - } - - pr_info("crashkernel low memory reserved: 0x%08llx - 0x%08llx (%lld MB)\n", - low_base, low_base + low_size, low_size >> 20); - - crashk_low_res.start = low_base; - crashk_low_res.end = low_base + low_size - 1; -#endif - return 0; -} - -void __init reserve_crashkernel_generic(char *cmdline, - unsigned long long crash_size, - unsigned long long crash_base, - unsigned long long crash_low_size, - bool high) -{ - unsigned long long search_end = CRASH_ADDR_LOW_MAX, search_base = 0; - bool fixed_base = false; - - /* User specifies base address explicitly. */ - if (crash_base) { - fixed_base = true; - search_base = crash_base; - search_end = crash_base + crash_size; - } else if (high) { - search_base = CRASH_ADDR_LOW_MAX; - search_end = CRASH_ADDR_HIGH_MAX; - } - -retry: - crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN, - search_base, search_end); - if (!crash_base) { - /* - * For crashkernel=size[KMG]@offset[KMG], print out failure - * message if can't reserve the specified region. - */ - if (fixed_base) { - pr_warn("crashkernel reservation failed - memory is in use.\n"); - return; - } - - /* - * For crashkernel=size[KMG], if the first attempt was for - * low memory, fall back to high memory, the minimum required - * low memory will be reserved later. - */ - if (!high && search_end == CRASH_ADDR_LOW_MAX) { - search_end = CRASH_ADDR_HIGH_MAX; - search_base = CRASH_ADDR_LOW_MAX; - crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE; - goto retry; - } - - /* - * For crashkernel=size[KMG],high, if the first attempt was - * for high memory, fall back to low memory. - */ - if (high && search_end == CRASH_ADDR_HIGH_MAX) { - search_end = CRASH_ADDR_LOW_MAX; - search_base = 0; - goto retry; - } - pr_warn("cannot allocate crashkernel (size:0x%llx)\n", - crash_size); - return; - } - - if ((crash_base >= CRASH_ADDR_LOW_MAX) && - crash_low_size && reserve_crashkernel_low(crash_low_size)) { - memblock_phys_free(crash_base, crash_size); - return; - } - - pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n", - crash_base, crash_base + crash_size, crash_size >> 20); - - /* - * The crashkernel memory will be removed from the kernel linear - * map. Inform kmemleak so that it won't try to access it. - */ - kmemleak_ignore_phys(crash_base); - if (crashk_low_res.end) - kmemleak_ignore_phys(crashk_low_res.start); - - crashk_res.start = crash_base; - crashk_res.end = crash_base + crash_size - 1; -} - -static __init int insert_crashkernel_resources(void) -{ - if (crashk_res.start < crashk_res.end) - insert_resource(&iomem_resource, &crashk_res); - - if (crashk_low_res.start < crashk_low_res.end) - insert_resource(&iomem_resource, &crashk_low_res); - - return 0; -} -early_initcall(insert_crashkernel_resources); -#endif - int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map, void **addr, unsigned long *sz) { diff --git a/kernel/crash_reserve.c b/kernel/crash_reserve.c new file mode 100644 index 000000000000..bbb6c3cb00e4 --- /dev/null +++ b/kernel/crash_reserve.c @@ -0,0 +1,464 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * crash.c - kernel crash support code. + * Copyright (C) 2002-2004 Eric Biederman + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +#include "kallsyms_internal.h" +#include "kexec_internal.h" + +/* Location of the reserved area for the crash kernel */ +struct resource crashk_res = { + .name = "Crash kernel", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, + .desc = IORES_DESC_CRASH_KERNEL +}; +struct resource crashk_low_res = { + .name = "Crash kernel", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, + .desc = IORES_DESC_CRASH_KERNEL +}; + +/* + * parsing the "crashkernel" commandline + * + * this code is intended to be called from architecture specific code + */ + + +/* + * This function parses command lines in the format + * + * crashkernel=ramsize-range:size[,...][@offset] + * + * The function returns 0 on success and -EINVAL on failure. + */ +static int __init parse_crashkernel_mem(char *cmdline, + unsigned long long system_ram, + unsigned long long *crash_size, + unsigned long long *crash_base) +{ + char *cur = cmdline, *tmp; + unsigned long long total_mem = system_ram; + + /* + * Firmware sometimes reserves some memory regions for its own use, + * so the system memory size is less than the actual physical memory + * size. Work around this by rounding up the total size to 128M, + * which is enough for most test cases. + */ + total_mem = roundup(total_mem, SZ_128M); + + /* for each entry of the comma-separated list */ + do { + unsigned long long start, end = ULLONG_MAX, size; + + /* get the start of the range */ + start = memparse(cur, &tmp); + if (cur == tmp) { + pr_warn("crashkernel: Memory value expected\n"); + return -EINVAL; + } + cur = tmp; + if (*cur != '-') { + pr_warn("crashkernel: '-' expected\n"); + return -EINVAL; + } + cur++; + + /* if no ':' is here, than we read the end */ + if (*cur != ':') { + end = memparse(cur, &tmp); + if (cur == tmp) { + pr_warn("crashkernel: Memory value expected\n"); + return -EINVAL; + } + cur = tmp; + if (end <= start) { + pr_warn("crashkernel: end <= start\n"); + return -EINVAL; + } + } + + if (*cur != ':') { + pr_warn("crashkernel: ':' expected\n"); + return -EINVAL; + } + cur++; + + size = memparse(cur, &tmp); + if (cur == tmp) { + pr_warn("Memory value expected\n"); + return -EINVAL; + } + cur = tmp; + if (size >= total_mem) { + pr_warn("crashkernel: invalid size\n"); + return -EINVAL; + } + + /* match ? */ + if (total_mem >= start && total_mem < end) { + *crash_size = size; + break; + } + } while (*cur++ == ','); + + if (*crash_size > 0) { + while (*cur && *cur != ' ' && *cur != '@') + cur++; + if (*cur == '@') { + cur++; + *crash_base = memparse(cur, &tmp); + if (cur == tmp) { + pr_warn("Memory value expected after '@'\n"); + return -EINVAL; + } + } + } else + pr_info("crashkernel size resulted in zero bytes\n"); + + return 0; +} + +/* + * That function parses "simple" (old) crashkernel command lines like + * + * crashkernel=size[@offset] + * + * It returns 0 on success and -EINVAL on failure. + */ +static int __init parse_crashkernel_simple(char *cmdline, + unsigned long long *crash_size, + unsigned long long *crash_base) +{ + char *cur = cmdline; + + *crash_size = memparse(cmdline, &cur); + if (cmdline == cur) { + pr_warn("crashkernel: memory value expected\n"); + return -EINVAL; + } + + if (*cur == '@') + *crash_base = memparse(cur+1, &cur); + else if (*cur != ' ' && *cur != '\0') { + pr_warn("crashkernel: unrecognized char: %c\n", *cur); + return -EINVAL; + } + + return 0; +} + +#define SUFFIX_HIGH 0 +#define SUFFIX_LOW 1 +#define SUFFIX_NULL 2 +static __initdata char *suffix_tbl[] = { + [SUFFIX_HIGH] = ",high", + [SUFFIX_LOW] = ",low", + [SUFFIX_NULL] = NULL, +}; + +/* + * That function parses "suffix" crashkernel command lines like + * + * crashkernel=size,[high|low] + * + * It returns 0 on success and -EINVAL on failure. + */ +static int __init parse_crashkernel_suffix(char *cmdline, + unsigned long long *crash_size, + const char *suffix) +{ + char *cur = cmdline; + + *crash_size = memparse(cmdline, &cur); + if (cmdline == cur) { + pr_warn("crashkernel: memory value expected\n"); + return -EINVAL; + } + + /* check with suffix */ + if (strncmp(cur, suffix, strlen(suffix))) { + pr_warn("crashkernel: unrecognized char: %c\n", *cur); + return -EINVAL; + } + cur += strlen(suffix); + if (*cur != ' ' && *cur != '\0') { + pr_warn("crashkernel: unrecognized char: %c\n", *cur); + return -EINVAL; + } + + return 0; +} + +static __init char *get_last_crashkernel(char *cmdline, + const char *name, + const char *suffix) +{ + char *p = cmdline, *ck_cmdline = NULL; + + /* find crashkernel and use the last one if there are more */ + p = strstr(p, name); + while (p) { + char *end_p = strchr(p, ' '); + char *q; + + if (!end_p) + end_p = p + strlen(p); + + if (!suffix) { + int i; + + /* skip the one with any known suffix */ + for (i = 0; suffix_tbl[i]; i++) { + q = end_p - strlen(suffix_tbl[i]); + if (!strncmp(q, suffix_tbl[i], + strlen(suffix_tbl[i]))) + goto next; + } + ck_cmdline = p; + } else { + q = end_p - strlen(suffix); + if (!strncmp(q, suffix, strlen(suffix))) + ck_cmdline = p; + } +next: + p = strstr(p+1, name); + } + + return ck_cmdline; +} + +static int __init __parse_crashkernel(char *cmdline, + unsigned long long system_ram, + unsigned long long *crash_size, + unsigned long long *crash_base, + const char *suffix) +{ + char *first_colon, *first_space; + char *ck_cmdline; + char *name = "crashkernel="; + + BUG_ON(!crash_size || !crash_base); + *crash_size = 0; + *crash_base = 0; + + ck_cmdline = get_last_crashkernel(cmdline, name, suffix); + if (!ck_cmdline) + return -ENOENT; + + ck_cmdline += strlen(name); + + if (suffix) + return parse_crashkernel_suffix(ck_cmdline, crash_size, + suffix); + /* + * if the commandline contains a ':', then that's the extended + * syntax -- if not, it must be the classic syntax + */ + first_colon = strchr(ck_cmdline, ':'); + first_space = strchr(ck_cmdline, ' '); + if (first_colon && (!first_space || first_colon < first_space)) + return parse_crashkernel_mem(ck_cmdline, system_ram, + crash_size, crash_base); + + return parse_crashkernel_simple(ck_cmdline, crash_size, crash_base); +} + +/* + * That function is the entry point for command line parsing and should be + * called from the arch-specific code. + * + * If crashkernel=,high|low is supported on architecture, non-NULL values + * should be passed to parameters 'low_size' and 'high'. + */ +int __init parse_crashkernel(char *cmdline, + unsigned long long system_ram, + unsigned long long *crash_size, + unsigned long long *crash_base, + unsigned long long *low_size, + bool *high) +{ + int ret; + + /* crashkernel=X[@offset] */ + ret = __parse_crashkernel(cmdline, system_ram, crash_size, + crash_base, NULL); +#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION + /* + * If non-NULL 'high' passed in and no normal crashkernel + * setting detected, try parsing crashkernel=,high|low. + */ + if (high && ret == -ENOENT) { + ret = __parse_crashkernel(cmdline, 0, crash_size, + crash_base, suffix_tbl[SUFFIX_HIGH]); + if (ret || !*crash_size) + return -EINVAL; + + /* + * crashkernel=Y,low can be specified or not, but invalid value + * is not allowed. + */ + ret = __parse_crashkernel(cmdline, 0, low_size, + crash_base, suffix_tbl[SUFFIX_LOW]); + if (ret == -ENOENT) { + *low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE; + ret = 0; + } else if (ret) { + return ret; + } + + *high = true; + } +#endif + if (!*crash_size) + ret = -EINVAL; + + return ret; +} + +/* + * Add a dummy early_param handler to mark crashkernel= as a known command line + * parameter and suppress incorrect warnings in init/main.c. + */ +static int __init parse_crashkernel_dummy(char *arg) +{ + return 0; +} +early_param("crashkernel", parse_crashkernel_dummy); + +#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION +static int __init reserve_crashkernel_low(unsigned long long low_size) +{ +#ifdef CONFIG_64BIT + unsigned long long low_base; + + low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, 0, CRASH_ADDR_LOW_MAX); + if (!low_base) { + pr_err("cannot allocate crashkernel low memory (size:0x%llx).\n", low_size); + return -ENOMEM; + } + + pr_info("crashkernel low memory reserved: 0x%08llx - 0x%08llx (%lld MB)\n", + low_base, low_base + low_size, low_size >> 20); + + crashk_low_res.start = low_base; + crashk_low_res.end = low_base + low_size - 1; + insert_resource(&iomem_resource, &crashk_low_res); +#endif + return 0; +} + +void __init reserve_crashkernel_generic(char *cmdline, + unsigned long long crash_size, + unsigned long long crash_base, + unsigned long long crash_low_size, + bool high) +{ + unsigned long long search_end = CRASH_ADDR_LOW_MAX, search_base = 0; + bool fixed_base = false; + + /* User specifies base address explicitly. */ + if (crash_base) { + fixed_base = true; + search_base = crash_base; + search_end = crash_base + crash_size; + } else if (high) { + search_base = CRASH_ADDR_LOW_MAX; + search_end = CRASH_ADDR_HIGH_MAX; + } + +retry: + crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN, + search_base, search_end); + if (!crash_base) { + /* + * For crashkernel=size[KMG]@offset[KMG], print out failure + * message if can't reserve the specified region. + */ + if (fixed_base) { + pr_warn("crashkernel reservation failed - memory is in use.\n"); + return; + } + + /* + * For crashkernel=size[KMG], if the first attempt was for + * low memory, fall back to high memory, the minimum required + * low memory will be reserved later. + */ + if (!high && search_end == CRASH_ADDR_LOW_MAX) { + search_end = CRASH_ADDR_HIGH_MAX; + search_base = CRASH_ADDR_LOW_MAX; + crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE; + goto retry; + } + + /* + * For crashkernel=size[KMG],high, if the first attempt was + * for high memory, fall back to low memory. + */ + if (high && search_end == CRASH_ADDR_HIGH_MAX) { + search_end = CRASH_ADDR_LOW_MAX; + search_base = 0; + goto retry; + } + pr_warn("cannot allocate crashkernel (size:0x%llx)\n", + crash_size); + return; + } + + if ((crash_base >= CRASH_ADDR_LOW_MAX) && + crash_low_size && reserve_crashkernel_low(crash_low_size)) { + memblock_phys_free(crash_base, crash_size); + return; + } + + pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n", + crash_base, crash_base + crash_size, crash_size >> 20); + + /* + * The crashkernel memory will be removed from the kernel linear + * map. Inform kmemleak so that it won't try to access it. + */ + kmemleak_ignore_phys(crash_base); + if (crashk_low_res.end) + kmemleak_ignore_phys(crashk_low_res.start); + + crashk_res.start = crash_base; + crashk_res.end = crash_base + crash_size - 1; +} + +static __init int insert_crashkernel_resources(void) +{ + if (crashk_res.start < crashk_res.end) + insert_resource(&iomem_resource, &crashk_res); + + if (crashk_low_res.start < crashk_low_res.end) + insert_resource(&iomem_resource, &crashk_low_res); + + return 0; +} +early_initcall(insert_crashkernel_resources); +#endif -- cgit v1.2.3 From 443cbaf9e2fdbef7d7cae457434a6cb8a679441b Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Wed, 24 Jan 2024 13:12:42 +0800 Subject: crash: split vmcoreinfo exporting code out from crash_core.c Now move the relevant codes into separate files: kernel/crash_reserve.c, include/linux/crash_reserve.h. And add config item CRASH_RESERVE to control its enabling. And also update the old ifdeffery of CONFIG_CRASH_CORE, including of and config item dependency on CRASH_CORE accordingly. And also do renaming as follows: - arch/xxx/kernel/{crash_core.c => vmcore_info.c} because they are only related to vmcoreinfo exporting on x86, arm64, riscv. And also Remove config item CRASH_CORE, and rely on CONFIG_KEXEC_CORE to decide if build in crash_core.c. [yang.lee@linux.alibaba.com: remove duplicated include in vmcore_info.c] Link: https://lkml.kernel.org/r/20240126005744.16561-1-yang.lee@linux.alibaba.com Link: https://lkml.kernel.org/r/20240124051254.67105-3-bhe@redhat.com Signed-off-by: Baoquan He Signed-off-by: Yang Li Acked-by: Hari Bathini Cc: Al Viro Cc: Eric W. Biederman Cc: Pingfan Liu Cc: Klara Modin Cc: Michael Kelley Cc: Nathan Chancellor Cc: Stephen Rothwell Cc: Yang Li Signed-off-by: Andrew Morton --- arch/arm64/kernel/Makefile | 2 +- arch/arm64/kernel/crash_core.c | 39 ----- arch/arm64/kernel/vmcore_info.c | 39 +++++ arch/powerpc/Kconfig | 2 +- arch/powerpc/kernel/setup-common.c | 2 +- arch/powerpc/platforms/powernv/opal-core.c | 2 +- arch/riscv/kernel/Makefile | 2 +- arch/riscv/kernel/crash_core.c | 24 --- arch/riscv/kernel/vmcore_info.c | 24 +++ arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/crash_core_32.c | 17 --- arch/x86/kernel/crash_core_64.c | 24 --- arch/x86/kernel/vmcore_info_32.c | 17 +++ arch/x86/kernel/vmcore_info_64.c | 24 +++ drivers/firmware/qemu_fw_cfg.c | 14 +- fs/proc/Kconfig | 2 +- fs/proc/kcore.c | 2 +- include/linux/buildid.h | 2 +- include/linux/crash_core.h | 73 --------- include/linux/kexec.h | 1 + include/linux/vmcore_info.h | 81 ++++++++++ kernel/Kconfig.kexec | 4 +- kernel/Makefile | 4 +- kernel/crash_core.c | 206 -------------------------- kernel/ksysfs.c | 6 +- kernel/printk/printk.c | 4 +- kernel/vmcore_info.c | 230 +++++++++++++++++++++++++++++ lib/buildid.c | 2 +- 28 files changed, 442 insertions(+), 409 deletions(-) delete mode 100644 arch/arm64/kernel/crash_core.c create mode 100644 arch/arm64/kernel/vmcore_info.c delete mode 100644 arch/riscv/kernel/crash_core.c create mode 100644 arch/riscv/kernel/vmcore_info.c delete mode 100644 arch/x86/kernel/crash_core_32.c delete mode 100644 arch/x86/kernel/crash_core_64.c create mode 100644 arch/x86/kernel/vmcore_info_32.c create mode 100644 arch/x86/kernel/vmcore_info_64.c create mode 100644 include/linux/vmcore_info.h create mode 100644 kernel/vmcore_info.c (limited to 'arch/x86') diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 467cb7117273..a3882cccf049 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -66,7 +66,7 @@ obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file.o kexec_image.o obj-$(CONFIG_ARM64_RELOC_TEST) += arm64-reloc-test.o arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o -obj-$(CONFIG_CRASH_CORE) += crash_core.o +obj-$(CONFIG_VMCORE_INFO) += vmcore_info.o obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o obj-$(CONFIG_ARM64_MTE) += mte.o diff --git a/arch/arm64/kernel/crash_core.c b/arch/arm64/kernel/crash_core.c deleted file mode 100644 index 2a24199a9b81..000000000000 --- a/arch/arm64/kernel/crash_core.c +++ /dev/null @@ -1,39 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) Linaro. - * Copyright (C) Huawei Futurewei Technologies. - */ - -#include -#include -#include -#include -#include - -static inline u64 get_tcr_el1_t1sz(void); - -static inline u64 get_tcr_el1_t1sz(void) -{ - return (read_sysreg(tcr_el1) & TCR_T1SZ_MASK) >> TCR_T1SZ_OFFSET; -} - -void arch_crash_save_vmcoreinfo(void) -{ - VMCOREINFO_NUMBER(VA_BITS); - /* Please note VMCOREINFO_NUMBER() uses "%d", not "%x" */ - vmcoreinfo_append_str("NUMBER(MODULES_VADDR)=0x%lx\n", MODULES_VADDR); - vmcoreinfo_append_str("NUMBER(MODULES_END)=0x%lx\n", MODULES_END); - vmcoreinfo_append_str("NUMBER(VMALLOC_END)=0x%lx\n", VMALLOC_END); - vmcoreinfo_append_str("NUMBER(VMEMMAP_START)=0x%lx\n", VMEMMAP_START); - vmcoreinfo_append_str("NUMBER(VMEMMAP_END)=0x%lx\n", VMEMMAP_END); - vmcoreinfo_append_str("NUMBER(kimage_voffset)=0x%llx\n", - kimage_voffset); - vmcoreinfo_append_str("NUMBER(PHYS_OFFSET)=0x%llx\n", - PHYS_OFFSET); - vmcoreinfo_append_str("NUMBER(TCR_EL1_T1SZ)=0x%llx\n", - get_tcr_el1_t1sz()); - vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset()); - vmcoreinfo_append_str("NUMBER(KERNELPACMASK)=0x%llx\n", - system_supports_address_auth() ? - ptrauth_kernel_pac_mask() : 0); -} diff --git a/arch/arm64/kernel/vmcore_info.c b/arch/arm64/kernel/vmcore_info.c new file mode 100644 index 000000000000..b19d5d6cb8b3 --- /dev/null +++ b/arch/arm64/kernel/vmcore_info.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) Linaro. + * Copyright (C) Huawei Futurewei Technologies. + */ + +#include +#include +#include +#include +#include + +static inline u64 get_tcr_el1_t1sz(void); + +static inline u64 get_tcr_el1_t1sz(void) +{ + return (read_sysreg(tcr_el1) & TCR_T1SZ_MASK) >> TCR_T1SZ_OFFSET; +} + +void arch_crash_save_vmcoreinfo(void) +{ + VMCOREINFO_NUMBER(VA_BITS); + /* Please note VMCOREINFO_NUMBER() uses "%d", not "%x" */ + vmcoreinfo_append_str("NUMBER(MODULES_VADDR)=0x%lx\n", MODULES_VADDR); + vmcoreinfo_append_str("NUMBER(MODULES_END)=0x%lx\n", MODULES_END); + vmcoreinfo_append_str("NUMBER(VMALLOC_END)=0x%lx\n", VMALLOC_END); + vmcoreinfo_append_str("NUMBER(VMEMMAP_START)=0x%lx\n", VMEMMAP_START); + vmcoreinfo_append_str("NUMBER(VMEMMAP_END)=0x%lx\n", VMEMMAP_END); + vmcoreinfo_append_str("NUMBER(kimage_voffset)=0x%llx\n", + kimage_voffset); + vmcoreinfo_append_str("NUMBER(PHYS_OFFSET)=0x%llx\n", + PHYS_OFFSET); + vmcoreinfo_append_str("NUMBER(TCR_EL1_T1SZ)=0x%llx\n", + get_tcr_el1_t1sz()); + vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset()); + vmcoreinfo_append_str("NUMBER(KERNELPACMASK)=0x%llx\n", + system_supports_address_auth() ? + ptrauth_kernel_pac_mask() : 0); +} diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 7f704ae5c5ef..495d197c9b27 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -690,7 +690,7 @@ config ARCH_SELECTS_CRASH_DUMP config FA_DUMP bool "Firmware-assisted dump" depends on PPC64 && (PPC_RTAS || PPC_POWERNV) - select CRASH_CORE + select VMCORE_INFO select CRASH_RESERVE select CRASH_DUMP help diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 9b142b9d5187..733f210ffda1 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -109,7 +109,7 @@ int ppc_do_canonicalize_irqs; EXPORT_SYMBOL(ppc_do_canonicalize_irqs); #endif -#ifdef CONFIG_CRASH_CORE +#ifdef CONFIG_VMCORE_INFO /* This keeps a track of which one is the crashing cpu. */ int crashing_cpu = -1; #endif diff --git a/arch/powerpc/platforms/powernv/opal-core.c b/arch/powerpc/platforms/powernv/opal-core.c index bb7657115f1d..c9a9b759cc92 100644 --- a/arch/powerpc/platforms/powernv/opal-core.c +++ b/arch/powerpc/platforms/powernv/opal-core.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index f71910718053..d6fd8dcfceb5 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -92,7 +92,7 @@ obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_KEXEC_CORE) += kexec_relocate.o crash_save_regs.o machine_kexec.o obj-$(CONFIG_KEXEC_FILE) += elf_kexec.o machine_kexec_file.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o -obj-$(CONFIG_CRASH_CORE) += crash_core.o +obj-$(CONFIG_VMCORE_INFO) += vmcore_info.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o diff --git a/arch/riscv/kernel/crash_core.c b/arch/riscv/kernel/crash_core.c deleted file mode 100644 index d18d529fd9b9..000000000000 --- a/arch/riscv/kernel/crash_core.c +++ /dev/null @@ -1,24 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only - -#include -#include - -void arch_crash_save_vmcoreinfo(void) -{ - VMCOREINFO_NUMBER(phys_ram_base); - - vmcoreinfo_append_str("NUMBER(PAGE_OFFSET)=0x%lx\n", PAGE_OFFSET); - vmcoreinfo_append_str("NUMBER(VMALLOC_END)=0x%lx\n", VMALLOC_END); -#ifdef CONFIG_MMU - VMCOREINFO_NUMBER(VA_BITS); - vmcoreinfo_append_str("NUMBER(VMEMMAP_START)=0x%lx\n", VMEMMAP_START); - vmcoreinfo_append_str("NUMBER(VMEMMAP_END)=0x%lx\n", VMEMMAP_END); -#ifdef CONFIG_64BIT - vmcoreinfo_append_str("NUMBER(MODULES_VADDR)=0x%lx\n", MODULES_VADDR); - vmcoreinfo_append_str("NUMBER(MODULES_END)=0x%lx\n", MODULES_END); -#endif -#endif - vmcoreinfo_append_str("NUMBER(KERNEL_LINK_ADDR)=0x%lx\n", KERNEL_LINK_ADDR); - vmcoreinfo_append_str("NUMBER(va_kernel_pa_offset)=0x%lx\n", - kernel_map.va_kernel_pa_offset); -} diff --git a/arch/riscv/kernel/vmcore_info.c b/arch/riscv/kernel/vmcore_info.c new file mode 100644 index 000000000000..6d7a22522d63 --- /dev/null +++ b/arch/riscv/kernel/vmcore_info.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include + +void arch_crash_save_vmcoreinfo(void) +{ + VMCOREINFO_NUMBER(phys_ram_base); + + vmcoreinfo_append_str("NUMBER(PAGE_OFFSET)=0x%lx\n", PAGE_OFFSET); + vmcoreinfo_append_str("NUMBER(VMALLOC_END)=0x%lx\n", VMALLOC_END); +#ifdef CONFIG_MMU + VMCOREINFO_NUMBER(VA_BITS); + vmcoreinfo_append_str("NUMBER(VMEMMAP_START)=0x%lx\n", VMEMMAP_START); + vmcoreinfo_append_str("NUMBER(VMEMMAP_END)=0x%lx\n", VMEMMAP_END); +#ifdef CONFIG_64BIT + vmcoreinfo_append_str("NUMBER(MODULES_VADDR)=0x%lx\n", MODULES_VADDR); + vmcoreinfo_append_str("NUMBER(MODULES_END)=0x%lx\n", MODULES_END); +#endif +#endif + vmcoreinfo_append_str("NUMBER(KERNEL_LINK_ADDR)=0x%lx\n", KERNEL_LINK_ADDR); + vmcoreinfo_append_str("NUMBER(va_kernel_pa_offset)=0x%lx\n", + kernel_map.va_kernel_pa_offset); +} diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 0000325ab98f..913d4022131e 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -98,7 +98,7 @@ obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o obj-$(CONFIG_X86_TSC) += trace_clock.o obj-$(CONFIG_TRACING) += trace.o obj-$(CONFIG_RETHOOK) += rethook.o -obj-$(CONFIG_CRASH_CORE) += crash_core_$(BITS).o +obj-$(CONFIG_VMCORE_INFO) += vmcore_info_$(BITS).o obj-$(CONFIG_KEXEC_CORE) += machine_kexec_$(BITS).o obj-$(CONFIG_KEXEC_CORE) += relocate_kernel_$(BITS).o crash.o obj-$(CONFIG_KEXEC_FILE) += kexec-bzimage64.o diff --git a/arch/x86/kernel/crash_core_32.c b/arch/x86/kernel/crash_core_32.c deleted file mode 100644 index 8a89c109e20a..000000000000 --- a/arch/x86/kernel/crash_core_32.c +++ /dev/null @@ -1,17 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only - -#include -#include - -#include - -void arch_crash_save_vmcoreinfo(void) -{ -#ifdef CONFIG_NUMA - VMCOREINFO_SYMBOL(node_data); - VMCOREINFO_LENGTH(node_data, MAX_NUMNODES); -#endif -#ifdef CONFIG_X86_PAE - VMCOREINFO_CONFIG(X86_PAE); -#endif -} diff --git a/arch/x86/kernel/crash_core_64.c b/arch/x86/kernel/crash_core_64.c deleted file mode 100644 index 7d255f882afe..000000000000 --- a/arch/x86/kernel/crash_core_64.c +++ /dev/null @@ -1,24 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only - -#include -#include - -#include - -void arch_crash_save_vmcoreinfo(void) -{ - u64 sme_mask = sme_me_mask; - - VMCOREINFO_NUMBER(phys_base); - VMCOREINFO_SYMBOL(init_top_pgt); - vmcoreinfo_append_str("NUMBER(pgtable_l5_enabled)=%d\n", - pgtable_l5_enabled()); - -#ifdef CONFIG_NUMA - VMCOREINFO_SYMBOL(node_data); - VMCOREINFO_LENGTH(node_data, MAX_NUMNODES); -#endif - vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset()); - VMCOREINFO_NUMBER(KERNEL_IMAGE_SIZE); - VMCOREINFO_NUMBER(sme_mask); -} diff --git a/arch/x86/kernel/vmcore_info_32.c b/arch/x86/kernel/vmcore_info_32.c new file mode 100644 index 000000000000..5995a749288a --- /dev/null +++ b/arch/x86/kernel/vmcore_info_32.c @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include + +#include + +void arch_crash_save_vmcoreinfo(void) +{ +#ifdef CONFIG_NUMA + VMCOREINFO_SYMBOL(node_data); + VMCOREINFO_LENGTH(node_data, MAX_NUMNODES); +#endif +#ifdef CONFIG_X86_PAE + VMCOREINFO_CONFIG(X86_PAE); +#endif +} diff --git a/arch/x86/kernel/vmcore_info_64.c b/arch/x86/kernel/vmcore_info_64.c new file mode 100644 index 000000000000..0dec7d868754 --- /dev/null +++ b/arch/x86/kernel/vmcore_info_64.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include + +#include + +void arch_crash_save_vmcoreinfo(void) +{ + u64 sme_mask = sme_me_mask; + + VMCOREINFO_NUMBER(phys_base); + VMCOREINFO_SYMBOL(init_top_pgt); + vmcoreinfo_append_str("NUMBER(pgtable_l5_enabled)=%d\n", + pgtable_l5_enabled()); + +#ifdef CONFIG_NUMA + VMCOREINFO_SYMBOL(node_data); + VMCOREINFO_LENGTH(node_data, MAX_NUMNODES); +#endif + vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset()); + VMCOREINFO_NUMBER(KERNEL_IMAGE_SIZE); + VMCOREINFO_NUMBER(sme_mask); +} diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c index 03da9a4354f8..5f43dfa22f79 100644 --- a/drivers/firmware/qemu_fw_cfg.c +++ b/drivers/firmware/qemu_fw_cfg.c @@ -37,7 +37,7 @@ #include #include #include -#include +#include MODULE_AUTHOR("Gabriel L. Somlo "); MODULE_DESCRIPTION("QEMU fw_cfg sysfs support"); @@ -67,7 +67,7 @@ static void fw_cfg_sel_endianness(u16 key) iowrite16(key, fw_cfg_reg_ctrl); } -#ifdef CONFIG_CRASH_CORE +#ifdef CONFIG_VMCORE_INFO static inline bool fw_cfg_dma_enabled(void) { return (fw_cfg_rev & FW_CFG_VERSION_DMA) && fw_cfg_reg_dma; @@ -156,7 +156,7 @@ static ssize_t fw_cfg_read_blob(u16 key, return count; } -#ifdef CONFIG_CRASH_CORE +#ifdef CONFIG_VMCORE_INFO /* write chunk of given fw_cfg blob (caller responsible for sanity-check) */ static ssize_t fw_cfg_write_blob(u16 key, void *buf, loff_t pos, size_t count) @@ -195,7 +195,7 @@ end: return ret; } -#endif /* CONFIG_CRASH_CORE */ +#endif /* CONFIG_VMCORE_INFO */ /* clean up fw_cfg device i/o */ static void fw_cfg_io_cleanup(void) @@ -319,7 +319,7 @@ struct fw_cfg_sysfs_entry { struct list_head list; }; -#ifdef CONFIG_CRASH_CORE +#ifdef CONFIG_VMCORE_INFO static ssize_t fw_cfg_write_vmcoreinfo(const struct fw_cfg_file *f) { static struct fw_cfg_vmcoreinfo *data; @@ -343,7 +343,7 @@ static ssize_t fw_cfg_write_vmcoreinfo(const struct fw_cfg_file *f) kfree(data); return ret; } -#endif /* CONFIG_CRASH_CORE */ +#endif /* CONFIG_VMCORE_INFO */ /* get fw_cfg_sysfs_entry from kobject member */ static inline struct fw_cfg_sysfs_entry *to_entry(struct kobject *kobj) @@ -583,7 +583,7 @@ static int fw_cfg_register_file(const struct fw_cfg_file *f) int err; struct fw_cfg_sysfs_entry *entry; -#ifdef CONFIG_CRASH_CORE +#ifdef CONFIG_VMCORE_INFO if (fw_cfg_dma_enabled() && strcmp(f->name, FW_CFG_VMCOREINFO_FILENAME) == 0 && !is_kdump_kernel()) { diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig index 32b1116ae137..d80a1431ef7b 100644 --- a/fs/proc/Kconfig +++ b/fs/proc/Kconfig @@ -32,7 +32,7 @@ config PROC_FS config PROC_KCORE bool "/proc/kcore support" if !ARM depends on PROC_FS && MMU - select CRASH_CORE + select VMCORE_INFO help Provides a virtual ELF core file of the live kernel. This can be read with gdb and other ELF tools. No modifications can be diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 6422e569b080..8e08a9a1b7ed 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -10,7 +10,7 @@ * Safe accesses to vmalloc/direct-mapped discontiguous areas, Kanoj Sarcar */ -#include +#include #include #include #include diff --git a/include/linux/buildid.h b/include/linux/buildid.h index 8a582d242f06..20aa3c2d89f7 100644 --- a/include/linux/buildid.h +++ b/include/linux/buildid.h @@ -11,7 +11,7 @@ int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id, __u32 *size); int build_id_parse_buf(const void *buf, unsigned char *build_id, u32 buf_size); -#if IS_ENABLED(CONFIG_STACKTRACE_BUILD_ID) || IS_ENABLED(CONFIG_CRASH_CORE) +#if IS_ENABLED(CONFIG_STACKTRACE_BUILD_ID) || IS_ENABLED(CONFIG_VMCORE_INFO) extern unsigned char vmlinux_build_id[BUILD_ID_SIZE_MAX]; void init_vmlinux_build_id(void); #else diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h index 1fde49246fa6..7f19f62018ef 100644 --- a/include/linux/crash_core.h +++ b/include/linux/crash_core.h @@ -6,79 +6,6 @@ #include #include -#define CRASH_CORE_NOTE_NAME "CORE" -#define CRASH_CORE_NOTE_HEAD_BYTES ALIGN(sizeof(struct elf_note), 4) -#define CRASH_CORE_NOTE_NAME_BYTES ALIGN(sizeof(CRASH_CORE_NOTE_NAME), 4) -#define CRASH_CORE_NOTE_DESC_BYTES ALIGN(sizeof(struct elf_prstatus), 4) - -/* - * The per-cpu notes area is a list of notes terminated by a "NULL" - * note header. For kdump, the code in vmcore.c runs in the context - * of the second kernel to combine them into one note. - */ -#define CRASH_CORE_NOTE_BYTES ((CRASH_CORE_NOTE_HEAD_BYTES * 2) + \ - CRASH_CORE_NOTE_NAME_BYTES + \ - CRASH_CORE_NOTE_DESC_BYTES) - -#define VMCOREINFO_BYTES PAGE_SIZE -#define VMCOREINFO_NOTE_NAME "VMCOREINFO" -#define VMCOREINFO_NOTE_NAME_BYTES ALIGN(sizeof(VMCOREINFO_NOTE_NAME), 4) -#define VMCOREINFO_NOTE_SIZE ((CRASH_CORE_NOTE_HEAD_BYTES * 2) + \ - VMCOREINFO_NOTE_NAME_BYTES + \ - VMCOREINFO_BYTES) - -typedef u32 note_buf_t[CRASH_CORE_NOTE_BYTES/4]; -/* Per cpu memory for storing cpu states in case of system crash. */ -extern note_buf_t __percpu *crash_notes; - -void crash_update_vmcoreinfo_safecopy(void *ptr); -void crash_save_vmcoreinfo(void); -void arch_crash_save_vmcoreinfo(void); -__printf(1, 2) -void vmcoreinfo_append_str(const char *fmt, ...); -phys_addr_t paddr_vmcoreinfo_note(void); - -#define VMCOREINFO_OSRELEASE(value) \ - vmcoreinfo_append_str("OSRELEASE=%s\n", value) -#define VMCOREINFO_BUILD_ID() \ - ({ \ - static_assert(sizeof(vmlinux_build_id) == 20); \ - vmcoreinfo_append_str("BUILD-ID=%20phN\n", vmlinux_build_id); \ - }) - -#define VMCOREINFO_PAGESIZE(value) \ - vmcoreinfo_append_str("PAGESIZE=%ld\n", value) -#define VMCOREINFO_SYMBOL(name) \ - vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)&name) -#define VMCOREINFO_SYMBOL_ARRAY(name) \ - vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)name) -#define VMCOREINFO_SIZE(name) \ - vmcoreinfo_append_str("SIZE(%s)=%lu\n", #name, \ - (unsigned long)sizeof(name)) -#define VMCOREINFO_STRUCT_SIZE(name) \ - vmcoreinfo_append_str("SIZE(%s)=%lu\n", #name, \ - (unsigned long)sizeof(struct name)) -#define VMCOREINFO_OFFSET(name, field) \ - vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #field, \ - (unsigned long)offsetof(struct name, field)) -#define VMCOREINFO_TYPE_OFFSET(name, field) \ - vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #field, \ - (unsigned long)offsetof(name, field)) -#define VMCOREINFO_LENGTH(name, value) \ - vmcoreinfo_append_str("LENGTH(%s)=%lu\n", #name, (unsigned long)value) -#define VMCOREINFO_NUMBER(name) \ - vmcoreinfo_append_str("NUMBER(%s)=%ld\n", #name, (long)name) -#define VMCOREINFO_CONFIG(name) \ - vmcoreinfo_append_str("CONFIG_%s=y\n", #name) - -extern unsigned char *vmcoreinfo_data; -extern size_t vmcoreinfo_size; -extern u32 *vmcoreinfo_note; - -Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type, - void *data, size_t data_len); -void final_note(Elf_Word *buf); - /* Alignment required for elf header segment */ #define ELF_CORE_HEADER_ALIGN 4096 diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 6d79bfb52e5b..9c7bb8b56ed6 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -16,6 +16,7 @@ #if !defined(__ASSEMBLY__) #include +#include #include #include #include diff --git a/include/linux/vmcore_info.h b/include/linux/vmcore_info.h new file mode 100644 index 000000000000..e1dec1a6a749 --- /dev/null +++ b/include/linux/vmcore_info.h @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef LINUX_VMCORE_INFO_H +#define LINUX_VMCORE_INFO_H + +#include +#include +#include + +#define CRASH_CORE_NOTE_NAME "CORE" +#define CRASH_CORE_NOTE_HEAD_BYTES ALIGN(sizeof(struct elf_note), 4) +#define CRASH_CORE_NOTE_NAME_BYTES ALIGN(sizeof(CRASH_CORE_NOTE_NAME), 4) +#define CRASH_CORE_NOTE_DESC_BYTES ALIGN(sizeof(struct elf_prstatus), 4) + +/* + * The per-cpu notes area is a list of notes terminated by a "NULL" + * note header. For kdump, the code in vmcore.c runs in the context + * of the second kernel to combine them into one note. + */ +#define CRASH_CORE_NOTE_BYTES ((CRASH_CORE_NOTE_HEAD_BYTES * 2) + \ + CRASH_CORE_NOTE_NAME_BYTES + \ + CRASH_CORE_NOTE_DESC_BYTES) + +#define VMCOREINFO_BYTES PAGE_SIZE +#define VMCOREINFO_NOTE_NAME "VMCOREINFO" +#define VMCOREINFO_NOTE_NAME_BYTES ALIGN(sizeof(VMCOREINFO_NOTE_NAME), 4) +#define VMCOREINFO_NOTE_SIZE ((CRASH_CORE_NOTE_HEAD_BYTES * 2) + \ + VMCOREINFO_NOTE_NAME_BYTES + \ + VMCOREINFO_BYTES) + +typedef u32 note_buf_t[CRASH_CORE_NOTE_BYTES/4]; +/* Per cpu memory for storing cpu states in case of system crash. */ +extern note_buf_t __percpu *crash_notes; + +void crash_update_vmcoreinfo_safecopy(void *ptr); +void crash_save_vmcoreinfo(void); +void arch_crash_save_vmcoreinfo(void); +__printf(1, 2) +void vmcoreinfo_append_str(const char *fmt, ...); +phys_addr_t paddr_vmcoreinfo_note(void); + +#define VMCOREINFO_OSRELEASE(value) \ + vmcoreinfo_append_str("OSRELEASE=%s\n", value) +#define VMCOREINFO_BUILD_ID() \ + ({ \ + static_assert(sizeof(vmlinux_build_id) == 20); \ + vmcoreinfo_append_str("BUILD-ID=%20phN\n", vmlinux_build_id); \ + }) + +#define VMCOREINFO_PAGESIZE(value) \ + vmcoreinfo_append_str("PAGESIZE=%ld\n", value) +#define VMCOREINFO_SYMBOL(name) \ + vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)&name) +#define VMCOREINFO_SYMBOL_ARRAY(name) \ + vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)name) +#define VMCOREINFO_SIZE(name) \ + vmcoreinfo_append_str("SIZE(%s)=%lu\n", #name, \ + (unsigned long)sizeof(name)) +#define VMCOREINFO_STRUCT_SIZE(name) \ + vmcoreinfo_append_str("SIZE(%s)=%lu\n", #name, \ + (unsigned long)sizeof(struct name)) +#define VMCOREINFO_OFFSET(name, field) \ + vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #field, \ + (unsigned long)offsetof(struct name, field)) +#define VMCOREINFO_TYPE_OFFSET(name, field) \ + vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #field, \ + (unsigned long)offsetof(name, field)) +#define VMCOREINFO_LENGTH(name, value) \ + vmcoreinfo_append_str("LENGTH(%s)=%lu\n", #name, (unsigned long)value) +#define VMCOREINFO_NUMBER(name) \ + vmcoreinfo_append_str("NUMBER(%s)=%ld\n", #name, (long)name) +#define VMCOREINFO_CONFIG(name) \ + vmcoreinfo_append_str("CONFIG_%s=y\n", #name) + +extern unsigned char *vmcoreinfo_data; +extern size_t vmcoreinfo_size; +extern u32 *vmcoreinfo_note; + +Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type, + void *data, size_t data_len); +void final_note(Elf_Word *buf); +#endif /* LINUX_VMCORE_INFO_H */ diff --git a/kernel/Kconfig.kexec b/kernel/Kconfig.kexec index 8b7be71edd85..8faf27043432 100644 --- a/kernel/Kconfig.kexec +++ b/kernel/Kconfig.kexec @@ -5,11 +5,11 @@ menu "Kexec and crash features" config CRASH_RESERVE bool -config CRASH_CORE +config VMCORE_INFO bool config KEXEC_CORE - select CRASH_CORE + select VMCORE_INFO select CRASH_RESERVE bool diff --git a/kernel/Makefile b/kernel/Makefile index 05fa88b3ab74..649272a1d6b9 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -68,9 +68,9 @@ obj-$(CONFIG_MODULE_SIG_FORMAT) += module_signature.o obj-$(CONFIG_KALLSYMS) += kallsyms.o obj-$(CONFIG_KALLSYMS_SELFTEST) += kallsyms_selftest.o obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o -obj-$(CONFIG_CRASH_CORE) += crash_core.o +obj-$(CONFIG_VMCORE_INFO) += vmcore_info.o obj-$(CONFIG_CRASH_RESERVE) += crash_reserve.o -obj-$(CONFIG_KEXEC_CORE) += kexec_core.o +obj-$(CONFIG_KEXEC_CORE) += kexec_core.o crash_core.o obj-$(CONFIG_KEXEC) += kexec.o obj-$(CONFIG_KEXEC_FILE) += kexec_file.o obj-$(CONFIG_KEXEC_ELF) += kexec_elf.o diff --git a/kernel/crash_core.c b/kernel/crash_core.c index ae0d1ce89b46..2f4df1fe6f7a 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -26,14 +26,6 @@ /* Per cpu memory for storing cpu states in case of system crash. */ note_buf_t __percpu *crash_notes; -/* vmcoreinfo stuff */ -unsigned char *vmcoreinfo_data; -size_t vmcoreinfo_size; -u32 *vmcoreinfo_note; - -/* trusted vmcoreinfo, e.g. we can make a copy in the crash memory */ -static unsigned char *vmcoreinfo_data_safecopy; - int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map, void **addr, unsigned long *sz) { @@ -195,204 +187,6 @@ int crash_exclude_mem_range(struct crash_mem *mem, return 0; } -Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type, - void *data, size_t data_len) -{ - struct elf_note *note = (struct elf_note *)buf; - - note->n_namesz = strlen(name) + 1; - note->n_descsz = data_len; - note->n_type = type; - buf += DIV_ROUND_UP(sizeof(*note), sizeof(Elf_Word)); - memcpy(buf, name, note->n_namesz); - buf += DIV_ROUND_UP(note->n_namesz, sizeof(Elf_Word)); - memcpy(buf, data, data_len); - buf += DIV_ROUND_UP(data_len, sizeof(Elf_Word)); - - return buf; -} - -void final_note(Elf_Word *buf) -{ - memset(buf, 0, sizeof(struct elf_note)); -} - -static void update_vmcoreinfo_note(void) -{ - u32 *buf = vmcoreinfo_note; - - if (!vmcoreinfo_size) - return; - buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data, - vmcoreinfo_size); - final_note(buf); -} - -void crash_update_vmcoreinfo_safecopy(void *ptr) -{ - if (ptr) - memcpy(ptr, vmcoreinfo_data, vmcoreinfo_size); - - vmcoreinfo_data_safecopy = ptr; -} - -void crash_save_vmcoreinfo(void) -{ - if (!vmcoreinfo_note) - return; - - /* Use the safe copy to generate vmcoreinfo note if have */ - if (vmcoreinfo_data_safecopy) - vmcoreinfo_data = vmcoreinfo_data_safecopy; - - vmcoreinfo_append_str("CRASHTIME=%lld\n", ktime_get_real_seconds()); - update_vmcoreinfo_note(); -} - -void vmcoreinfo_append_str(const char *fmt, ...) -{ - va_list args; - char buf[0x50]; - size_t r; - - va_start(args, fmt); - r = vscnprintf(buf, sizeof(buf), fmt, args); - va_end(args); - - r = min(r, (size_t)VMCOREINFO_BYTES - vmcoreinfo_size); - - memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r); - - vmcoreinfo_size += r; - - WARN_ONCE(vmcoreinfo_size == VMCOREINFO_BYTES, - "vmcoreinfo data exceeds allocated size, truncating"); -} - -/* - * provide an empty default implementation here -- architecture - * code may override this - */ -void __weak arch_crash_save_vmcoreinfo(void) -{} - -phys_addr_t __weak paddr_vmcoreinfo_note(void) -{ - return __pa(vmcoreinfo_note); -} -EXPORT_SYMBOL(paddr_vmcoreinfo_note); - -static int __init crash_save_vmcoreinfo_init(void) -{ - vmcoreinfo_data = (unsigned char *)get_zeroed_page(GFP_KERNEL); - if (!vmcoreinfo_data) { - pr_warn("Memory allocation for vmcoreinfo_data failed\n"); - return -ENOMEM; - } - - vmcoreinfo_note = alloc_pages_exact(VMCOREINFO_NOTE_SIZE, - GFP_KERNEL | __GFP_ZERO); - if (!vmcoreinfo_note) { - free_page((unsigned long)vmcoreinfo_data); - vmcoreinfo_data = NULL; - pr_warn("Memory allocation for vmcoreinfo_note failed\n"); - return -ENOMEM; - } - - VMCOREINFO_OSRELEASE(init_uts_ns.name.release); - VMCOREINFO_BUILD_ID(); - VMCOREINFO_PAGESIZE(PAGE_SIZE); - - VMCOREINFO_SYMBOL(init_uts_ns); - VMCOREINFO_OFFSET(uts_namespace, name); - VMCOREINFO_SYMBOL(node_online_map); -#ifdef CONFIG_MMU - VMCOREINFO_SYMBOL_ARRAY(swapper_pg_dir); -#endif - VMCOREINFO_SYMBOL(_stext); - vmcoreinfo_append_str("NUMBER(VMALLOC_START)=0x%lx\n", (unsigned long) VMALLOC_START); - -#ifndef CONFIG_NUMA - VMCOREINFO_SYMBOL(mem_map); - VMCOREINFO_SYMBOL(contig_page_data); -#endif -#ifdef CONFIG_SPARSEMEM - VMCOREINFO_SYMBOL_ARRAY(mem_section); - VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS); - VMCOREINFO_STRUCT_SIZE(mem_section); - VMCOREINFO_OFFSET(mem_section, section_mem_map); - VMCOREINFO_NUMBER(SECTION_SIZE_BITS); - VMCOREINFO_NUMBER(MAX_PHYSMEM_BITS); -#endif - VMCOREINFO_STRUCT_SIZE(page); - VMCOREINFO_STRUCT_SIZE(pglist_data); - VMCOREINFO_STRUCT_SIZE(zone); - VMCOREINFO_STRUCT_SIZE(free_area); - VMCOREINFO_STRUCT_SIZE(list_head); - VMCOREINFO_SIZE(nodemask_t); - VMCOREINFO_OFFSET(page, flags); - VMCOREINFO_OFFSET(page, _refcount); - VMCOREINFO_OFFSET(page, mapping); - VMCOREINFO_OFFSET(page, lru); - VMCOREINFO_OFFSET(page, _mapcount); - VMCOREINFO_OFFSET(page, private); - VMCOREINFO_OFFSET(page, compound_head); - VMCOREINFO_OFFSET(pglist_data, node_zones); - VMCOREINFO_OFFSET(pglist_data, nr_zones); -#ifdef CONFIG_FLATMEM - VMCOREINFO_OFFSET(pglist_data, node_mem_map); -#endif - VMCOREINFO_OFFSET(pglist_data, node_start_pfn); - VMCOREINFO_OFFSET(pglist_data, node_spanned_pages); - VMCOREINFO_OFFSET(pglist_data, node_id); - VMCOREINFO_OFFSET(zone, free_area); - VMCOREINFO_OFFSET(zone, vm_stat); - VMCOREINFO_OFFSET(zone, spanned_pages); - VMCOREINFO_OFFSET(free_area, free_list); - VMCOREINFO_OFFSET(list_head, next); - VMCOREINFO_OFFSET(list_head, prev); - VMCOREINFO_LENGTH(zone.free_area, NR_PAGE_ORDERS); - log_buf_vmcoreinfo_setup(); - VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES); - VMCOREINFO_NUMBER(NR_FREE_PAGES); - VMCOREINFO_NUMBER(PG_lru); - VMCOREINFO_NUMBER(PG_private); - VMCOREINFO_NUMBER(PG_swapcache); - VMCOREINFO_NUMBER(PG_swapbacked); - VMCOREINFO_NUMBER(PG_slab); -#ifdef CONFIG_MEMORY_FAILURE - VMCOREINFO_NUMBER(PG_hwpoison); -#endif - VMCOREINFO_NUMBER(PG_head_mask); -#define PAGE_BUDDY_MAPCOUNT_VALUE (~PG_buddy) - VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE); -#ifdef CONFIG_HUGETLB_PAGE - VMCOREINFO_NUMBER(PG_hugetlb); -#define PAGE_OFFLINE_MAPCOUNT_VALUE (~PG_offline) - VMCOREINFO_NUMBER(PAGE_OFFLINE_MAPCOUNT_VALUE); -#endif - -#ifdef CONFIG_KALLSYMS - VMCOREINFO_SYMBOL(kallsyms_names); - VMCOREINFO_SYMBOL(kallsyms_num_syms); - VMCOREINFO_SYMBOL(kallsyms_token_table); - VMCOREINFO_SYMBOL(kallsyms_token_index); -#ifdef CONFIG_KALLSYMS_BASE_RELATIVE - VMCOREINFO_SYMBOL(kallsyms_offsets); - VMCOREINFO_SYMBOL(kallsyms_relative_base); -#else - VMCOREINFO_SYMBOL(kallsyms_addresses); -#endif /* CONFIG_KALLSYMS_BASE_RELATIVE */ -#endif /* CONFIG_KALLSYMS */ - - arch_crash_save_vmcoreinfo(); - update_vmcoreinfo_note(); - - return 0; -} - -subsys_initcall(crash_save_vmcoreinfo_init); - static int __init crash_notes_memory_init(void) { /* Allocate memory for saving cpu registers. */ diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index 1d4bc493b2f4..11526fc42bc2 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c @@ -154,7 +154,7 @@ KERNEL_ATTR_RW(kexec_crash_size); #endif /* CONFIG_KEXEC_CORE */ -#ifdef CONFIG_CRASH_CORE +#ifdef CONFIG_VMCORE_INFO static ssize_t vmcoreinfo_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) @@ -177,7 +177,7 @@ KERNEL_ATTR_RO(crash_elfcorehdr_size); #endif -#endif /* CONFIG_CRASH_CORE */ +#endif /* CONFIG_VMCORE_INFO */ /* whether file capabilities are enabled */ static ssize_t fscaps_show(struct kobject *kobj, @@ -265,7 +265,7 @@ static struct attribute * kernel_attrs[] = { &kexec_crash_loaded_attr.attr, &kexec_crash_size_attr.attr, #endif -#ifdef CONFIG_CRASH_CORE +#ifdef CONFIG_VMCORE_INFO &vmcoreinfo_attr.attr, #ifdef CONFIG_CRASH_HOTPLUG &crash_elfcorehdr_size_attr.attr, diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index f2444b581e16..7d74b000b43a 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -34,7 +34,7 @@ #include #include #include -#include +#include #include #include #include @@ -951,7 +951,7 @@ const struct file_operations kmsg_fops = { .release = devkmsg_release, }; -#ifdef CONFIG_CRASH_CORE +#ifdef CONFIG_VMCORE_INFO /* * This appends the listed symbols to /proc/vmcore * diff --git a/kernel/vmcore_info.c b/kernel/vmcore_info.c new file mode 100644 index 000000000000..8f77e238a54f --- /dev/null +++ b/kernel/vmcore_info.c @@ -0,0 +1,230 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * crash.c - kernel crash support code. + * Copyright (C) 2002-2004 Eric Biederman + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +#include "kallsyms_internal.h" +#include "kexec_internal.h" + +/* vmcoreinfo stuff */ +unsigned char *vmcoreinfo_data; +size_t vmcoreinfo_size; +u32 *vmcoreinfo_note; + +/* trusted vmcoreinfo, e.g. we can make a copy in the crash memory */ +static unsigned char *vmcoreinfo_data_safecopy; + +Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type, + void *data, size_t data_len) +{ + struct elf_note *note = (struct elf_note *)buf; + + note->n_namesz = strlen(name) + 1; + note->n_descsz = data_len; + note->n_type = type; + buf += DIV_ROUND_UP(sizeof(*note), sizeof(Elf_Word)); + memcpy(buf, name, note->n_namesz); + buf += DIV_ROUND_UP(note->n_namesz, sizeof(Elf_Word)); + memcpy(buf, data, data_len); + buf += DIV_ROUND_UP(data_len, sizeof(Elf_Word)); + + return buf; +} + +void final_note(Elf_Word *buf) +{ + memset(buf, 0, sizeof(struct elf_note)); +} + +static void update_vmcoreinfo_note(void) +{ + u32 *buf = vmcoreinfo_note; + + if (!vmcoreinfo_size) + return; + buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data, + vmcoreinfo_size); + final_note(buf); +} + +void crash_update_vmcoreinfo_safecopy(void *ptr) +{ + if (ptr) + memcpy(ptr, vmcoreinfo_data, vmcoreinfo_size); + + vmcoreinfo_data_safecopy = ptr; +} + +void crash_save_vmcoreinfo(void) +{ + if (!vmcoreinfo_note) + return; + + /* Use the safe copy to generate vmcoreinfo note if have */ + if (vmcoreinfo_data_safecopy) + vmcoreinfo_data = vmcoreinfo_data_safecopy; + + vmcoreinfo_append_str("CRASHTIME=%lld\n", ktime_get_real_seconds()); + update_vmcoreinfo_note(); +} + +void vmcoreinfo_append_str(const char *fmt, ...) +{ + va_list args; + char buf[0x50]; + size_t r; + + va_start(args, fmt); + r = vscnprintf(buf, sizeof(buf), fmt, args); + va_end(args); + + r = min(r, (size_t)VMCOREINFO_BYTES - vmcoreinfo_size); + + memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r); + + vmcoreinfo_size += r; + + WARN_ONCE(vmcoreinfo_size == VMCOREINFO_BYTES, + "vmcoreinfo data exceeds allocated size, truncating"); +} + +/* + * provide an empty default implementation here -- architecture + * code may override this + */ +void __weak arch_crash_save_vmcoreinfo(void) +{} + +phys_addr_t __weak paddr_vmcoreinfo_note(void) +{ + return __pa(vmcoreinfo_note); +} +EXPORT_SYMBOL(paddr_vmcoreinfo_note); + +static int __init crash_save_vmcoreinfo_init(void) +{ + vmcoreinfo_data = (unsigned char *)get_zeroed_page(GFP_KERNEL); + if (!vmcoreinfo_data) { + pr_warn("Memory allocation for vmcoreinfo_data failed\n"); + return -ENOMEM; + } + + vmcoreinfo_note = alloc_pages_exact(VMCOREINFO_NOTE_SIZE, + GFP_KERNEL | __GFP_ZERO); + if (!vmcoreinfo_note) { + free_page((unsigned long)vmcoreinfo_data); + vmcoreinfo_data = NULL; + pr_warn("Memory allocation for vmcoreinfo_note failed\n"); + return -ENOMEM; + } + + VMCOREINFO_OSRELEASE(init_uts_ns.name.release); + VMCOREINFO_BUILD_ID(); + VMCOREINFO_PAGESIZE(PAGE_SIZE); + + VMCOREINFO_SYMBOL(init_uts_ns); + VMCOREINFO_OFFSET(uts_namespace, name); + VMCOREINFO_SYMBOL(node_online_map); +#ifdef CONFIG_MMU + VMCOREINFO_SYMBOL_ARRAY(swapper_pg_dir); +#endif + VMCOREINFO_SYMBOL(_stext); + vmcoreinfo_append_str("NUMBER(VMALLOC_START)=0x%lx\n", (unsigned long) VMALLOC_START); + +#ifndef CONFIG_NUMA + VMCOREINFO_SYMBOL(mem_map); + VMCOREINFO_SYMBOL(contig_page_data); +#endif +#ifdef CONFIG_SPARSEMEM + VMCOREINFO_SYMBOL_ARRAY(mem_section); + VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS); + VMCOREINFO_STRUCT_SIZE(mem_section); + VMCOREINFO_OFFSET(mem_section, section_mem_map); + VMCOREINFO_NUMBER(SECTION_SIZE_BITS); + VMCOREINFO_NUMBER(MAX_PHYSMEM_BITS); +#endif + VMCOREINFO_STRUCT_SIZE(page); + VMCOREINFO_STRUCT_SIZE(pglist_data); + VMCOREINFO_STRUCT_SIZE(zone); + VMCOREINFO_STRUCT_SIZE(free_area); + VMCOREINFO_STRUCT_SIZE(list_head); + VMCOREINFO_SIZE(nodemask_t); + VMCOREINFO_OFFSET(page, flags); + VMCOREINFO_OFFSET(page, _refcount); + VMCOREINFO_OFFSET(page, mapping); + VMCOREINFO_OFFSET(page, lru); + VMCOREINFO_OFFSET(page, _mapcount); + VMCOREINFO_OFFSET(page, private); + VMCOREINFO_OFFSET(page, compound_head); + VMCOREINFO_OFFSET(pglist_data, node_zones); + VMCOREINFO_OFFSET(pglist_data, nr_zones); +#ifdef CONFIG_FLATMEM + VMCOREINFO_OFFSET(pglist_data, node_mem_map); +#endif + VMCOREINFO_OFFSET(pglist_data, node_start_pfn); + VMCOREINFO_OFFSET(pglist_data, node_spanned_pages); + VMCOREINFO_OFFSET(pglist_data, node_id); + VMCOREINFO_OFFSET(zone, free_area); + VMCOREINFO_OFFSET(zone, vm_stat); + VMCOREINFO_OFFSET(zone, spanned_pages); + VMCOREINFO_OFFSET(free_area, free_list); + VMCOREINFO_OFFSET(list_head, next); + VMCOREINFO_OFFSET(list_head, prev); + VMCOREINFO_LENGTH(zone.free_area, NR_PAGE_ORDERS); + log_buf_vmcoreinfo_setup(); + VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES); + VMCOREINFO_NUMBER(NR_FREE_PAGES); + VMCOREINFO_NUMBER(PG_lru); + VMCOREINFO_NUMBER(PG_private); + VMCOREINFO_NUMBER(PG_swapcache); + VMCOREINFO_NUMBER(PG_swapbacked); + VMCOREINFO_NUMBER(PG_slab); +#ifdef CONFIG_MEMORY_FAILURE + VMCOREINFO_NUMBER(PG_hwpoison); +#endif + VMCOREINFO_NUMBER(PG_head_mask); +#define PAGE_BUDDY_MAPCOUNT_VALUE (~PG_buddy) + VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE); +#ifdef CONFIG_HUGETLB_PAGE + VMCOREINFO_NUMBER(PG_hugetlb); +#define PAGE_OFFLINE_MAPCOUNT_VALUE (~PG_offline) + VMCOREINFO_NUMBER(PAGE_OFFLINE_MAPCOUNT_VALUE); +#endif + +#ifdef CONFIG_KALLSYMS + VMCOREINFO_SYMBOL(kallsyms_names); + VMCOREINFO_SYMBOL(kallsyms_num_syms); + VMCOREINFO_SYMBOL(kallsyms_token_table); + VMCOREINFO_SYMBOL(kallsyms_token_index); +#ifdef CONFIG_KALLSYMS_BASE_RELATIVE + VMCOREINFO_SYMBOL(kallsyms_offsets); + VMCOREINFO_SYMBOL(kallsyms_relative_base); +#else + VMCOREINFO_SYMBOL(kallsyms_addresses); +#endif /* CONFIG_KALLSYMS_BASE_RELATIVE */ +#endif /* CONFIG_KALLSYMS */ + + arch_crash_save_vmcoreinfo(); + update_vmcoreinfo_note(); + + return 0; +} + +subsys_initcall(crash_save_vmcoreinfo_init); diff --git a/lib/buildid.c b/lib/buildid.c index e3a7acdeef0e..3e6868c86b45 100644 --- a/lib/buildid.c +++ b/lib/buildid.c @@ -174,7 +174,7 @@ int build_id_parse_buf(const void *buf, unsigned char *build_id, u32 buf_size) return parse_build_id_buf(build_id, NULL, buf, buf_size); } -#if IS_ENABLED(CONFIG_STACKTRACE_BUILD_ID) || IS_ENABLED(CONFIG_CRASH_CORE) +#if IS_ENABLED(CONFIG_STACKTRACE_BUILD_ID) || IS_ENABLED(CONFIG_VMCORE_INFO) unsigned char vmlinux_build_id[BUILD_ID_SIZE_MAX] __ro_after_init; /** -- cgit v1.2.3 From a4eeb2176d89fdf2785851521577b94b31690a60 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Wed, 24 Jan 2024 13:12:46 +0800 Subject: x86, crash: wrap crash dumping code into crash related ifdefs Now crash codes under kernel/ folder has been split out from kexec code, crash dumping can be separated from kexec reboot in config items on x86 with some adjustments. Here, also change some ifdefs or IS_ENABLED() check to more appropriate ones, e,g - #ifdef CONFIG_KEXEC_CORE -> #ifdef CONFIG_CRASH_DUMP - (!IS_ENABLED(CONFIG_KEXEC_CORE)) - > (!IS_ENABLED(CONFIG_CRASH_RESERVE)) [bhe@redhat.com: don't nest CONFIG_CRASH_DUMP ifdef inside CONFIG_KEXEC_CODE ifdef scope] Link: https://lore.kernel.org/all/SN6PR02MB4157931105FA68D72E3D3DB8D47B2@SN6PR02MB4157.namprd02.prod.outlook.com/T/#u Link: https://lkml.kernel.org/r/20240124051254.67105-7-bhe@redhat.com Signed-off-by: Baoquan He Cc: Al Viro Cc: Eric W. Biederman Cc: Hari Bathini Cc: Pingfan Liu Cc: Klara Modin Cc: Michael Kelley Cc: Nathan Chancellor Cc: Stephen Rothwell Cc: Yang Li Signed-off-by: Andrew Morton --- arch/x86/kernel/Makefile | 4 ++-- arch/x86/kernel/cpu/mshyperv.c | 10 ++++++++-- arch/x86/kernel/kexec-bzimage64.c | 4 ++++ arch/x86/kernel/kvm.c | 4 ++-- arch/x86/kernel/machine_kexec_64.c | 3 +++ arch/x86/kernel/reboot.c | 4 ++-- arch/x86/kernel/setup.c | 2 +- arch/x86/kernel/smp.c | 2 +- arch/x86/xen/enlighten_hvm.c | 4 ++++ arch/x86/xen/mmu_pv.c | 2 +- 10 files changed, 28 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 913d4022131e..3668b1edef2d 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -100,9 +100,9 @@ obj-$(CONFIG_TRACING) += trace.o obj-$(CONFIG_RETHOOK) += rethook.o obj-$(CONFIG_VMCORE_INFO) += vmcore_info_$(BITS).o obj-$(CONFIG_KEXEC_CORE) += machine_kexec_$(BITS).o -obj-$(CONFIG_KEXEC_CORE) += relocate_kernel_$(BITS).o crash.o +obj-$(CONFIG_KEXEC_CORE) += relocate_kernel_$(BITS).o obj-$(CONFIG_KEXEC_FILE) += kexec-bzimage64.o -obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o +obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o crash.o obj-y += kprobes/ obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_X86_32) += doublefault_32.o diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 01fa06dd06b6..2e8cd5a4ae85 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -209,7 +209,9 @@ static void hv_machine_shutdown(void) if (kexec_in_progress) hyperv_cleanup(); } +#endif /* CONFIG_KEXEC_CORE */ +#ifdef CONFIG_CRASH_DUMP static void hv_machine_crash_shutdown(struct pt_regs *regs) { if (hv_crash_handler) @@ -221,7 +223,7 @@ static void hv_machine_crash_shutdown(struct pt_regs *regs) /* Disable the hypercall page when there is only 1 active CPU. */ hyperv_cleanup(); } -#endif /* CONFIG_KEXEC_CORE */ +#endif /* CONFIG_CRASH_DUMP */ #endif /* CONFIG_HYPERV */ static uint32_t __init ms_hyperv_platform(void) @@ -495,9 +497,13 @@ static void __init ms_hyperv_init_platform(void) no_timer_check = 1; #endif -#if IS_ENABLED(CONFIG_HYPERV) && defined(CONFIG_KEXEC_CORE) +#if IS_ENABLED(CONFIG_HYPERV) +#if defined(CONFIG_KEXEC_CORE) machine_ops.shutdown = hv_machine_shutdown; +#endif +#if defined(CONFIG_CRASH_DUMP) machine_ops.crash_shutdown = hv_machine_crash_shutdown; +#endif #endif if (ms_hyperv.features & HV_ACCESS_TSC_INVARIANT) { /* diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index 2a422e00ed4b..b55737b83a84 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -263,11 +263,13 @@ setup_boot_parameters(struct kimage *image, struct boot_params *params, memset(¶ms->hd0_info, 0, sizeof(params->hd0_info)); memset(¶ms->hd1_info, 0, sizeof(params->hd1_info)); +#ifdef CONFIG_CRASH_DUMP if (image->type == KEXEC_TYPE_CRASH) { ret = crash_setup_memmap_entries(image, params); if (ret) return ret; } else +#endif setup_e820_entries(params); nr_e820_entries = params->e820_entries; @@ -433,12 +435,14 @@ static void *bzImage64_load(struct kimage *image, char *kernel, return ERR_PTR(-EINVAL); } +#ifdef CONFIG_CRASH_DUMP /* Allocate and load backup region */ if (image->type == KEXEC_TYPE_CRASH) { ret = crash_load_segments(image); if (ret) return ERR_PTR(ret); } +#endif /* * Load purgatory. For 64bit entry point, purgatory code can be diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 428ee74002e1..3c9c327d6706 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -770,7 +770,7 @@ static struct notifier_block kvm_pv_reboot_nb = { * won't be valid. In cases like kexec, in which you install a new kernel, this * means a random memory location will be kept being written. */ -#ifdef CONFIG_KEXEC_CORE +#ifdef CONFIG_CRASH_DUMP static void kvm_crash_shutdown(struct pt_regs *regs) { kvm_guest_cpu_offline(true); @@ -853,7 +853,7 @@ static void __init kvm_guest_init(void) kvm_guest_cpu_init(); #endif -#ifdef CONFIG_KEXEC_CORE +#ifdef CONFIG_CRASH_DUMP machine_ops.crash_shutdown = kvm_crash_shutdown; #endif diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index bc0a5348b4a6..b180d8e497c3 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -508,6 +508,8 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image) } #endif /* CONFIG_KEXEC_FILE */ +#ifdef CONFIG_CRASH_DUMP + static int kexec_mark_range(unsigned long start, unsigned long end, bool protect) { @@ -552,6 +554,7 @@ void arch_kexec_unprotect_crashkres(void) { kexec_mark_crashkres(false); } +#endif /* * During a traditional boot under SME, SME will encrypt the kernel, diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 830425e6d38e..f3130f762784 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -796,7 +796,7 @@ struct machine_ops machine_ops __ro_after_init = { .emergency_restart = native_machine_emergency_restart, .restart = native_machine_restart, .halt = native_machine_halt, -#ifdef CONFIG_KEXEC_CORE +#ifdef CONFIG_CRASH_DUMP .crash_shutdown = native_machine_crash_shutdown, #endif }; @@ -826,7 +826,7 @@ void machine_halt(void) machine_ops.halt(); } -#ifdef CONFIG_KEXEC_CORE +#ifdef CONFIG_CRASH_DUMP void machine_crash_shutdown(struct pt_regs *regs) { machine_ops.crash_shutdown(regs); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 84201071dfac..899d839a2954 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -471,7 +471,7 @@ static void __init arch_reserve_crashkernel(void) bool high = false; int ret; - if (!IS_ENABLED(CONFIG_KEXEC_CORE)) + if (!IS_ENABLED(CONFIG_CRASH_RESERVE)) return; ret = parse_crashkernel(cmdline, memblock_phys_mem_size(), diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 96a771f9f930..52c3823b7211 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -282,7 +282,7 @@ struct smp_ops smp_ops = { .smp_cpus_done = native_smp_cpus_done, .stop_other_cpus = native_stop_other_cpus, -#if defined(CONFIG_KEXEC_CORE) +#if defined(CONFIG_CRASH_DUMP) .crash_stop_other_cpus = kdump_nmi_shootdown_cpus, #endif .smp_send_reschedule = native_smp_send_reschedule, diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index 3f8c34707c50..0b367c1e086d 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c @@ -148,7 +148,9 @@ static void xen_hvm_shutdown(void) if (kexec_in_progress) xen_reboot(SHUTDOWN_soft_reset); } +#endif +#ifdef CONFIG_CRASH_DUMP static void xen_hvm_crash_shutdown(struct pt_regs *regs) { native_machine_crash_shutdown(regs); @@ -236,6 +238,8 @@ static void __init xen_hvm_guest_init(void) #ifdef CONFIG_KEXEC_CORE machine_ops.shutdown = xen_hvm_shutdown; +#endif +#ifdef CONFIG_CRASH_DUMP machine_ops.crash_shutdown = xen_hvm_crash_shutdown; #endif } diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 218773cfb009..e21974f2cf2d 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -2520,7 +2520,7 @@ out: } EXPORT_SYMBOL_GPL(xen_remap_pfn); -#ifdef CONFIG_KEXEC_CORE +#ifdef CONFIG_VMCORE_INFO phys_addr_t paddr_vmcoreinfo_note(void) { if (xen_pv_domain()) -- cgit v1.2.3 From cd87d9f58439a114f38cec276f3ec1600729c8bf Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Thu, 22 Feb 2024 19:09:10 +0000 Subject: x86/mm: further clarify switch_mm_irqs_off() documentation Commit accf6b23d1e5a ("x86/mm: clarify "prev" usage in switch_mm_irqs_off()") attempted to clarify x86's usage of the arguments passed by generic code, specifically the "prev" argument the is unused by x86. However, it could have done a better job with the comment above switch_mm_irqs_off(). Rewrite this comment according to Dave Hansen's suggestion. Link: https://lkml.kernel.org/r/20240222190911.1903054-1-yosryahmed@google.com Fixes: 3cfd6625a6cf ("x86/mm: clarify "prev" usage in switch_mm_irqs_off()") Signed-off-by: Yosry Ahmed Suggested-by: Dave Hansen Acked-by: Dave Hansen Cc: Andy Lutomirski Cc: Borislav Petkov (AMD) Cc: Ingo Molnar Cc: Peter Zijlstra (Intel) Cc: Thomas Gleixner Signed-off-by: Andrew Morton --- arch/x86/mm/tlb.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index bf9605caf24f..b67545baf697 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -493,10 +493,10 @@ static inline void cr4_update_pce_mm(struct mm_struct *mm) { } #endif /* - * The "prev" argument passed by the caller does not always match CR3. For - * example, the scheduler passes in active_mm when switching from lazy TLB mode - * to normal mode, but switch_mm_irqs_off() can be called from x86 code without - * updating active_mm. Use cpu_tlbstate.loaded_mm instead. + * This optimizes when not actually switching mm's. Some architectures use the + * 'unused' argument for this optimization, but x86 must use + * 'cpu_tlbstate.loaded_mm' instead because it does not always keep + * 'current->active_mm' up to date. */ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next, struct task_struct *tsk) -- cgit v1.2.3 From 15d1ec74b5d7054bba97e8be0b4407cd74976cbd Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Thu, 22 Feb 2024 19:09:11 +0000 Subject: x86/mm: always pass NULL as the first argument of switch_mm_irqs_off() The first argument of switch_mm_irqs_off() is unused by the x86 implementation. Make sure that x86 code never passes a non-NULL value to make this clear. Update the only non violating caller, switch_mm(). Link: https://lkml.kernel.org/r/20240222190911.1903054-2-yosryahmed@google.com Signed-off-by: Yosry Ahmed Suggested-by: Dave Hansen Acked-by: Dave Hansen Cc: Andy Lutomirski Cc: Borislav Petkov (AMD) Cc: Ingo Molnar Cc: Peter Zijlstra (Intel) Cc: Thomas Gleixner Signed-off-by: Andrew Morton --- arch/x86/mm/tlb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index b67545baf697..51f9f5694105 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -327,7 +327,7 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next, unsigned long flags; local_irq_save(flags); - switch_mm_irqs_off(prev, next, tsk); + switch_mm_irqs_off(NULL, next, tsk); local_irq_restore(flags); } -- cgit v1.2.3 From dba8e6f34f07674e7b6e33d74b4ff35c82abb13a Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 5 Mar 2024 12:37:43 +0800 Subject: mm/x86: replace p4d_large() with p4d_leaf() p4d_large() is always defined as p4d_leaf(). Merge their usages. Chose p4d_leaf() because p4d_leaf() is a global API, while p4d_large() is not. Only x86 has p4d_leaf() defined as of now. So it also means after this patch we removed all p4d_large() usages. Link: https://lkml.kernel.org/r/20240305043750.93762-4-peterx@redhat.com Signed-off-by: Peter Xu Reviewed-by: Jason Gunthorpe Reviewed-by: Mike Rapoport (IBM) Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: "Aneesh Kumar K.V" Cc: Christophe Leroy Cc: Dmitry Vyukov Cc: Kirill A. Shutemov Cc: Michael Ellerman Cc: Muchun Song Cc: "Naveen N. Rao" Cc: Nicholas Piggin Cc: Vincenzo Frascino Cc: Yang Shi Signed-off-by: Andrew Morton --- arch/x86/mm/fault.c | 4 ++-- arch/x86/mm/init_64.c | 2 +- arch/x86/mm/pat/set_memory.c | 4 ++-- arch/x86/mm/pti.c | 2 +- arch/x86/power/hibernate.c | 2 +- arch/x86/xen/mmu_pv.c | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 679b09cfe241..8b69ce3f4115 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -368,7 +368,7 @@ static void dump_pagetable(unsigned long address) goto bad; pr_cont("P4D %lx ", p4d_val(*p4d)); - if (!p4d_present(*p4d) || p4d_large(*p4d)) + if (!p4d_present(*p4d) || p4d_leaf(*p4d)) goto out; pud = pud_offset(p4d, address); @@ -1039,7 +1039,7 @@ spurious_kernel_fault(unsigned long error_code, unsigned long address) if (!p4d_present(*p4d)) return 0; - if (p4d_large(*p4d)) + if (p4d_leaf(*p4d)) return spurious_kernel_fault_check(error_code, (pte_t *) p4d); pud = pud_offset(p4d, address); diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index ebdbcae48011..d691e7992a9a 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1197,7 +1197,7 @@ remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end, if (!p4d_present(*p4d)) continue; - BUILD_BUG_ON(p4d_large(*p4d)); + BUILD_BUG_ON(p4d_leaf(*p4d)); pud_base = pud_offset(p4d, 0); remove_pud_table(pud_base, addr, next, altmap, direct); diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index e9b448d1b1b7..5359a9c88099 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -676,7 +676,7 @@ pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address, return NULL; *level = PG_LEVEL_512G; - if (p4d_large(*p4d) || !p4d_present(*p4d)) + if (p4d_leaf(*p4d) || !p4d_present(*p4d)) return (pte_t *)p4d; pud = pud_offset(p4d, address); @@ -739,7 +739,7 @@ pmd_t *lookup_pmd_address(unsigned long address) return NULL; p4d = p4d_offset(pgd, address); - if (p4d_none(*p4d) || p4d_large(*p4d) || !p4d_present(*p4d)) + if (p4d_none(*p4d) || p4d_leaf(*p4d) || !p4d_present(*p4d)) return NULL; pud = pud_offset(p4d, address); diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index 669ba1c345b3..dc0a81f5f60e 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -206,7 +206,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address) if (!p4d) return NULL; - BUILD_BUG_ON(p4d_large(*p4d) != 0); + BUILD_BUG_ON(p4d_leaf(*p4d) != 0); if (p4d_none(*p4d)) { unsigned long new_pud_page = __get_free_page(gfp); if (WARN_ON_ONCE(!new_pud_page)) diff --git a/arch/x86/power/hibernate.c b/arch/x86/power/hibernate.c index 6f955eb1e163..28153789f873 100644 --- a/arch/x86/power/hibernate.c +++ b/arch/x86/power/hibernate.c @@ -165,7 +165,7 @@ int relocate_restore_code(void) pgd = (pgd_t *)__va(read_cr3_pa()) + pgd_index(relocated_restore_code); p4d = p4d_offset(pgd, relocated_restore_code); - if (p4d_large(*p4d)) { + if (p4d_leaf(*p4d)) { set_p4d(p4d, __p4d(p4d_val(*p4d) & ~_PAGE_NX)); goto out; } diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index e21974f2cf2d..12a43a4abebf 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -1104,7 +1104,7 @@ static void __init xen_cleanmfnmap_p4d(p4d_t *p4d, bool unpin) pud_t *pud_tbl; int i; - if (p4d_large(*p4d)) { + if (p4d_leaf(*p4d)) { pa = p4d_val(*p4d) & PHYSICAL_PAGE_MASK; xen_free_ro_pages(pa, P4D_SIZE); return; -- cgit v1.2.3 From 83ea65da325c643bd1fea078fb81c5415ed7a83a Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 5 Mar 2024 12:37:44 +0800 Subject: mm/x86: replace pgd_large() with pgd_leaf() pgd_leaf() is a global API while pgd_large() is not. Always use the global pgd_leaf(), then drop pgd_large(). Link: https://lkml.kernel.org/r/20240305043750.93762-5-peterx@redhat.com Signed-off-by: Peter Xu Reviewed-by: Jason Gunthorpe Reviewed-by: Mike Rapoport (IBM) Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: "Aneesh Kumar K.V" Cc: Christophe Leroy Cc: Dmitry Vyukov Cc: Kirill A. Shutemov Cc: Michael Ellerman Cc: Muchun Song Cc: "Naveen N. Rao" Cc: Nicholas Piggin Cc: Vincenzo Frascino Cc: Yang Shi Signed-off-by: Andrew Morton --- arch/x86/include/asm/pgtable.h | 4 ++-- arch/x86/mm/pti.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 69ed0ea0641b..d6e993a5659f 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -1418,8 +1418,8 @@ static inline bool pgdp_maps_userspace(void *__ptr) return (((ptr & ~PAGE_MASK) / sizeof(pgd_t)) < PGD_KERNEL_START); } -#define pgd_leaf pgd_large -static inline int pgd_large(pgd_t pgd) { return 0; } +#define pgd_leaf pgd_leaf +static inline int pgd_leaf(pgd_t pgd) { return 0; } #ifdef CONFIG_PAGE_TABLE_ISOLATION /* diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index dc0a81f5f60e..c17aab24c1b3 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -185,7 +185,7 @@ static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address) set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page))); } - BUILD_BUG_ON(pgd_large(*pgd) != 0); + BUILD_BUG_ON(pgd_leaf(*pgd) != 0); return p4d_offset(pgd, address); } -- cgit v1.2.3 From 924bd6a8c96767a05323d575bdefd664631dce73 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 5 Mar 2024 12:37:45 +0800 Subject: mm/x86: drop two unnecessary pud_leaf() definitions pud_leaf() has a fallback macro defined in include/linux/pgtable.h already. Drop the extra two for x86. Link: https://lkml.kernel.org/r/20240305043750.93762-6-peterx@redhat.com Signed-off-by: Peter Xu Reviewed-by: Jason Gunthorpe Acked-by: Thomas Gleixner Reviewed-by: Mike Rapoport (IBM) Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: "Aneesh Kumar K.V" Cc: Christophe Leroy Cc: Dmitry Vyukov Cc: Kirill A. Shutemov Cc: Michael Ellerman Cc: Muchun Song Cc: "Naveen N. Rao" Cc: Nicholas Piggin Cc: Vincenzo Frascino Cc: Yang Shi Signed-off-by: Andrew Morton --- arch/x86/include/asm/pgtable.h | 1 - include/asm-generic/pgtable-nopmd.h | 1 - 2 files changed, 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index d6e993a5659f..9db7a38a0e9f 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -1097,7 +1097,6 @@ static inline int pud_bad(pud_t pud) return (pud_flags(pud) & ~(_KERNPG_TABLE | _PAGE_USER)) != 0; } #else -#define pud_leaf pud_large static inline int pud_large(pud_t pud) { return 0; diff --git a/include/asm-generic/pgtable-nopmd.h b/include/asm-generic/pgtable-nopmd.h index 8ffd64e7a24c..fa27e16bbe1b 100644 --- a/include/asm-generic/pgtable-nopmd.h +++ b/include/asm-generic/pgtable-nopmd.h @@ -31,7 +31,6 @@ static inline int pud_none(pud_t pud) { return 0; } static inline int pud_bad(pud_t pud) { return 0; } static inline int pud_present(pud_t pud) { return 1; } static inline int pud_user(pud_t pud) { return 0; } -static inline int pud_leaf(pud_t pud) { return 0; } static inline void pud_clear(pud_t *pud) { } #define pmd_ERROR(pmd) (pud_ERROR((pmd).pud)) -- cgit v1.2.3 From 2f709f7bfd3d13f8878070a79e0983b5fac2225f Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 5 Mar 2024 12:37:47 +0800 Subject: mm/treewide: replace pmd_large() with pmd_leaf() pmd_large() is always defined as pmd_leaf(). Merge their usages. Chose pmd_leaf() because pmd_leaf() is a global API, while pmd_large() is not. Link: https://lkml.kernel.org/r/20240305043750.93762-8-peterx@redhat.com Signed-off-by: Peter Xu Reviewed-by: Jason Gunthorpe Reviewed-by: Mike Rapoport (IBM) Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: "Aneesh Kumar K.V" Cc: Borislav Petkov Cc: Christophe Leroy Cc: Dave Hansen Cc: Dmitry Vyukov Cc: Ingo Molnar Cc: Kirill A. Shutemov Cc: Michael Ellerman Cc: Muchun Song Cc: "Naveen N. Rao" Cc: Nicholas Piggin Cc: Thomas Gleixner Cc: Vincenzo Frascino Cc: Yang Shi Signed-off-by: Andrew Morton --- arch/arm/mm/dump.c | 4 ++-- arch/powerpc/mm/book3s64/pgtable.c | 2 +- arch/powerpc/mm/book3s64/radix_pgtable.c | 2 +- arch/powerpc/mm/pgtable_64.c | 2 +- arch/s390/boot/vmem.c | 2 +- arch/s390/include/asm/pgtable.h | 8 ++++---- arch/s390/mm/gmap.c | 12 ++++++------ arch/s390/mm/hugetlbpage.c | 2 +- arch/s390/mm/pageattr.c | 2 +- arch/s390/mm/pgtable.c | 6 +++--- arch/s390/mm/vmem.c | 6 +++--- arch/sparc/mm/init_64.c | 4 ++-- arch/x86/boot/compressed/ident_map_64.c | 2 +- arch/x86/kvm/mmu/mmu.c | 2 +- arch/x86/mm/fault.c | 8 ++++---- arch/x86/mm/init_32.c | 2 +- arch/x86/mm/init_64.c | 8 ++++---- arch/x86/mm/kasan_init_64.c | 2 +- arch/x86/mm/mem_encrypt_identity.c | 4 ++-- arch/x86/mm/pat/set_memory.c | 4 ++-- arch/x86/mm/pgtable.c | 2 +- arch/x86/mm/pti.c | 4 ++-- arch/x86/power/hibernate.c | 2 +- arch/x86/xen/mmu_pv.c | 4 ++-- drivers/misc/sgi-gru/grufault.c | 2 +- 25 files changed, 49 insertions(+), 49 deletions(-) (limited to 'arch/x86') diff --git a/arch/arm/mm/dump.c b/arch/arm/mm/dump.c index a9381095ab36..cd032522d902 100644 --- a/arch/arm/mm/dump.c +++ b/arch/arm/mm/dump.c @@ -349,12 +349,12 @@ static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) for (i = 0; i < PTRS_PER_PMD; i++, pmd++) { addr = start + i * PMD_SIZE; domain = get_domain_name(pmd); - if (pmd_none(*pmd) || pmd_large(*pmd) || !pmd_present(*pmd)) + if (pmd_none(*pmd) || pmd_leaf(*pmd) || !pmd_present(*pmd)) note_page(st, addr, 4, pmd_val(*pmd), domain); else walk_pte(st, pmd, addr, domain); - if (SECTION_SIZE < PMD_SIZE && pmd_large(pmd[1])) { + if (SECTION_SIZE < PMD_SIZE && pmd_leaf(pmd[1])) { addr += SECTION_SIZE; pmd++; domain = get_domain_name(pmd); diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index 3438ab72c346..45f526547b27 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -113,7 +113,7 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, WARN_ON(pte_hw_valid(pmd_pte(*pmdp)) && !pte_protnone(pmd_pte(*pmdp))); assert_spin_locked(pmd_lockptr(mm, pmdp)); - WARN_ON(!(pmd_large(pmd))); + WARN_ON(!(pmd_leaf(pmd))); #endif trace_hugepage_set_pmd(addr, pmd_val(pmd)); return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd)); diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 1f8db10693e3..5cc4008329be 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -924,7 +924,7 @@ bool vmemmap_can_optimize(struct vmem_altmap *altmap, struct dev_pagemap *pgmap) int __meminit vmemmap_check_pmd(pmd_t *pmdp, int node, unsigned long addr, unsigned long next) { - int large = pmd_large(*pmdp); + int large = pmd_leaf(*pmdp); if (large) vmemmap_verify(pmdp_ptep(pmdp), node, addr, next); diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 386c6b06eab7..9b99113cb51a 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -132,7 +132,7 @@ struct page *pmd_page(pmd_t pmd) * enabled so these checks can't be used. */ if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP)) - VM_WARN_ON(!(pmd_large(pmd) || pmd_huge(pmd))); + VM_WARN_ON(!(pmd_leaf(pmd) || pmd_huge(pmd))); return pte_page(pmd_pte(pmd)); } return virt_to_page(pmd_page_vaddr(pmd)); diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c index e3a4500a5a75..348ab02b1028 100644 --- a/arch/s390/boot/vmem.c +++ b/arch/s390/boot/vmem.c @@ -333,7 +333,7 @@ static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long e } pte = boot_pte_alloc(); pmd_populate(&init_mm, pmd, pte); - } else if (pmd_large(*pmd)) { + } else if (pmd_leaf(*pmd)) { continue; } pgtable_pte_populate(pmd, addr, next, mode); diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 4b91e65c85d9..431d03d5116b 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -721,7 +721,7 @@ static inline int pmd_large(pmd_t pmd) static inline int pmd_bad(pmd_t pmd) { - if ((pmd_val(pmd) & _SEGMENT_ENTRY_TYPE_MASK) > 0 || pmd_large(pmd)) + if ((pmd_val(pmd) & _SEGMENT_ENTRY_TYPE_MASK) > 0 || pmd_leaf(pmd)) return 1; return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0; } @@ -820,8 +820,8 @@ static inline int pte_protnone(pte_t pte) static inline int pmd_protnone(pmd_t pmd) { - /* pmd_large(pmd) implies pmd_present(pmd) */ - return pmd_large(pmd) && !(pmd_val(pmd) & _SEGMENT_ENTRY_READ); + /* pmd_leaf(pmd) implies pmd_present(pmd) */ + return pmd_leaf(pmd) && !(pmd_val(pmd) & _SEGMENT_ENTRY_READ); } #endif @@ -1385,7 +1385,7 @@ static inline unsigned long pmd_deref(pmd_t pmd) unsigned long origin_mask; origin_mask = _SEGMENT_ENTRY_ORIGIN; - if (pmd_large(pmd)) + if (pmd_leaf(pmd)) origin_mask = _SEGMENT_ENTRY_ORIGIN_LARGE; return (unsigned long)__va(pmd_val(pmd) & origin_mask); } diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index e43a5a3befd4..767c6f077b53 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -603,7 +603,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) pmd = pmd_offset(pud, vmaddr); VM_BUG_ON(pmd_none(*pmd)); /* Are we allowed to use huge pages? */ - if (pmd_large(*pmd) && !gmap->mm->context.allow_gmap_hpage_1m) + if (pmd_leaf(*pmd) && !gmap->mm->context.allow_gmap_hpage_1m) return -EFAULT; /* Link gmap segment table entry location to page table. */ rc = radix_tree_preload(GFP_KERNEL_ACCOUNT); @@ -615,7 +615,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) rc = radix_tree_insert(&gmap->host_to_guest, vmaddr >> PMD_SHIFT, table); if (!rc) { - if (pmd_large(*pmd)) { + if (pmd_leaf(*pmd)) { *table = (pmd_val(*pmd) & _SEGMENT_ENTRY_HARDWARE_BITS_LARGE) | _SEGMENT_ENTRY_GMAP_UC; @@ -945,7 +945,7 @@ static inline pmd_t *gmap_pmd_op_walk(struct gmap *gmap, unsigned long gaddr) } /* 4k page table entries are locked via the pte (pte_alloc_map_lock). */ - if (!pmd_large(*pmdp)) + if (!pmd_leaf(*pmdp)) spin_unlock(&gmap->guest_table_lock); return pmdp; } @@ -957,7 +957,7 @@ static inline pmd_t *gmap_pmd_op_walk(struct gmap *gmap, unsigned long gaddr) */ static inline void gmap_pmd_op_end(struct gmap *gmap, pmd_t *pmdp) { - if (pmd_large(*pmdp)) + if (pmd_leaf(*pmdp)) spin_unlock(&gmap->guest_table_lock); } @@ -1068,7 +1068,7 @@ static int gmap_protect_range(struct gmap *gmap, unsigned long gaddr, rc = -EAGAIN; pmdp = gmap_pmd_op_walk(gmap, gaddr); if (pmdp) { - if (!pmd_large(*pmdp)) { + if (!pmd_leaf(*pmdp)) { rc = gmap_protect_pte(gmap, gaddr, pmdp, prot, bits); if (!rc) { @@ -2500,7 +2500,7 @@ void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long bitmap[4], if (!pmdp) return; - if (pmd_large(*pmdp)) { + if (pmd_leaf(*pmdp)) { if (gmap_test_and_clear_dirty_pmd(gmap, pmdp, gaddr)) bitmap_fill(bitmap, _PAGE_ENTRIES); } else { diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 297a6d897d5a..1ccb5b40fe92 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -235,7 +235,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm, int pmd_huge(pmd_t pmd) { - return pmd_large(pmd); + return pmd_leaf(pmd); } int pud_huge(pud_t pud) diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index 631e3a4ee2de..9f55d5a3210c 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -185,7 +185,7 @@ static int walk_pmd_level(pud_t *pudp, unsigned long addr, unsigned long end, if (pmd_none(*pmdp)) return -EINVAL; next = pmd_addr_end(addr, end); - if (pmd_large(*pmdp)) { + if (pmd_leaf(*pmdp)) { need_split = !!(flags & SET_MEMORY_4K); need_split |= !!(addr & ~PMD_MASK); need_split |= !!(addr + PMD_SIZE > next); diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index b71432b15d66..9ac66304d776 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -827,7 +827,7 @@ again: return key ? -EFAULT : 0; } - if (pmd_large(*pmdp)) { + if (pmd_leaf(*pmdp)) { paddr = pmd_val(*pmdp) & HPAGE_MASK; paddr |= addr & ~HPAGE_MASK; /* @@ -938,7 +938,7 @@ again: return 0; } - if (pmd_large(*pmdp)) { + if (pmd_leaf(*pmdp)) { paddr = pmd_val(*pmdp) & HPAGE_MASK; paddr |= addr & ~HPAGE_MASK; cc = page_reset_referenced(paddr); @@ -1002,7 +1002,7 @@ again: return 0; } - if (pmd_large(*pmdp)) { + if (pmd_leaf(*pmdp)) { paddr = pmd_val(*pmdp) & HPAGE_MASK; paddr |= addr & ~HPAGE_MASK; *key = page_get_storage_key(paddr); diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index eb100479f7be..afe5edf2a604 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -236,7 +236,7 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr, if (!add) { if (pmd_none(*pmd)) continue; - if (pmd_large(*pmd)) { + if (pmd_leaf(*pmd)) { if (IS_ALIGNED(addr, PMD_SIZE) && IS_ALIGNED(next, PMD_SIZE)) { if (!direct) @@ -281,7 +281,7 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr, if (!pte) goto out; pmd_populate(&init_mm, pmd, pte); - } else if (pmd_large(*pmd)) { + } else if (pmd_leaf(*pmd)) { if (!direct) vmemmap_use_sub_pmd(addr, next); continue; @@ -610,7 +610,7 @@ pte_t *vmem_get_alloc_pte(unsigned long addr, bool alloc) if (!pte) goto out; pmd_populate(&init_mm, pmd, pte); - } else if (WARN_ON_ONCE(pmd_large(*pmd))) { + } else if (WARN_ON_ONCE(pmd_leaf(*pmd))) { goto out; } ptep = pte_offset_kernel(pmd, addr); diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index f83017992eaa..5e067b6a4464 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -1672,7 +1672,7 @@ bool kern_addr_valid(unsigned long addr) if (pmd_none(*pmd)) return false; - if (pmd_large(*pmd)) + if (pmd_leaf(*pmd)) return pfn_valid(pmd_pfn(*pmd)); pte = pte_offset_kernel(pmd, addr); @@ -2968,7 +2968,7 @@ void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, struct mm_struct *mm; pmd_t entry = *pmd; - if (!pmd_large(entry) || !pmd_young(entry)) + if (!pmd_leaf(entry) || !pmd_young(entry)) return; pte = pmd_val(entry); diff --git a/arch/x86/boot/compressed/ident_map_64.c b/arch/x86/boot/compressed/ident_map_64.c index d040080d7edb..71c6e2fdcec7 100644 --- a/arch/x86/boot/compressed/ident_map_64.c +++ b/arch/x86/boot/compressed/ident_map_64.c @@ -284,7 +284,7 @@ static int set_clr_page_flags(struct x86_mapping_info *info, pudp = pud_offset(p4dp, address); pmdp = pmd_offset(pudp, address); - if (pmd_large(*pmdp)) + if (pmd_leaf(*pmdp)) ptep = split_large_pmd(info, pmdp, address); else ptep = pte_offset_kernel(pmdp, address); diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 2d6cdeab1f8a..c15123248c52 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3135,7 +3135,7 @@ static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn, if (pmd_none(pmd) || !pmd_present(pmd)) goto out; - if (pmd_large(pmd)) + if (pmd_leaf(pmd)) level = PG_LEVEL_2M; out: diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 8b69ce3f4115..09417f950343 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -250,7 +250,7 @@ static noinline int vmalloc_fault(unsigned long address) if (!pmd_k) return -1; - if (pmd_large(*pmd_k)) + if (pmd_leaf(*pmd_k)) return 0; pte_k = pte_offset_kernel(pmd_k, address); @@ -319,7 +319,7 @@ static void dump_pagetable(unsigned long address) * And let's rather not kmap-atomic the pte, just in case * it's allocated already: */ - if (!low_pfn(pmd_pfn(*pmd)) || !pmd_present(*pmd) || pmd_large(*pmd)) + if (!low_pfn(pmd_pfn(*pmd)) || !pmd_present(*pmd) || pmd_leaf(*pmd)) goto out; pte = pte_offset_kernel(pmd, address); @@ -384,7 +384,7 @@ static void dump_pagetable(unsigned long address) goto bad; pr_cont("PMD %lx ", pmd_val(*pmd)); - if (!pmd_present(*pmd) || pmd_large(*pmd)) + if (!pmd_present(*pmd) || pmd_leaf(*pmd)) goto out; pte = pte_offset_kernel(pmd, address); @@ -1053,7 +1053,7 @@ spurious_kernel_fault(unsigned long error_code, unsigned long address) if (!pmd_present(*pmd)) return 0; - if (pmd_large(*pmd)) + if (pmd_leaf(*pmd)) return spurious_kernel_fault_check(error_code, (pte_t *) pmd); pte = pte_offset_kernel(pmd, address); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 5c736b707cae..ac41b1e0940d 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -463,7 +463,7 @@ void __init native_pagetable_init(void) break; /* should not be large page here */ - if (pmd_large(*pmd)) { + if (pmd_leaf(*pmd)) { pr_warn("try to clear pte for ram above max_low_pfn: pfn: %lx pmd: %p pmd phys: %lx, but pmd is big page and is not using pte !\n", pfn, pmd, __pa(pmd)); BUG_ON(1); diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index d691e7992a9a..2c5490e58f41 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -530,7 +530,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end, } if (!pmd_none(*pmd)) { - if (!pmd_large(*pmd)) { + if (!pmd_leaf(*pmd)) { spin_lock(&init_mm.page_table_lock); pte = (pte_t *)pmd_page_vaddr(*pmd); paddr_last = phys_pte_init(pte, paddr, @@ -1114,7 +1114,7 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, if (!pmd_present(*pmd)) continue; - if (pmd_large(*pmd)) { + if (pmd_leaf(*pmd)) { if (IS_ALIGNED(addr, PMD_SIZE) && IS_ALIGNED(next, PMD_SIZE)) { if (!direct) @@ -1520,9 +1520,9 @@ void __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node, int __meminit vmemmap_check_pmd(pmd_t *pmd, int node, unsigned long addr, unsigned long next) { - int large = pmd_large(*pmd); + int large = pmd_leaf(*pmd); - if (pmd_large(*pmd)) { + if (pmd_leaf(*pmd)) { vmemmap_verify((pte_t *)pmd, node, addr, next); vmemmap_use_sub_pmd(addr, next); } diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 0302491d799d..f41d26bc9161 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -95,7 +95,7 @@ static void __init kasan_populate_pud(pud_t *pud, unsigned long addr, pmd = pmd_offset(pud, addr); do { next = pmd_addr_end(addr, end); - if (!pmd_large(*pmd)) + if (!pmd_leaf(*pmd)) kasan_populate_pmd(pmd, addr, next, nid); } while (pmd++, addr = next, addr != end); } diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c index d73aeb16417f..bca4fea80579 100644 --- a/arch/x86/mm/mem_encrypt_identity.c +++ b/arch/x86/mm/mem_encrypt_identity.c @@ -161,7 +161,7 @@ static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd) return; pmd = pmd_offset(pud, ppd->vaddr); - if (pmd_large(*pmd)) + if (pmd_leaf(*pmd)) return; set_pmd(pmd, __pmd(ppd->paddr | ppd->pmd_flags)); @@ -185,7 +185,7 @@ static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd) set_pmd(pmd, __pmd(PMD_FLAGS | __pa(pte))); } - if (pmd_large(*pmd)) + if (pmd_leaf(*pmd)) return; pte = pte_offset_kernel(pmd, ppd->vaddr); diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 5359a9c88099..b4037fe08eed 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -692,7 +692,7 @@ pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address, return NULL; *level = PG_LEVEL_2M; - if (pmd_large(*pmd) || !pmd_present(*pmd)) + if (pmd_leaf(*pmd) || !pmd_present(*pmd)) return (pte_t *)pmd; *level = PG_LEVEL_4K; @@ -1229,7 +1229,7 @@ static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end) * Try to unmap in 2M chunks. */ while (end - start >= PMD_SIZE) { - if (pmd_large(*pmd)) + if (pmd_leaf(*pmd)) pmd_clear(pmd); else __unmap_pmd_range(pud, pmd, start, start + PMD_SIZE); diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 0cbc1b8e8e3d..d05dd86ceb41 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -792,7 +792,7 @@ int pud_clear_huge(pud_t *pud) */ int pmd_clear_huge(pmd_t *pmd) { - if (pmd_large(*pmd)) { + if (pmd_leaf(*pmd)) { pmd_clear(pmd); return 1; } diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index c17aab24c1b3..0442e8f479a6 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -252,7 +252,7 @@ static pte_t *pti_user_pagetable_walk_pte(unsigned long address) return NULL; /* We can't do anything sensible if we hit a large mapping. */ - if (pmd_large(*pmd)) { + if (pmd_leaf(*pmd)) { WARN_ON(1); return NULL; } @@ -341,7 +341,7 @@ pti_clone_pgtable(unsigned long start, unsigned long end, continue; } - if (pmd_large(*pmd) || level == PTI_CLONE_PMD) { + if (pmd_leaf(*pmd) || level == PTI_CLONE_PMD) { target_pmd = pti_user_pagetable_walk_pmd(addr); if (WARN_ON(!target_pmd)) return; diff --git a/arch/x86/power/hibernate.c b/arch/x86/power/hibernate.c index 28153789f873..277eaf610e0e 100644 --- a/arch/x86/power/hibernate.c +++ b/arch/x86/power/hibernate.c @@ -175,7 +175,7 @@ int relocate_restore_code(void) goto out; } pmd = pmd_offset(pud, relocated_restore_code); - if (pmd_large(*pmd)) { + if (pmd_leaf(*pmd)) { set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_NX)); goto out; } diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 12a43a4abebf..dde551bbd231 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -1059,7 +1059,7 @@ static void __init xen_cleanmfnmap_pmd(pmd_t *pmd, bool unpin) pte_t *pte_tbl; int i; - if (pmd_large(*pmd)) { + if (pmd_leaf(*pmd)) { pa = pmd_val(*pmd) & PHYSICAL_PAGE_MASK; xen_free_ro_pages(pa, PMD_SIZE); return; @@ -1871,7 +1871,7 @@ static phys_addr_t __init xen_early_virt_to_phys(unsigned long vaddr) if (!pmd_present(pmd)) return 0; pa = pmd_val(pmd) & PTE_PFN_MASK; - if (pmd_large(pmd)) + if (pmd_leaf(pmd)) return pa + (vaddr & ~PMD_MASK); pte = native_make_pte(xen_read_phys_ulong(pa + pte_index(vaddr) * diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c index 629edb6486de..3557d78ee47a 100644 --- a/drivers/misc/sgi-gru/grufault.c +++ b/drivers/misc/sgi-gru/grufault.c @@ -227,7 +227,7 @@ static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr, if (unlikely(pmd_none(*pmdp))) goto err; #ifdef CONFIG_X86_64 - if (unlikely(pmd_large(*pmdp))) + if (unlikely(pmd_leaf(*pmdp))) pte = ptep_get((pte_t *)pmdp); else #endif -- cgit v1.2.3 From 0a845e0f6348ccfa2dcc8c450ffd1c9ffe8c4add Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 5 Mar 2024 12:37:48 +0800 Subject: mm/treewide: replace pud_large() with pud_leaf() pud_large() is always defined as pud_leaf(). Merge their usages. Chose pud_leaf() because pud_leaf() is a global API, while pud_large() is not. Link: https://lkml.kernel.org/r/20240305043750.93762-9-peterx@redhat.com Signed-off-by: Peter Xu Reviewed-by: Jason Gunthorpe Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: "Aneesh Kumar K.V" Cc: Borislav Petkov Cc: Christophe Leroy Cc: Dave Hansen Cc: Dmitry Vyukov Cc: Ingo Molnar Cc: Kirill A. Shutemov Cc: Michael Ellerman Cc: Muchun Song Cc: "Naveen N. Rao" Cc: Nicholas Piggin Cc: Thomas Gleixner Cc: Vincenzo Frascino Cc: Yang Shi Signed-off-by: Andrew Morton --- arch/powerpc/mm/book3s64/pgtable.c | 2 +- arch/s390/boot/vmem.c | 2 +- arch/s390/include/asm/pgtable.h | 4 ++-- arch/s390/mm/gmap.c | 2 +- arch/s390/mm/hugetlbpage.c | 4 ++-- arch/s390/mm/pageattr.c | 2 +- arch/s390/mm/pgtable.c | 2 +- arch/s390/mm/vmem.c | 6 +++--- arch/sparc/mm/init_64.c | 2 +- arch/x86/kvm/mmu/mmu.c | 2 +- arch/x86/mm/fault.c | 4 ++-- arch/x86/mm/ident_map.c | 2 +- arch/x86/mm/init_64.c | 4 ++-- arch/x86/mm/kasan_init_64.c | 2 +- arch/x86/mm/mem_encrypt_identity.c | 2 +- arch/x86/mm/pat/set_memory.c | 6 +++--- arch/x86/mm/pgtable.c | 2 +- arch/x86/mm/pti.c | 2 +- arch/x86/power/hibernate.c | 2 +- arch/x86/xen/mmu_pv.c | 4 ++-- 20 files changed, 29 insertions(+), 29 deletions(-) (limited to 'arch/x86') diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index 45f526547b27..83823db3488b 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -130,7 +130,7 @@ void set_pud_at(struct mm_struct *mm, unsigned long addr, WARN_ON(pte_hw_valid(pud_pte(*pudp))); assert_spin_locked(pud_lockptr(mm, pudp)); - WARN_ON(!(pud_large(pud))); + WARN_ON(!(pud_leaf(pud))); #endif trace_hugepage_set_pud(addr, pud_val(pud)); return set_pte_at(mm, addr, pudp_ptep(pudp), pud_pte(pud)); diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c index 348ab02b1028..09b10bb6e4d0 100644 --- a/arch/s390/boot/vmem.c +++ b/arch/s390/boot/vmem.c @@ -366,7 +366,7 @@ static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long e } pmd = boot_crst_alloc(_SEGMENT_ENTRY_EMPTY); pud_populate(&init_mm, pud, pmd); - } else if (pud_large(*pud)) { + } else if (pud_leaf(*pud)) { continue; } pgtable_pmd_populate(pud, addr, next, mode); diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 431d03d5116b..a5f16a244a64 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -730,7 +730,7 @@ static inline int pud_bad(pud_t pud) { unsigned long type = pud_val(pud) & _REGION_ENTRY_TYPE_MASK; - if (type > _REGION_ENTRY_TYPE_R3 || pud_large(pud)) + if (type > _REGION_ENTRY_TYPE_R3 || pud_leaf(pud)) return 1; if (type < _REGION_ENTRY_TYPE_R3) return 0; @@ -1400,7 +1400,7 @@ static inline unsigned long pud_deref(pud_t pud) unsigned long origin_mask; origin_mask = _REGION_ENTRY_ORIGIN; - if (pud_large(pud)) + if (pud_leaf(pud)) origin_mask = _REGION3_ENTRY_ORIGIN_LARGE; return (unsigned long)__va(pud_val(pud) & origin_mask); } diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 767c6f077b53..094b43b121cd 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -598,7 +598,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) pud = pud_offset(p4d, vmaddr); VM_BUG_ON(pud_none(*pud)); /* large puds cannot yet be handled */ - if (pud_large(*pud)) + if (pud_leaf(*pud)) return -EFAULT; pmd = pmd_offset(pud, vmaddr); VM_BUG_ON(pmd_none(*pmd)); diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 1ccb5b40fe92..c2e8242bd15d 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -224,7 +224,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm, if (p4d_present(*p4dp)) { pudp = pud_offset(p4dp, addr); if (pud_present(*pudp)) { - if (pud_large(*pudp)) + if (pud_leaf(*pudp)) return (pte_t *) pudp; pmdp = pmd_offset(pudp, addr); } @@ -240,7 +240,7 @@ int pmd_huge(pmd_t pmd) int pud_huge(pud_t pud) { - return pud_large(pud); + return pud_leaf(pud); } bool __init arch_hugetlb_valid_size(unsigned long size) diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index 9f55d5a3210c..01bc8fad64d6 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -274,7 +274,7 @@ static int walk_pud_level(p4d_t *p4d, unsigned long addr, unsigned long end, if (pud_none(*pudp)) return -EINVAL; next = pud_addr_end(addr, end); - if (pud_large(*pudp)) { + if (pud_leaf(*pudp)) { need_split = !!(flags & SET_MEMORY_4K); need_split |= !!(addr & ~PUD_MASK); need_split |= !!(addr + PUD_SIZE > next); diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 9ac66304d776..2c944bafb030 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -470,7 +470,7 @@ static int pmd_lookup(struct mm_struct *mm, unsigned long addr, pmd_t **pmdp) return -ENOENT; /* Large PUDs are not supported yet. */ - if (pud_large(*pud)) + if (pud_leaf(*pud)) return -EFAULT; *pmdp = pmd_offset(pud, addr); diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index afe5edf2a604..85cddf904cb2 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -329,7 +329,7 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end, if (!add) { if (pud_none(*pud)) continue; - if (pud_large(*pud)) { + if (pud_leaf(*pud)) { if (IS_ALIGNED(addr, PUD_SIZE) && IS_ALIGNED(next, PUD_SIZE)) { pud_clear(pud); @@ -350,7 +350,7 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end, if (!pmd) goto out; pud_populate(&init_mm, pud, pmd); - } else if (pud_large(*pud)) { + } else if (pud_leaf(*pud)) { continue; } ret = modify_pmd_table(pud, addr, next, add, direct, altmap); @@ -599,7 +599,7 @@ pte_t *vmem_get_alloc_pte(unsigned long addr, bool alloc) if (!pmd) goto out; pud_populate(&init_mm, pud, pmd); - } else if (WARN_ON_ONCE(pud_large(*pud))) { + } else if (WARN_ON_ONCE(pud_leaf(*pud))) { goto out; } pmd = pmd_offset(pud, addr); diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 5e067b6a4464..1ca9054d9b97 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -1665,7 +1665,7 @@ bool kern_addr_valid(unsigned long addr) if (pud_none(*pud)) return false; - if (pud_large(*pud)) + if (pud_leaf(*pud)) return pfn_valid(pud_pfn(*pud)); pmd = pmd_offset(pud, addr); diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index c15123248c52..5cb5bc4a72c4 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3126,7 +3126,7 @@ static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn, if (pud_none(pud) || !pud_present(pud)) goto out; - if (pud_large(pud)) { + if (pud_leaf(pud)) { level = PG_LEVEL_1G; goto out; } diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 09417f950343..2cc6fa5dc561 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -376,7 +376,7 @@ static void dump_pagetable(unsigned long address) goto bad; pr_cont("PUD %lx ", pud_val(*pud)); - if (!pud_present(*pud) || pud_large(*pud)) + if (!pud_present(*pud) || pud_leaf(*pud)) goto out; pmd = pmd_offset(pud, address); @@ -1046,7 +1046,7 @@ spurious_kernel_fault(unsigned long error_code, unsigned long address) if (!pud_present(*pud)) return 0; - if (pud_large(*pud)) + if (pud_leaf(*pud)) return spurious_kernel_fault_check(error_code, (pte_t *) pud); pmd = pmd_offset(pud, address); diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c index f50cc210a981..a204a332c71f 100644 --- a/arch/x86/mm/ident_map.c +++ b/arch/x86/mm/ident_map.c @@ -33,7 +33,7 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page, next = end; /* if this is already a gbpage, this portion is already mapped */ - if (pud_large(*pud)) + if (pud_leaf(*pud)) continue; /* Is using a gbpage allowed? */ diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 2c5490e58f41..7e177856ee4f 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -617,7 +617,7 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end, } if (!pud_none(*pud)) { - if (!pud_large(*pud)) { + if (!pud_leaf(*pud)) { pmd = pmd_offset(pud, 0); paddr_last = phys_pmd_init(pmd, paddr, paddr_end, @@ -1163,7 +1163,7 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end, if (!pud_present(*pud)) continue; - if (pud_large(*pud) && + if (pud_leaf(*pud) && IS_ALIGNED(addr, PUD_SIZE) && IS_ALIGNED(next, PUD_SIZE)) { spin_lock(&init_mm.page_table_lock); diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index f41d26bc9161..9dddf19a5571 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -115,7 +115,7 @@ static void __init kasan_populate_p4d(p4d_t *p4d, unsigned long addr, pud = pud_offset(p4d, addr); do { next = pud_addr_end(addr, end); - if (!pud_large(*pud)) + if (!pud_leaf(*pud)) kasan_populate_pud(pud, addr, next, nid); } while (pud++, addr = next, addr != end); } diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c index bca4fea80579..7dd30e16294d 100644 --- a/arch/x86/mm/mem_encrypt_identity.c +++ b/arch/x86/mm/mem_encrypt_identity.c @@ -145,7 +145,7 @@ static pud_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd) set_pud(pud, __pud(PUD_FLAGS | __pa(pmd))); } - if (pud_large(*pud)) + if (pud_leaf(*pud)) return NULL; return pud; diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index b4037fe08eed..e3a26f2c7781 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -684,7 +684,7 @@ pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address, return NULL; *level = PG_LEVEL_1G; - if (pud_large(*pud) || !pud_present(*pud)) + if (pud_leaf(*pud) || !pud_present(*pud)) return (pte_t *)pud; pmd = pmd_offset(pud, address); @@ -743,7 +743,7 @@ pmd_t *lookup_pmd_address(unsigned long address) return NULL; pud = pud_offset(p4d, address); - if (pud_none(*pud) || pud_large(*pud) || !pud_present(*pud)) + if (pud_none(*pud) || pud_leaf(*pud) || !pud_present(*pud)) return NULL; return pmd_offset(pud, address); @@ -1274,7 +1274,7 @@ static void unmap_pud_range(p4d_t *p4d, unsigned long start, unsigned long end) */ while (end - start >= PUD_SIZE) { - if (pud_large(*pud)) + if (pud_leaf(*pud)) pud_clear(pud); else unmap_pmd_range(pud, start, start + PUD_SIZE); diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index d05dd86ceb41..ff690ddc2334 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -777,7 +777,7 @@ int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) */ int pud_clear_huge(pud_t *pud) { - if (pud_large(*pud)) { + if (pud_leaf(*pud)) { pud_clear(pud); return 1; } diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index 0442e8f479a6..2e69abf4f852 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -217,7 +217,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address) pud = pud_offset(p4d, address); /* The user page tables do not use large mappings: */ - if (pud_large(*pud)) { + if (pud_leaf(*pud)) { WARN_ON(1); return NULL; } diff --git a/arch/x86/power/hibernate.c b/arch/x86/power/hibernate.c index 277eaf610e0e..5b81d19cd114 100644 --- a/arch/x86/power/hibernate.c +++ b/arch/x86/power/hibernate.c @@ -170,7 +170,7 @@ int relocate_restore_code(void) goto out; } pud = pud_offset(p4d, relocated_restore_code); - if (pud_large(*pud)) { + if (pud_leaf(*pud)) { set_pud(pud, __pud(pud_val(*pud) & ~_PAGE_NX)); goto out; } diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index dde551bbd231..54e0d311dcc9 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -1082,7 +1082,7 @@ static void __init xen_cleanmfnmap_pud(pud_t *pud, bool unpin) pmd_t *pmd_tbl; int i; - if (pud_large(*pud)) { + if (pud_leaf(*pud)) { pa = pud_val(*pud) & PHYSICAL_PAGE_MASK; xen_free_ro_pages(pa, PUD_SIZE); return; @@ -1863,7 +1863,7 @@ static phys_addr_t __init xen_early_virt_to_phys(unsigned long vaddr) if (!pud_present(pud)) return 0; pa = pud_val(pud) & PTE_PFN_MASK; - if (pud_large(pud)) + if (pud_leaf(pud)) return pa + (vaddr & ~PUD_MASK); pmd = native_make_pmd(xen_read_phys_ulong(pa + pmd_index(vaddr) * -- cgit v1.2.3 From e72c7c2b88666903f174a82cd5c4e0598601189f Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 5 Mar 2024 12:37:49 +0800 Subject: mm/treewide: drop pXd_large() They're not used anymore, drop all of them. Link: https://lkml.kernel.org/r/20240305043750.93762-10-peterx@redhat.com Signed-off-by: Peter Xu Reviewed-by: Jason Gunthorpe Reviewed-by: Mike Rapoport (IBM) Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: "Aneesh Kumar K.V" Cc: Borislav Petkov Cc: Christophe Leroy Cc: Dave Hansen Cc: Dmitry Vyukov Cc: Ingo Molnar Cc: Kirill A. Shutemov Cc: Michael Ellerman Cc: Muchun Song Cc: "Naveen N. Rao" Cc: Nicholas Piggin Cc: Thomas Gleixner Cc: Vincenzo Frascino Cc: Yang Shi Signed-off-by: Andrew Morton --- arch/arm/include/asm/pgtable-2level.h | 1 - arch/arm/include/asm/pgtable-3level.h | 1 - arch/loongarch/kvm/mmu.c | 2 +- arch/powerpc/include/asm/book3s/64/pgtable.h | 4 +--- arch/powerpc/include/asm/pgtable.h | 4 ---- arch/s390/include/asm/pgtable.h | 8 ++++---- arch/sparc/include/asm/pgtable_64.h | 8 ++++---- arch/x86/include/asm/pgtable.h | 19 +++++++------------ arch/x86/kvm/mmu/mmu.c | 2 +- 9 files changed, 18 insertions(+), 31 deletions(-) (limited to 'arch/x86') diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h index ce543cd9380c..b0a262566eb9 100644 --- a/arch/arm/include/asm/pgtable-2level.h +++ b/arch/arm/include/asm/pgtable-2level.h @@ -213,7 +213,6 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) #define pmd_pfn(pmd) (__phys_to_pfn(pmd_val(pmd) & PHYS_MASK)) -#define pmd_large(pmd) (pmd_val(pmd) & 2) #define pmd_leaf(pmd) (pmd_val(pmd) & 2) #define pmd_bad(pmd) (pmd_val(pmd) & 2) #define pmd_present(pmd) (pmd_val(pmd)) diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index 71c3add6417f..4b1d9eb3908a 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h @@ -118,7 +118,6 @@ PMD_TYPE_TABLE) #define pmd_sect(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \ PMD_TYPE_SECT) -#define pmd_large(pmd) pmd_sect(pmd) #define pmd_leaf(pmd) pmd_sect(pmd) #define pud_clear(pudp) \ diff --git a/arch/loongarch/kvm/mmu.c b/arch/loongarch/kvm/mmu.c index 50a6acd7ffe4..a556cff35740 100644 --- a/arch/loongarch/kvm/mmu.c +++ b/arch/loongarch/kvm/mmu.c @@ -723,7 +723,7 @@ static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn, /* * Read each entry once. As above, a non-leaf entry can be promoted to * a huge page _during_ this walk. Re-reading the entry could send the - * walk into the weeks, e.g. p*d_large() returns false (sees the old + * walk into the weeks, e.g. p*d_leaf() returns false (sees the old * value) and then p*d_offset() walks into the target huge page instead * of the old page table (sees the new value). */ diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 3e99e409774a..df66dce8306f 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -1437,17 +1437,15 @@ static inline bool is_pte_rw_upgrade(unsigned long old_val, unsigned long new_va } /* - * Like pmd_huge() and pmd_large(), but works regardless of config options + * Like pmd_huge(), but works regardless of config options */ #define pmd_leaf pmd_leaf -#define pmd_large pmd_leaf static inline bool pmd_leaf(pmd_t pmd) { return !!(pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE)); } #define pud_leaf pud_leaf -#define pud_large pud_leaf static inline bool pud_leaf(pud_t pud) { return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PTE)); diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index e6edf1cdbc5b..239709a2f68e 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -101,10 +101,6 @@ void poking_init(void); extern unsigned long ioremap_bot; extern const pgprot_t protection_map[16]; -#ifndef pmd_large -#define pmd_large(pmd) 0 -#endif - /* can we use this in kvm */ unsigned long vmalloc_to_phys(void *vmalloc_addr); diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index a5f16a244a64..9e08af5b9247 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -705,16 +705,16 @@ static inline int pud_none(pud_t pud) return pud_val(pud) == _REGION3_ENTRY_EMPTY; } -#define pud_leaf pud_large -static inline int pud_large(pud_t pud) +#define pud_leaf pud_leaf +static inline int pud_leaf(pud_t pud) { if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) != _REGION_ENTRY_TYPE_R3) return 0; return !!(pud_val(pud) & _REGION3_ENTRY_LARGE); } -#define pmd_leaf pmd_large -static inline int pmd_large(pmd_t pmd) +#define pmd_leaf pmd_leaf +static inline int pmd_leaf(pmd_t pmd) { return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0; } diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 652af9d63fa2..6ff0a28d5fd1 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -680,8 +680,8 @@ static inline unsigned long pte_special(pte_t pte) return pte_val(pte) & _PAGE_SPECIAL; } -#define pmd_leaf pmd_large -static inline unsigned long pmd_large(pmd_t pmd) +#define pmd_leaf pmd_leaf +static inline unsigned long pmd_leaf(pmd_t pmd) { pte_t pte = __pte(pmd_val(pmd)); @@ -867,8 +867,8 @@ static inline pmd_t *pud_pgtable(pud_t pud) /* only used by the stubbed out hugetlb gup code, should never be called */ #define p4d_page(p4d) NULL -#define pud_leaf pud_large -static inline unsigned long pud_large(pud_t pud) +#define pud_leaf pud_leaf +static inline unsigned long pud_leaf(pud_t pud) { pte_t pte = __pte(pud_val(pud)); diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 9db7a38a0e9f..cfc84c55d0e6 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -251,8 +251,8 @@ static inline unsigned long pgd_pfn(pgd_t pgd) return (pgd_val(pgd) & PTE_PFN_MASK) >> PAGE_SHIFT; } -#define p4d_leaf p4d_large -static inline int p4d_large(p4d_t p4d) +#define p4d_leaf p4d_leaf +static inline int p4d_leaf(p4d_t p4d) { /* No 512 GiB pages yet */ return 0; @@ -260,14 +260,14 @@ static inline int p4d_large(p4d_t p4d) #define pte_page(pte) pfn_to_page(pte_pfn(pte)) -#define pmd_leaf pmd_large -static inline int pmd_large(pmd_t pte) +#define pmd_leaf pmd_leaf +static inline int pmd_leaf(pmd_t pte) { return pmd_flags(pte) & _PAGE_PSE; } #ifdef CONFIG_TRANSPARENT_HUGEPAGE -/* NOTE: when predicate huge page, consider also pmd_devmap, or use pmd_large */ +/* NOTE: when predicate huge page, consider also pmd_devmap, or use pmd_leaf */ static inline int pmd_trans_huge(pmd_t pmd) { return (pmd_val(pmd) & (_PAGE_PSE|_PAGE_DEVMAP)) == _PAGE_PSE; @@ -1085,8 +1085,8 @@ static inline pmd_t *pud_pgtable(pud_t pud) */ #define pud_page(pud) pfn_to_page(pud_pfn(pud)) -#define pud_leaf pud_large -static inline int pud_large(pud_t pud) +#define pud_leaf pud_leaf +static inline int pud_leaf(pud_t pud) { return (pud_val(pud) & (_PAGE_PSE | _PAGE_PRESENT)) == (_PAGE_PSE | _PAGE_PRESENT); @@ -1096,11 +1096,6 @@ static inline int pud_bad(pud_t pud) { return (pud_flags(pud) & ~(_KERNPG_TABLE | _PAGE_USER)) != 0; } -#else -static inline int pud_large(pud_t pud) -{ - return 0; -} #endif /* CONFIG_PGTABLE_LEVELS > 2 */ #if CONFIG_PGTABLE_LEVELS > 3 diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 5cb5bc4a72c4..58f5e6b637b4 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3110,7 +3110,7 @@ static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn, /* * Read each entry once. As above, a non-leaf entry can be promoted to * a huge page _during_ this walk. Re-reading the entry could send the - * walk into the weeks, e.g. p*d_large() returns false (sees the old + * walk into the weeks, e.g. p*d_leaf() returns false (sees the old * value) and then p*d_offset() walks into the target huge page instead * of the old page table (sees the new value). */ -- cgit v1.2.3 From c05995b7ec2a73bf813a8944978e175f8e4ec3ac Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 5 Mar 2024 12:37:50 +0800 Subject: mm/treewide: align up pXd_leaf() retval across archs Even if pXd_leaf() API is defined globally, it's not clear on the retval, and there are three types used (bool, int, unsigned log). Always return a boolean for pXd_leaf() APIs. Link: https://lkml.kernel.org/r/20240305043750.93762-11-peterx@redhat.com Signed-off-by: Peter Xu Suggested-by: Jason Gunthorpe Reviewed-by: Jason Gunthorpe Reviewed-by: Mike Rapoport (IBM) Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: "Aneesh Kumar K.V" Cc: Borislav Petkov Cc: Christophe Leroy Cc: Dave Hansen Cc: Dmitry Vyukov Cc: Ingo Molnar Cc: Kirill A. Shutemov Cc: Michael Ellerman Cc: Muchun Song Cc: "Naveen N. Rao" Cc: Nicholas Piggin Cc: Thomas Gleixner Cc: Vincenzo Frascino Cc: Yang Shi Signed-off-by: Andrew Morton --- arch/riscv/include/asm/pgtable-64.h | 2 +- arch/riscv/include/asm/pgtable.h | 2 +- arch/s390/include/asm/pgtable.h | 4 ++-- arch/sparc/include/asm/pgtable_64.h | 4 ++-- arch/x86/include/asm/pgtable.h | 8 ++++---- include/linux/pgtable.h | 8 ++++---- 6 files changed, 14 insertions(+), 14 deletions(-) (limited to 'arch/x86') diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h index b42017d76924..2c7e1661db01 100644 --- a/arch/riscv/include/asm/pgtable-64.h +++ b/arch/riscv/include/asm/pgtable-64.h @@ -190,7 +190,7 @@ static inline int pud_bad(pud_t pud) } #define pud_leaf pud_leaf -static inline int pud_leaf(pud_t pud) +static inline bool pud_leaf(pud_t pud) { return pud_present(pud) && (pud_val(pud) & _PAGE_LEAF); } diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index add5cd30ab34..6839520dbcb1 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -241,7 +241,7 @@ static inline int pmd_bad(pmd_t pmd) } #define pmd_leaf pmd_leaf -static inline int pmd_leaf(pmd_t pmd) +static inline bool pmd_leaf(pmd_t pmd) { return pmd_present(pmd) && (pmd_val(pmd) & _PAGE_LEAF); } diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 9e08af5b9247..60950e7a25f5 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -706,7 +706,7 @@ static inline int pud_none(pud_t pud) } #define pud_leaf pud_leaf -static inline int pud_leaf(pud_t pud) +static inline bool pud_leaf(pud_t pud) { if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) != _REGION_ENTRY_TYPE_R3) return 0; @@ -714,7 +714,7 @@ static inline int pud_leaf(pud_t pud) } #define pmd_leaf pmd_leaf -static inline int pmd_leaf(pmd_t pmd) +static inline bool pmd_leaf(pmd_t pmd) { return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0; } diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 6ff0a28d5fd1..4d1bafaba942 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -681,7 +681,7 @@ static inline unsigned long pte_special(pte_t pte) } #define pmd_leaf pmd_leaf -static inline unsigned long pmd_leaf(pmd_t pmd) +static inline bool pmd_leaf(pmd_t pmd) { pte_t pte = __pte(pmd_val(pmd)); @@ -868,7 +868,7 @@ static inline pmd_t *pud_pgtable(pud_t pud) #define p4d_page(p4d) NULL #define pud_leaf pud_leaf -static inline unsigned long pud_leaf(pud_t pud) +static inline bool pud_leaf(pud_t pud) { pte_t pte = __pte(pud_val(pud)); diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index cfc84c55d0e6..7621a5acb13e 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -252,7 +252,7 @@ static inline unsigned long pgd_pfn(pgd_t pgd) } #define p4d_leaf p4d_leaf -static inline int p4d_leaf(p4d_t p4d) +static inline bool p4d_leaf(p4d_t p4d) { /* No 512 GiB pages yet */ return 0; @@ -261,7 +261,7 @@ static inline int p4d_leaf(p4d_t p4d) #define pte_page(pte) pfn_to_page(pte_pfn(pte)) #define pmd_leaf pmd_leaf -static inline int pmd_leaf(pmd_t pte) +static inline bool pmd_leaf(pmd_t pte) { return pmd_flags(pte) & _PAGE_PSE; } @@ -1086,7 +1086,7 @@ static inline pmd_t *pud_pgtable(pud_t pud) #define pud_page(pud) pfn_to_page(pud_pfn(pud)) #define pud_leaf pud_leaf -static inline int pud_leaf(pud_t pud) +static inline bool pud_leaf(pud_t pud) { return (pud_val(pud) & (_PAGE_PSE | _PAGE_PRESENT)) == (_PAGE_PSE | _PAGE_PRESENT); @@ -1413,7 +1413,7 @@ static inline bool pgdp_maps_userspace(void *__ptr) } #define pgd_leaf pgd_leaf -static inline int pgd_leaf(pgd_t pgd) { return 0; } +static inline bool pgd_leaf(pgd_t pgd) { return false; } #ifdef CONFIG_PAGE_TABLE_ISOLATION /* diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index a36cf4e124b0..85fc7554cd52 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1777,16 +1777,16 @@ typedef unsigned int pgtbl_mod_mask; * Only meaningful when called on a valid entry. */ #ifndef pgd_leaf -#define pgd_leaf(x) 0 +#define pgd_leaf(x) false #endif #ifndef p4d_leaf -#define p4d_leaf(x) 0 +#define p4d_leaf(x) false #endif #ifndef pud_leaf -#define pud_leaf(x) 0 +#define pud_leaf(x) false #endif #ifndef pmd_leaf -#define pmd_leaf(x) 0 +#define pmd_leaf(x) false #endif #ifndef pgd_leaf_size -- cgit v1.2.3