From bb43b14b576228c580bdc7e1aeeded54d540b5ef Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 24 Mar 2022 18:09:49 -0700 Subject: mm: delete __ClearPageWaiters() The PG_waiters bit is not included in PAGE_FLAGS_CHECK_AT_FREE, and vmscan.c's free_unref_page_list() callers rely on that not to generate bad_page() alerts. So __page_cache_release(), put_pages_list() and release_pages() (and presumably copy-and-pasted free_zone_device_page()) are redundant and misleading to make a special point of clearing it (as the "__" implies, it could only safely be used on the freeing path). Delete __ClearPageWaiters(). Remark on this in one of the "possible" comments in folio_wake_bit(), and delete the superfluous comments. Link: https://lkml.kernel.org/r/3eafa969-5b1a-accf-88fe-318784c791a@google.com Signed-off-by: Hugh Dickins Tested-by: Yu Zhao Reviewed-by: Yang Shi Reviewed-by: David Hildenbrand Cc: Matthew Wilcox Cc: Nicholas Piggin Cc: Yu Zhao Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page-flags.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 88fe1d759cdd..9d8eeaa67d05 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -481,7 +481,7 @@ static inline int TestClearPage##uname(struct page *page) { return 0; } TESTSETFLAG_FALSE(uname, lname) TESTCLEARFLAG_FALSE(uname, lname) __PAGEFLAG(Locked, locked, PF_NO_TAIL) -PAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) __CLEARPAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) +PAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) PAGEFLAG(Error, error, PF_NO_TAIL) TESTCLEARFLAG(Error, error, PF_NO_TAIL) PAGEFLAG(Referenced, referenced, PF_HEAD) TESTCLEARFLAG(Referenced, referenced, PF_HEAD) -- cgit v1.2.3 From 7c13c163e036c646b77753deacfe2f5478b654bc Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 24 Mar 2022 18:10:10 -0700 Subject: kasan, page_alloc: merge kasan_free_pages into free_pages_prepare Currently, the code responsible for initializing and poisoning memory in free_pages_prepare() is scattered across two locations: kasan_free_pages() for HW_TAGS KASAN and free_pages_prepare() itself. This is confusing. This and a few following patches combine the code from these two locations. Along the way, these patches also simplify the performed checks to make them easier to follow. Replaces the only caller of kasan_free_pages() with its implementation. As kasan_has_integrated_init() is only true when CONFIG_KASAN_HW_TAGS is enabled, moving the code does no functional changes. This patch is not useful by itself but makes the simplifications in the following patches easier to follow. Link: https://lkml.kernel.org/r/303498d15840bb71905852955c6e2390ecc87139.1643047180.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Acked-by: Marco Elver Cc: Andrey Ryabinin Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Mark Rutland Cc: Peter Collingbourne Cc: Vincenzo Frascino Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 8 -------- mm/kasan/common.c | 2 +- mm/kasan/hw_tags.c | 11 ----------- mm/page_alloc.c | 6 ++++-- 4 files changed, 5 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index b6a93261c92a..dd2161af84e9 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -85,7 +85,6 @@ static inline void kasan_disable_current(void) {} #ifdef CONFIG_KASAN_HW_TAGS void kasan_alloc_pages(struct page *page, unsigned int order, gfp_t flags); -void kasan_free_pages(struct page *page, unsigned int order); #else /* CONFIG_KASAN_HW_TAGS */ @@ -96,13 +95,6 @@ static __always_inline void kasan_alloc_pages(struct page *page, BUILD_BUG(); } -static __always_inline void kasan_free_pages(struct page *page, - unsigned int order) -{ - /* Only available for integrated init. */ - BUILD_BUG(); -} - #endif /* CONFIG_KASAN_HW_TAGS */ static inline bool kasan_has_integrated_init(void) diff --git a/mm/kasan/common.c b/mm/kasan/common.c index 92196562687b..a0082fad48b1 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -387,7 +387,7 @@ static inline bool ____kasan_kfree_large(void *ptr, unsigned long ip) } /* - * The object will be poisoned by kasan_free_pages() or + * The object will be poisoned by kasan_poison_pages() or * kasan_slab_free_mempool(). */ diff --git a/mm/kasan/hw_tags.c b/mm/kasan/hw_tags.c index 7355cb534e4f..0b8225add2e4 100644 --- a/mm/kasan/hw_tags.c +++ b/mm/kasan/hw_tags.c @@ -213,17 +213,6 @@ void kasan_alloc_pages(struct page *page, unsigned int order, gfp_t flags) } } -void kasan_free_pages(struct page *page, unsigned int order) -{ - /* - * This condition should match the one in free_pages_prepare() in - * page_alloc.c. - */ - bool init = want_init_on_free(); - - kasan_poison_pages(page, order, init); -} - #if IS_ENABLED(CONFIG_KASAN_KUNIT_TEST) void kasan_enable_tagging_sync(void) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 0721ff0c90be..d16df446d2ca 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1364,15 +1364,17 @@ static __always_inline bool free_pages_prepare(struct page *page, /* * As memory initialization might be integrated into KASAN, - * kasan_free_pages and kernel_init_free_pages must be + * KASAN poisoning and memory initialization code must be * kept together to avoid discrepancies in behavior. * * With hardware tag-based KASAN, memory tags must be set before the * page becomes unavailable via debug_pagealloc or arch_free_page. */ if (kasan_has_integrated_init()) { + bool init = want_init_on_free(); + if (!skip_kasan_poison) - kasan_free_pages(page, order); + kasan_poison_pages(page, order, init); } else { bool init = want_init_on_free(); -- cgit v1.2.3 From c82ce3195fd17e57a370e4dc8f36c195b8807b95 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 24 Mar 2022 18:10:22 -0700 Subject: mm: clarify __GFP_ZEROTAGS comment __GFP_ZEROTAGS is intended as an optimization: if memory is zeroed during allocation, it's possible to set memory tags at the same time with little performance impact. Clarify this intention of __GFP_ZEROTAGS in the comment. Link: https://lkml.kernel.org/r/cdffde013973c5634a447513e10ec0d21e8eee29.1643047180.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Acked-by: Marco Elver Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Mark Rutland Cc: Peter Collingbourne Cc: Vincenzo Frascino Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 20f6fbe12993..72dc4ca75cf3 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -232,8 +232,10 @@ struct vm_area_struct; * * %__GFP_ZERO returns a zeroed page on success. * - * %__GFP_ZEROTAGS returns a page with zeroed memory tags on success, if - * __GFP_ZERO is set. + * %__GFP_ZEROTAGS zeroes memory tags at allocation time if the memory itself + * is being zeroed (either via __GFP_ZERO or via init_on_alloc). This flag is + * intended for optimization: setting memory tags at the same time as zeroing + * memory has minimal additional performace impact. * * %__GFP_SKIP_KASAN_POISON returns a page which does not need to be poisoned * on deallocation. Typically used for userspace pages. Currently only has an -- cgit v1.2.3 From b42090ae6f3aa07b0a39403545d688489548a6a8 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 24 Mar 2022 18:10:31 -0700 Subject: kasan, page_alloc: merge kasan_alloc_pages into post_alloc_hook Currently, the code responsible for initializing and poisoning memory in post_alloc_hook() is scattered across two locations: kasan_alloc_pages() hook for HW_TAGS KASAN and post_alloc_hook() itself. This is confusing. This and a few following patches combine the code from these two locations. Along the way, these patches do a step-by-step restructure the many performed checks to make them easier to follow. Replace the only caller of kasan_alloc_pages() with its implementation. As kasan_has_integrated_init() is only true when CONFIG_KASAN_HW_TAGS is enabled, moving the code does no functional changes. Also move init and init_tags variables definitions out of kasan_has_integrated_init() clause in post_alloc_hook(), as they have the same values regardless of what the if condition evaluates to. This patch is not useful by itself but makes the simplifications in the following patches easier to follow. Link: https://lkml.kernel.org/r/5ac7e0b30f5cbb177ec363ddd7878a3141289592.1643047180.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Acked-by: Marco Elver Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Mark Rutland Cc: Peter Collingbourne Cc: Vincenzo Frascino Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 9 --------- mm/kasan/common.c | 2 +- mm/kasan/hw_tags.c | 22 ---------------------- mm/page_alloc.c | 20 +++++++++++++++----- 4 files changed, 16 insertions(+), 37 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index dd2161af84e9..4ed94616c8f0 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -84,17 +84,8 @@ static inline void kasan_disable_current(void) {} #ifdef CONFIG_KASAN_HW_TAGS -void kasan_alloc_pages(struct page *page, unsigned int order, gfp_t flags); - #else /* CONFIG_KASAN_HW_TAGS */ -static __always_inline void kasan_alloc_pages(struct page *page, - unsigned int order, gfp_t flags) -{ - /* Only available for integrated init. */ - BUILD_BUG(); -} - #endif /* CONFIG_KASAN_HW_TAGS */ static inline bool kasan_has_integrated_init(void) diff --git a/mm/kasan/common.c b/mm/kasan/common.c index a0082fad48b1..d9079ec11f31 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -538,7 +538,7 @@ void * __must_check __kasan_kmalloc_large(const void *ptr, size_t size, return NULL; /* - * The object has already been unpoisoned by kasan_alloc_pages() for + * The object has already been unpoisoned by kasan_unpoison_pages() for * alloc_pages() or by kasan_krealloc() for krealloc(). */ diff --git a/mm/kasan/hw_tags.c b/mm/kasan/hw_tags.c index c643740b8599..76cf2b6229c7 100644 --- a/mm/kasan/hw_tags.c +++ b/mm/kasan/hw_tags.c @@ -192,28 +192,6 @@ void __init kasan_init_hw_tags(void) kasan_stack_collection_enabled() ? "on" : "off"); } -void kasan_alloc_pages(struct page *page, unsigned int order, gfp_t flags) -{ - /* - * This condition should match the one in post_alloc_hook() in - * page_alloc.c. - */ - bool init = !want_init_on_free() && want_init_on_alloc(flags); - bool init_tags = init && (flags & __GFP_ZEROTAGS); - - if (flags & __GFP_SKIP_KASAN_POISON) - SetPageSkipKASanPoison(page); - - if (init_tags) { - int i; - - for (i = 0; i != 1 << order; ++i) - tag_clear_highpage(page + i); - } else { - kasan_unpoison_pages(page, order, init); - } -} - #if IS_ENABLED(CONFIG_KASAN_KUNIT_TEST) void kasan_enable_tagging_sync(void) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d2d6e48d2d83..db82facf14e7 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2346,6 +2346,9 @@ static inline bool check_new_pcp(struct page *page, unsigned int order) inline void post_alloc_hook(struct page *page, unsigned int order, gfp_t gfp_flags) { + bool init = !want_init_on_free() && want_init_on_alloc(gfp_flags); + bool init_tags = init && (gfp_flags & __GFP_ZEROTAGS); + set_page_private(page, 0); set_page_refcounted(page); @@ -2361,15 +2364,22 @@ inline void post_alloc_hook(struct page *page, unsigned int order, /* * As memory initialization might be integrated into KASAN, - * kasan_alloc_pages and kernel_init_free_pages must be + * KASAN unpoisoning and memory initializion code must be * kept together to avoid discrepancies in behavior. */ if (kasan_has_integrated_init()) { - kasan_alloc_pages(page, order, gfp_flags); - } else { - bool init = !want_init_on_free() && want_init_on_alloc(gfp_flags); - bool init_tags = init && (gfp_flags & __GFP_ZEROTAGS); + if (gfp_flags & __GFP_SKIP_KASAN_POISON) + SetPageSkipKASanPoison(page); + + if (init_tags) { + int i; + for (i = 0; i != 1 << order; ++i) + tag_clear_highpage(page + i); + } else { + kasan_unpoison_pages(page, order, init); + } + } else { kasan_unpoison_pages(page, order, init); if (init_tags) { -- cgit v1.2.3 From 63840de296472f3914bb933b11ba2b764590755e Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 24 Mar 2022 18:10:52 -0700 Subject: kasan, x86, arm64, s390: rename functions for modules shadow Rename kasan_free_shadow to kasan_free_module_shadow and kasan_module_alloc to kasan_alloc_module_shadow. These functions are used to allocate/free shadow memory for kernel modules when KASAN_VMALLOC is not enabled. The new names better reflect their purpose. Also reword the comment next to their declaration to improve clarity. Link: https://lkml.kernel.org/r/36db32bde765d5d0b856f77d2d806e838513fe84.1643047180.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Acked-by: Catalin Marinas Acked-by: Marco Elver Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Mark Rutland Cc: Peter Collingbourne Cc: Vincenzo Frascino Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/kernel/module.c | 2 +- arch/s390/kernel/module.c | 2 +- arch/x86/kernel/module.c | 2 +- include/linux/kasan.h | 14 +++++++------- mm/kasan/shadow.c | 4 ++-- mm/vmalloc.c | 2 +- 6 files changed, 13 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 309a27553c87..d3a1fa818348 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -58,7 +58,7 @@ void *module_alloc(unsigned long size) PAGE_KERNEL, 0, NUMA_NO_NODE, __builtin_return_address(0)); - if (p && (kasan_module_alloc(p, size, gfp_mask) < 0)) { + if (p && (kasan_alloc_module_shadow(p, size, gfp_mask) < 0)) { vfree(p); return NULL; } diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index b032e556eeb7..a7aefc278909 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -45,7 +45,7 @@ void *module_alloc(unsigned long size) p = __vmalloc_node_range(size, MODULE_ALIGN, MODULES_VADDR, MODULES_END, gfp_mask, PAGE_KERNEL_EXEC, VM_DEFER_KMEMLEAK, NUMA_NO_NODE, __builtin_return_address(0)); - if (p && (kasan_module_alloc(p, size, gfp_mask) < 0)) { + if (p && (kasan_alloc_module_shadow(p, size, gfp_mask) < 0)) { vfree(p); return NULL; } diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 96d7c27b7093..504ea65987e8 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -78,7 +78,7 @@ void *module_alloc(unsigned long size) MODULES_END, gfp_mask, PAGE_KERNEL, VM_DEFER_KMEMLEAK, NUMA_NO_NODE, __builtin_return_address(0)); - if (p && (kasan_module_alloc(p, size, gfp_mask) < 0)) { + if (p && (kasan_alloc_module_shadow(p, size, gfp_mask) < 0)) { vfree(p); return NULL; } diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 4ed94616c8f0..b450c7653137 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -433,17 +433,17 @@ static inline void kasan_populate_early_vm_area_shadow(void *start, !defined(CONFIG_KASAN_VMALLOC) /* - * These functions provide a special case to support backing module - * allocations with real shadow memory. With KASAN vmalloc, the special - * case is unnecessary, as the work is handled in the generic case. + * These functions allocate and free shadow memory for kernel modules. + * They are only required when KASAN_VMALLOC is not supported, as otherwise + * shadow memory is allocated by the generic vmalloc handlers. */ -int kasan_module_alloc(void *addr, size_t size, gfp_t gfp_mask); -void kasan_free_shadow(const struct vm_struct *vm); +int kasan_alloc_module_shadow(void *addr, size_t size, gfp_t gfp_mask); +void kasan_free_module_shadow(const struct vm_struct *vm); #else /* (CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS) && !CONFIG_KASAN_VMALLOC */ -static inline int kasan_module_alloc(void *addr, size_t size, gfp_t gfp_mask) { return 0; } -static inline void kasan_free_shadow(const struct vm_struct *vm) {} +static inline int kasan_alloc_module_shadow(void *addr, size_t size, gfp_t gfp_mask) { return 0; } +static inline void kasan_free_module_shadow(const struct vm_struct *vm) {} #endif /* (CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS) && !CONFIG_KASAN_VMALLOC */ diff --git a/mm/kasan/shadow.c b/mm/kasan/shadow.c index 94136f84b449..e5c4393eb861 100644 --- a/mm/kasan/shadow.c +++ b/mm/kasan/shadow.c @@ -498,7 +498,7 @@ void kasan_release_vmalloc(unsigned long start, unsigned long end, #else /* CONFIG_KASAN_VMALLOC */ -int kasan_module_alloc(void *addr, size_t size, gfp_t gfp_mask) +int kasan_alloc_module_shadow(void *addr, size_t size, gfp_t gfp_mask) { void *ret; size_t scaled_size; @@ -534,7 +534,7 @@ int kasan_module_alloc(void *addr, size_t size, gfp_t gfp_mask) return -ENOMEM; } -void kasan_free_shadow(const struct vm_struct *vm) +void kasan_free_module_shadow(const struct vm_struct *vm) { if (vm->flags & VM_KASAN) vfree(kasan_mem_to_shadow(vm->addr)); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 99e0f3e8d1a5..feecd614bb77 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2547,7 +2547,7 @@ struct vm_struct *remove_vm_area(const void *addr) va->vm = NULL; spin_unlock(&vmap_area_lock); - kasan_free_shadow(vm); + kasan_free_module_shadow(vm); free_unmap_vmap_area(va); return vm; -- cgit v1.2.3 From 0b7ccc70ee1d5b499d1626c9d28f729507b1c036 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 24 Mar 2022 18:10:55 -0700 Subject: kasan, vmalloc: drop outdated VM_KASAN comment The comment about VM_KASAN in include/linux/vmalloc.c is outdated. VM_KASAN is currently only used to mark vm_areas allocated for kernel modules when CONFIG_KASAN_VMALLOC is disabled. Drop the comment. Link: https://lkml.kernel.org/r/780395afea83a147b3b5acc36cf2e38f7f8479f9.1643047180.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Acked-by: Marco Elver Cc: Andrey Ryabinin Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Mark Rutland Cc: Peter Collingbourne Cc: Vincenzo Frascino Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmalloc.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 5a0c3b556848..2ca95c7db463 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -35,17 +35,6 @@ struct notifier_block; /* in notifier.h */ #define VM_DEFER_KMEMLEAK 0 #endif -/* - * VM_KASAN is used slightly differently depending on CONFIG_KASAN_VMALLOC. - * - * If IS_ENABLED(CONFIG_KASAN_VMALLOC), VM_KASAN is set on a vm_struct after - * shadow memory has been mapped. It's used to handle allocation errors so that - * we don't try to poison shadow on free if it was never allocated. - * - * Otherwise, VM_KASAN is set for kasan_module_alloc() allocations and used to - * determine which allocations need the module shadow freed. - */ - /* bits [20..32] reserved for arch specific ioremap internals */ /* -- cgit v1.2.3 From 5bd9bae22a455321327982d0b595a7e76d425fd0 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 24 Mar 2022 18:10:58 -0700 Subject: kasan: reorder vmalloc hooks Group functions that [de]populate shadow memory for vmalloc. Group functions that [un]poison memory for vmalloc. This patch does no functional changes but prepares KASAN code for adding vmalloc support to HW_TAGS KASAN. Link: https://lkml.kernel.org/r/aeef49eb249c206c4c9acce2437728068da74c28.1643047180.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Acked-by: Marco Elver Cc: Andrey Ryabinin Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Mark Rutland Cc: Peter Collingbourne Cc: Vincenzo Frascino Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 20 +++++++++----------- mm/kasan/shadow.c | 43 ++++++++++++++++++++++--------------------- 2 files changed, 31 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index b450c7653137..5f9ed1372eef 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -397,34 +397,32 @@ static inline void kasan_init_hw_tags(void) { } #ifdef CONFIG_KASAN_VMALLOC +void kasan_populate_early_vm_area_shadow(void *start, unsigned long size); int kasan_populate_vmalloc(unsigned long addr, unsigned long size); -void kasan_poison_vmalloc(const void *start, unsigned long size); -void kasan_unpoison_vmalloc(const void *start, unsigned long size); void kasan_release_vmalloc(unsigned long start, unsigned long end, unsigned long free_region_start, unsigned long free_region_end); -void kasan_populate_early_vm_area_shadow(void *start, unsigned long size); +void kasan_unpoison_vmalloc(const void *start, unsigned long size); +void kasan_poison_vmalloc(const void *start, unsigned long size); #else /* CONFIG_KASAN_VMALLOC */ +static inline void kasan_populate_early_vm_area_shadow(void *start, + unsigned long size) { } static inline int kasan_populate_vmalloc(unsigned long start, unsigned long size) { return 0; } - -static inline void kasan_poison_vmalloc(const void *start, unsigned long size) -{ } -static inline void kasan_unpoison_vmalloc(const void *start, unsigned long size) -{ } static inline void kasan_release_vmalloc(unsigned long start, unsigned long end, unsigned long free_region_start, - unsigned long free_region_end) {} + unsigned long free_region_end) { } -static inline void kasan_populate_early_vm_area_shadow(void *start, - unsigned long size) +static inline void kasan_unpoison_vmalloc(const void *start, unsigned long size) +{ } +static inline void kasan_poison_vmalloc(const void *start, unsigned long size) { } #endif /* CONFIG_KASAN_VMALLOC */ diff --git a/mm/kasan/shadow.c b/mm/kasan/shadow.c index e5c4393eb861..bf7ab62fbfb9 100644 --- a/mm/kasan/shadow.c +++ b/mm/kasan/shadow.c @@ -345,27 +345,6 @@ int kasan_populate_vmalloc(unsigned long addr, unsigned long size) return 0; } -/* - * Poison the shadow for a vmalloc region. Called as part of the - * freeing process at the time the region is freed. - */ -void kasan_poison_vmalloc(const void *start, unsigned long size) -{ - if (!is_vmalloc_or_module_addr(start)) - return; - - size = round_up(size, KASAN_GRANULE_SIZE); - kasan_poison(start, size, KASAN_VMALLOC_INVALID, false); -} - -void kasan_unpoison_vmalloc(const void *start, unsigned long size) -{ - if (!is_vmalloc_or_module_addr(start)) - return; - - kasan_unpoison(start, size, false); -} - static int kasan_depopulate_vmalloc_pte(pte_t *ptep, unsigned long addr, void *unused) { @@ -496,6 +475,28 @@ void kasan_release_vmalloc(unsigned long start, unsigned long end, } } + +void kasan_unpoison_vmalloc(const void *start, unsigned long size) +{ + if (!is_vmalloc_or_module_addr(start)) + return; + + kasan_unpoison(start, size, false); +} + +/* + * Poison the shadow for a vmalloc region. Called as part of the + * freeing process at the time the region is freed. + */ +void kasan_poison_vmalloc(const void *start, unsigned long size) +{ + if (!is_vmalloc_or_module_addr(start)) + return; + + size = round_up(size, KASAN_GRANULE_SIZE); + kasan_poison(start, size, KASAN_VMALLOC_INVALID, false); +} + #else /* CONFIG_KASAN_VMALLOC */ int kasan_alloc_module_shadow(void *addr, size_t size, gfp_t gfp_mask) -- cgit v1.2.3 From 579fb0ac085be2015529a5f7bd1061899a6374de Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 24 Mar 2022 18:11:01 -0700 Subject: kasan: add wrappers for vmalloc hooks Add wrappers around functions that [un]poison memory for vmalloc allocations. These functions will be used by HW_TAGS KASAN and therefore need to be disabled when kasan=off command line argument is provided. This patch does no functional changes for software KASAN modes. Link: https://lkml.kernel.org/r/3b8728eac438c55389fb0f9a8a2145d71dd77487.1643047180.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Acked-by: Marco Elver Cc: Andrey Ryabinin Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Mark Rutland Cc: Peter Collingbourne Cc: Vincenzo Frascino Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 17 +++++++++++++++-- mm/kasan/shadow.c | 5 ++--- 2 files changed, 17 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 5f9ed1372eef..a6d6fdefb278 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -403,8 +403,21 @@ void kasan_release_vmalloc(unsigned long start, unsigned long end, unsigned long free_region_start, unsigned long free_region_end); -void kasan_unpoison_vmalloc(const void *start, unsigned long size); -void kasan_poison_vmalloc(const void *start, unsigned long size); +void __kasan_unpoison_vmalloc(const void *start, unsigned long size); +static __always_inline void kasan_unpoison_vmalloc(const void *start, + unsigned long size) +{ + if (kasan_enabled()) + __kasan_unpoison_vmalloc(start, size); +} + +void __kasan_poison_vmalloc(const void *start, unsigned long size); +static __always_inline void kasan_poison_vmalloc(const void *start, + unsigned long size) +{ + if (kasan_enabled()) + __kasan_poison_vmalloc(start, size); +} #else /* CONFIG_KASAN_VMALLOC */ diff --git a/mm/kasan/shadow.c b/mm/kasan/shadow.c index bf7ab62fbfb9..39d0b32ebf70 100644 --- a/mm/kasan/shadow.c +++ b/mm/kasan/shadow.c @@ -475,8 +475,7 @@ void kasan_release_vmalloc(unsigned long start, unsigned long end, } } - -void kasan_unpoison_vmalloc(const void *start, unsigned long size) +void __kasan_unpoison_vmalloc(const void *start, unsigned long size) { if (!is_vmalloc_or_module_addr(start)) return; @@ -488,7 +487,7 @@ void kasan_unpoison_vmalloc(const void *start, unsigned long size) * Poison the shadow for a vmalloc region. Called as part of the * freeing process at the time the region is freed. */ -void kasan_poison_vmalloc(const void *start, unsigned long size) +void __kasan_poison_vmalloc(const void *start, unsigned long size) { if (!is_vmalloc_or_module_addr(start)) return; -- cgit v1.2.3 From 1d96320f8d5320423737da61e6e6937f6b475b5c Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 24 Mar 2022 18:11:13 -0700 Subject: kasan, vmalloc: add vmalloc tagging for SW_TAGS Add vmalloc tagging support to SW_TAGS KASAN. - __kasan_unpoison_vmalloc() now assigns a random pointer tag, poisons the virtual mapping accordingly, and embeds the tag into the returned pointer. - __get_vm_area_node() (used by vmalloc() and vmap()) and pcpu_get_vm_areas() save the tagged pointer into vm_struct->addr (note: not into vmap_area->addr). This requires putting kasan_unpoison_vmalloc() after setup_vmalloc_vm[_locked](); otherwise the latter will overwrite the tagged pointer. The tagged pointer then is naturally propagateed to vmalloc() and vmap(). - vm_map_ram() returns the tagged pointer directly. As a result of this change, vm_struct->addr is now tagged. Enabling KASAN_VMALLOC with SW_TAGS is not yet allowed. Link: https://lkml.kernel.org/r/4a78f3c064ce905e9070c29733aca1dd254a74f1.1643047180.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Acked-by: Marco Elver Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Mark Rutland Cc: Peter Collingbourne Cc: Vincenzo Frascino Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 16 ++++++++++------ mm/kasan/shadow.c | 6 ++++-- mm/vmalloc.c | 14 ++++++++------ 3 files changed, 22 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index a6d6fdefb278..d3efb6cbd0f5 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -403,12 +403,13 @@ void kasan_release_vmalloc(unsigned long start, unsigned long end, unsigned long free_region_start, unsigned long free_region_end); -void __kasan_unpoison_vmalloc(const void *start, unsigned long size); -static __always_inline void kasan_unpoison_vmalloc(const void *start, - unsigned long size) +void *__kasan_unpoison_vmalloc(const void *start, unsigned long size); +static __always_inline void *kasan_unpoison_vmalloc(const void *start, + unsigned long size) { if (kasan_enabled()) - __kasan_unpoison_vmalloc(start, size); + return __kasan_unpoison_vmalloc(start, size); + return (void *)start; } void __kasan_poison_vmalloc(const void *start, unsigned long size); @@ -433,8 +434,11 @@ static inline void kasan_release_vmalloc(unsigned long start, unsigned long free_region_start, unsigned long free_region_end) { } -static inline void kasan_unpoison_vmalloc(const void *start, unsigned long size) -{ } +static inline void *kasan_unpoison_vmalloc(const void *start, + unsigned long size) +{ + return (void *)start; +} static inline void kasan_poison_vmalloc(const void *start, unsigned long size) { } diff --git a/mm/kasan/shadow.c b/mm/kasan/shadow.c index 39d0b32ebf70..5a866f6663fc 100644 --- a/mm/kasan/shadow.c +++ b/mm/kasan/shadow.c @@ -475,12 +475,14 @@ void kasan_release_vmalloc(unsigned long start, unsigned long end, } } -void __kasan_unpoison_vmalloc(const void *start, unsigned long size) +void *__kasan_unpoison_vmalloc(const void *start, unsigned long size) { if (!is_vmalloc_or_module_addr(start)) - return; + return (void *)start; + start = set_tag(start, kasan_random_tag()); kasan_unpoison(start, size, false); + return (void *)start; } /* diff --git a/mm/vmalloc.c b/mm/vmalloc.c index ab0a0002bfca..1ab1f1b2f5b7 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2231,7 +2231,7 @@ void *vm_map_ram(struct page **pages, unsigned int count, int node) mem = (void *)addr; } - kasan_unpoison_vmalloc(mem, size); + mem = kasan_unpoison_vmalloc(mem, size); if (vmap_pages_range(addr, addr + size, PAGE_KERNEL, pages, PAGE_SHIFT) < 0) { @@ -2464,10 +2464,10 @@ static struct vm_struct *__get_vm_area_node(unsigned long size, return NULL; } - kasan_unpoison_vmalloc((void *)va->va_start, requested_size); - setup_vmalloc_vm(area, va, flags, caller); + area->addr = kasan_unpoison_vmalloc(area->addr, requested_size); + return area; } @@ -3815,9 +3815,6 @@ retry: for (area = 0; area < nr_vms; area++) { if (kasan_populate_vmalloc(vas[area]->va_start, sizes[area])) goto err_free_shadow; - - kasan_unpoison_vmalloc((void *)vas[area]->va_start, - sizes[area]); } /* insert all vm's */ @@ -3830,6 +3827,11 @@ retry: } spin_unlock(&vmap_area_lock); + /* mark allocated areas as accessible */ + for (area = 0; area < nr_vms; area++) + vms[area]->addr = kasan_unpoison_vmalloc(vms[area]->addr, + vms[area]->size); + kfree(vas); return vms; -- cgit v1.2.3 From 01d92c7f358ce892279ca830cf6ccf2862a17d1c Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 24 Mar 2022 18:11:16 -0700 Subject: kasan, vmalloc, arm64: mark vmalloc mappings as pgprot_tagged HW_TAGS KASAN relies on ARM Memory Tagging Extension (MTE). With MTE, a memory region must be mapped as MT_NORMAL_TAGGED to allow setting memory tags via MTE-specific instructions. Add proper protection bits to vmalloc() allocations. These allocations are always backed by page_alloc pages, so the tags will actually be getting set on the corresponding physical memory. Link: https://lkml.kernel.org/r/983fc33542db2f6b1e77b34ca23448d4640bbb9e.1643047180.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Co-developed-by: Vincenzo Frascino Signed-off-by: Vincenzo Frascino Acked-by: Marco Elver Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Mark Rutland Cc: Peter Collingbourne Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/include/asm/vmalloc.h | 6 ++++++ include/linux/vmalloc.h | 7 +++++++ mm/vmalloc.c | 9 +++++++++ 3 files changed, 22 insertions(+) (limited to 'include/linux') diff --git a/arch/arm64/include/asm/vmalloc.h b/arch/arm64/include/asm/vmalloc.h index b9185503feae..38fafffe699f 100644 --- a/arch/arm64/include/asm/vmalloc.h +++ b/arch/arm64/include/asm/vmalloc.h @@ -25,4 +25,10 @@ static inline bool arch_vmap_pmd_supported(pgprot_t prot) #endif +#define arch_vmap_pgprot_tagged arch_vmap_pgprot_tagged +static inline pgprot_t arch_vmap_pgprot_tagged(pgprot_t prot) +{ + return pgprot_tagged(prot); +} + #endif /* _ASM_ARM64_VMALLOC_H */ diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 2ca95c7db463..3b1df7da402d 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -115,6 +115,13 @@ static inline int arch_vmap_pte_supported_shift(unsigned long size) } #endif +#ifndef arch_vmap_pgprot_tagged +static inline pgprot_t arch_vmap_pgprot_tagged(pgprot_t prot) +{ + return prot; +} +#endif + /* * Highlevel APIs for driver use */ diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 1ab1f1b2f5b7..8530d86c3e58 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -3128,6 +3128,15 @@ again: goto fail; } + /* + * Modify protection bits to allow tagging. + * This must be done before mapping by __vmalloc_area_node(). + */ + if (kasan_hw_tags_enabled() && + pgprot_val(prot) == pgprot_val(PAGE_KERNEL)) + prot = arch_vmap_pgprot_tagged(prot); + + /* Allocate physical pages and map them into vmalloc space. */ addr = __vmalloc_area_node(area, gfp_mask, prot, shift, node); if (!addr) goto fail; -- cgit v1.2.3 From f49d9c5bb15cc5c470097078ec1f26ff605ae1a2 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 24 Mar 2022 18:11:23 -0700 Subject: kasan, mm: only define ___GFP_SKIP_KASAN_POISON with HW_TAGS Only define the ___GFP_SKIP_KASAN_POISON flag when CONFIG_KASAN_HW_TAGS is enabled. This patch it not useful by itself, but it prepares the code for additions of new KASAN-specific GFP patches. Link: https://lkml.kernel.org/r/44e5738a584c11801b2b8f1231898918efc8634a.1643047180.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Acked-by: Marco Elver Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Mark Rutland Cc: Peter Collingbourne Cc: Vincenzo Frascino Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 8 +++++++- include/trace/events/mmflags.h | 12 +++++++++--- 2 files changed, 16 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 72dc4ca75cf3..75abf8d5bc9b 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -54,7 +54,11 @@ struct vm_area_struct; #define ___GFP_THISNODE 0x200000u #define ___GFP_ACCOUNT 0x400000u #define ___GFP_ZEROTAGS 0x800000u +#ifdef CONFIG_KASAN_HW_TAGS #define ___GFP_SKIP_KASAN_POISON 0x1000000u +#else +#define ___GFP_SKIP_KASAN_POISON 0 +#endif #ifdef CONFIG_LOCKDEP #define ___GFP_NOLOCKDEP 0x2000000u #else @@ -251,7 +255,9 @@ struct vm_area_struct; #define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP) /* Room for N __GFP_FOO bits */ -#define __GFP_BITS_SHIFT (25 + IS_ENABLED(CONFIG_LOCKDEP)) +#define __GFP_BITS_SHIFT (24 + \ + IS_ENABLED(CONFIG_KASAN_HW_TAGS) + \ + IS_ENABLED(CONFIG_LOCKDEP)) #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) /** diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index 116ed4d5d0f8..cb4520374e2c 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -49,12 +49,18 @@ {(unsigned long)__GFP_RECLAIM, "__GFP_RECLAIM"}, \ {(unsigned long)__GFP_DIRECT_RECLAIM, "__GFP_DIRECT_RECLAIM"},\ {(unsigned long)__GFP_KSWAPD_RECLAIM, "__GFP_KSWAPD_RECLAIM"},\ - {(unsigned long)__GFP_ZEROTAGS, "__GFP_ZEROTAGS"}, \ - {(unsigned long)__GFP_SKIP_KASAN_POISON,"__GFP_SKIP_KASAN_POISON"}\ + {(unsigned long)__GFP_ZEROTAGS, "__GFP_ZEROTAGS"} \ + +#ifdef CONFIG_KASAN_HW_TAGS +#define __def_gfpflag_names_kasan \ + , {(unsigned long)__GFP_SKIP_KASAN_POISON, "__GFP_SKIP_KASAN_POISON"} +#else +#define __def_gfpflag_names_kasan +#endif #define show_gfp_flags(flags) \ (flags) ? __print_flags(flags, "|", \ - __def_gfpflag_names \ + __def_gfpflag_names __def_gfpflag_names_kasan \ ) : "none" #ifdef CONFIG_MMU -- cgit v1.2.3 From 53ae233c30a623ff44ff2f83854e92530c5d9fc2 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 24 Mar 2022 18:11:26 -0700 Subject: kasan, page_alloc: allow skipping unpoisoning for HW_TAGS Add a new GFP flag __GFP_SKIP_KASAN_UNPOISON that allows skipping KASAN poisoning for page_alloc allocations. The flag is only effective with HW_TAGS KASAN. This flag will be used by vmalloc code for page_alloc allocations backing vmalloc() mappings in a following patch. The reason to skip KASAN poisoning for these pages in page_alloc is because vmalloc code will be poisoning them instead. Also reword the comment for __GFP_SKIP_KASAN_POISON. Link: https://lkml.kernel.org/r/35c97d77a704f6ff971dd3bfe4be95855744108e.1643047180.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Acked-by: Marco Elver Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Mark Rutland Cc: Peter Collingbourne Cc: Vincenzo Frascino Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 21 +++++++++++++-------- include/trace/events/mmflags.h | 5 +++-- mm/page_alloc.c | 31 ++++++++++++++++++++++--------- 3 files changed, 38 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 75abf8d5bc9b..688a39fde43e 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -55,12 +55,14 @@ struct vm_area_struct; #define ___GFP_ACCOUNT 0x400000u #define ___GFP_ZEROTAGS 0x800000u #ifdef CONFIG_KASAN_HW_TAGS -#define ___GFP_SKIP_KASAN_POISON 0x1000000u +#define ___GFP_SKIP_KASAN_UNPOISON 0x1000000u +#define ___GFP_SKIP_KASAN_POISON 0x2000000u #else +#define ___GFP_SKIP_KASAN_UNPOISON 0 #define ___GFP_SKIP_KASAN_POISON 0 #endif #ifdef CONFIG_LOCKDEP -#define ___GFP_NOLOCKDEP 0x2000000u +#define ___GFP_NOLOCKDEP 0x4000000u #else #define ___GFP_NOLOCKDEP 0 #endif @@ -241,22 +243,25 @@ struct vm_area_struct; * intended for optimization: setting memory tags at the same time as zeroing * memory has minimal additional performace impact. * - * %__GFP_SKIP_KASAN_POISON returns a page which does not need to be poisoned - * on deallocation. Typically used for userspace pages. Currently only has an - * effect in HW tags mode. + * %__GFP_SKIP_KASAN_UNPOISON makes KASAN skip unpoisoning on page allocation. + * Only effective in HW_TAGS mode. + * + * %__GFP_SKIP_KASAN_POISON makes KASAN skip poisoning on page deallocation. + * Typically, used for userspace pages. Only effective in HW_TAGS mode. */ #define __GFP_NOWARN ((__force gfp_t)___GFP_NOWARN) #define __GFP_COMP ((__force gfp_t)___GFP_COMP) #define __GFP_ZERO ((__force gfp_t)___GFP_ZERO) #define __GFP_ZEROTAGS ((__force gfp_t)___GFP_ZEROTAGS) -#define __GFP_SKIP_KASAN_POISON ((__force gfp_t)___GFP_SKIP_KASAN_POISON) +#define __GFP_SKIP_KASAN_UNPOISON ((__force gfp_t)___GFP_SKIP_KASAN_UNPOISON) +#define __GFP_SKIP_KASAN_POISON ((__force gfp_t)___GFP_SKIP_KASAN_POISON) /* Disable lockdep for GFP context tracking */ #define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP) /* Room for N __GFP_FOO bits */ -#define __GFP_BITS_SHIFT (24 + \ - IS_ENABLED(CONFIG_KASAN_HW_TAGS) + \ +#define __GFP_BITS_SHIFT (24 + \ + 2 * IS_ENABLED(CONFIG_KASAN_HW_TAGS) + \ IS_ENABLED(CONFIG_LOCKDEP)) #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index cb4520374e2c..134c45e62d91 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -52,8 +52,9 @@ {(unsigned long)__GFP_ZEROTAGS, "__GFP_ZEROTAGS"} \ #ifdef CONFIG_KASAN_HW_TAGS -#define __def_gfpflag_names_kasan \ - , {(unsigned long)__GFP_SKIP_KASAN_POISON, "__GFP_SKIP_KASAN_POISON"} +#define __def_gfpflag_names_kasan , \ + {(unsigned long)__GFP_SKIP_KASAN_POISON, "__GFP_SKIP_KASAN_POISON"}, \ + {(unsigned long)__GFP_SKIP_KASAN_UNPOISON, "__GFP_SKIP_KASAN_UNPOISON"} #else #define __def_gfpflag_names_kasan #endif diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 350bb27dc4bb..59a9408e1e7d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2343,6 +2343,26 @@ static inline bool check_new_pcp(struct page *page, unsigned int order) } #endif /* CONFIG_DEBUG_VM */ +static inline bool should_skip_kasan_unpoison(gfp_t flags, bool init_tags) +{ + /* Don't skip if a software KASAN mode is enabled. */ + if (IS_ENABLED(CONFIG_KASAN_GENERIC) || + IS_ENABLED(CONFIG_KASAN_SW_TAGS)) + return false; + + /* Skip, if hardware tag-based KASAN is not enabled. */ + if (!kasan_hw_tags_enabled()) + return true; + + /* + * With hardware tag-based KASAN enabled, skip if either: + * + * 1. Memory tags have already been cleared via tag_clear_highpage(). + * 2. Skipping has been requested via __GFP_SKIP_KASAN_UNPOISON. + */ + return init_tags || (flags & __GFP_SKIP_KASAN_UNPOISON); +} + inline void post_alloc_hook(struct page *page, unsigned int order, gfp_t gfp_flags) { @@ -2382,15 +2402,8 @@ inline void post_alloc_hook(struct page *page, unsigned int order, /* Note that memory is already initialized by the loop above. */ init = false; } - /* - * If either a software KASAN mode is enabled, or, - * in the case of hardware tag-based KASAN, - * if memory tags have not been cleared via tag_clear_highpage(). - */ - if (IS_ENABLED(CONFIG_KASAN_GENERIC) || - IS_ENABLED(CONFIG_KASAN_SW_TAGS) || - kasan_hw_tags_enabled() && !init_tags) { - /* Mark shadow memory or set memory tags. */ + if (!should_skip_kasan_unpoison(gfp_flags, init_tags)) { + /* Unpoison shadow memory or set memory tags. */ kasan_unpoison_pages(page, order, init); /* Note that memory is already initialized by KASAN. */ -- cgit v1.2.3 From 9353ffa6e9e90d2b6348209cf2b95a8ffee18711 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 24 Mar 2022 18:11:29 -0700 Subject: kasan, page_alloc: allow skipping memory init for HW_TAGS Add a new GFP flag __GFP_SKIP_ZERO that allows to skip memory initialization. The flag is only effective with HW_TAGS KASAN. This flag will be used by vmalloc code for page_alloc allocations backing vmalloc() mappings in a following patch. The reason to skip memory initialization for these pages in page_alloc is because vmalloc code will be initializing them instead. With the current implementation, when __GFP_SKIP_ZERO is provided, __GFP_ZEROTAGS is ignored. This doesn't matter, as these two flags are never provided at the same time. However, if this is changed in the future, this particular implementation detail can be changed as well. Link: https://lkml.kernel.org/r/0d53efeff345de7d708e0baa0d8829167772521e.1643047180.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Acked-by: Marco Elver Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Mark Rutland Cc: Peter Collingbourne Cc: Vincenzo Frascino Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 18 +++++++++++------- include/trace/events/mmflags.h | 1 + mm/page_alloc.c | 13 ++++++++++++- 3 files changed, 24 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 688a39fde43e..0fa17fb85de5 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -55,14 +55,16 @@ struct vm_area_struct; #define ___GFP_ACCOUNT 0x400000u #define ___GFP_ZEROTAGS 0x800000u #ifdef CONFIG_KASAN_HW_TAGS -#define ___GFP_SKIP_KASAN_UNPOISON 0x1000000u -#define ___GFP_SKIP_KASAN_POISON 0x2000000u +#define ___GFP_SKIP_ZERO 0x1000000u +#define ___GFP_SKIP_KASAN_UNPOISON 0x2000000u +#define ___GFP_SKIP_KASAN_POISON 0x4000000u #else +#define ___GFP_SKIP_ZERO 0 #define ___GFP_SKIP_KASAN_UNPOISON 0 #define ___GFP_SKIP_KASAN_POISON 0 #endif #ifdef CONFIG_LOCKDEP -#define ___GFP_NOLOCKDEP 0x4000000u +#define ___GFP_NOLOCKDEP 0x8000000u #else #define ___GFP_NOLOCKDEP 0 #endif @@ -239,9 +241,10 @@ struct vm_area_struct; * %__GFP_ZERO returns a zeroed page on success. * * %__GFP_ZEROTAGS zeroes memory tags at allocation time if the memory itself - * is being zeroed (either via __GFP_ZERO or via init_on_alloc). This flag is - * intended for optimization: setting memory tags at the same time as zeroing - * memory has minimal additional performace impact. + * is being zeroed (either via __GFP_ZERO or via init_on_alloc, provided that + * __GFP_SKIP_ZERO is not set). This flag is intended for optimization: setting + * memory tags at the same time as zeroing memory has minimal additional + * performace impact. * * %__GFP_SKIP_KASAN_UNPOISON makes KASAN skip unpoisoning on page allocation. * Only effective in HW_TAGS mode. @@ -253,6 +256,7 @@ struct vm_area_struct; #define __GFP_COMP ((__force gfp_t)___GFP_COMP) #define __GFP_ZERO ((__force gfp_t)___GFP_ZERO) #define __GFP_ZEROTAGS ((__force gfp_t)___GFP_ZEROTAGS) +#define __GFP_SKIP_ZERO ((__force gfp_t)___GFP_SKIP_ZERO) #define __GFP_SKIP_KASAN_UNPOISON ((__force gfp_t)___GFP_SKIP_KASAN_UNPOISON) #define __GFP_SKIP_KASAN_POISON ((__force gfp_t)___GFP_SKIP_KASAN_POISON) @@ -261,7 +265,7 @@ struct vm_area_struct; /* Room for N __GFP_FOO bits */ #define __GFP_BITS_SHIFT (24 + \ - 2 * IS_ENABLED(CONFIG_KASAN_HW_TAGS) + \ + 3 * IS_ENABLED(CONFIG_KASAN_HW_TAGS) + \ IS_ENABLED(CONFIG_LOCKDEP)) #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index 134c45e62d91..6532119a6bf1 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -53,6 +53,7 @@ #ifdef CONFIG_KASAN_HW_TAGS #define __def_gfpflag_names_kasan , \ + {(unsigned long)__GFP_SKIP_ZERO, "__GFP_SKIP_ZERO"}, \ {(unsigned long)__GFP_SKIP_KASAN_POISON, "__GFP_SKIP_KASAN_POISON"}, \ {(unsigned long)__GFP_SKIP_KASAN_UNPOISON, "__GFP_SKIP_KASAN_UNPOISON"} #else diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 59a9408e1e7d..bdc8f60ae462 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2363,10 +2363,21 @@ static inline bool should_skip_kasan_unpoison(gfp_t flags, bool init_tags) return init_tags || (flags & __GFP_SKIP_KASAN_UNPOISON); } +static inline bool should_skip_init(gfp_t flags) +{ + /* Don't skip, if hardware tag-based KASAN is not enabled. */ + if (!kasan_hw_tags_enabled()) + return false; + + /* For hardware tag-based KASAN, skip if requested. */ + return (flags & __GFP_SKIP_ZERO); +} + inline void post_alloc_hook(struct page *page, unsigned int order, gfp_t gfp_flags) { - bool init = !want_init_on_free() && want_init_on_alloc(gfp_flags); + bool init = !want_init_on_free() && want_init_on_alloc(gfp_flags) && + !should_skip_init(gfp_flags); bool init_tags = init && (gfp_flags & __GFP_ZEROTAGS); set_page_private(page, 0); -- cgit v1.2.3 From 23689e91fb22c15b84ac6c22ad9942039792f3af Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 24 Mar 2022 18:11:32 -0700 Subject: kasan, vmalloc: add vmalloc tagging for HW_TAGS Add vmalloc tagging support to HW_TAGS KASAN. The key difference between HW_TAGS and the other two KASAN modes when it comes to vmalloc: HW_TAGS KASAN can only assign tags to physical memory. The other two modes have shadow memory covering every mapped virtual memory region. Make __kasan_unpoison_vmalloc() for HW_TAGS KASAN: - Skip non-VM_ALLOC mappings as HW_TAGS KASAN can only tag a single mapping of normal physical memory; see the comment in the function. - Generate a random tag, tag the returned pointer and the allocation, and initialize the allocation at the same time. - Propagate the tag into the page stucts to allow accesses through page_address(vmalloc_to_page()). The rest of vmalloc-related KASAN hooks are not needed: - The shadow-related ones are fully skipped. - __kasan_poison_vmalloc() is kept as a no-op with a comment. Poisoning and zeroing of physical pages that are backing vmalloc() allocations are skipped via __GFP_SKIP_KASAN_UNPOISON and __GFP_SKIP_ZERO: __kasan_unpoison_vmalloc() does that instead. Enabling CONFIG_KASAN_VMALLOC with HW_TAGS is not yet allowed. Link: https://lkml.kernel.org/r/d19b2e9e59a9abc59d05b72dea8429dcaea739c6.1643047180.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Co-developed-by: Vincenzo Frascino Signed-off-by: Vincenzo Frascino Acked-by: Marco Elver Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Mark Rutland Cc: Peter Collingbourne Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 36 +++++++++++++++++--- kernel/scs.c | 4 +-- mm/kasan/hw_tags.c | 92 +++++++++++++++++++++++++++++++++++++++++++++++++++ mm/kasan/shadow.c | 10 +++++- mm/vmalloc.c | 51 ++++++++++++++++++++++------ 5 files changed, 175 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index d3efb6cbd0f5..65fc5285bb59 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -26,6 +26,12 @@ struct kunit_kasan_expectation { #endif +typedef unsigned int __bitwise kasan_vmalloc_flags_t; + +#define KASAN_VMALLOC_NONE 0x00u +#define KASAN_VMALLOC_INIT 0x01u +#define KASAN_VMALLOC_VM_ALLOC 0x02u + #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) #include @@ -397,18 +403,39 @@ static inline void kasan_init_hw_tags(void) { } #ifdef CONFIG_KASAN_VMALLOC +#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) + void kasan_populate_early_vm_area_shadow(void *start, unsigned long size); int kasan_populate_vmalloc(unsigned long addr, unsigned long size); void kasan_release_vmalloc(unsigned long start, unsigned long end, unsigned long free_region_start, unsigned long free_region_end); -void *__kasan_unpoison_vmalloc(const void *start, unsigned long size); +#else /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */ + +static inline void kasan_populate_early_vm_area_shadow(void *start, + unsigned long size) +{ } +static inline int kasan_populate_vmalloc(unsigned long start, + unsigned long size) +{ + return 0; +} +static inline void kasan_release_vmalloc(unsigned long start, + unsigned long end, + unsigned long free_region_start, + unsigned long free_region_end) { } + +#endif /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */ + +void *__kasan_unpoison_vmalloc(const void *start, unsigned long size, + kasan_vmalloc_flags_t flags); static __always_inline void *kasan_unpoison_vmalloc(const void *start, - unsigned long size) + unsigned long size, + kasan_vmalloc_flags_t flags) { if (kasan_enabled()) - return __kasan_unpoison_vmalloc(start, size); + return __kasan_unpoison_vmalloc(start, size, flags); return (void *)start; } @@ -435,7 +462,8 @@ static inline void kasan_release_vmalloc(unsigned long start, unsigned long free_region_end) { } static inline void *kasan_unpoison_vmalloc(const void *start, - unsigned long size) + unsigned long size, + kasan_vmalloc_flags_t flags) { return (void *)start; } diff --git a/kernel/scs.c b/kernel/scs.c index 579841be8864..b83bc9251f99 100644 --- a/kernel/scs.c +++ b/kernel/scs.c @@ -32,7 +32,7 @@ static void *__scs_alloc(int node) for (i = 0; i < NR_CACHED_SCS; i++) { s = this_cpu_xchg(scs_cache[i], NULL); if (s) { - kasan_unpoison_vmalloc(s, SCS_SIZE); + kasan_unpoison_vmalloc(s, SCS_SIZE, KASAN_VMALLOC_NONE); memset(s, 0, SCS_SIZE); return s; } @@ -78,7 +78,7 @@ void scs_free(void *s) if (this_cpu_cmpxchg(scs_cache[i], 0, s) == NULL) return; - kasan_unpoison_vmalloc(s, SCS_SIZE); + kasan_unpoison_vmalloc(s, SCS_SIZE, KASAN_VMALLOC_NONE); vfree_atomic(s); } diff --git a/mm/kasan/hw_tags.c b/mm/kasan/hw_tags.c index 76cf2b6229c7..21104fd51872 100644 --- a/mm/kasan/hw_tags.c +++ b/mm/kasan/hw_tags.c @@ -192,6 +192,98 @@ void __init kasan_init_hw_tags(void) kasan_stack_collection_enabled() ? "on" : "off"); } +#ifdef CONFIG_KASAN_VMALLOC + +static void unpoison_vmalloc_pages(const void *addr, u8 tag) +{ + struct vm_struct *area; + int i; + + /* + * As hardware tag-based KASAN only tags VM_ALLOC vmalloc allocations + * (see the comment in __kasan_unpoison_vmalloc), all of the pages + * should belong to a single area. + */ + area = find_vm_area((void *)addr); + if (WARN_ON(!area)) + return; + + for (i = 0; i < area->nr_pages; i++) { + struct page *page = area->pages[i]; + + page_kasan_tag_set(page, tag); + } +} + +void *__kasan_unpoison_vmalloc(const void *start, unsigned long size, + kasan_vmalloc_flags_t flags) +{ + u8 tag; + unsigned long redzone_start, redzone_size; + + if (!is_vmalloc_or_module_addr(start)) + return (void *)start; + + /* + * Skip unpoisoning and assigning a pointer tag for non-VM_ALLOC + * mappings as: + * + * 1. Unlike the software KASAN modes, hardware tag-based KASAN only + * supports tagging physical memory. Therefore, it can only tag a + * single mapping of normal physical pages. + * 2. Hardware tag-based KASAN can only tag memory mapped with special + * mapping protection bits, see arch_vmalloc_pgprot_modify(). + * As non-VM_ALLOC mappings can be mapped outside of vmalloc code, + * providing these bits would require tracking all non-VM_ALLOC + * mappers. + * + * Thus, for VM_ALLOC mappings, hardware tag-based KASAN only tags + * the first virtual mapping, which is created by vmalloc(). + * Tagging the page_alloc memory backing that vmalloc() allocation is + * skipped, see ___GFP_SKIP_KASAN_UNPOISON. + * + * For non-VM_ALLOC allocations, page_alloc memory is tagged as usual. + */ + if (!(flags & KASAN_VMALLOC_VM_ALLOC)) + return (void *)start; + + tag = kasan_random_tag(); + start = set_tag(start, tag); + + /* Unpoison and initialize memory up to size. */ + kasan_unpoison(start, size, flags & KASAN_VMALLOC_INIT); + + /* + * Explicitly poison and initialize the in-page vmalloc() redzone. + * Unlike software KASAN modes, hardware tag-based KASAN doesn't + * unpoison memory when populating shadow for vmalloc() space. + */ + redzone_start = round_up((unsigned long)start + size, + KASAN_GRANULE_SIZE); + redzone_size = round_up(redzone_start, PAGE_SIZE) - redzone_start; + kasan_poison((void *)redzone_start, redzone_size, KASAN_TAG_INVALID, + flags & KASAN_VMALLOC_INIT); + + /* + * Set per-page tag flags to allow accessing physical memory for the + * vmalloc() mapping through page_address(vmalloc_to_page()). + */ + unpoison_vmalloc_pages(start, tag); + + return (void *)start; +} + +void __kasan_poison_vmalloc(const void *start, unsigned long size) +{ + /* + * No tagging here. + * The physical pages backing the vmalloc() allocation are poisoned + * through the usual page_alloc paths. + */ +} + +#endif + #if IS_ENABLED(CONFIG_KASAN_KUNIT_TEST) void kasan_enable_tagging_sync(void) diff --git a/mm/kasan/shadow.c b/mm/kasan/shadow.c index 5a866f6663fc..b958babc8fed 100644 --- a/mm/kasan/shadow.c +++ b/mm/kasan/shadow.c @@ -475,8 +475,16 @@ void kasan_release_vmalloc(unsigned long start, unsigned long end, } } -void *__kasan_unpoison_vmalloc(const void *start, unsigned long size) +void *__kasan_unpoison_vmalloc(const void *start, unsigned long size, + kasan_vmalloc_flags_t flags) { + /* + * Software KASAN modes unpoison both VM_ALLOC and non-VM_ALLOC + * mappings, so the KASAN_VMALLOC_VM_ALLOC flag is ignored. + * Software KASAN modes can't optimize zeroing memory by combining it + * with setting memory tags, so the KASAN_VMALLOC_INIT flag is ignored. + */ + if (!is_vmalloc_or_module_addr(start)) return (void *)start; diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 8da8501db942..185ab3e27d13 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2237,8 +2237,12 @@ void *vm_map_ram(struct page **pages, unsigned int count, int node) return NULL; } - /* Mark the pages as accessible, now that they are mapped. */ - mem = kasan_unpoison_vmalloc(mem, size); + /* + * Mark the pages as accessible, now that they are mapped. + * With hardware tag-based KASAN, marking is skipped for + * non-VM_ALLOC mappings, see __kasan_unpoison_vmalloc(). + */ + mem = kasan_unpoison_vmalloc(mem, size, KASAN_VMALLOC_NONE); return mem; } @@ -2472,9 +2476,12 @@ static struct vm_struct *__get_vm_area_node(unsigned long size, * best-effort approach, as they can be mapped outside of vmalloc code. * For VM_ALLOC mappings, the pages are marked as accessible after * getting mapped in __vmalloc_node_range(). + * With hardware tag-based KASAN, marking is skipped for + * non-VM_ALLOC mappings, see __kasan_unpoison_vmalloc(). */ if (!(flags & VM_ALLOC)) - area->addr = kasan_unpoison_vmalloc(area->addr, requested_size); + area->addr = kasan_unpoison_vmalloc(area->addr, requested_size, + KASAN_VMALLOC_NONE); return area; } @@ -3084,6 +3091,7 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, { struct vm_struct *area; void *ret; + kasan_vmalloc_flags_t kasan_flags; unsigned long real_size = size; unsigned long real_align = align; unsigned int shift = PAGE_SHIFT; @@ -3136,21 +3144,39 @@ again: goto fail; } - /* - * Modify protection bits to allow tagging. - * This must be done before mapping by __vmalloc_area_node(). - */ + /* Prepare arguments for __vmalloc_area_node(). */ if (kasan_hw_tags_enabled() && - pgprot_val(prot) == pgprot_val(PAGE_KERNEL)) + pgprot_val(prot) == pgprot_val(PAGE_KERNEL)) { + /* + * Modify protection bits to allow tagging. + * This must be done before mapping in __vmalloc_area_node(). + */ prot = arch_vmap_pgprot_tagged(prot); + /* + * Skip page_alloc poisoning and zeroing for physical pages + * backing VM_ALLOC mapping. Memory is instead poisoned and + * zeroed by kasan_unpoison_vmalloc(). + */ + gfp_mask |= __GFP_SKIP_KASAN_UNPOISON | __GFP_SKIP_ZERO; + } + /* Allocate physical pages and map them into vmalloc space. */ ret = __vmalloc_area_node(area, gfp_mask, prot, shift, node); if (!ret) goto fail; - /* Mark the pages as accessible, now that they are mapped. */ - area->addr = kasan_unpoison_vmalloc(area->addr, real_size); + /* + * Mark the pages as accessible, now that they are mapped. + * The init condition should match the one in post_alloc_hook() + * (except for the should_skip_init() check) to make sure that memory + * is initialized under the same conditions regardless of the enabled + * KASAN mode. + */ + kasan_flags = KASAN_VMALLOC_VM_ALLOC; + if (!want_init_on_free() && want_init_on_alloc(gfp_mask)) + kasan_flags |= KASAN_VMALLOC_INIT; + area->addr = kasan_unpoison_vmalloc(area->addr, real_size, kasan_flags); /* * In this function, newly allocated vm_struct has VM_UNINITIALIZED @@ -3850,10 +3876,13 @@ retry: /* * Mark allocated areas as accessible. Do it now as a best-effort * approach, as they can be mapped outside of vmalloc code. + * With hardware tag-based KASAN, marking is skipped for + * non-VM_ALLOC mappings, see __kasan_unpoison_vmalloc(). */ for (area = 0; area < nr_vms; area++) vms[area]->addr = kasan_unpoison_vmalloc(vms[area]->addr, - vms[area]->size); + vms[area]->size, + KASAN_VMALLOC_NONE); kfree(vas); return vms; -- cgit v1.2.3 From f6e39794f4b6da7ca9b77f2f9ad11fd6f0ac83e5 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 24 Mar 2022 18:11:35 -0700 Subject: kasan, vmalloc: only tag normal vmalloc allocations The kernel can use to allocate executable memory. The only supported way to do that is via __vmalloc_node_range() with the executable bit set in the prot argument. (vmap() resets the bit via pgprot_nx()). Once tag-based KASAN modes start tagging vmalloc allocations, executing code from such allocations will lead to the PC register getting a tag, which is not tolerated by the kernel. Only tag the allocations for normal kernel pages. [andreyknvl@google.com: pass KASAN_VMALLOC_PROT_NORMAL to kasan_unpoison_vmalloc()] Link: https://lkml.kernel.org/r/9230ca3d3e40ffca041c133a524191fd71969a8d.1646233925.git.andreyknvl@google.com [andreyknvl@google.com: support tagged vmalloc mappings] Link: https://lkml.kernel.org/r/2f6605e3a358cf64d73a05710cb3da356886ad29.1646233925.git.andreyknvl@google.com [andreyknvl@google.com: don't unintentionally disabled poisoning] Link: https://lkml.kernel.org/r/de4587d6a719232e83c760113e46ed2d4d8da61e.1646757322.git.andreyknvl@google.com Link: https://lkml.kernel.org/r/fbfd9939a4dc375923c9a5c6b9e7ab05c26b8c6b.1643047180.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Acked-by: Marco Elver Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Mark Rutland Cc: Peter Collingbourne Cc: Vincenzo Frascino Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 7 ++++--- kernel/scs.c | 12 ++++++++---- mm/kasan/hw_tags.c | 7 +++++++ mm/kasan/shadow.c | 8 ++++++++ mm/vmalloc.c | 49 +++++++++++++++++++++++++++++-------------------- 5 files changed, 56 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 65fc5285bb59..903b9453931d 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -28,9 +28,10 @@ struct kunit_kasan_expectation { typedef unsigned int __bitwise kasan_vmalloc_flags_t; -#define KASAN_VMALLOC_NONE 0x00u -#define KASAN_VMALLOC_INIT 0x01u -#define KASAN_VMALLOC_VM_ALLOC 0x02u +#define KASAN_VMALLOC_NONE 0x00u +#define KASAN_VMALLOC_INIT 0x01u +#define KASAN_VMALLOC_VM_ALLOC 0x02u +#define KASAN_VMALLOC_PROT_NORMAL 0x04u #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) diff --git a/kernel/scs.c b/kernel/scs.c index b83bc9251f99..b7e1b096d906 100644 --- a/kernel/scs.c +++ b/kernel/scs.c @@ -32,15 +32,19 @@ static void *__scs_alloc(int node) for (i = 0; i < NR_CACHED_SCS; i++) { s = this_cpu_xchg(scs_cache[i], NULL); if (s) { - kasan_unpoison_vmalloc(s, SCS_SIZE, KASAN_VMALLOC_NONE); + s = kasan_unpoison_vmalloc(s, SCS_SIZE, + KASAN_VMALLOC_PROT_NORMAL); memset(s, 0, SCS_SIZE); - return s; + goto out; } } - return __vmalloc_node_range(SCS_SIZE, 1, VMALLOC_START, VMALLOC_END, + s = __vmalloc_node_range(SCS_SIZE, 1, VMALLOC_START, VMALLOC_END, GFP_SCS, PAGE_KERNEL, 0, node, __builtin_return_address(0)); + +out: + return kasan_reset_tag(s); } void *scs_alloc(int node) @@ -78,7 +82,7 @@ void scs_free(void *s) if (this_cpu_cmpxchg(scs_cache[i], 0, s) == NULL) return; - kasan_unpoison_vmalloc(s, SCS_SIZE, KASAN_VMALLOC_NONE); + kasan_unpoison_vmalloc(s, SCS_SIZE, KASAN_VMALLOC_PROT_NORMAL); vfree_atomic(s); } diff --git a/mm/kasan/hw_tags.c b/mm/kasan/hw_tags.c index 21104fd51872..2e9378a4f07f 100644 --- a/mm/kasan/hw_tags.c +++ b/mm/kasan/hw_tags.c @@ -247,6 +247,13 @@ void *__kasan_unpoison_vmalloc(const void *start, unsigned long size, if (!(flags & KASAN_VMALLOC_VM_ALLOC)) return (void *)start; + /* + * Don't tag executable memory. + * The kernel doesn't tolerate having the PC register tagged. + */ + if (!(flags & KASAN_VMALLOC_PROT_NORMAL)) + return (void *)start; + tag = kasan_random_tag(); start = set_tag(start, tag); diff --git a/mm/kasan/shadow.c b/mm/kasan/shadow.c index b958babc8fed..a4f07de21771 100644 --- a/mm/kasan/shadow.c +++ b/mm/kasan/shadow.c @@ -488,6 +488,14 @@ void *__kasan_unpoison_vmalloc(const void *start, unsigned long size, if (!is_vmalloc_or_module_addr(start)) return (void *)start; + /* + * Don't tag executable memory with the tag-based mode. + * The kernel doesn't tolerate having the PC register tagged. + */ + if (IS_ENABLED(CONFIG_KASAN_SW_TAGS) && + !(flags & KASAN_VMALLOC_PROT_NORMAL)) + return (void *)start; + start = set_tag(start, kasan_random_tag()); kasan_unpoison(start, size, false); return (void *)start; diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 185ab3e27d13..e163372d3967 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2242,7 +2242,7 @@ void *vm_map_ram(struct page **pages, unsigned int count, int node) * With hardware tag-based KASAN, marking is skipped for * non-VM_ALLOC mappings, see __kasan_unpoison_vmalloc(). */ - mem = kasan_unpoison_vmalloc(mem, size, KASAN_VMALLOC_NONE); + mem = kasan_unpoison_vmalloc(mem, size, KASAN_VMALLOC_PROT_NORMAL); return mem; } @@ -2481,7 +2481,7 @@ static struct vm_struct *__get_vm_area_node(unsigned long size, */ if (!(flags & VM_ALLOC)) area->addr = kasan_unpoison_vmalloc(area->addr, requested_size, - KASAN_VMALLOC_NONE); + KASAN_VMALLOC_PROT_NORMAL); return area; } @@ -3091,7 +3091,7 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, { struct vm_struct *area; void *ret; - kasan_vmalloc_flags_t kasan_flags; + kasan_vmalloc_flags_t kasan_flags = KASAN_VMALLOC_NONE; unsigned long real_size = size; unsigned long real_align = align; unsigned int shift = PAGE_SHIFT; @@ -3144,21 +3144,28 @@ again: goto fail; } - /* Prepare arguments for __vmalloc_area_node(). */ - if (kasan_hw_tags_enabled() && - pgprot_val(prot) == pgprot_val(PAGE_KERNEL)) { - /* - * Modify protection bits to allow tagging. - * This must be done before mapping in __vmalloc_area_node(). - */ - prot = arch_vmap_pgprot_tagged(prot); + /* + * Prepare arguments for __vmalloc_area_node() and + * kasan_unpoison_vmalloc(). + */ + if (pgprot_val(prot) == pgprot_val(PAGE_KERNEL)) { + if (kasan_hw_tags_enabled()) { + /* + * Modify protection bits to allow tagging. + * This must be done before mapping. + */ + prot = arch_vmap_pgprot_tagged(prot); - /* - * Skip page_alloc poisoning and zeroing for physical pages - * backing VM_ALLOC mapping. Memory is instead poisoned and - * zeroed by kasan_unpoison_vmalloc(). - */ - gfp_mask |= __GFP_SKIP_KASAN_UNPOISON | __GFP_SKIP_ZERO; + /* + * Skip page_alloc poisoning and zeroing for physical + * pages backing VM_ALLOC mapping. Memory is instead + * poisoned and zeroed by kasan_unpoison_vmalloc(). + */ + gfp_mask |= __GFP_SKIP_KASAN_UNPOISON | __GFP_SKIP_ZERO; + } + + /* Take note that the mapping is PAGE_KERNEL. */ + kasan_flags |= KASAN_VMALLOC_PROT_NORMAL; } /* Allocate physical pages and map them into vmalloc space. */ @@ -3172,10 +3179,13 @@ again: * (except for the should_skip_init() check) to make sure that memory * is initialized under the same conditions regardless of the enabled * KASAN mode. + * Tag-based KASAN modes only assign tags to normal non-executable + * allocations, see __kasan_unpoison_vmalloc(). */ - kasan_flags = KASAN_VMALLOC_VM_ALLOC; + kasan_flags |= KASAN_VMALLOC_VM_ALLOC; if (!want_init_on_free() && want_init_on_alloc(gfp_mask)) kasan_flags |= KASAN_VMALLOC_INIT; + /* KASAN_VMALLOC_PROT_NORMAL already set if required. */ area->addr = kasan_unpoison_vmalloc(area->addr, real_size, kasan_flags); /* @@ -3881,8 +3891,7 @@ retry: */ for (area = 0; area < nr_vms; area++) vms[area]->addr = kasan_unpoison_vmalloc(vms[area]->addr, - vms[area]->size, - KASAN_VMALLOC_NONE); + vms[area]->size, KASAN_VMALLOC_PROT_NORMAL); kfree(vas); return vms; -- cgit v1.2.3 From ed6d74446cbfb88c747d4b32477a9d46132694db Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 24 Mar 2022 18:12:02 -0700 Subject: kasan: test: support async (again) and asymm modes for HW_TAGS Async mode support has already been implemented in commit e80a76aa1a91 ("kasan, arm64: tests supports for HW_TAGS async mode") but then got accidentally broken in commit 99734b535d9b ("kasan: detect false-positives in tests"). Restore the changes removed by the latter patch and adapt them for asymm mode: add a sync_fault flag to kunit_kasan_expectation that only get set if the MTE fault was synchronous, and reenable MTE on such faults in tests. Also rename kunit_kasan_expectation to kunit_kasan_status and move its definition to mm/kasan/kasan.h from include/linux/kasan.h, as this structure is only internally used by KASAN. Also put the structure definition under IS_ENABLED(CONFIG_KUNIT). Link: https://lkml.kernel.org/r/133970562ccacc93ba19d754012c562351d4a8c8.1645033139.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Cc: Marco Elver Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Andrey Ryabinin Cc: Vincenzo Frascino Cc: Catalin Marinas Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 5 ----- lib/test_kasan.c | 39 ++++++++++++++++++++++----------------- mm/kasan/hw_tags.c | 18 +++++++++--------- mm/kasan/kasan.h | 14 ++++++++++++-- mm/kasan/report.c | 17 +++++++++-------- 5 files changed, 52 insertions(+), 41 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 903b9453931d..fe36215807f7 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -19,11 +19,6 @@ struct task_struct; #include #include -/* kasan_data struct is used in KUnit tests for KASAN expected failures */ -struct kunit_kasan_expectation { - bool report_found; -}; - #endif typedef unsigned int __bitwise kasan_vmalloc_flags_t; diff --git a/lib/test_kasan.c b/lib/test_kasan.c index 2addd0c66acf..72391f992689 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -37,7 +37,7 @@ void *kasan_ptr_result; int kasan_int_result; static struct kunit_resource resource; -static struct kunit_kasan_expectation fail_data; +static struct kunit_kasan_status test_status; static bool multishot; /* @@ -54,58 +54,63 @@ static int kasan_test_init(struct kunit *test) } multishot = kasan_save_enable_multi_shot(); - fail_data.report_found = false; + test_status.report_found = false; + test_status.sync_fault = false; kunit_add_named_resource(test, NULL, NULL, &resource, - "kasan_data", &fail_data); + "kasan_status", &test_status); return 0; } static void kasan_test_exit(struct kunit *test) { kasan_restore_multi_shot(multishot); - KUNIT_EXPECT_FALSE(test, fail_data.report_found); + KUNIT_EXPECT_FALSE(test, test_status.report_found); } /** * KUNIT_EXPECT_KASAN_FAIL() - check that the executed expression produces a * KASAN report; causes a test failure otherwise. This relies on a KUnit - * resource named "kasan_data". Do not use this name for KUnit resources + * resource named "kasan_status". Do not use this name for KUnit resources * outside of KASAN tests. * - * For hardware tag-based KASAN in sync mode, when a tag fault happens, tag + * For hardware tag-based KASAN, when a synchronous tag fault happens, tag * checking is auto-disabled. When this happens, this test handler reenables * tag checking. As tag checking can be only disabled or enabled per CPU, * this handler disables migration (preemption). * - * Since the compiler doesn't see that the expression can change the fail_data + * Since the compiler doesn't see that the expression can change the test_status * fields, it can reorder or optimize away the accesses to those fields. * Use READ/WRITE_ONCE() for the accesses and compiler barriers around the * expression to prevent that. * - * In between KUNIT_EXPECT_KASAN_FAIL checks, fail_data.report_found is kept as - * false. This allows detecting KASAN reports that happen outside of the checks - * by asserting !fail_data.report_found at the start of KUNIT_EXPECT_KASAN_FAIL - * and in kasan_test_exit. + * In between KUNIT_EXPECT_KASAN_FAIL checks, test_status.report_found is kept + * as false. This allows detecting KASAN reports that happen outside of the + * checks by asserting !test_status.report_found at the start of + * KUNIT_EXPECT_KASAN_FAIL and in kasan_test_exit. */ #define KUNIT_EXPECT_KASAN_FAIL(test, expression) do { \ if (IS_ENABLED(CONFIG_KASAN_HW_TAGS) && \ kasan_sync_fault_possible()) \ migrate_disable(); \ - KUNIT_EXPECT_FALSE(test, READ_ONCE(fail_data.report_found)); \ + KUNIT_EXPECT_FALSE(test, READ_ONCE(test_status.report_found)); \ barrier(); \ expression; \ barrier(); \ - if (!READ_ONCE(fail_data.report_found)) { \ + if (kasan_async_fault_possible()) \ + kasan_force_async_fault(); \ + if (!READ_ONCE(test_status.report_found)) { \ KUNIT_FAIL(test, KUNIT_SUBTEST_INDENT "KASAN failure " \ "expected in \"" #expression \ "\", but none occurred"); \ } \ - if (IS_ENABLED(CONFIG_KASAN_HW_TAGS)) { \ - if (READ_ONCE(fail_data.report_found)) \ - kasan_enable_tagging_sync(); \ + if (IS_ENABLED(CONFIG_KASAN_HW_TAGS) && \ + kasan_sync_fault_possible()) { \ + if (READ_ONCE(test_status.report_found) && \ + READ_ONCE(test_status.sync_fault)) \ + kasan_enable_tagging(); \ migrate_enable(); \ } \ - WRITE_ONCE(fail_data.report_found, false); \ + WRITE_ONCE(test_status.report_found, false); \ } while (0) #define KASAN_TEST_NEEDS_CONFIG_ON(test, config) do { \ diff --git a/mm/kasan/hw_tags.c b/mm/kasan/hw_tags.c index fad1887e54c0..07a76c46daa5 100644 --- a/mm/kasan/hw_tags.c +++ b/mm/kasan/hw_tags.c @@ -172,12 +172,7 @@ void kasan_init_hw_tags_cpu(void) * Enable async or asymm modes only when explicitly requested * through the command line. */ - if (kasan_arg_mode == KASAN_ARG_MODE_ASYNC) - hw_enable_tagging_async(); - else if (kasan_arg_mode == KASAN_ARG_MODE_ASYMM) - hw_enable_tagging_asymm(); - else - hw_enable_tagging_sync(); + kasan_enable_tagging(); } /* kasan_init_hw_tags() is called once on boot CPU. */ @@ -343,11 +338,16 @@ void __kasan_poison_vmalloc(const void *start, unsigned long size) #if IS_ENABLED(CONFIG_KASAN_KUNIT_TEST) -void kasan_enable_tagging_sync(void) +void kasan_enable_tagging(void) { - hw_enable_tagging_sync(); + if (kasan_arg_mode == KASAN_ARG_MODE_ASYNC) + hw_enable_tagging_async(); + else if (kasan_arg_mode == KASAN_ARG_MODE_ASYMM) + hw_enable_tagging_asymm(); + else + hw_enable_tagging_sync(); } -EXPORT_SYMBOL_GPL(kasan_enable_tagging_sync); +EXPORT_SYMBOL_GPL(kasan_enable_tagging); void kasan_force_async_fault(void) { diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index 4d67408e8407..d1e111b7d5d8 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -7,6 +7,16 @@ #include #include +#if IS_ENABLED(CONFIG_KUNIT) + +/* Used in KUnit-compatible KASAN tests. */ +struct kunit_kasan_status { + bool report_found; + bool sync_fault; +}; + +#endif + #ifdef CONFIG_KASAN_HW_TAGS #include @@ -350,12 +360,12 @@ static inline const void *arch_kasan_set_tag(const void *addr, u8 tag) #if defined(CONFIG_KASAN_HW_TAGS) && IS_ENABLED(CONFIG_KASAN_KUNIT_TEST) -void kasan_enable_tagging_sync(void); +void kasan_enable_tagging(void); void kasan_force_async_fault(void); #else /* CONFIG_KASAN_HW_TAGS || CONFIG_KASAN_KUNIT_TEST */ -static inline void kasan_enable_tagging_sync(void) { } +static inline void kasan_enable_tagging(void) { } static inline void kasan_force_async_fault(void) { } #endif /* CONFIG_KASAN_HW_TAGS || CONFIG_KASAN_KUNIT_TEST */ diff --git a/mm/kasan/report.c b/mm/kasan/report.c index f14146563d41..137c2c0b09db 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -336,20 +336,21 @@ static bool report_enabled(void) } #if IS_ENABLED(CONFIG_KUNIT) -static void kasan_update_kunit_status(struct kunit *cur_test) +static void kasan_update_kunit_status(struct kunit *cur_test, bool sync) { struct kunit_resource *resource; - struct kunit_kasan_expectation *kasan_data; + struct kunit_kasan_status *status; - resource = kunit_find_named_resource(cur_test, "kasan_data"); + resource = kunit_find_named_resource(cur_test, "kasan_status"); if (!resource) { kunit_set_failure(cur_test); return; } - kasan_data = (struct kunit_kasan_expectation *)resource->data; - WRITE_ONCE(kasan_data->report_found, true); + status = (struct kunit_kasan_status *)resource->data; + WRITE_ONCE(status->report_found, true); + WRITE_ONCE(status->sync_fault, sync); kunit_put_resource(resource); } #endif /* IS_ENABLED(CONFIG_KUNIT) */ @@ -363,7 +364,7 @@ void kasan_report_invalid_free(void *object, unsigned long ip) #if IS_ENABLED(CONFIG_KUNIT) if (current->kunit_test) - kasan_update_kunit_status(current->kunit_test); + kasan_update_kunit_status(current->kunit_test, true); #endif /* IS_ENABLED(CONFIG_KUNIT) */ start_report(&flags); @@ -383,7 +384,7 @@ void kasan_report_async(void) #if IS_ENABLED(CONFIG_KUNIT) if (current->kunit_test) - kasan_update_kunit_status(current->kunit_test); + kasan_update_kunit_status(current->kunit_test, false); #endif /* IS_ENABLED(CONFIG_KUNIT) */ start_report(&flags); @@ -405,7 +406,7 @@ static void __kasan_report(unsigned long addr, size_t size, bool is_write, #if IS_ENABLED(CONFIG_KUNIT) if (current->kunit_test) - kasan_update_kunit_status(current->kunit_test); + kasan_update_kunit_status(current->kunit_test, true); #endif /* IS_ENABLED(CONFIG_KUNIT) */ disable_trace_on_warning(); -- cgit v1.2.3 From 80207910cd71b4e0e87140d165d82b5d3ff69e53 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 24 Mar 2022 18:13:12 -0700 Subject: kasan: move and hide kasan_save_enable/restore_multi_shot - Move kasan_save_enable/restore_multi_shot() declarations to mm/kasan/kasan.h, as there is no need for them to be visible outside of KASAN implementation. - Only define and export these functions when KASAN tests are enabled. - Move their definitions closer to other test-related code in report.c. Link: https://lkml.kernel.org/r/6ba637333b78447f027d775f2d55ab1a40f63c99.1646237226.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Marco Elver Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 4 ---- mm/kasan/kasan.h | 7 +++++++ mm/kasan/report.c | 30 +++++++++++++++++------------- 3 files changed, 24 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index fe36215807f7..ceebcb9de7bf 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -267,10 +267,6 @@ static __always_inline bool kasan_check_byte(const void *addr) return true; } - -bool kasan_save_enable_multi_shot(void); -void kasan_restore_multi_shot(bool enabled); - #else /* CONFIG_KASAN */ static inline slab_flags_t kasan_never_merge(void) diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index 9d2e128eb623..d79b83d673b1 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -492,6 +492,13 @@ static inline bool kasan_arch_is_ready(void) { return true; } #error kasan_arch_is_ready only works in KASAN generic outline mode! #endif +#if IS_ENABLED(CONFIG_KASAN_KUNIT_TEST) || IS_ENABLED(CONFIG_KASAN_MODULE_TEST) + +bool kasan_save_enable_multi_shot(void); +void kasan_restore_multi_shot(bool enabled); + +#endif + /* * Exported functions for interfaces called from assembly or from generated * code. Declarations here to avoid warning about missing declarations. diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 7ef3b0455603..c9bfffe931b4 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -64,19 +64,6 @@ static int __init early_kasan_fault(char *arg) } early_param("kasan.fault", early_kasan_fault); -bool kasan_save_enable_multi_shot(void) -{ - return test_and_set_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags); -} -EXPORT_SYMBOL_GPL(kasan_save_enable_multi_shot); - -void kasan_restore_multi_shot(bool enabled) -{ - if (!enabled) - clear_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags); -} -EXPORT_SYMBOL_GPL(kasan_restore_multi_shot); - static int __init kasan_set_multi_shot(char *str) { set_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags); @@ -109,6 +96,23 @@ static bool report_enabled(void) return !test_and_set_bit(KASAN_BIT_REPORTED, &kasan_flags); } +#if IS_ENABLED(CONFIG_KASAN_KUNIT_TEST) || IS_ENABLED(CONFIG_KASAN_MODULE_TEST) + +bool kasan_save_enable_multi_shot(void) +{ + return test_and_set_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags); +} +EXPORT_SYMBOL_GPL(kasan_save_enable_multi_shot); + +void kasan_restore_multi_shot(bool enabled) +{ + if (!enabled) + clear_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags); +} +EXPORT_SYMBOL_GPL(kasan_restore_multi_shot); + +#endif + #if IS_ENABLED(CONFIG_KASAN_KUNIT_TEST) static void update_kunit_status(bool sync) { -- cgit v1.2.3 From 562beb7235abfebdd8366e0664a5c3d1e597b990 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Thu, 24 Mar 2022 18:13:27 -0700 Subject: mm/huge_memory: make is_transparent_hugepage() static It's only used inside the huge_memory.c now. Don't export it and make it static. We can thus reduce the size of huge_memory.o a bit. Without this patch: text data bss dec hex filename 32319 2965 4 35288 89d8 mm/huge_memory.o With this patch: text data bss dec hex filename 32042 2957 4 35003 88bb mm/huge_memory.o Link: https://lkml.kernel.org/r/20220302082145.12028-1-linmiaohe@huawei.com Signed-off-by: Miaohe Lin Reviewed-by: Muchun Song Reviewed-by: Yang Shi Cc: Matthew Wilcox Cc: William Kucharski Cc: Hugh Dickins Cc: Peter Xu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/huge_mm.h | 6 ------ mm/huge_memory.c | 3 +-- 2 files changed, 1 insertion(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 0734aff8fa19..2999190adc22 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -183,7 +183,6 @@ unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr, void prep_transhuge_page(struct page *page); void free_transhuge_page(struct page *page); -bool is_transparent_hugepage(struct page *page); bool can_split_folio(struct folio *folio, int *pextra_pins); int split_huge_page_to_list(struct page *page, struct list_head *list); @@ -341,11 +340,6 @@ static inline bool transhuge_vma_enabled(struct vm_area_struct *vma, static inline void prep_transhuge_page(struct page *page) {} -static inline bool is_transparent_hugepage(struct page *page) -{ - return false; -} - #define transparent_hugepage_flags 0UL #define thp_get_unmapped_area NULL diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 035aa204ab8d..057b13bde82d 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -533,7 +533,7 @@ void prep_transhuge_page(struct page *page) set_compound_page_dtor(page, TRANSHUGE_PAGE_DTOR); } -bool is_transparent_hugepage(struct page *page) +static inline bool is_transparent_hugepage(struct page *page) { if (!PageCompound(page)) return false; @@ -542,7 +542,6 @@ bool is_transparent_hugepage(struct page *page) return is_huge_zero_page(page) || page[1].compound_dtor == TRANSHUGE_PAGE_DTOR; } -EXPORT_SYMBOL_GPL(is_transparent_hugepage); static unsigned long __thp_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, -- cgit v1.2.3 From 03104c2c5db8918030788e607e4af980b2f42bb3 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 24 Mar 2022 18:13:50 -0700 Subject: mm/swapfile: remove stale reuse_swap_page() All users are gone, let's remove it. We'll let SWP_STABLE_WRITES stick around for now, as it might come in handy in the near future. Link: https://lkml.kernel.org/r/20220131162940.210846-8-david@redhat.com Signed-off-by: David Hildenbrand Acked-by: Vlastimil Babka Cc: Andrea Arcangeli Cc: Christoph Hellwig Cc: David Rientjes Cc: Don Dutile Cc: Hugh Dickins Cc: Jan Kara Cc: Jann Horn Cc: Jason Gunthorpe Cc: John Hubbard Cc: Kirill A. Shutemov Cc: Liang Zhang Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Mike Kravetz Cc: Mike Rapoport Cc: Nadav Amit Cc: Oleg Nesterov Cc: Peter Xu Cc: Rik van Riel Cc: Roman Gushchin Cc: Shakeel Butt Cc: Yang Shi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 4 -- mm/swapfile.c | 104 --------------------------------------------------- 2 files changed, 108 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index f37837c614c5..27093b477c5f 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -515,7 +515,6 @@ extern int __swp_swapcount(swp_entry_t entry); extern int swp_swapcount(swp_entry_t entry); extern struct swap_info_struct *page_swap_info(struct page *); extern struct swap_info_struct *swp_swap_info(swp_entry_t entry); -extern bool reuse_swap_page(struct page *); extern int try_to_free_swap(struct page *); struct backing_dev_info; extern int init_swap_address_space(unsigned int type, unsigned long nr_pages); @@ -681,9 +680,6 @@ static inline int swp_swapcount(swp_entry_t entry) return 0; } -#define reuse_swap_page(page) \ - (page_trans_huge_mapcount(page) == 1) - static inline int try_to_free_swap(struct page *page) { return 0; diff --git a/mm/swapfile.c b/mm/swapfile.c index 33c7abb16610..63c61f8b2611 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1167,16 +1167,6 @@ out: return NULL; } -static struct swap_info_struct *swap_info_get(swp_entry_t entry) -{ - struct swap_info_struct *p; - - p = _swap_info_get(entry); - if (p) - spin_lock(&p->lock); - return p; -} - static struct swap_info_struct *swap_info_get_cont(swp_entry_t entry, struct swap_info_struct *q) { @@ -1601,100 +1591,6 @@ static bool page_swapped(struct page *page) return false; } -static int page_trans_huge_map_swapcount(struct page *page, - int *total_swapcount) -{ - int i, map_swapcount, _total_swapcount; - unsigned long offset = 0; - struct swap_info_struct *si; - struct swap_cluster_info *ci = NULL; - unsigned char *map = NULL; - int swapcount = 0; - - /* hugetlbfs shouldn't call it */ - VM_BUG_ON_PAGE(PageHuge(page), page); - - if (!IS_ENABLED(CONFIG_THP_SWAP) || likely(!PageTransCompound(page))) { - if (PageSwapCache(page)) - swapcount = page_swapcount(page); - if (total_swapcount) - *total_swapcount = swapcount; - return swapcount + page_trans_huge_mapcount(page); - } - - page = compound_head(page); - - _total_swapcount = map_swapcount = 0; - if (PageSwapCache(page)) { - swp_entry_t entry; - - entry.val = page_private(page); - si = _swap_info_get(entry); - if (si) { - map = si->swap_map; - offset = swp_offset(entry); - } - } - if (map) - ci = lock_cluster(si, offset); - for (i = 0; i < HPAGE_PMD_NR; i++) { - int mapcount = atomic_read(&page[i]._mapcount) + 1; - if (map) { - swapcount = swap_count(map[offset + i]); - _total_swapcount += swapcount; - } - map_swapcount = max(map_swapcount, mapcount + swapcount); - } - unlock_cluster(ci); - - if (PageDoubleMap(page)) - map_swapcount -= 1; - - if (total_swapcount) - *total_swapcount = _total_swapcount; - - return map_swapcount + compound_mapcount(page); -} - -/* - * We can write to an anon page without COW if there are no other references - * to it. And as a side-effect, free up its swap: because the old content - * on disk will never be read, and seeking back there to write new content - * later would only waste time away from clustering. - */ -bool reuse_swap_page(struct page *page) -{ - int count, total_swapcount; - - VM_BUG_ON_PAGE(!PageLocked(page), page); - if (unlikely(PageKsm(page))) - return false; - count = page_trans_huge_map_swapcount(page, &total_swapcount); - if (count == 1 && PageSwapCache(page) && - (likely(!PageTransCompound(page)) || - /* The remaining swap count will be freed soon */ - total_swapcount == page_swapcount(page))) { - if (!PageWriteback(page)) { - page = compound_head(page); - delete_from_swap_cache(page); - SetPageDirty(page); - } else { - swp_entry_t entry; - struct swap_info_struct *p; - - entry.val = page_private(page); - p = swap_info_get(entry); - if (p->flags & SWP_STABLE_WRITES) { - spin_unlock(&p->lock); - return false; - } - spin_unlock(&p->lock); - } - } - - return count <= 1; -} - /* * If swap is getting full, or if there are no more mappings of this page, * then try_to_free_swap is called to free its swap space. -- cgit v1.2.3 From 55c62fa7c53368a9011cd1276c001a1469078c6a Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 24 Mar 2022 18:13:53 -0700 Subject: mm/huge_memory: remove stale page_trans_huge_mapcount() All users are gone, let's remove it. Link: https://lkml.kernel.org/r/20220131162940.210846-9-david@redhat.com Signed-off-by: David Hildenbrand Acked-by: Vlastimil Babka Cc: Andrea Arcangeli Cc: Christoph Hellwig Cc: David Rientjes Cc: Don Dutile Cc: Hugh Dickins Cc: Jan Kara Cc: Jann Horn Cc: Jason Gunthorpe Cc: John Hubbard Cc: Kirill A. Shutemov Cc: Liang Zhang Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Mike Kravetz Cc: Mike Rapoport Cc: Nadav Amit Cc: Oleg Nesterov Cc: Peter Xu Cc: Rik van Riel Cc: Roman Gushchin Cc: Shakeel Butt Cc: Yang Shi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 5 ----- mm/huge_memory.c | 48 ------------------------------------------------ 2 files changed, 53 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 7a3dd7e617e4..e34edb775334 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -834,16 +834,11 @@ static inline int total_mapcount(struct page *page) return folio_mapcount(page_folio(page)); } -int page_trans_huge_mapcount(struct page *page); #else static inline int total_mapcount(struct page *page) { return page_mapcount(page); } -static inline int page_trans_huge_mapcount(struct page *page) -{ - return page_mapcount(page); -} #endif static inline struct page *virt_to_head_page(const void *x) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index a5f651c6df4e..cc580d90117b 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2483,54 +2483,6 @@ static void __split_huge_page(struct page *page, struct list_head *list, } } -/* - * This calculates accurately how many mappings a transparent hugepage - * has (unlike page_mapcount() which isn't fully accurate). This full - * accuracy is primarily needed to know if copy-on-write faults can - * reuse the page and change the mapping to read-write instead of - * copying them. At the same time this returns the total_mapcount too. - * - * The function returns the highest mapcount any one of the subpages - * has. If the return value is one, even if different processes are - * mapping different subpages of the transparent hugepage, they can - * all reuse it, because each process is reusing a different subpage. - * - * The total_mapcount is instead counting all virtual mappings of the - * subpages. If the total_mapcount is equal to "one", it tells the - * caller all mappings belong to the same "mm" and in turn the - * anon_vma of the transparent hugepage can become the vma->anon_vma - * local one as no other process may be mapping any of the subpages. - * - * It would be more accurate to replace page_mapcount() with - * page_trans_huge_mapcount(), however we only use - * page_trans_huge_mapcount() in the copy-on-write faults where we - * need full accuracy to avoid breaking page pinning, because - * page_trans_huge_mapcount() is slower than page_mapcount(). - */ -int page_trans_huge_mapcount(struct page *page) -{ - int i, ret; - - /* hugetlbfs shouldn't call it */ - VM_BUG_ON_PAGE(PageHuge(page), page); - - if (likely(!PageTransCompound(page))) - return atomic_read(&page->_mapcount) + 1; - - page = compound_head(page); - - ret = 0; - for (i = 0; i < thp_nr_pages(page); i++) { - int mapcount = atomic_read(&page[i]._mapcount) + 1; - ret = max(ret, mapcount); - } - - if (PageDoubleMap(page)) - ret -= 1; - - return ret + compound_mapcount(page); -} - /* Racy check whether the huge page can be split */ bool can_split_folio(struct folio *folio, int *pextra_pins) { -- cgit v1.2.3 From 566d3362885aab04d6b0f885f12db3176ca3a032 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 24 Mar 2022 18:13:59 -0700 Subject: mm: warn on deleting redirtied only if accounted filemap_unaccount_folio() has a WARN_ON_ONCE(folio_test_dirty(folio)). It is good to warn of late dirtying on a persistent filesystem, but late dirtying on tmpfs can only lose data which is expected to be thrown away; and it's a pity if that warning comes ONCE on tmpfs, then hides others which really matter. Make it conditional on mapping_cap_writeback(). Cleanup: then folio_account_cleaned() no longer needs to check that for itself, and so no longer needs to know the mapping. Link: https://lkml.kernel.org/r/b5a1106c-7226-a5c6-ad41-ad4832cae1f@google.com Signed-off-by: Hugh Dickins Cc: Matthew Wilcox Cc: Jan Kara Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 3 +-- mm/filemap.c | 14 +++++++++----- mm/page-writeback.c | 18 ++++++++---------- 3 files changed, 18 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 58f395f3febe..a8d0b327b066 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -1009,8 +1009,7 @@ static inline void __set_page_dirty(struct page *page, { __folio_mark_dirty(page_folio(page), mapping, warn); } -void folio_account_cleaned(struct folio *folio, struct address_space *mapping, - struct bdi_writeback *wb); +void folio_account_cleaned(struct folio *folio, struct bdi_writeback *wb); void __folio_cancel_dirty(struct folio *folio); static inline void folio_cancel_dirty(struct folio *folio) { diff --git a/mm/filemap.c b/mm/filemap.c index c5c169c16bae..8426434042f4 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -193,16 +193,20 @@ static void filemap_unaccount_folio(struct address_space *mapping, /* * At this point folio must be either written or cleaned by * truncate. Dirty folio here signals a bug and loss of - * unwritten data. + * unwritten data - on ordinary filesystems. * - * This fixes dirty accounting after removing the folio entirely + * But it's harmless on in-memory filesystems like tmpfs; and can + * occur when a driver which did get_user_pages() sets page dirty + * before putting it, while the inode is being finally evicted. + * + * Below fixes dirty accounting after removing the folio entirely * but leaves the dirty flag set: it has no effect for truncated * folio and anyway will be cleared before returning folio to * buddy allocator. */ - if (WARN_ON_ONCE(folio_test_dirty(folio))) - folio_account_cleaned(folio, mapping, - inode_to_wb(mapping->host)); + if (WARN_ON_ONCE(folio_test_dirty(folio) && + mapping_can_writeback(mapping))) + folio_account_cleaned(folio, inode_to_wb(mapping->host)); } /* diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 435c02630593..7e2da284e427 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2465,16 +2465,14 @@ static void folio_account_dirtied(struct folio *folio, * * Caller must hold lock_page_memcg(). */ -void folio_account_cleaned(struct folio *folio, struct address_space *mapping, - struct bdi_writeback *wb) +void folio_account_cleaned(struct folio *folio, struct bdi_writeback *wb) { - if (mapping_can_writeback(mapping)) { - long nr = folio_nr_pages(folio); - lruvec_stat_mod_folio(folio, NR_FILE_DIRTY, -nr); - zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, -nr); - wb_stat_mod(wb, WB_RECLAIMABLE, -nr); - task_io_account_cancelled_write(nr * PAGE_SIZE); - } + long nr = folio_nr_pages(folio); + + lruvec_stat_mod_folio(folio, NR_FILE_DIRTY, -nr); + zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, -nr); + wb_stat_mod(wb, WB_RECLAIMABLE, -nr); + task_io_account_cancelled_write(nr * PAGE_SIZE); } /* @@ -2683,7 +2681,7 @@ void __folio_cancel_dirty(struct folio *folio) wb = unlocked_inode_to_wb_begin(inode, &cookie); if (folio_test_clear_dirty(folio)) - folio_account_cleaned(folio, mapping, wb); + folio_account_cleaned(folio, wb); unlocked_inode_to_wb_end(inode, &cookie); folio_memcg_unlock(folio); -- cgit v1.2.3