diff options
author | Linus Torvalds | 2022-05-26 12:32:41 -0700 |
---|---|---|
committer | Linus Torvalds | 2022-05-26 12:32:41 -0700 |
commit | 98931dd95fd489fcbfa97da563505a6f071d7c77 (patch) | |
tree | 44683fc4a92efa614acdca2742a7ff19d26da1e3 /mm/memory-failure.c | |
parent | df202b452fe6c6d6f1351bad485e2367ef1e644e (diff) | |
parent | f403f22f8ccb12860b2b62fec3173c6ccd45938b (diff) |
Merge tag 'mm-stable-2022-05-25' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull MM updates from Andrew Morton:
"Almost all of MM here. A few things are still getting finished off,
reviewed, etc.
- Yang Shi has improved the behaviour of khugepaged collapsing of
readonly file-backed transparent hugepages.
- Johannes Weiner has arranged for zswap memory use to be tracked and
managed on a per-cgroup basis.
- Munchun Song adds a /proc knob ("hugetlb_optimize_vmemmap") for
runtime enablement of the recent huge page vmemmap optimization
feature.
- Baolin Wang contributes a series to fix some issues around hugetlb
pagetable invalidation.
- Zhenwei Pi has fixed some interactions between hwpoisoned pages and
virtualization.
- Tong Tiangen has enabled the use of the presently x86-only
page_table_check debugging feature on arm64 and riscv.
- David Vernet has done some fixup work on the memcg selftests.
- Peter Xu has taught userfaultfd to handle write protection faults
against shmem- and hugetlbfs-backed files.
- More DAMON development from SeongJae Park - adding online tuning of
the feature and support for monitoring of fixed virtual address
ranges. Also easier discovery of which monitoring operations are
available.
- Nadav Amit has done some optimization of TLB flushing during
mprotect().
- Neil Brown continues to labor away at improving our swap-over-NFS
support.
- David Hildenbrand has some fixes to anon page COWing versus
get_user_pages().
- Peng Liu fixed some errors in the core hugetlb code.
- Joao Martins has reduced the amount of memory consumed by
device-dax's compound devmaps.
- Some cleanups of the arch-specific pagemap code from Anshuman
Khandual.
- Muchun Song has found and fixed some errors in the TLB flushing of
transparent hugepages.
- Roman Gushchin has done more work on the memcg selftests.
... and, of course, many smaller fixes and cleanups. Notably, the
customary million cleanup serieses from Miaohe Lin"
* tag 'mm-stable-2022-05-25' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (381 commits)
mm: kfence: use PAGE_ALIGNED helper
selftests: vm: add the "settings" file with timeout variable
selftests: vm: add "test_hmm.sh" to TEST_FILES
selftests: vm: check numa_available() before operating "merge_across_nodes" in ksm_tests
selftests: vm: add migration to the .gitignore
selftests/vm/pkeys: fix typo in comment
ksm: fix typo in comment
selftests: vm: add process_mrelease tests
Revert "mm/vmscan: never demote for memcg reclaim"
mm/kfence: print disabling or re-enabling message
include/trace/events/percpu.h: cleanup for "percpu: improve percpu_alloc_percpu event trace"
include/trace/events/mmflags.h: cleanup for "tracing: incorrect gfp_t conversion"
mm: fix a potential infinite loop in start_isolate_page_range()
MAINTAINERS: add Muchun as co-maintainer for HugeTLB
zram: fix Kconfig dependency warning
mm/shmem: fix shmem folio swapoff hang
cgroup: fix an error handling path in alloc_pagecache_max_30M()
mm: damon: use HPAGE_PMD_SIZE
tracing: incorrect isolate_mote_t cast in mm_vmscan_lru_isolate
nodemask.h: fix compilation error with GCC12
...
Diffstat (limited to 'mm/memory-failure.c')
-rw-r--r-- | mm/memory-failure.c | 126 |
1 files changed, 63 insertions, 63 deletions
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index d4a4adcca01f..b85661cbdc4a 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -59,6 +59,7 @@ #include <linux/page-isolation.h> #include <linux/pagewalk.h> #include <linux/shmem_fs.h> +#include "swap.h" #include "internal.h" #include "ras/ras_event.h" @@ -484,7 +485,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill, struct anon_vma *av; pgoff_t pgoff; - av = folio_lock_anon_vma_read(folio); + av = folio_lock_anon_vma_read(folio, NULL); if (av == NULL) /* Not actually mapped anymore */ return; @@ -622,7 +623,7 @@ static int check_hwpoisoned_pmd_entry(pmd_t *pmdp, unsigned long addr, static int hwpoison_pte_range(pmd_t *pmdp, unsigned long addr, unsigned long end, struct mm_walk *walk) { - struct hwp_walk *hwp = (struct hwp_walk *)walk->private; + struct hwp_walk *hwp = walk->private; int ret = 0; pte_t *ptep, *mapped_pte; spinlock_t *ptl; @@ -656,7 +657,7 @@ static int hwpoison_hugetlb_range(pte_t *ptep, unsigned long hmask, unsigned long addr, unsigned long end, struct mm_walk *walk) { - struct hwp_walk *hwp = (struct hwp_walk *)walk->private; + struct hwp_walk *hwp = walk->private; pte_t pte = huge_ptep_get(ptep); struct hstate *h = hstate_vma(walk->vma); @@ -733,7 +734,6 @@ static const char * const action_page_types[] = { [MF_MSG_BUDDY] = "free buddy page", [MF_MSG_DAX] = "dax page", [MF_MSG_UNSPLIT_THP] = "unsplit thp", - [MF_MSG_DIFFERENT_PAGE_SIZE] = "different page size", [MF_MSG_UNKNOWN] = "unknown page", }; @@ -1041,12 +1041,11 @@ static int me_huge_page(struct page_state *ps, struct page *p) res = MF_FAILED; unlock_page(hpage); /* - * migration entry prevents later access on error anonymous - * hugepage, so we can free and dissolve it into buddy to - * save healthy subpages. + * migration entry prevents later access on error hugepage, + * so we can free and dissolve it into buddy to save healthy + * subpages. */ - if (PageAnon(hpage)) - put_page(hpage); + put_page(hpage); if (__page_handle_poison(p)) { page_ref_inc(p); res = MF_RECOVERED; @@ -1133,6 +1132,7 @@ static void action_result(unsigned long pfn, enum mf_action_page_type type, { trace_memory_failure_event(pfn, type, result); + num_poisoned_pages_inc(); pr_err("Memory failure: %#lx: recovery action for %s: %s\n", pfn, action_page_types[type], action_name[result]); } @@ -1179,13 +1179,11 @@ void ClearPageHWPoisonTakenOff(struct page *page) */ static inline bool HWPoisonHandlable(struct page *page, unsigned long flags) { - bool movable = false; - - /* Soft offline could mirgate non-LRU movable pages */ + /* Soft offline could migrate non-LRU movable pages */ if ((flags & MF_SOFT_OFFLINE) && __PageMovable(page)) - movable = true; + return true; - return movable || PageLRU(page) || is_free_buddy_page(page); + return PageLRU(page) || is_free_buddy_page(page); } static int __get_hwpoison_page(struct page *page, unsigned long flags) @@ -1521,7 +1519,9 @@ int __get_huge_page_for_hwpoison(unsigned long pfn, int flags) if (flags & MF_COUNT_INCREASED) { ret = 1; count_increased = true; - } else if (HPageFreed(head) || HPageMigratable(head)) { + } else if (HPageFreed(head)) { + ret = 0; + } else if (HPageMigratable(head)) { ret = get_page_unless_zero(head); if (ret) count_increased = true; @@ -1588,8 +1588,6 @@ retry: goto out; } - num_poisoned_pages_inc(); - /* * Handling free hugepage. The possible race with hugepage allocation * or demotion can be prevented by PageHWPoison flag. @@ -1605,16 +1603,6 @@ retry: return res == MF_RECOVERED ? 0 : -EBUSY; } - /* - * The page could have changed compound pages due to race window. - * If this happens just bail out. - */ - if (!PageHuge(p) || compound_head(p) != head) { - action_result(pfn, MF_MSG_DIFFERENT_PAGE_SIZE, MF_IGNORED); - res = -EBUSY; - goto out; - } - page_flags = head->flags; /* @@ -1762,7 +1750,7 @@ static DEFINE_MUTEX(mf_mutex); * enabled and no spinlocks hold. * * Return: 0 for successfully handled the memory error, - * -EOPNOTSUPP for memory_filter() filtered the error event, + * -EOPNOTSUPP for hwpoison_filter() filtered the error event, * < 0(except -EOPNOTSUPP) on failure. */ int memory_failure(unsigned long pfn, int flags) @@ -1811,11 +1799,12 @@ try_again: res = -EHWPOISON; if (flags & MF_ACTION_REQUIRED) res = kill_accessing_process(current, pfn, flags); + if (flags & MF_COUNT_INCREASED) + put_page(p); goto unlock_mutex; } hpage = compound_head(p); - num_poisoned_pages_inc(); /* * We need/can do nothing about count=0 pages. @@ -1839,7 +1828,6 @@ try_again: /* We lost the race, try again */ if (retry) { ClearPageHWPoison(p); - num_poisoned_pages_dec(); retry = false; goto try_again; } @@ -1902,8 +1890,7 @@ try_again: */ if (PageCompound(p)) { if (retry) { - if (TestClearPageHWPoison(p)) - num_poisoned_pages_dec(); + ClearPageHWPoison(p); unlock_page(p); put_page(p); flags &= ~MF_COUNT_INCREASED; @@ -1925,8 +1912,7 @@ try_again: page_flags = p->flags; if (hwpoison_filter(p)) { - if (TestClearPageHWPoison(p)) - num_poisoned_pages_dec(); + TestClearPageHWPoison(p); unlock_page(p); put_page(p); res = -EOPNOTSUPP; @@ -2088,28 +2074,6 @@ core_initcall(memory_failure_init); pr_info(fmt, pfn); \ }) -static inline int clear_page_hwpoison(struct ratelimit_state *rs, struct page *p) -{ - if (TestClearPageHWPoison(p)) { - unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n", - page_to_pfn(p), rs); - num_poisoned_pages_dec(); - return 1; - } - return 0; -} - -static inline int unpoison_taken_off_page(struct ratelimit_state *rs, - struct page *p) -{ - if (put_page_back_buddy(p)) { - unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n", - page_to_pfn(p), rs); - return 0; - } - return -EBUSY; -} - /** * unpoison_memory - Unpoison a previously poisoned page * @pfn: Page number of the to be unpoisoned page @@ -2127,6 +2091,7 @@ int unpoison_memory(unsigned long pfn) struct page *page; struct page *p; int ret = -EBUSY; + int freeit = 0; static DEFINE_RATELIMIT_STATE(unpoison_rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); @@ -2167,18 +2132,15 @@ int unpoison_memory(unsigned long pfn) ret = get_hwpoison_page(p, MF_UNPOISON); if (!ret) { - if (clear_page_hwpoison(&unpoison_rs, page)) - ret = 0; - else - ret = -EBUSY; + ret = TestClearPageHWPoison(page) ? 0 : -EBUSY; } else if (ret < 0) { if (ret == -EHWPOISON) { - ret = unpoison_taken_off_page(&unpoison_rs, p); + ret = put_page_back_buddy(p) ? 0 : -EBUSY; } else unpoison_pr_info("Unpoison: failed to grab page %#lx\n", pfn, &unpoison_rs); } else { - int freeit = clear_page_hwpoison(&unpoison_rs, p); + freeit = !!TestClearPageHWPoison(p); put_page(page); if (freeit && !(pfn == my_zero_pfn(0) && page_count(p) == 1)) { @@ -2189,6 +2151,11 @@ int unpoison_memory(unsigned long pfn) unlock_mutex: mutex_unlock(&mf_mutex); + if (!ret || freeit) { + num_poisoned_pages_dec(); + unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n", + page_to_pfn(p), &unpoison_rs); + } return ret; } EXPORT_SYMBOL(unpoison_memory); @@ -2323,7 +2290,9 @@ static void put_ref_page(struct page *page) * @pfn: pfn to soft-offline * @flags: flags. Same as memory_failure(). * - * Returns 0 on success, otherwise negated errno. + * Returns 0 on success + * -EOPNOTSUPP for hwpoison_filter() filtered the error event + * < 0 otherwise negated errno. * * Soft offline a page, by migration or invalidation, * without killing anything. This is for the case when @@ -2374,6 +2343,16 @@ retry: ret = get_hwpoison_page(page, flags | MF_SOFT_OFFLINE); put_online_mems(); + if (hwpoison_filter(page)) { + if (ret > 0) + put_page(page); + else + put_ref_page(ref_page); + + mutex_unlock(&mf_mutex); + return -EOPNOTSUPP; + } + if (ret > 0) { ret = soft_offline_in_use_page(page); } else if (ret == 0) { @@ -2388,3 +2367,24 @@ retry: return ret; } + +void clear_hwpoisoned_pages(struct page *memmap, int nr_pages) +{ + int i; + + /* + * A further optimization is to have per section refcounted + * num_poisoned_pages. But that would need more space per memmap, so + * for now just do a quick global check to speed up this routine in the + * absence of bad pages. + */ + if (atomic_long_read(&num_poisoned_pages) == 0) + return; + + for (i = 0; i < nr_pages; i++) { + if (PageHWPoison(&memmap[i])) { + num_poisoned_pages_dec(); + ClearPageHWPoison(&memmap[i]); + } + } +} |