diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/balloon_compaction.c | 2 | ||||
-rw-r--r-- | mm/debug.c | 6 | ||||
-rw-r--r-- | mm/huge_memory.c | 7 | ||||
-rw-r--r-- | mm/hugetlb.c | 11 | ||||
-rw-r--r-- | mm/internal.h | 5 | ||||
-rw-r--r-- | mm/kasan/report.c | 1 | ||||
-rw-r--r-- | mm/ksm.c | 3 | ||||
-rw-r--r-- | mm/madvise.c | 1 | ||||
-rw-r--r-- | mm/memory.c | 43 | ||||
-rw-r--r-- | mm/migrate.c | 6 | ||||
-rw-r--r-- | mm/mprotect.c | 5 | ||||
-rw-r--r-- | mm/mremap.c | 8 | ||||
-rw-r--r-- | mm/page_alloc.c | 13 | ||||
-rw-r--r-- | mm/page_io.c | 7 | ||||
-rw-r--r-- | mm/rmap.c | 88 | ||||
-rw-r--r-- | mm/shmem.c | 12 | ||||
-rw-r--r-- | mm/util.c | 2 | ||||
-rw-r--r-- | mm/zsmalloc.c | 1 |
18 files changed, 158 insertions, 63 deletions
diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c index 9075aa54e955..b06d9fe23a28 100644 --- a/mm/balloon_compaction.c +++ b/mm/balloon_compaction.c @@ -24,7 +24,7 @@ struct page *balloon_page_enqueue(struct balloon_dev_info *b_dev_info) { unsigned long flags; struct page *page = alloc_page(balloon_mapping_gfp_mask() | - __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_ZERO); + __GFP_NOMEMALLOC | __GFP_NORETRY); if (!page) return NULL; diff --git a/mm/debug.c b/mm/debug.c index db1cd26d8752..5715448ab0b5 100644 --- a/mm/debug.c +++ b/mm/debug.c @@ -124,9 +124,7 @@ void dump_mm(const struct mm_struct *mm) #ifdef CONFIG_NUMA_BALANCING "numa_next_scan %lu numa_scan_offset %lu numa_scan_seq %d\n" #endif -#if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION) "tlb_flush_pending %d\n" -#endif "def_flags: %#lx(%pGv)\n", mm, mm->mmap, mm->vmacache_seqnum, mm->task_size, @@ -158,9 +156,7 @@ void dump_mm(const struct mm_struct *mm) #ifdef CONFIG_NUMA_BALANCING mm->numa_next_scan, mm->numa_scan_offset, mm->numa_scan_seq, #endif -#if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION) - mm->tlb_flush_pending, -#endif + atomic_read(&mm->tlb_flush_pending), mm->def_flags, &mm->def_flags ); } diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 86975dec0ba1..216114f6ef0b 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1496,6 +1496,13 @@ int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd) } /* + * The page_table_lock above provides a memory barrier + * with change_protection_range. + */ + if (mm_tlb_flush_pending(vma->vm_mm)) + flush_tlb_range(vma, haddr, haddr + HPAGE_PMD_SIZE); + + /* * Migrate the THP to the requested node, returns with page unlocked * and access rights restored. */ diff --git a/mm/hugetlb.c b/mm/hugetlb.c index bc48ee783dd9..31e207cb399b 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4062,9 +4062,9 @@ out: return ret; out_release_unlock: spin_unlock(ptl); -out_release_nounlock: if (vm_shared) unlock_page(page); +out_release_nounlock: put_page(page); goto out; } @@ -4078,6 +4078,7 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long vaddr = *position; unsigned long remainder = *nr_pages; struct hstate *h = hstate_vma(vma); + int err = -EFAULT; while (vaddr < vma->vm_end && remainder) { pte_t *pte; @@ -4154,11 +4155,7 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, } ret = hugetlb_fault(mm, vma, vaddr, fault_flags); if (ret & VM_FAULT_ERROR) { - int err = vm_fault_to_errno(ret, flags); - - if (err) - return err; - + err = vm_fault_to_errno(ret, flags); remainder = 0; break; } @@ -4213,7 +4210,7 @@ same_page: */ *position = vaddr; - return i ? i : -EFAULT; + return i ? i : err; } #ifndef __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE diff --git a/mm/internal.h b/mm/internal.h index 24d88f084705..4ef49fc55e58 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -498,6 +498,7 @@ extern struct workqueue_struct *mm_percpu_wq; #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH void try_to_unmap_flush(void); void try_to_unmap_flush_dirty(void); +void flush_tlb_batched_pending(struct mm_struct *mm); #else static inline void try_to_unmap_flush(void) { @@ -505,7 +506,9 @@ static inline void try_to_unmap_flush(void) static inline void try_to_unmap_flush_dirty(void) { } - +static inline void flush_tlb_batched_pending(struct mm_struct *mm) +{ +} #endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */ extern const struct trace_print_flags pageflag_names[]; diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 04bb1d3eb9ec..6bcfb01ba038 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -401,6 +401,7 @@ void kasan_report(unsigned long addr, size_t size, disable_trace_on_warning(); info.access_addr = (void *)addr; + info.first_bad_addr = (void *)addr; info.access_size = size; info.is_write = is_write; info.ip = ip; @@ -1038,7 +1038,8 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page, goto out_unlock; if (pte_write(*pvmw.pte) || pte_dirty(*pvmw.pte) || - (pte_protnone(*pvmw.pte) && pte_savedwrite(*pvmw.pte))) { + (pte_protnone(*pvmw.pte) && pte_savedwrite(*pvmw.pte)) || + mm_tlb_flush_pending(mm)) { pte_t entry; swapped = PageSwapCache(page); diff --git a/mm/madvise.c b/mm/madvise.c index 9976852f1e1c..47d8d8a25eae 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -320,6 +320,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr, tlb_remove_check_page_size_change(tlb, PAGE_SIZE); orig_pte = pte = pte_offset_map_lock(mm, pmd, addr, &ptl); + flush_tlb_batched_pending(mm); arch_enter_lazy_mmu_mode(); for (; addr != end; pte++, addr += PAGE_SIZE) { ptent = *pte; diff --git a/mm/memory.c b/mm/memory.c index 0e517be91a89..e158f7ac6730 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -215,12 +215,8 @@ static bool tlb_next_batch(struct mmu_gather *tlb) return true; } -/* tlb_gather_mmu - * Called to initialize an (on-stack) mmu_gather structure for page-table - * tear-down from @mm. The @fullmm argument is used when @mm is without - * users and we're going to destroy the full address space (exit/execve). - */ -void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end) +void arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, + unsigned long start, unsigned long end) { tlb->mm = mm; @@ -275,10 +271,14 @@ void tlb_flush_mmu(struct mmu_gather *tlb) * Called at the end of the shootdown operation to free up any resources * that were required. */ -void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) +void arch_tlb_finish_mmu(struct mmu_gather *tlb, + unsigned long start, unsigned long end, bool force) { struct mmu_gather_batch *batch, *next; + if (force) + __tlb_adjust_range(tlb, start, end - start); + tlb_flush_mmu(tlb); /* keep the page table cache within bounds */ @@ -398,6 +398,34 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table) #endif /* CONFIG_HAVE_RCU_TABLE_FREE */ +/* tlb_gather_mmu + * Called to initialize an (on-stack) mmu_gather structure for page-table + * tear-down from @mm. The @fullmm argument is used when @mm is without + * users and we're going to destroy the full address space (exit/execve). + */ +void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + arch_tlb_gather_mmu(tlb, mm, start, end); + inc_tlb_flush_pending(tlb->mm); +} + +void tlb_finish_mmu(struct mmu_gather *tlb, + unsigned long start, unsigned long end) +{ + /* + * If there are parallel threads are doing PTE changes on same range + * under non-exclusive lock(e.g., mmap_sem read-side) but defer TLB + * flush by batching, a thread has stable TLB entry can fail to flush + * the TLB by observing pte_none|!pte_dirty, for example so flush TLB + * forcefully if we detect parallel PTE batching threads. + */ + bool force = mm_tlb_flush_nested(tlb->mm); + + arch_tlb_finish_mmu(tlb, start, end, force); + dec_tlb_flush_pending(tlb->mm); +} + /* * Note: this doesn't free the actual pages themselves. That * has been handled earlier when unmapping all the memory regions. @@ -1197,6 +1225,7 @@ again: init_rss_vec(rss); start_pte = pte_offset_map_lock(mm, pmd, addr, &ptl); pte = start_pte; + flush_tlb_batched_pending(mm); arch_enter_lazy_mmu_mode(); do { pte_t ptent = *pte; diff --git a/mm/migrate.c b/mm/migrate.c index 627671551873..d68a41da6abb 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1937,12 +1937,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, put_page(new_page); goto out_fail; } - /* - * We are not sure a pending tlb flush here is for a huge page - * mapping or not. Hence use the tlb range variant - */ - if (mm_tlb_flush_pending(mm)) - flush_tlb_range(vma, mmun_start, mmun_end); /* Prepare a page as a migration target */ __SetPageLocked(new_page); diff --git a/mm/mprotect.c b/mm/mprotect.c index 1a8c9ca83e48..bd0f409922cb 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -64,6 +64,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, atomic_read(&vma->vm_mm->mm_users) == 1) target_node = numa_node_id(); + flush_tlb_batched_pending(vma->vm_mm); arch_enter_lazy_mmu_mode(); do { oldpte = *pte; @@ -243,7 +244,7 @@ static unsigned long change_protection_range(struct vm_area_struct *vma, BUG_ON(addr >= end); pgd = pgd_offset(mm, addr); flush_cache_range(vma, addr, end); - set_tlb_flush_pending(mm); + inc_tlb_flush_pending(mm); do { next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) @@ -255,7 +256,7 @@ static unsigned long change_protection_range(struct vm_area_struct *vma, /* Only flush the TLB if we actually modified any entries: */ if (pages) flush_tlb_range(vma, start, end); - clear_tlb_flush_pending(mm); + dec_tlb_flush_pending(mm); return pages; } diff --git a/mm/mremap.c b/mm/mremap.c index cd8a1b199ef9..3f23715d3c69 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -152,6 +152,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, new_ptl = pte_lockptr(mm, new_pmd); if (new_ptl != old_ptl) spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); + flush_tlb_batched_pending(vma->vm_mm); arch_enter_lazy_mmu_mode(); for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE, @@ -428,6 +429,7 @@ static struct vm_area_struct *vma_to_resize(unsigned long addr, static unsigned long mremap_to(unsigned long addr, unsigned long old_len, unsigned long new_addr, unsigned long new_len, bool *locked, struct vm_userfaultfd_ctx *uf, + struct list_head *uf_unmap_early, struct list_head *uf_unmap) { struct mm_struct *mm = current->mm; @@ -446,7 +448,7 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len, if (addr + old_len > new_addr && new_addr + new_len > addr) goto out; - ret = do_munmap(mm, new_addr, new_len, NULL); + ret = do_munmap(mm, new_addr, new_len, uf_unmap_early); if (ret) goto out; @@ -514,6 +516,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, unsigned long charged = 0; bool locked = false; struct vm_userfaultfd_ctx uf = NULL_VM_UFFD_CTX; + LIST_HEAD(uf_unmap_early); LIST_HEAD(uf_unmap); if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE)) @@ -541,7 +544,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, if (flags & MREMAP_FIXED) { ret = mremap_to(addr, old_len, new_addr, new_len, - &locked, &uf, &uf_unmap); + &locked, &uf, &uf_unmap_early, &uf_unmap); goto out; } @@ -621,6 +624,7 @@ out: up_write(¤t->mm->mmap_sem); if (locked && new_len > old_len) mm_populate(new_addr + old_len, new_len - old_len); + userfaultfd_unmap_complete(mm, &uf_unmap_early); mremap_userfaultfd_complete(&uf, addr, new_addr, old_len); userfaultfd_unmap_complete(mm, &uf_unmap); return ret; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6d30e914afb6..6d00f746c2fd 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4458,8 +4458,9 @@ long si_mem_available(void) * Part of the reclaimable slab consists of items that are in use, * and cannot be freed. Cap this estimate at the low watermark. */ - available += global_page_state(NR_SLAB_RECLAIMABLE) - - min(global_page_state(NR_SLAB_RECLAIMABLE) / 2, wmark_low); + available += global_node_page_state(NR_SLAB_RECLAIMABLE) - + min(global_node_page_state(NR_SLAB_RECLAIMABLE) / 2, + wmark_low); if (available < 0) available = 0; @@ -4602,8 +4603,8 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) global_node_page_state(NR_FILE_DIRTY), global_node_page_state(NR_WRITEBACK), global_node_page_state(NR_UNSTABLE_NFS), - global_page_state(NR_SLAB_RECLAIMABLE), - global_page_state(NR_SLAB_UNRECLAIMABLE), + global_node_page_state(NR_SLAB_RECLAIMABLE), + global_node_page_state(NR_SLAB_UNRECLAIMABLE), global_node_page_state(NR_FILE_MAPPED), global_node_page_state(NR_SHMEM), global_page_state(NR_PAGETABLE), @@ -4891,9 +4892,11 @@ int numa_zonelist_order_handler(struct ctl_table *table, int write, NUMA_ZONELIST_ORDER_LEN); user_zonelist_order = oldval; } else if (oldval != user_zonelist_order) { + mem_hotplug_begin(); mutex_lock(&zonelists_mutex); build_all_zonelists(NULL, NULL); mutex_unlock(&zonelists_mutex); + mem_hotplug_done(); } } out: @@ -7666,7 +7669,7 @@ int alloc_contig_range(unsigned long start, unsigned long end, /* Make sure the range is really isolated. */ if (test_pages_isolated(outer_start, end, false)) { - pr_info("%s: [%lx, %lx) PFNs busy\n", + pr_info_ratelimited("%s: [%lx, %lx) PFNs busy\n", __func__, outer_start, end); ret = -EBUSY; goto done; diff --git a/mm/page_io.c b/mm/page_io.c index b6c4ac388209..5f61b54ee1f3 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -22,6 +22,7 @@ #include <linux/frontswap.h> #include <linux/blkdev.h> #include <linux/uio.h> +#include <linux/sched/task.h> #include <asm/pgtable.h> static struct bio *get_swap_bio(gfp_t gfp_flags, @@ -136,6 +137,7 @@ out: WRITE_ONCE(bio->bi_private, NULL); bio_put(bio); wake_up_process(waiter); + put_task_struct(waiter); } int generic_swapfile_activate(struct swap_info_struct *sis, @@ -378,6 +380,11 @@ int swap_readpage(struct page *page, bool do_poll) goto out; } bdev = bio->bi_bdev; + /* + * Keep this task valid during swap readpage because the oom killer may + * attempt to access it in the page fault retry time check. + */ + get_task_struct(current); bio->bi_private = current; bio_set_op_attrs(bio, REQ_OP_READ, 0); count_vm_event(PSWPIN); diff --git a/mm/rmap.c b/mm/rmap.c index ced14f1af6dc..c1286d47aa1f 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -605,6 +605,13 @@ static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable) tlb_ubc->flush_required = true; /* + * Ensure compiler does not re-order the setting of tlb_flush_batched + * before the PTE is cleared. + */ + barrier(); + mm->tlb_flush_batched = true; + + /* * If the PTE was dirty then it's best to assume it's writable. The * caller must use try_to_unmap_flush_dirty() or try_to_unmap_flush() * before the page is queued for IO. @@ -631,6 +638,35 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags) return should_defer; } + +/* + * Reclaim unmaps pages under the PTL but do not flush the TLB prior to + * releasing the PTL if TLB flushes are batched. It's possible for a parallel + * operation such as mprotect or munmap to race between reclaim unmapping + * the page and flushing the page. If this race occurs, it potentially allows + * access to data via a stale TLB entry. Tracking all mm's that have TLB + * batching in flight would be expensive during reclaim so instead track + * whether TLB batching occurred in the past and if so then do a flush here + * if required. This will cost one additional flush per reclaim cycle paid + * by the first operation at risk such as mprotect and mumap. + * + * This must be called under the PTL so that an access to tlb_flush_batched + * that is potentially a "reclaim vs mprotect/munmap/etc" race will synchronise + * via the PTL. + */ +void flush_tlb_batched_pending(struct mm_struct *mm) +{ + if (mm->tlb_flush_batched) { + flush_tlb_mm(mm); + + /* + * Do not allow the compiler to re-order the clearing of + * tlb_flush_batched before the tlb is flushed. + */ + barrier(); + mm->tlb_flush_batched = false; + } +} #else static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable) { @@ -852,10 +888,10 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma, .flags = PVMW_SYNC, }; int *cleaned = arg; + bool invalidation_needed = false; while (page_vma_mapped_walk(&pvmw)) { int ret = 0; - address = pvmw.address; if (pvmw.pte) { pte_t entry; pte_t *pte = pvmw.pte; @@ -863,11 +899,11 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma, if (!pte_dirty(*pte) && !pte_write(*pte)) continue; - flush_cache_page(vma, address, pte_pfn(*pte)); - entry = ptep_clear_flush(vma, address, pte); + flush_cache_page(vma, pvmw.address, pte_pfn(*pte)); + entry = ptep_clear_flush(vma, pvmw.address, pte); entry = pte_wrprotect(entry); entry = pte_mkclean(entry); - set_pte_at(vma->vm_mm, address, pte, entry); + set_pte_at(vma->vm_mm, pvmw.address, pte, entry); ret = 1; } else { #ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE @@ -877,11 +913,11 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma, if (!pmd_dirty(*pmd) && !pmd_write(*pmd)) continue; - flush_cache_page(vma, address, page_to_pfn(page)); - entry = pmdp_huge_clear_flush(vma, address, pmd); + flush_cache_page(vma, pvmw.address, page_to_pfn(page)); + entry = pmdp_huge_clear_flush(vma, pvmw.address, pmd); entry = pmd_wrprotect(entry); entry = pmd_mkclean(entry); - set_pmd_at(vma->vm_mm, address, pmd, entry); + set_pmd_at(vma->vm_mm, pvmw.address, pmd, entry); ret = 1; #else /* unexpected pmd-mapped page? */ @@ -890,11 +926,16 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma, } if (ret) { - mmu_notifier_invalidate_page(vma->vm_mm, address); (*cleaned)++; + invalidation_needed = true; } } + if (invalidation_needed) { + mmu_notifier_invalidate_range(vma->vm_mm, address, + address + (1UL << compound_order(page))); + } + return true; } @@ -1287,7 +1328,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, }; pte_t pteval; struct page *subpage; - bool ret = true; + bool ret = true, invalidation_needed = false; enum ttu_flags flags = (enum ttu_flags)arg; /* munlock has nothing to gain from examining un-locked vmas */ @@ -1327,11 +1368,9 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, VM_BUG_ON_PAGE(!pvmw.pte, page); subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte); - address = pvmw.address; - if (!(flags & TTU_IGNORE_ACCESS)) { - if (ptep_clear_flush_young_notify(vma, address, + if (ptep_clear_flush_young_notify(vma, pvmw.address, pvmw.pte)) { ret = false; page_vma_mapped_walk_done(&pvmw); @@ -1340,7 +1379,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, } /* Nuke the page table entry. */ - flush_cache_page(vma, address, pte_pfn(*pvmw.pte)); + flush_cache_page(vma, pvmw.address, pte_pfn(*pvmw.pte)); if (should_defer_flush(mm, flags)) { /* * We clear the PTE but do not flush so potentially @@ -1350,11 +1389,12 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, * transition on a cached TLB entry is written through * and traps if the PTE is unmapped. */ - pteval = ptep_get_and_clear(mm, address, pvmw.pte); + pteval = ptep_get_and_clear(mm, pvmw.address, + pvmw.pte); set_tlb_ubc_flush_pending(mm, pte_dirty(pteval)); } else { - pteval = ptep_clear_flush(vma, address, pvmw.pte); + pteval = ptep_clear_flush(vma, pvmw.address, pvmw.pte); } /* Move the dirty bit to the page. Now the pte is gone. */ @@ -1369,12 +1409,12 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, if (PageHuge(page)) { int nr = 1 << compound_order(page); hugetlb_count_sub(nr, mm); - set_huge_swap_pte_at(mm, address, + set_huge_swap_pte_at(mm, pvmw.address, pvmw.pte, pteval, vma_mmu_pagesize(vma)); } else { dec_mm_counter(mm, mm_counter(page)); - set_pte_at(mm, address, pvmw.pte, pteval); + set_pte_at(mm, pvmw.address, pvmw.pte, pteval); } } else if (pte_unused(pteval)) { @@ -1398,7 +1438,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, swp_pte = swp_entry_to_pte(entry); if (pte_soft_dirty(pteval)) swp_pte = pte_swp_mksoft_dirty(swp_pte); - set_pte_at(mm, address, pvmw.pte, swp_pte); + set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte); } else if (PageAnon(page)) { swp_entry_t entry = { .val = page_private(subpage) }; pte_t swp_pte; @@ -1424,7 +1464,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, * If the page was redirtied, it cannot be * discarded. Remap the page to page table. */ - set_pte_at(mm, address, pvmw.pte, pteval); + set_pte_at(mm, pvmw.address, pvmw.pte, pteval); SetPageSwapBacked(page); ret = false; page_vma_mapped_walk_done(&pvmw); @@ -1432,7 +1472,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, } if (swap_duplicate(entry) < 0) { - set_pte_at(mm, address, pvmw.pte, pteval); + set_pte_at(mm, pvmw.address, pvmw.pte, pteval); ret = false; page_vma_mapped_walk_done(&pvmw); break; @@ -1448,14 +1488,18 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, swp_pte = swp_entry_to_pte(entry); if (pte_soft_dirty(pteval)) swp_pte = pte_swp_mksoft_dirty(swp_pte); - set_pte_at(mm, address, pvmw.pte, swp_pte); + set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte); } else dec_mm_counter(mm, mm_counter_file(page)); discard: page_remove_rmap(subpage, PageHuge(page)); put_page(page); - mmu_notifier_invalidate_page(mm, address); + invalidation_needed = true; } + + if (invalidation_needed) + mmu_notifier_invalidate_range(mm, address, + address + (1UL << compound_order(page))); return ret; } diff --git a/mm/shmem.c b/mm/shmem.c index b0aa6075d164..6540e5982444 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1022,7 +1022,11 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr) */ if (IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE)) { spin_lock(&sbinfo->shrinklist_lock); - if (list_empty(&info->shrinklist)) { + /* + * _careful to defend against unlocked access to + * ->shrink_list in shmem_unused_huge_shrink() + */ + if (list_empty_careful(&info->shrinklist)) { list_add_tail(&info->shrinklist, &sbinfo->shrinklist); sbinfo->shrinklist_len++; @@ -1817,7 +1821,11 @@ alloc_nohuge: page = shmem_alloc_and_acct_page(gfp, info, sbinfo, * to shrink under memory pressure. */ spin_lock(&sbinfo->shrinklist_lock); - if (list_empty(&info->shrinklist)) { + /* + * _careful to defend against unlocked access to + * ->shrink_list in shmem_unused_huge_shrink() + */ + if (list_empty_careful(&info->shrinklist)) { list_add_tail(&info->shrinklist, &sbinfo->shrinklist); sbinfo->shrinklist_len++; diff --git a/mm/util.c b/mm/util.c index 7b07ec852e01..9ecddf568fe3 100644 --- a/mm/util.c +++ b/mm/util.c @@ -633,7 +633,7 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) * which are reclaimable, under pressure. The dentry * cache and most inode caches should fall into this */ - free += global_page_state(NR_SLAB_RECLAIMABLE); + free += global_node_page_state(NR_SLAB_RECLAIMABLE); /* * Leave reserved pages. The pages are not for anonymous pages. diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 013eea76685e..308acb9d814b 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -2453,7 +2453,6 @@ void zs_destroy_pool(struct zs_pool *pool) } destroy_cache(pool); - kfree(pool->size_class); kfree(pool->name); kfree(pool); } |