aboutsummaryrefslogtreecommitdiff
path: root/mm/hugetlb.c
diff options
context:
space:
mode:
authorPeter Xu2022-05-12 20:22:55 -0700
committerAndrew Morton2022-05-13 07:20:11 -0700
commit05e90bd05eea33fc77d6b11e121e2da01fee379f (patch)
tree0a005eadeaff344f70c4eb7eb6625b0f86f3b362 /mm/hugetlb.c
parent60dfaad65aa97fb6755b9798a6b3c9e79bcd5930 (diff)
mm/hugetlb: only drop uffd-wp special pte if required
As with shmem uffd-wp special ptes, only drop the uffd-wp special swap pte if unmapping an entire vma or synchronized such that faults can not race with the unmap operation. This requires passing zap_flags all the way to the lowest level hugetlb unmap routine: __unmap_hugepage_range. In general, unmap calls originated in hugetlbfs code will pass the ZAP_FLAG_DROP_MARKER flag as synchronization is in place to prevent faults. The exception is hole punch which will first unmap without any synchronization. Later when hole punch actually removes the page from the file, it will check to see if there was a subsequent fault and if so take the hugetlb fault mutex while unmapping again. This second unmap will pass in ZAP_FLAG_DROP_MARKER. The justification of "whether to apply ZAP_FLAG_DROP_MARKER flag when unmap a hugetlb range" is (IMHO): we should never reach a state when a page fault could errornously fault in a page-cache page that was wr-protected to be writable, even in an extremely short period. That could happen if e.g. we pass ZAP_FLAG_DROP_MARKER when hugetlbfs_punch_hole() calls hugetlb_vmdelete_list(), because if a page faults after that call and before remove_inode_hugepages() is executed, the page cache can be mapped writable again in the small racy window, that can cause unexpected data overwritten. [peterx@redhat.com: fix sparse warning] Link: https://lkml.kernel.org/r/Ylcdw8I1L5iAoWhb@xz-m1.local [akpm@linux-foundation.org: move zap_flags_t from mm.h to mm_types.h to fix build issues] Link: https://lkml.kernel.org/r/20220405014915.14873-1-peterx@redhat.com Signed-off-by: Peter Xu <peterx@redhat.com> Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com> Cc: Alistair Popple <apopple@nvidia.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Axel Rasmussen <axelrasmussen@google.com> Cc: David Hildenbrand <david@redhat.com> Cc: Hugh Dickins <hughd@google.com> Cc: Jerome Glisse <jglisse@redhat.com> Cc: "Kirill A . Shutemov" <kirill@shutemov.name> Cc: Matthew Wilcox <willy@infradead.org> Cc: Mike Rapoport <rppt@linux.vnet.ibm.com> Cc: Nadav Amit <nadav.amit@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r--mm/hugetlb.c33
1 files changed, 25 insertions, 8 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index ec9774ed84c0..99281aecbd28 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4973,7 +4973,7 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma,
static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
unsigned long start, unsigned long end,
- struct page *ref_page)
+ struct page *ref_page, zap_flags_t zap_flags)
{
struct mm_struct *mm = vma->vm_mm;
unsigned long address;
@@ -5029,7 +5029,18 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
* unmapped and its refcount is dropped, so just clear pte here.
*/
if (unlikely(!pte_present(pte))) {
- huge_pte_clear(mm, address, ptep, sz);
+ /*
+ * If the pte was wr-protected by uffd-wp in any of the
+ * swap forms, meanwhile the caller does not want to
+ * drop the uffd-wp bit in this zap, then replace the
+ * pte with a marker.
+ */
+ if (pte_swp_uffd_wp_any(pte) &&
+ !(zap_flags & ZAP_FLAG_DROP_MARKER))
+ set_huge_pte_at(mm, address, ptep,
+ make_pte_marker(PTE_MARKER_UFFD_WP));
+ else
+ huge_pte_clear(mm, address, ptep, sz);
spin_unlock(ptl);
continue;
}
@@ -5057,7 +5068,11 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
tlb_remove_huge_tlb_entry(h, tlb, ptep, address);
if (huge_pte_dirty(pte))
set_page_dirty(page);
-
+ /* Leave a uffd-wp pte marker if needed */
+ if (huge_pte_uffd_wp(pte) &&
+ !(zap_flags & ZAP_FLAG_DROP_MARKER))
+ set_huge_pte_at(mm, address, ptep,
+ make_pte_marker(PTE_MARKER_UFFD_WP));
hugetlb_count_sub(pages_per_huge_page(h), mm);
page_remove_rmap(page, vma, true);
@@ -5091,9 +5106,10 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
void __unmap_hugepage_range_final(struct mmu_gather *tlb,
struct vm_area_struct *vma, unsigned long start,
- unsigned long end, struct page *ref_page)
+ unsigned long end, struct page *ref_page,
+ zap_flags_t zap_flags)
{
- __unmap_hugepage_range(tlb, vma, start, end, ref_page);
+ __unmap_hugepage_range(tlb, vma, start, end, ref_page, zap_flags);
/*
* Clear this flag so that x86's huge_pmd_share page_table_shareable
@@ -5109,12 +5125,13 @@ void __unmap_hugepage_range_final(struct mmu_gather *tlb,
}
void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
- unsigned long end, struct page *ref_page)
+ unsigned long end, struct page *ref_page,
+ zap_flags_t zap_flags)
{
struct mmu_gather tlb;
tlb_gather_mmu(&tlb, vma->vm_mm);
- __unmap_hugepage_range(&tlb, vma, start, end, ref_page);
+ __unmap_hugepage_range(&tlb, vma, start, end, ref_page, zap_flags);
tlb_finish_mmu(&tlb);
}
@@ -5169,7 +5186,7 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
*/
if (!is_vma_resv_set(iter_vma, HPAGE_RESV_OWNER))
unmap_hugepage_range(iter_vma, address,
- address + huge_page_size(h), page);
+ address + huge_page_size(h), page, 0);
}
i_mmap_unlock_write(mapping);
}