diff options
Diffstat (limited to 'mm/swapfile.c')
-rw-r--r-- | mm/swapfile.c | 109 |
1 files changed, 46 insertions, 63 deletions
diff --git a/mm/swapfile.c b/mm/swapfile.c index 6bc83060df9a..8e6dde68b389 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -41,6 +41,7 @@ #include <linux/swap_slots.h> #include <linux/sort.h> #include <linux/completion.h> +#include <linux/suspend.h> #include <asm/tlbflush.h> #include <linux/swapops.h> @@ -1219,6 +1220,13 @@ static unsigned char __swap_entry_free_locked(struct swap_info_struct *p, } /* + * When we get a swap entry, if there aren't some other ways to + * prevent swapoff, such as the folio in swap cache is locked, page + * table lock is held, etc., the swap entry may become invalid because + * of swapoff. Then, we need to enclose all swap related functions + * with get_swap_device() and put_swap_device(), unless the swap + * functions call get/put_swap_device() by themselves. + * * Check whether swap entry is valid in the swap device. If so, * return pointer to swap_info_struct, and keep the swap entry valid * via preventing the swap device from being swapoff, until @@ -1227,9 +1235,8 @@ static unsigned char __swap_entry_free_locked(struct swap_info_struct *p, * Notice that swapoff or swapoff+swapon can still happen before the * percpu_ref_tryget_live() in get_swap_device() or after the * percpu_ref_put() in put_swap_device() if there isn't any other way - * to prevent swapoff, such as page lock, page table lock, etc. The - * caller must be prepared for that. For example, the following - * situation is possible. + * to prevent swapoff. The caller must be prepared for that. For + * example, the following situation is possible. * * CPU1 CPU2 * do_swap_page() @@ -1432,16 +1439,10 @@ void swapcache_free_entries(swp_entry_t *entries, int n) int __swap_count(swp_entry_t entry) { - struct swap_info_struct *si; + struct swap_info_struct *si = swp_swap_info(entry); pgoff_t offset = swp_offset(entry); - int count = 0; - si = get_swap_device(entry); - if (si) { - count = swap_count(si->swap_map[offset]); - put_swap_device(si); - } - return count; + return swap_count(si->swap_map[offset]); } /* @@ -1449,7 +1450,7 @@ int __swap_count(swp_entry_t entry) * This does not give an exact answer when swap count is continued, * but does include the high COUNT_CONTINUED flag to allow for that. */ -static int swap_swapcount(struct swap_info_struct *si, swp_entry_t entry) +int swap_swapcount(struct swap_info_struct *si, swp_entry_t entry) { pgoff_t offset = swp_offset(entry); struct swap_cluster_info *ci; @@ -1463,24 +1464,6 @@ static int swap_swapcount(struct swap_info_struct *si, swp_entry_t entry) /* * How many references to @entry are currently swapped out? - * This does not give an exact answer when swap count is continued, - * but does include the high COUNT_CONTINUED flag to allow for that. - */ -int __swp_swapcount(swp_entry_t entry) -{ - int count = 0; - struct swap_info_struct *si; - - si = get_swap_device(entry); - if (si) { - count = swap_swapcount(si, entry); - put_swap_device(si); - } - return count; -} - -/* - * How many references to @entry are currently swapped out? * This considers COUNT_CONTINUED so it returns exact answer. */ int swp_swapcount(swp_entry_t entry) @@ -1762,7 +1745,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, struct page *page = folio_file_page(folio, swp_offset(entry)); struct page *swapcache; spinlock_t *ptl; - pte_t *pte, new_pte; + pte_t *pte, new_pte, old_pte; bool hwposioned = false; int ret = 1; @@ -1774,11 +1757,14 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, hwposioned = true; pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); - if (unlikely(!pte_same_as_swp(*pte, swp_entry_to_pte(entry)))) { + if (unlikely(!pte || !pte_same_as_swp(ptep_get(pte), + swp_entry_to_pte(entry)))) { ret = 0; goto out; } + old_pte = ptep_get(pte); + if (unlikely(hwposioned || !PageUptodate(page))) { swp_entry_t swp_entry; @@ -1810,7 +1796,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, * call and have the page locked. */ VM_BUG_ON_PAGE(PageWriteback(page), page); - if (pte_swp_exclusive(*pte)) + if (pte_swp_exclusive(old_pte)) rmap_flags |= RMAP_EXCLUSIVE; page_add_anon_rmap(page, vma, addr, rmap_flags); @@ -1819,15 +1805,16 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, lru_cache_add_inactive_or_unevictable(page, vma); } new_pte = pte_mkold(mk_pte(page, vma->vm_page_prot)); - if (pte_swp_soft_dirty(*pte)) + if (pte_swp_soft_dirty(old_pte)) new_pte = pte_mksoft_dirty(new_pte); - if (pte_swp_uffd_wp(*pte)) + if (pte_swp_uffd_wp(old_pte)) new_pte = pte_mkuffd_wp(new_pte); setpte: set_pte_at(vma->vm_mm, addr, pte, new_pte); swap_free(entry); out: - pte_unmap_unlock(pte, ptl); + if (pte) + pte_unmap_unlock(pte, ptl); if (page != swapcache) { unlock_page(page); put_page(page); @@ -1839,27 +1826,37 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, unsigned long end, unsigned int type) { - swp_entry_t entry; - pte_t *pte; + pte_t *pte = NULL; struct swap_info_struct *si; - int ret = 0; si = swap_info[type]; - pte = pte_offset_map(pmd, addr); do { struct folio *folio; unsigned long offset; unsigned char swp_count; + swp_entry_t entry; + int ret; + pte_t ptent; + + if (!pte++) { + pte = pte_offset_map(pmd, addr); + if (!pte) + break; + } + + ptent = ptep_get_lockless(pte); - if (!is_swap_pte(*pte)) + if (!is_swap_pte(ptent)) continue; - entry = pte_to_swp_entry(*pte); + entry = pte_to_swp_entry(ptent); if (swp_type(entry) != type) continue; offset = swp_offset(entry); pte_unmap(pte); + pte = NULL; + folio = swap_cache_get_folio(entry, vma, addr); if (!folio) { struct page *page; @@ -1878,8 +1875,7 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd, if (!folio) { swp_count = READ_ONCE(si->swap_map[offset]); if (swp_count == 0 || swp_count == SWAP_MAP_BAD) - goto try_next; - + continue; return -ENOMEM; } @@ -1889,20 +1885,17 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd, if (ret < 0) { folio_unlock(folio); folio_put(folio); - goto out; + return ret; } folio_free_swap(folio); folio_unlock(folio); folio_put(folio); -try_next: - pte = pte_offset_map(pmd, addr); - } while (pte++, addr += PAGE_SIZE, addr != end); - pte_unmap(pte - 1); + } while (addr += PAGE_SIZE, addr != end); - ret = 0; -out: - return ret; + if (pte) + pte_unmap(pte); + return 0; } static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud, @@ -1917,8 +1910,6 @@ static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud, do { cond_resched(); next = pmd_addr_end(addr, end); - if (pmd_none_or_trans_huge_or_clear_bad(pmd)) - continue; ret = unuse_pte_range(vma, pmd, addr, next, type); if (ret) return ret; @@ -3288,9 +3279,7 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage) unsigned char has_cache; int err; - p = get_swap_device(entry); - if (!p) - return -EINVAL; + p = swp_swap_info(entry); offset = swp_offset(entry); ci = lock_cluster_or_swap_info(p, offset); @@ -3337,7 +3326,6 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage) unlock_out: unlock_cluster_or_swap_info(p, ci); - put_swap_device(p); return err; } @@ -3468,11 +3456,6 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask) goto out; } - /* - * We are fortunate that although vmalloc_to_page uses pte_offset_map, - * no architecture is using highmem pages for kernel page tables: so it - * will not corrupt the GFP_ATOMIC caller's atomic page table kmaps. - */ head = vmalloc_to_page(si->swap_map + offset); offset &= ~PAGE_MASK; |