aboutsummaryrefslogtreecommitdiff
path: root/mm/swapfile.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/swapfile.c')
-rw-r--r--mm/swapfile.c109
1 files changed, 46 insertions, 63 deletions
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 6bc83060df9a..8e6dde68b389 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -41,6 +41,7 @@
#include <linux/swap_slots.h>
#include <linux/sort.h>
#include <linux/completion.h>
+#include <linux/suspend.h>
#include <asm/tlbflush.h>
#include <linux/swapops.h>
@@ -1219,6 +1220,13 @@ static unsigned char __swap_entry_free_locked(struct swap_info_struct *p,
}
/*
+ * When we get a swap entry, if there aren't some other ways to
+ * prevent swapoff, such as the folio in swap cache is locked, page
+ * table lock is held, etc., the swap entry may become invalid because
+ * of swapoff. Then, we need to enclose all swap related functions
+ * with get_swap_device() and put_swap_device(), unless the swap
+ * functions call get/put_swap_device() by themselves.
+ *
* Check whether swap entry is valid in the swap device. If so,
* return pointer to swap_info_struct, and keep the swap entry valid
* via preventing the swap device from being swapoff, until
@@ -1227,9 +1235,8 @@ static unsigned char __swap_entry_free_locked(struct swap_info_struct *p,
* Notice that swapoff or swapoff+swapon can still happen before the
* percpu_ref_tryget_live() in get_swap_device() or after the
* percpu_ref_put() in put_swap_device() if there isn't any other way
- * to prevent swapoff, such as page lock, page table lock, etc. The
- * caller must be prepared for that. For example, the following
- * situation is possible.
+ * to prevent swapoff. The caller must be prepared for that. For
+ * example, the following situation is possible.
*
* CPU1 CPU2
* do_swap_page()
@@ -1432,16 +1439,10 @@ void swapcache_free_entries(swp_entry_t *entries, int n)
int __swap_count(swp_entry_t entry)
{
- struct swap_info_struct *si;
+ struct swap_info_struct *si = swp_swap_info(entry);
pgoff_t offset = swp_offset(entry);
- int count = 0;
- si = get_swap_device(entry);
- if (si) {
- count = swap_count(si->swap_map[offset]);
- put_swap_device(si);
- }
- return count;
+ return swap_count(si->swap_map[offset]);
}
/*
@@ -1449,7 +1450,7 @@ int __swap_count(swp_entry_t entry)
* This does not give an exact answer when swap count is continued,
* but does include the high COUNT_CONTINUED flag to allow for that.
*/
-static int swap_swapcount(struct swap_info_struct *si, swp_entry_t entry)
+int swap_swapcount(struct swap_info_struct *si, swp_entry_t entry)
{
pgoff_t offset = swp_offset(entry);
struct swap_cluster_info *ci;
@@ -1463,24 +1464,6 @@ static int swap_swapcount(struct swap_info_struct *si, swp_entry_t entry)
/*
* How many references to @entry are currently swapped out?
- * This does not give an exact answer when swap count is continued,
- * but does include the high COUNT_CONTINUED flag to allow for that.
- */
-int __swp_swapcount(swp_entry_t entry)
-{
- int count = 0;
- struct swap_info_struct *si;
-
- si = get_swap_device(entry);
- if (si) {
- count = swap_swapcount(si, entry);
- put_swap_device(si);
- }
- return count;
-}
-
-/*
- * How many references to @entry are currently swapped out?
* This considers COUNT_CONTINUED so it returns exact answer.
*/
int swp_swapcount(swp_entry_t entry)
@@ -1762,7 +1745,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
struct page *page = folio_file_page(folio, swp_offset(entry));
struct page *swapcache;
spinlock_t *ptl;
- pte_t *pte, new_pte;
+ pte_t *pte, new_pte, old_pte;
bool hwposioned = false;
int ret = 1;
@@ -1774,11 +1757,14 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
hwposioned = true;
pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
- if (unlikely(!pte_same_as_swp(*pte, swp_entry_to_pte(entry)))) {
+ if (unlikely(!pte || !pte_same_as_swp(ptep_get(pte),
+ swp_entry_to_pte(entry)))) {
ret = 0;
goto out;
}
+ old_pte = ptep_get(pte);
+
if (unlikely(hwposioned || !PageUptodate(page))) {
swp_entry_t swp_entry;
@@ -1810,7 +1796,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
* call and have the page locked.
*/
VM_BUG_ON_PAGE(PageWriteback(page), page);
- if (pte_swp_exclusive(*pte))
+ if (pte_swp_exclusive(old_pte))
rmap_flags |= RMAP_EXCLUSIVE;
page_add_anon_rmap(page, vma, addr, rmap_flags);
@@ -1819,15 +1805,16 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
lru_cache_add_inactive_or_unevictable(page, vma);
}
new_pte = pte_mkold(mk_pte(page, vma->vm_page_prot));
- if (pte_swp_soft_dirty(*pte))
+ if (pte_swp_soft_dirty(old_pte))
new_pte = pte_mksoft_dirty(new_pte);
- if (pte_swp_uffd_wp(*pte))
+ if (pte_swp_uffd_wp(old_pte))
new_pte = pte_mkuffd_wp(new_pte);
setpte:
set_pte_at(vma->vm_mm, addr, pte, new_pte);
swap_free(entry);
out:
- pte_unmap_unlock(pte, ptl);
+ if (pte)
+ pte_unmap_unlock(pte, ptl);
if (page != swapcache) {
unlock_page(page);
put_page(page);
@@ -1839,27 +1826,37 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
unsigned long addr, unsigned long end,
unsigned int type)
{
- swp_entry_t entry;
- pte_t *pte;
+ pte_t *pte = NULL;
struct swap_info_struct *si;
- int ret = 0;
si = swap_info[type];
- pte = pte_offset_map(pmd, addr);
do {
struct folio *folio;
unsigned long offset;
unsigned char swp_count;
+ swp_entry_t entry;
+ int ret;
+ pte_t ptent;
+
+ if (!pte++) {
+ pte = pte_offset_map(pmd, addr);
+ if (!pte)
+ break;
+ }
+
+ ptent = ptep_get_lockless(pte);
- if (!is_swap_pte(*pte))
+ if (!is_swap_pte(ptent))
continue;
- entry = pte_to_swp_entry(*pte);
+ entry = pte_to_swp_entry(ptent);
if (swp_type(entry) != type)
continue;
offset = swp_offset(entry);
pte_unmap(pte);
+ pte = NULL;
+
folio = swap_cache_get_folio(entry, vma, addr);
if (!folio) {
struct page *page;
@@ -1878,8 +1875,7 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
if (!folio) {
swp_count = READ_ONCE(si->swap_map[offset]);
if (swp_count == 0 || swp_count == SWAP_MAP_BAD)
- goto try_next;
-
+ continue;
return -ENOMEM;
}
@@ -1889,20 +1885,17 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
if (ret < 0) {
folio_unlock(folio);
folio_put(folio);
- goto out;
+ return ret;
}
folio_free_swap(folio);
folio_unlock(folio);
folio_put(folio);
-try_next:
- pte = pte_offset_map(pmd, addr);
- } while (pte++, addr += PAGE_SIZE, addr != end);
- pte_unmap(pte - 1);
+ } while (addr += PAGE_SIZE, addr != end);
- ret = 0;
-out:
- return ret;
+ if (pte)
+ pte_unmap(pte);
+ return 0;
}
static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud,
@@ -1917,8 +1910,6 @@ static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud,
do {
cond_resched();
next = pmd_addr_end(addr, end);
- if (pmd_none_or_trans_huge_or_clear_bad(pmd))
- continue;
ret = unuse_pte_range(vma, pmd, addr, next, type);
if (ret)
return ret;
@@ -3288,9 +3279,7 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage)
unsigned char has_cache;
int err;
- p = get_swap_device(entry);
- if (!p)
- return -EINVAL;
+ p = swp_swap_info(entry);
offset = swp_offset(entry);
ci = lock_cluster_or_swap_info(p, offset);
@@ -3337,7 +3326,6 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage)
unlock_out:
unlock_cluster_or_swap_info(p, ci);
- put_swap_device(p);
return err;
}
@@ -3468,11 +3456,6 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)
goto out;
}
- /*
- * We are fortunate that although vmalloc_to_page uses pte_offset_map,
- * no architecture is using highmem pages for kernel page tables: so it
- * will not corrupt the GFP_ATOMIC caller's atomic page table kmaps.
- */
head = vmalloc_to_page(si->swap_map + offset);
offset &= ~PAGE_MASK;