From 08d5b29eac7dd5e6c79b66d390ecbb9219e05931 Mon Sep 17 00:00:00 2001
From: Pasha Tatashin
Date: Fri, 14 Jan 2022 14:06:33 -0800
Subject: mm: ptep_clear() page table helper

We have ptep_get_and_clear() and ptep_get_and_clear_full() helpers to
clear PTE from user page tables, but there is no variant for simple
clear of a present PTE from user page tables without using a low level
pte_clear() which can be either native or para-virtualised.

Add a new ptep_clear() that can be used in common code to clear PTEs
from page table.  We will need this call later in order to add a hook
for page table check.

Link: https://lkml.kernel.org/r/20211221154650.1047963-3-pasha.tatashin@soleen.com
Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Frederic Weisbecker <frederic@kernel.org>
Cc: Greg Thelen <gthelen@google.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Slaby <jirislaby@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Kees Cook <keescook@chromium.org>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Paul Turner <pjt@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sami Tolvanen <samitolvanen@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Wei Xu <weixugc@google.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/khugepaged.c | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

(limited to 'mm/khugepaged.c')

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index e99101162f1a..9d40dd8890e5 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -756,11 +756,7 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
 				 * ptl mostly unnecessary.
 				 */
 				spin_lock(ptl);
-				/*
-				 * paravirt calls inside pte_clear here are
-				 * superfluous.
-				 */
-				pte_clear(vma->vm_mm, address, _pte);
+				ptep_clear(vma->vm_mm, address, _pte);
 				spin_unlock(ptl);
 			}
 		} else {
@@ -774,11 +770,7 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
 			 * inside page_remove_rmap().
 			 */
 			spin_lock(ptl);
-			/*
-			 * paravirt calls inside pte_clear here are
-			 * superfluous.
-			 */
-			pte_clear(vma->vm_mm, address, _pte);
+			ptep_clear(vma->vm_mm, address, _pte);
 			page_remove_rmap(src_page, false);
 			spin_unlock(ptl);
 			free_page_and_swap_cache(src_page);
-- 
cgit v1.2.3


From 020e87650af9f43683546729f959fdc78422a4b7 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox (Oracle)
Date: Fri, 14 Jan 2022 14:06:44 -0800
Subject: mm: remove last argument of reuse_swap_page()

None of the callers care about the total_map_swapcount() any more.

Link: https://lkml.kernel.org/r/20211220205943.456187-1-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h | 6 +++---
 mm/huge_memory.c     | 2 +-
 mm/khugepaged.c      | 2 +-
 mm/memory.c          | 2 +-
 mm/swapfile.c        | 8 +-------
 5 files changed, 7 insertions(+), 13 deletions(-)

(limited to 'mm/khugepaged.c')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index d1ea44b31f19..bdccbf1efa61 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -514,7 +514,7 @@ extern int __swp_swapcount(swp_entry_t entry);
 extern int swp_swapcount(swp_entry_t entry);
 extern struct swap_info_struct *page_swap_info(struct page *);
 extern struct swap_info_struct *swp_swap_info(swp_entry_t entry);
-extern bool reuse_swap_page(struct page *, int *);
+extern bool reuse_swap_page(struct page *);
 extern int try_to_free_swap(struct page *);
 struct backing_dev_info;
 extern int init_swap_address_space(unsigned int type, unsigned long nr_pages);
@@ -680,8 +680,8 @@ static inline int swp_swapcount(swp_entry_t entry)
 	return 0;
 }
 
-#define reuse_swap_page(page, total_map_swapcount) \
-	(page_trans_huge_mapcount(page, total_map_swapcount) == 1)
+#define reuse_swap_page(page) \
+	(page_trans_huge_mapcount(page, NULL) == 1)
 
 static inline int try_to_free_swap(struct page *page)
 {
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index e5483347291c..b61fbe95c856 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1322,7 +1322,7 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf)
 	 * We can only reuse the page if nobody else maps the huge page or it's
 	 * part.
 	 */
-	if (reuse_swap_page(page, NULL)) {
+	if (reuse_swap_page(page)) {
 		pmd_t entry;
 		entry = pmd_mkyoung(orig_pmd);
 		entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 9d40dd8890e5..698ea19775ac 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -681,7 +681,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
 			goto out;
 		}
 		if (!pte_write(pteval) && PageSwapCache(page) &&
-				!reuse_swap_page(page, NULL)) {
+				!reuse_swap_page(page)) {
 			/*
 			 * Page is in the swap cache and cannot be re-used.
 			 * It cannot be collapsed into a THP.
diff --git a/mm/memory.c b/mm/memory.c
index 5fea331b1560..571d02f419ba 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3627,7 +3627,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
 	inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
 	dec_mm_counter_fast(vma->vm_mm, MM_SWAPENTS);
 	pte = mk_pte(page, vma->vm_page_prot);
-	if ((vmf->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) {
+	if ((vmf->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page)) {
 		pte = maybe_mkwrite(pte_mkdirty(pte), vma);
 		vmf->flags &= ~FAULT_FLAG_WRITE;
 		ret |= VM_FAULT_WRITE;
diff --git a/mm/swapfile.c b/mm/swapfile.c
index e64207e2ef1d..31d13a393cf0 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1668,12 +1668,8 @@ static int page_trans_huge_map_swapcount(struct page *page, int *total_mapcount,
  * to it.  And as a side-effect, free up its swap: because the old content
  * on disk will never be read, and seeking back there to write new content
  * later would only waste time away from clustering.
- *
- * NOTE: total_map_swapcount should not be relied upon by the caller if
- * reuse_swap_page() returns false, but it may be always overwritten
- * (see the other implementation for CONFIG_SWAP=n).
  */
-bool reuse_swap_page(struct page *page, int *total_map_swapcount)
+bool reuse_swap_page(struct page *page)
 {
 	int count, total_mapcount, total_swapcount;
 
@@ -1682,8 +1678,6 @@ bool reuse_swap_page(struct page *page, int *total_map_swapcount)
 		return false;
 	count = page_trans_huge_map_swapcount(page, &total_mapcount,
 					      &total_swapcount);
-	if (total_map_swapcount)
-		*total_map_swapcount = total_mapcount + total_swapcount;
 	if (count == 1 && PageSwapCache(page) &&
 	    (likely(!PageTransCompound(page)) ||
 	     /* The remaining swap count will be freed soon */
-- 
cgit v1.2.3


From e9ea874a8ffb0f8ebed4f4981531a32c5b663d79 Mon Sep 17 00:00:00 2001
From: Yang Yang
Date: Fri, 14 Jan 2022 14:07:55 -0800
Subject: mm/vmstat: add events for THP max_ptes_* exceeds

There are interfaces to adjust max_ptes_none, max_ptes_swap,
max_ptes_shared values, see
  /sys/kernel/mm/transparent_hugepage/khugepaged/.

But system administrator may not know which value is the best.  So Add
those events to support adjusting max_ptes_* to suitable values.

For example, if default max_ptes_swap value causes too much failures,
and system uses zram whose IO is fast, administrator could increase
max_ptes_swap until THP_SCAN_EXCEED_SWAP_PTE not increase anymore.

Link: https://lkml.kernel.org/r/20211225094036.574157-1-yang.yang29@zte.com.cn
Signed-off-by: Yang Yang <yang.yang29@zte.com.cn>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Saravanan D <saravanand@fb.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vm_event_item.h | 3 +++
 mm/khugepaged.c               | 7 +++++++
 mm/vmstat.c                   | 3 +++
 3 files changed, 13 insertions(+)

(limited to 'mm/khugepaged.c')

diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index a185cc75ff52..7b2363388bfa 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -98,6 +98,9 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		THP_SPLIT_PAGE_FAILED,
 		THP_DEFERRED_SPLIT_PAGE,
 		THP_SPLIT_PMD,
+		THP_SCAN_EXCEED_NONE_PTE,
+		THP_SCAN_EXCEED_SWAP_PTE,
+		THP_SCAN_EXCEED_SHARED_PTE,
 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
 		THP_SPLIT_PUD,
 #endif
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 698ea19775ac..02071f213c58 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -618,6 +618,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
 				continue;
 			} else {
 				result = SCAN_EXCEED_NONE_PTE;
+				count_vm_event(THP_SCAN_EXCEED_NONE_PTE);
 				goto out;
 			}
 		}
@@ -636,6 +637,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
 		if (page_mapcount(page) > 1 &&
 				++shared > khugepaged_max_ptes_shared) {
 			result = SCAN_EXCEED_SHARED_PTE;
+			count_vm_event(THP_SCAN_EXCEED_SHARED_PTE);
 			goto out;
 		}
 
@@ -1253,6 +1255,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
 				continue;
 			} else {
 				result = SCAN_EXCEED_SWAP_PTE;
+				count_vm_event(THP_SCAN_EXCEED_SWAP_PTE);
 				goto out_unmap;
 			}
 		}
@@ -1262,6 +1265,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
 				continue;
 			} else {
 				result = SCAN_EXCEED_NONE_PTE;
+				count_vm_event(THP_SCAN_EXCEED_NONE_PTE);
 				goto out_unmap;
 			}
 		}
@@ -1290,6 +1294,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
 		if (page_mapcount(page) > 1 &&
 				++shared > khugepaged_max_ptes_shared) {
 			result = SCAN_EXCEED_SHARED_PTE;
+			count_vm_event(THP_SCAN_EXCEED_SHARED_PTE);
 			goto out_unmap;
 		}
 
@@ -2000,6 +2005,7 @@ static void khugepaged_scan_file(struct mm_struct *mm,
 		if (xa_is_value(page)) {
 			if (++swap > khugepaged_max_ptes_swap) {
 				result = SCAN_EXCEED_SWAP_PTE;
+				count_vm_event(THP_SCAN_EXCEED_SWAP_PTE);
 				break;
 			}
 			continue;
@@ -2046,6 +2052,7 @@ static void khugepaged_scan_file(struct mm_struct *mm,
 	if (result == SCAN_SUCCEED) {
 		if (present < HPAGE_PMD_NR - khugepaged_max_ptes_none) {
 			result = SCAN_EXCEED_NONE_PTE;
+			count_vm_event(THP_SCAN_EXCEED_NONE_PTE);
 		} else {
 			node = khugepaged_find_target_node();
 			collapse_file(mm, file, start, hpage, node);
diff --git a/mm/vmstat.c b/mm/vmstat.c
index d701c335628c..4057372745d0 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1353,6 +1353,9 @@ const char * const vmstat_text[] = {
 	"thp_split_page_failed",
 	"thp_deferred_split_page",
 	"thp_split_pmd",
+	"thp_scan_exceed_none_pte",
+	"thp_scan_exceed_swap_pte",
+	"thp_scan_exceed_share_pte",
 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
 	"thp_split_pud",
 #endif
-- 
cgit v1.2.3


From 0b8f0d870020dbd7037bfacbb73a9b3213470f90 Mon Sep 17 00:00:00 2001
From: Quanfa Fu
Date: Fri, 14 Jan 2022 14:09:25 -0800
Subject: mm: fix some comment errors

Link: https://lkml.kernel.org/r/20211101040208.460810-1-fuqf0919@gmail.com
Signed-off-by: Quanfa Fu <fuqf0919@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/khugepaged.c     | 2 +-
 mm/memory-failure.c | 2 +-
 mm/slab_common.c    | 2 +-
 mm/swap.c           | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'mm/khugepaged.c')

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 02071f213c58..7af84bac6fc2 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1303,7 +1303,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
 		/*
 		 * Record which node the original page is from and save this
 		 * information to khugepaged_node_load[].
-		 * Khupaged will allocate hugepage from the node has the max
+		 * Khugepaged will allocate hugepage from the node has the max
 		 * hit record.
 		 */
 		node = page_to_nid(page);
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 6a2b4b86b679..373837bb94cb 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1306,7 +1306,7 @@ static int __get_unpoison_page(struct page *page)
  *
  * get_hwpoison_page() takes a page refcount of an error page to handle memory
  * error on it, after checking that the error page is in a well-defined state
- * (defined as a page-type we can successfully handle the memor error on it,
+ * (defined as a page-type we can successfully handle the memory error on it,
  * such as LRU page and hugetlb page).
  *
  * Memory error handling could be triggered at any time on any type of page,
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 1f75bd4e95d6..9513244457e6 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -819,7 +819,7 @@ void __init setup_kmalloc_cache_index_table(void)
 
 	if (KMALLOC_MIN_SIZE >= 64) {
 		/*
-		 * The 96 byte size cache is not used if the alignment
+		 * The 96 byte sized cache is not used if the alignment
 		 * is 64 byte.
 		 */
 		for (i = 64 + 8; i <= 96; i += 8)
diff --git a/mm/swap.c b/mm/swap.c
index e8c9dc6d0377..b461814ce0cb 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -882,7 +882,7 @@ void lru_cache_disable(void)
 	 * all online CPUs so any calls of lru_cache_disabled wrapped by
 	 * local_lock or preemption disabled would be ordered by that.
 	 * The atomic operation doesn't need to have stronger ordering
-	 * requirements because that is enforeced by the scheduling
+	 * requirements because that is enforced by the scheduling
 	 * guarantees.
 	 */
 	__lru_add_drain_all(true);
-- 
cgit v1.2.3