aboutsummaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
authorJohn Hubbard2020-04-01 21:05:33 -0700
committerLinus Torvalds2020-04-02 09:35:27 -0700
commit47e29d32afba11b13efb51f03154a8cf22fb4360 (patch)
tree9d72042c87bfd951e56cf680c0372ed347240300 /mm
parent3faa52c03f440d1b9ddef18c4f189f4790d52d7e (diff)
mm/gup: page->hpage_pinned_refcount: exact pin counts for huge pages
For huge pages (and in fact, any compound page), the GUP_PIN_COUNTING_BIAS scheme tends to overflow too easily, each tail page increments the head page->_refcount by GUP_PIN_COUNTING_BIAS (1024). That limits the number of huge pages that can be pinned. This patch removes that limitation, by using an exact form of pin counting for compound pages of order > 1. The "order > 1" is required because this approach uses the 3rd struct page in the compound page, and order 1 compound pages only have two pages, so that won't work there. A new struct page field, hpage_pinned_refcount, has been added, replacing a padding field in the union (so no new space is used). This enhancement also has a useful side effect: huge pages and compound pages (of order > 1) do not suffer from the "potential false positives" problem that is discussed in the page_dma_pinned() comment block. That is because these compound pages have extra space for tracking things, so they get exact pin counts instead of overloading page->_refcount. Documentation/core-api/pin_user_pages.rst is updated accordingly. Suggested-by: Jan Kara <jack@suse.cz> Signed-off-by: John Hubbard <jhubbard@nvidia.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Reviewed-by: Jan Kara <jack@suse.cz> Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Ira Weiny <ira.weiny@intel.com> Cc: Jérôme Glisse <jglisse@redhat.com> Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Christoph Hellwig <hch@infradead.org> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Dave Chinner <david@fromorbit.com> Cc: Jason Gunthorpe <jgg@ziepe.ca> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Michal Hocko <mhocko@suse.com> Cc: Mike Kravetz <mike.kravetz@oracle.com> Cc: Shuah Khan <shuah@kernel.org> Cc: Vlastimil Babka <vbabka@suse.cz> Link: http://lkml.kernel.org/r/20200211001536.1027652-8-jhubbard@nvidia.com Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/gup.c78
-rw-r--r--mm/hugetlb.c6
-rw-r--r--mm/page_alloc.c2
-rw-r--r--mm/rmap.c6
4 files changed, 84 insertions, 8 deletions
diff --git a/mm/gup.c b/mm/gup.c
index ee4f14f108fe..6601df4c7682 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -29,6 +29,22 @@ struct follow_page_context {
unsigned int page_mask;
};
+static void hpage_pincount_add(struct page *page, int refs)
+{
+ VM_BUG_ON_PAGE(!hpage_pincount_available(page), page);
+ VM_BUG_ON_PAGE(page != compound_head(page), page);
+
+ atomic_add(refs, compound_pincount_ptr(page));
+}
+
+static void hpage_pincount_sub(struct page *page, int refs)
+{
+ VM_BUG_ON_PAGE(!hpage_pincount_available(page), page);
+ VM_BUG_ON_PAGE(page != compound_head(page), page);
+
+ atomic_sub(refs, compound_pincount_ptr(page));
+}
+
/*
* Return the compound head page with ref appropriately incremented,
* or NULL if that failed.
@@ -70,8 +86,25 @@ static __maybe_unused struct page *try_grab_compound_head(struct page *page,
if (flags & FOLL_GET)
return try_get_compound_head(page, refs);
else if (flags & FOLL_PIN) {
- refs *= GUP_PIN_COUNTING_BIAS;
- return try_get_compound_head(page, refs);
+ /*
+ * When pinning a compound page of order > 1 (which is what
+ * hpage_pincount_available() checks for), use an exact count to
+ * track it, via hpage_pincount_add/_sub().
+ *
+ * However, be sure to *also* increment the normal page refcount
+ * field at least once, so that the page really is pinned.
+ */
+ if (!hpage_pincount_available(page))
+ refs *= GUP_PIN_COUNTING_BIAS;
+
+ page = try_get_compound_head(page, refs);
+ if (!page)
+ return NULL;
+
+ if (hpage_pincount_available(page))
+ hpage_pincount_add(page, refs);
+
+ return page;
}
WARN_ON_ONCE(1);
@@ -106,12 +139,25 @@ bool __must_check try_grab_page(struct page *page, unsigned int flags)
if (flags & FOLL_GET)
return try_get_page(page);
else if (flags & FOLL_PIN) {
+ int refs = 1;
+
page = compound_head(page);
if (WARN_ON_ONCE(page_ref_count(page) <= 0))
return false;
- page_ref_add(page, GUP_PIN_COUNTING_BIAS);
+ if (hpage_pincount_available(page))
+ hpage_pincount_add(page, 1);
+ else
+ refs = GUP_PIN_COUNTING_BIAS;
+
+ /*
+ * Similar to try_grab_compound_head(): even if using the
+ * hpage_pincount_add/_sub() routines, be sure to
+ * *also* increment the normal page refcount field at least
+ * once, so that the page really is pinned.
+ */
+ page_ref_add(page, refs);
}
return true;
@@ -120,12 +166,17 @@ bool __must_check try_grab_page(struct page *page, unsigned int flags)
#ifdef CONFIG_DEV_PAGEMAP_OPS
static bool __unpin_devmap_managed_user_page(struct page *page)
{
- int count;
+ int count, refs = 1;
if (!page_is_devmap_managed(page))
return false;
- count = page_ref_sub_return(page, GUP_PIN_COUNTING_BIAS);
+ if (hpage_pincount_available(page))
+ hpage_pincount_sub(page, 1);
+ else
+ refs = GUP_PIN_COUNTING_BIAS;
+
+ count = page_ref_sub_return(page, refs);
/*
* devmap page refcounts are 1-based, rather than 0-based: if
@@ -157,6 +208,8 @@ static bool __unpin_devmap_managed_user_page(struct page *page)
*/
void unpin_user_page(struct page *page)
{
+ int refs = 1;
+
page = compound_head(page);
/*
@@ -168,7 +221,12 @@ void unpin_user_page(struct page *page)
if (__unpin_devmap_managed_user_page(page))
return;
- if (page_ref_sub_and_test(page, GUP_PIN_COUNTING_BIAS))
+ if (hpage_pincount_available(page))
+ hpage_pincount_sub(page, 1);
+ else
+ refs = GUP_PIN_COUNTING_BIAS;
+
+ if (page_ref_sub_and_test(page, refs))
__put_page(page);
}
EXPORT_SYMBOL(unpin_user_page);
@@ -1955,8 +2013,12 @@ EXPORT_SYMBOL(get_user_pages_unlocked);
static void put_compound_head(struct page *page, int refs, unsigned int flags)
{
- if (flags & FOLL_PIN)
- refs *= GUP_PIN_COUNTING_BIAS;
+ if (flags & FOLL_PIN) {
+ if (hpage_pincount_available(page))
+ hpage_pincount_sub(page, refs);
+ else
+ refs *= GUP_PIN_COUNTING_BIAS;
+ }
VM_BUG_ON_PAGE(page_ref_count(page) < refs, page);
/*
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index ba1de6bc1402..3d31a235b53d 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1009,6 +1009,9 @@ static void destroy_compound_gigantic_page(struct page *page,
struct page *p = page + 1;
atomic_set(compound_mapcount_ptr(page), 0);
+ if (hpage_pincount_available(page))
+ atomic_set(compound_pincount_ptr(page), 0);
+
for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) {
clear_compound_head(p);
set_page_refcounted(p);
@@ -1287,6 +1290,9 @@ static void prep_compound_gigantic_page(struct page *page, unsigned int order)
set_compound_head(p, page);
}
atomic_set(compound_mapcount_ptr(page), -1);
+
+ if (hpage_pincount_available(page))
+ atomic_set(compound_pincount_ptr(page), 0);
}
/*
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6e7e9c1d6caa..8f3a3bf2c347 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -688,6 +688,8 @@ void prep_compound_page(struct page *page, unsigned int order)
set_compound_head(p, page);
}
atomic_set(compound_mapcount_ptr(page), -1);
+ if (hpage_pincount_available(page))
+ atomic_set(compound_pincount_ptr(page), 0);
}
#ifdef CONFIG_DEBUG_PAGEALLOC
diff --git a/mm/rmap.c b/mm/rmap.c
index b3e381919835..e45b9b991e2f 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1178,6 +1178,9 @@ void page_add_new_anon_rmap(struct page *page,
VM_BUG_ON_PAGE(!PageTransHuge(page), page);
/* increment count (starts at -1) */
atomic_set(compound_mapcount_ptr(page), 0);
+ if (hpage_pincount_available(page))
+ atomic_set(compound_pincount_ptr(page), 0);
+
__inc_node_page_state(page, NR_ANON_THPS);
} else {
/* Anon THP always mapped first with PMD */
@@ -1974,6 +1977,9 @@ void hugepage_add_new_anon_rmap(struct page *page,
{
BUG_ON(address < vma->vm_start || address >= vma->vm_end);
atomic_set(compound_mapcount_ptr(page), 0);
+ if (hpage_pincount_available(page))
+ atomic_set(compound_pincount_ptr(page), 0);
+
__page_set_anon_rmap(page, vma, address, 1);
}
#endif /* CONFIG_HUGETLB_PAGE */