aboutsummaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
authorAndrea Arcangeli2014-01-21 15:48:54 -0800
committerLinus Torvalds2014-01-21 16:19:43 -0800
commit44518d2b32646e37b4b7a0813bbbe98dc21c7f8f (patch)
tree9ab4e057cb7ce8ed5cb6a5f6085c902b8ba6f109 /mm
parentca641514f4056deee1fb2eb356e2c99b98718ade (diff)
mm: tail page refcounting optimization for slab and hugetlbfs
This skips the _mapcount mangling for slab and hugetlbfs pages. The main trouble in doing this is to guarantee that PageSlab and PageHeadHuge remains constant for all get_page/put_page run on the tail of slab or hugetlbfs compound pages. Otherwise if they're set during get_page but not set during put_page, the _mapcount of the tail page would underflow. PageHeadHuge will remain true until the compound page is released and enters the buddy allocator so it won't risk to change even if the tail page is the last reference left on the page. PG_slab instead is cleared before the slab frees the head page with put_page, so if the tail pin is released after the slab freed the page, we would have a problem. But in the slab case the tail pin cannot be the last reference left on the page. This is because the slab code is free to reuse the compound page after a kfree/kmem_cache_free without having to check if there's any tail pin left. In turn all tail pins must be always released while the head is still pinned by the slab code and so we know PG_slab will be still set too. Signed-off-by: Andrea Arcangeli <aarcange@redhat.com> Reviewed-by: Khalid Aziz <khalid.aziz@oracle.com> Cc: Pravin Shelar <pshelar@nicira.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Ben Hutchings <bhutchings@solarflare.com> Cc: Christoph Lameter <cl@linux.com> Cc: Johannes Weiner <jweiner@redhat.com> Cc: Mel Gorman <mgorman@suse.de> Cc: Rik van Riel <riel@redhat.com> Cc: Andi Kleen <andi@firstfloor.org> Cc: Minchan Kim <minchan@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/internal.h3
-rw-r--r--mm/swap.c33
2 files changed, 29 insertions, 7 deletions
diff --git a/mm/internal.h b/mm/internal.h
index 684f7aa9692a..a85a3ab1f7ef 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -51,7 +51,8 @@ static inline void __get_page_tail_foll(struct page *page,
VM_BUG_ON(page_mapcount(page) < 0);
if (get_page_head)
atomic_inc(&page->first_page->_count);
- atomic_inc(&page->_mapcount);
+ if (compound_tail_refcounted(page->first_page))
+ atomic_inc(&page->_mapcount);
}
/*
diff --git a/mm/swap.c b/mm/swap.c
index e2757fbb04ea..bba4aa5bf686 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -88,8 +88,9 @@ static void put_compound_page(struct page *page)
/*
* THP can not break up slab pages so avoid taking
- * compound_lock(). Slab performs non-atomic bit ops
- * on page->flags for better performance. In
+ * compound_lock() and skip the tail page refcounting
+ * (in _mapcount) too. Slab performs non-atomic bit
+ * ops on page->flags for better performance. In
* particular slab_unlock() in slub used to be a hot
* path. It is still hot on arches that do not support
* this_cpu_cmpxchg_double().
@@ -102,7 +103,7 @@ static void put_compound_page(struct page *page)
* PageTail clear after smp_rmb() and we'll treat it
* as a single page.
*/
- if (PageSlab(page_head) || PageHeadHuge(page_head)) {
+ if (!__compound_tail_refcounted(page_head)) {
/*
* If "page" is a THP tail, we must read the tail page
* flags after the head page flags. The
@@ -117,10 +118,30 @@ static void put_compound_page(struct page *page)
* cannot race here.
*/
VM_BUG_ON(!PageHead(page_head));
- VM_BUG_ON(page_mapcount(page) <= 0);
- atomic_dec(&page->_mapcount);
- if (put_page_testzero(page_head))
+ VM_BUG_ON(page_mapcount(page) != 0);
+ if (put_page_testzero(page_head)) {
+ /*
+ * If this is the tail of a
+ * slab compound page, the
+ * tail pin must not be the
+ * last reference held on the
+ * page, because the PG_slab
+ * cannot be cleared before
+ * all tail pins (which skips
+ * the _mapcount tail
+ * refcounting) have been
+ * released. For hugetlbfs the
+ * tail pin may be the last
+ * reference on the page
+ * instead, because
+ * PageHeadHuge will not go
+ * away until the compound
+ * page enters the buddy
+ * allocator.
+ */
+ VM_BUG_ON(PageSlab(page_head));
__put_compound_page(page_head);
+ }
return;
} else
/*