aboutsummaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/debug.c4
-rw-r--r--mm/gup.c3
-rw-r--r--mm/hugetlb.c3
-rw-r--r--mm/kasan/Makefile3
-rw-r--r--mm/kasan/common.c29
-rw-r--r--mm/kasan/tags.c2
-rw-r--r--mm/kmemleak.c10
-rw-r--r--mm/memblock.c11
-rw-r--r--mm/memory-failure.c3
-rw-r--r--mm/memory_hotplug.c85
-rw-r--r--mm/mempolicy.c6
-rw-r--r--mm/migrate.c12
-rw-r--r--mm/oom_kill.c12
-rw-r--r--mm/page_alloc.c32
-rw-r--r--mm/page_ext.c4
-rw-r--r--mm/shmem.c10
-rw-r--r--mm/slab.c15
-rw-r--r--mm/slab.h7
-rw-r--r--mm/slab_common.c3
-rw-r--r--mm/slub.c59
-rw-r--r--mm/swap.c17
-rw-r--r--mm/util.c2
-rw-r--r--mm/vmscan.c10
23 files changed, 194 insertions, 148 deletions
diff --git a/mm/debug.c b/mm/debug.c
index 0abb987dad9b..1611cf00a137 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -44,7 +44,7 @@ const struct trace_print_flags vmaflag_names[] = {
void __dump_page(struct page *page, const char *reason)
{
- struct address_space *mapping = page_mapping(page);
+ struct address_space *mapping;
bool page_poisoned = PagePoisoned(page);
int mapcount;
@@ -58,6 +58,8 @@ void __dump_page(struct page *page, const char *reason)
goto hex_only;
}
+ mapping = page_mapping(page);
+
/*
* Avoid VM_BUG_ON() in page_mapcount().
* page->_mapcount space in struct page is used by sl[aou]b pages to
diff --git a/mm/gup.c b/mm/gup.c
index 05acd7e2eb22..75029649baca 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1674,7 +1674,8 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
if (!pmd_present(pmd))
return 0;
- if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd))) {
+ if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd) ||
+ pmd_devmap(pmd))) {
/*
* NUMA hinting faults need to be handled in the GUP
* slowpath for accounting purposes and so that they
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index df2e7dd5ff17..afef61656c1e 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4268,7 +4268,8 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
break;
}
if (ret & VM_FAULT_RETRY) {
- if (nonblocking)
+ if (nonblocking &&
+ !(fault_flags & FAULT_FLAG_RETRY_NOWAIT))
*nonblocking = 0;
*nr_pages = 0;
/*
diff --git a/mm/kasan/Makefile b/mm/kasan/Makefile
index 0a14fcff70ed..5d1065efbd47 100644
--- a/mm/kasan/Makefile
+++ b/mm/kasan/Makefile
@@ -5,7 +5,10 @@ UBSAN_SANITIZE_generic.o := n
UBSAN_SANITIZE_tags.o := n
KCOV_INSTRUMENT := n
+CFLAGS_REMOVE_common.o = -pg
CFLAGS_REMOVE_generic.o = -pg
+CFLAGS_REMOVE_tags.o = -pg
+
# Function splitter causes unnecessary splits in __asan_load1/__asan_store1
# see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63533
diff --git a/mm/kasan/common.c b/mm/kasan/common.c
index 73c9cbfdedf4..09b534fbba17 100644
--- a/mm/kasan/common.c
+++ b/mm/kasan/common.c
@@ -361,10 +361,15 @@ void kasan_poison_object_data(struct kmem_cache *cache, void *object)
* get different tags.
*/
static u8 assign_tag(struct kmem_cache *cache, const void *object,
- bool init, bool krealloc)
+ bool init, bool keep_tag)
{
- /* Reuse the same tag for krealloc'ed objects. */
- if (krealloc)
+ /*
+ * 1. When an object is kmalloc()'ed, two hooks are called:
+ * kasan_slab_alloc() and kasan_kmalloc(). We assign the
+ * tag only in the first one.
+ * 2. We reuse the same tag for krealloc'ed objects.
+ */
+ if (keep_tag)
return get_tag(object);
/*
@@ -405,12 +410,6 @@ void * __must_check kasan_init_slab_obj(struct kmem_cache *cache,
return (void *)object;
}
-void * __must_check kasan_slab_alloc(struct kmem_cache *cache, void *object,
- gfp_t flags)
-{
- return kasan_kmalloc(cache, object, cache->object_size, flags);
-}
-
static inline bool shadow_invalid(u8 tag, s8 shadow_byte)
{
if (IS_ENABLED(CONFIG_KASAN_GENERIC))
@@ -467,7 +466,7 @@ bool kasan_slab_free(struct kmem_cache *cache, void *object, unsigned long ip)
}
static void *__kasan_kmalloc(struct kmem_cache *cache, const void *object,
- size_t size, gfp_t flags, bool krealloc)
+ size_t size, gfp_t flags, bool keep_tag)
{
unsigned long redzone_start;
unsigned long redzone_end;
@@ -485,7 +484,7 @@ static void *__kasan_kmalloc(struct kmem_cache *cache, const void *object,
KASAN_SHADOW_SCALE_SIZE);
if (IS_ENABLED(CONFIG_KASAN_SW_TAGS))
- tag = assign_tag(cache, object, false, krealloc);
+ tag = assign_tag(cache, object, false, keep_tag);
/* Tag is ignored in set_tag without CONFIG_KASAN_SW_TAGS */
kasan_unpoison_shadow(set_tag(object, tag), size);
@@ -498,10 +497,16 @@ static void *__kasan_kmalloc(struct kmem_cache *cache, const void *object,
return set_tag(object, tag);
}
+void * __must_check kasan_slab_alloc(struct kmem_cache *cache, void *object,
+ gfp_t flags)
+{
+ return __kasan_kmalloc(cache, object, cache->object_size, flags, false);
+}
+
void * __must_check kasan_kmalloc(struct kmem_cache *cache, const void *object,
size_t size, gfp_t flags)
{
- return __kasan_kmalloc(cache, object, size, flags, false);
+ return __kasan_kmalloc(cache, object, size, flags, true);
}
EXPORT_SYMBOL(kasan_kmalloc);
diff --git a/mm/kasan/tags.c b/mm/kasan/tags.c
index 0777649e07c4..63fca3172659 100644
--- a/mm/kasan/tags.c
+++ b/mm/kasan/tags.c
@@ -46,7 +46,7 @@ void kasan_init_tags(void)
int cpu;
for_each_possible_cpu(cpu)
- per_cpu(prng_state, cpu) = get_random_u32();
+ per_cpu(prng_state, cpu) = (u32)get_cycles();
}
/*
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index f9d9dc250428..707fa5579f66 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -574,6 +574,7 @@ static struct kmemleak_object *create_object(unsigned long ptr, size_t size,
unsigned long flags;
struct kmemleak_object *object, *parent;
struct rb_node **link, *rb_parent;
+ unsigned long untagged_ptr;
object = kmem_cache_alloc(object_cache, gfp_kmemleak_mask(gfp));
if (!object) {
@@ -619,8 +620,9 @@ static struct kmemleak_object *create_object(unsigned long ptr, size_t size,
write_lock_irqsave(&kmemleak_lock, flags);
- min_addr = min(min_addr, ptr);
- max_addr = max(max_addr, ptr + size);
+ untagged_ptr = (unsigned long)kasan_reset_tag((void *)ptr);
+ min_addr = min(min_addr, untagged_ptr);
+ max_addr = max(max_addr, untagged_ptr + size);
link = &object_tree_root.rb_node;
rb_parent = NULL;
while (*link) {
@@ -1333,6 +1335,7 @@ static void scan_block(void *_start, void *_end,
unsigned long *start = PTR_ALIGN(_start, BYTES_PER_POINTER);
unsigned long *end = _end - (BYTES_PER_POINTER - 1);
unsigned long flags;
+ unsigned long untagged_ptr;
read_lock_irqsave(&kmemleak_lock, flags);
for (ptr = start; ptr < end; ptr++) {
@@ -1347,7 +1350,8 @@ static void scan_block(void *_start, void *_end,
pointer = *ptr;
kasan_enable_current();
- if (pointer < min_addr || pointer >= max_addr)
+ untagged_ptr = (unsigned long)kasan_reset_tag((void *)pointer);
+ if (untagged_ptr < min_addr || untagged_ptr >= max_addr)
continue;
/*
diff --git a/mm/memblock.c b/mm/memblock.c
index 022d4cbb3618..ea31045ba704 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -26,6 +26,13 @@
#include "internal.h"
+#define INIT_MEMBLOCK_REGIONS 128
+#define INIT_PHYSMEM_REGIONS 4
+
+#ifndef INIT_MEMBLOCK_RESERVED_REGIONS
+# define INIT_MEMBLOCK_RESERVED_REGIONS INIT_MEMBLOCK_REGIONS
+#endif
+
/**
* DOC: memblock overview
*
@@ -92,7 +99,7 @@ unsigned long max_pfn;
unsigned long long max_possible_pfn;
static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;
-static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;
+static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_RESERVED_REGIONS] __initdata_memblock;
#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
static struct memblock_region memblock_physmem_init_regions[INIT_PHYSMEM_REGIONS] __initdata_memblock;
#endif
@@ -105,7 +112,7 @@ struct memblock memblock __initdata_memblock = {
.reserved.regions = memblock_reserved_init_regions,
.reserved.cnt = 1, /* empty dummy entry */
- .reserved.max = INIT_MEMBLOCK_REGIONS,
+ .reserved.max = INIT_MEMBLOCK_RESERVED_REGIONS,
.reserved.name = "reserved",
#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 7c72f2a95785..831be5ff5f4d 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -372,7 +372,8 @@ static void kill_procs(struct list_head *to_kill, int forcekill, bool fail,
if (fail || tk->addr_valid == 0) {
pr_err("Memory failure: %#lx: forcibly killing %s:%d because of failure to unmap corrupted page\n",
pfn, tk->tsk->comm, tk->tsk->pid);
- force_sig(SIGKILL, tk->tsk);
+ do_send_sig_info(SIGKILL, SEND_SIG_PRIV,
+ tk->tsk, PIDTYPE_PID);
}
/*
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index b9a667d36c55..1ad28323fb9f 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1188,11 +1188,13 @@ static inline int pageblock_free(struct page *page)
return PageBuddy(page) && page_order(page) >= pageblock_order;
}
-/* Return the start of the next active pageblock after a given page */
-static struct page *next_active_pageblock(struct page *page)
+/* Return the pfn of the start of the next active pageblock after a given pfn */
+static unsigned long next_active_pageblock(unsigned long pfn)
{
+ struct page *page = pfn_to_page(pfn);
+
/* Ensure the starting page is pageblock-aligned */
- BUG_ON(page_to_pfn(page) & (pageblock_nr_pages - 1));
+ BUG_ON(pfn & (pageblock_nr_pages - 1));
/* If the entire pageblock is free, move to the end of free page */
if (pageblock_free(page)) {
@@ -1200,16 +1202,16 @@ static struct page *next_active_pageblock(struct page *page)
/* be careful. we don't have locks, page_order can be changed.*/
order = page_order(page);
if ((order < MAX_ORDER) && (order >= pageblock_order))
- return page + (1 << order);
+ return pfn + (1 << order);
}
- return page + pageblock_nr_pages;
+ return pfn + pageblock_nr_pages;
}
-static bool is_pageblock_removable_nolock(struct page *page)
+static bool is_pageblock_removable_nolock(unsigned long pfn)
{
+ struct page *page = pfn_to_page(pfn);
struct zone *zone;
- unsigned long pfn;
/*
* We have to be careful here because we are iterating over memory
@@ -1232,12 +1234,14 @@ static bool is_pageblock_removable_nolock(struct page *page)
/* Checks if this range of memory is likely to be hot-removable. */
bool is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages)
{
- struct page *page = pfn_to_page(start_pfn);
- struct page *end_page = page + nr_pages;
+ unsigned long end_pfn, pfn;
+
+ end_pfn = min(start_pfn + nr_pages,
+ zone_end_pfn(page_zone(pfn_to_page(start_pfn))));
/* Check the starting page of each pageblock within the range */
- for (; page < end_page; page = next_active_pageblock(page)) {
- if (!is_pageblock_removable_nolock(page))
+ for (pfn = start_pfn; pfn < end_pfn; pfn = next_active_pageblock(pfn)) {
+ if (!is_pageblock_removable_nolock(pfn))
return false;
cond_resched();
}
@@ -1273,6 +1277,9 @@ int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn,
i++;
if (i == MAX_ORDER_NR_PAGES || pfn + i >= end_pfn)
continue;
+ /* Check if we got outside of the zone */
+ if (zone && !zone_spans_pfn(zone, pfn + i))
+ return 0;
page = pfn_to_page(pfn + i);
if (zone && page_zone(page) != zone)
return 0;
@@ -1301,23 +1308,27 @@ int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn,
static unsigned long scan_movable_pages(unsigned long start, unsigned long end)
{
unsigned long pfn;
- struct page *page;
+
for (pfn = start; pfn < end; pfn++) {
- if (pfn_valid(pfn)) {
- page = pfn_to_page(pfn);
- if (PageLRU(page))
- return pfn;
- if (__PageMovable(page))
- return pfn;
- if (PageHuge(page)) {
- if (hugepage_migration_supported(page_hstate(page)) &&
- page_huge_active(page))
- return pfn;
- else
- pfn = round_up(pfn + 1,
- 1 << compound_order(page)) - 1;
- }
- }
+ struct page *page, *head;
+ unsigned long skip;
+
+ if (!pfn_valid(pfn))
+ continue;
+ page = pfn_to_page(pfn);
+ if (PageLRU(page))
+ return pfn;
+ if (__PageMovable(page))
+ return pfn;
+
+ if (!PageHuge(page))
+ continue;
+ head = compound_head(page);
+ if (hugepage_migration_supported(page_hstate(head)) &&
+ page_huge_active(head))
+ return pfn;
+ skip = (1 << compound_order(head)) - (page - head);
+ pfn += skip - 1;
}
return 0;
}
@@ -1344,7 +1355,6 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
{
unsigned long pfn;
struct page *page;
- int not_managed = 0;
int ret = 0;
LIST_HEAD(source);
@@ -1392,7 +1402,6 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
else
ret = isolate_movable_page(page, ISOLATE_UNEVICTABLE);
if (!ret) { /* Success */
- put_page(page);
list_add_tail(&page->lru, &source);
if (!__PageMovable(page))
inc_node_page_state(page, NR_ISOLATED_ANON +
@@ -1401,22 +1410,10 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
} else {
pr_warn("failed to isolate pfn %lx\n", pfn);
dump_page(page, "isolation failed");
- put_page(page);
- /* Because we don't have big zone->lock. we should
- check this again here. */
- if (page_count(page)) {
- not_managed++;
- ret = -EBUSY;
- break;
- }
}
+ put_page(page);
}
if (!list_empty(&source)) {
- if (not_managed) {
- putback_movable_pages(&source);
- goto out;
- }
-
/* Allocate a new page from the nearest neighbor node */
ret = migrate_pages(&source, new_node_page, NULL, 0,
MIGRATE_SYNC, MR_MEMORY_HOTPLUG);
@@ -1429,7 +1426,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
putback_movable_pages(&source);
}
}
-out:
+
return ret;
}
@@ -1576,7 +1573,6 @@ static int __ref __offline_pages(unsigned long start_pfn,
we assume this for now. .*/
if (!test_pages_in_a_zone(start_pfn, end_pfn, &valid_start,
&valid_end)) {
- mem_hotplug_done();
ret = -EINVAL;
reason = "multizone range";
goto failed_removal;
@@ -1591,7 +1587,6 @@ static int __ref __offline_pages(unsigned long start_pfn,
MIGRATE_MOVABLE,
SKIP_HWPOISON | REPORT_FAILURE);
if (ret) {
- mem_hotplug_done();
reason = "failure to isolate range";
goto failed_removal;
}
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index d4496d9d34f5..ee2bce59d2bf 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1314,7 +1314,7 @@ static int copy_nodes_to_user(unsigned long __user *mask, unsigned long maxnode,
nodemask_t *nodes)
{
unsigned long copy = ALIGN(maxnode-1, 64) / 8;
- const int nbytes = BITS_TO_LONGS(MAX_NUMNODES) * sizeof(long);
+ unsigned int nbytes = BITS_TO_LONGS(nr_node_ids) * sizeof(long);
if (copy > nbytes) {
if (copy > PAGE_SIZE)
@@ -1491,7 +1491,7 @@ static int kernel_get_mempolicy(int __user *policy,
int uninitialized_var(pval);
nodemask_t nodes;
- if (nmask != NULL && maxnode < MAX_NUMNODES)
+ if (nmask != NULL && maxnode < nr_node_ids)
return -EINVAL;
err = do_get_mempolicy(&pval, &nodes, addr, flags);
@@ -1527,7 +1527,7 @@ COMPAT_SYSCALL_DEFINE5(get_mempolicy, int __user *, policy,
unsigned long nr_bits, alloc_size;
DECLARE_BITMAP(bm, MAX_NUMNODES);
- nr_bits = min_t(unsigned long, maxnode-1, MAX_NUMNODES);
+ nr_bits = min_t(unsigned long, maxnode-1, nr_node_ids);
alloc_size = ALIGN(nr_bits, BITS_PER_LONG) / 8;
if (nmask)
diff --git a/mm/migrate.c b/mm/migrate.c
index a16b15090df3..d4fd680be3b0 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -709,7 +709,6 @@ static bool buffer_migrate_lock_buffers(struct buffer_head *head,
/* Simple case, sync compaction */
if (mode != MIGRATE_ASYNC) {
do {
- get_bh(bh);
lock_buffer(bh);
bh = bh->b_this_page;
@@ -720,18 +719,15 @@ static bool buffer_migrate_lock_buffers(struct buffer_head *head,
/* async case, we cannot block on lock_buffer so use trylock_buffer */
do {
- get_bh(bh);
if (!trylock_buffer(bh)) {
/*
* We failed to lock the buffer and cannot stall in
* async migration. Release the taken locks
*/
struct buffer_head *failed_bh = bh;
- put_bh(failed_bh);
bh = head;
while (bh != failed_bh) {
unlock_buffer(bh);
- put_bh(bh);
bh = bh->b_this_page;
}
return false;
@@ -818,7 +814,6 @@ unlock_buffers:
bh = head;
do {
unlock_buffer(bh);
- put_bh(bh);
bh = bh->b_this_page;
} while (bh != head);
@@ -1135,10 +1130,13 @@ out:
* If migration is successful, decrease refcount of the newpage
* which will not free the page because new page owner increased
* refcounter. As well, if it is LRU page, add the page to LRU
- * list in here.
+ * list in here. Use the old state of the isolated source page to
+ * determine if we migrated a LRU page. newpage was already unlocked
+ * and possibly modified by its owner - don't rely on the page
+ * state.
*/
if (rc == MIGRATEPAGE_SUCCESS) {
- if (unlikely(__PageMovable(newpage)))
+ if (unlikely(!is_lru))
put_page(newpage);
else
putback_lru_page(newpage);
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index f0e8cd9edb1a..26ea8636758f 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -647,8 +647,8 @@ static int oom_reaper(void *unused)
static void wake_oom_reaper(struct task_struct *tsk)
{
- /* tsk is already queued? */
- if (tsk == oom_reaper_list || tsk->oom_reaper_list)
+ /* mm is already queued? */
+ if (test_and_set_bit(MMF_OOM_REAP_QUEUED, &tsk->signal->oom_mm->flags))
return;
get_task_struct(tsk);
@@ -975,6 +975,13 @@ static void oom_kill_process(struct oom_control *oc, const char *message)
* still freeing memory.
*/
read_lock(&tasklist_lock);
+
+ /*
+ * The task 'p' might have already exited before reaching here. The
+ * put_task_struct() will free task_struct 'p' while the loop still try
+ * to access the field of 'p', so, get an extra reference.
+ */
+ get_task_struct(p);
for_each_thread(p, t) {
list_for_each_entry(child, &t->children, sibling) {
unsigned int child_points;
@@ -994,6 +1001,7 @@ static void oom_kill_process(struct oom_control *oc, const char *message)
}
}
}
+ put_task_struct(p);
read_unlock(&tasklist_lock);
/*
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d295c9bc01a8..0b9f577b1a2a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2170,6 +2170,18 @@ static inline void boost_watermark(struct zone *zone)
max_boost = mult_frac(zone->_watermark[WMARK_HIGH],
watermark_boost_factor, 10000);
+
+ /*
+ * high watermark may be uninitialised if fragmentation occurs
+ * very early in boot so do not boost. We do not fall
+ * through and boost by pageblock_nr_pages as failing
+ * allocations that early means that reclaim is not going
+ * to help and it may even be impossible to reclaim the
+ * boosted watermark resulting in a hang.
+ */
+ if (!max_boost)
+ return;
+
max_boost = max(pageblock_nr_pages, max_boost);
zone->watermark_boost = min(zone->watermark_boost + pageblock_nr_pages,
@@ -4675,11 +4687,11 @@ refill:
/* Even if we own the page, we do not use atomic_set().
* This would break get_page_unless_zero() users.
*/
- page_ref_add(page, size - 1);
+ page_ref_add(page, PAGE_FRAG_CACHE_MAX_SIZE);
/* reset page count bias and offset to start of new frag */
nc->pfmemalloc = page_is_pfmemalloc(page);
- nc->pagecnt_bias = size;
+ nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
nc->offset = size;
}
@@ -4695,10 +4707,10 @@ refill:
size = nc->size;
#endif
/* OK, page count is 0, we can safely set it */
- set_page_count(page, size);
+ set_page_count(page, PAGE_FRAG_CACHE_MAX_SIZE + 1);
/* reset page count bias and offset to start of new frag */
- nc->pagecnt_bias = size;
+ nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
offset = size - fragsz;
}
@@ -5701,18 +5713,6 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
cond_resched();
}
}
-#ifdef CONFIG_SPARSEMEM
- /*
- * If the zone does not span the rest of the section then
- * we should at least initialize those pages. Otherwise we
- * could blow up on a poisoned page in some paths which depend
- * on full sections being initialized (e.g. memory hotplug).
- */
- while (end_pfn % PAGES_PER_SECTION) {
- __init_single_page(pfn_to_page(end_pfn), end_pfn, zone, nid);
- end_pfn++;
- }
-#endif
}
#ifdef CONFIG_ZONE_DEVICE
diff --git a/mm/page_ext.c b/mm/page_ext.c
index ae44f7adbe07..8c78b8d45117 100644
--- a/mm/page_ext.c
+++ b/mm/page_ext.c
@@ -398,10 +398,8 @@ void __init page_ext_init(void)
* We know some arch can have a nodes layout such as
* -------------pfn-------------->
* N0 | N1 | N2 | N0 | N1 | N2|....
- *
- * Take into account DEFERRED_STRUCT_PAGE_INIT.
*/
- if (early_pfn_to_nid(pfn) != nid)
+ if (pfn_to_nid(pfn) != nid)
continue;
if (init_section_page_ext(pfn, nid))
goto oom;
diff --git a/mm/shmem.c b/mm/shmem.c
index 6ece1e2fe76e..0905215fb016 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2854,10 +2854,14 @@ static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentr
* No ordinary (disk based) filesystem counts links as inodes;
* but each new link needs a new dentry, pinning lowmem, and
* tmpfs dentries cannot be pruned until they are unlinked.
+ * But if an O_TMPFILE file is linked into the tmpfs, the
+ * first link must skip that, to get the accounting right.
*/
- ret = shmem_reserve_inode(inode->i_sb);
- if (ret)
- goto out;
+ if (inode->i_nlink) {
+ ret = shmem_reserve_inode(inode->i_sb);
+ if (ret)
+ goto out;
+ }
dir->i_size += BOGO_DIRENT_SIZE;
inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
diff --git a/mm/slab.c b/mm/slab.c
index 78eb8c5bf4e4..91c1863df93d 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2359,7 +2359,7 @@ static void *alloc_slabmgmt(struct kmem_cache *cachep,
void *freelist;
void *addr = page_address(page);
- page->s_mem = kasan_reset_tag(addr) + colour_off;
+ page->s_mem = addr + colour_off;
page->active = 0;
if (OBJFREELIST_SLAB(cachep))
@@ -2368,6 +2368,7 @@ static void *alloc_slabmgmt(struct kmem_cache *cachep,
/* Slab management obj is off-slab. */
freelist = kmem_cache_alloc_node(cachep->freelist_cache,
local_flags, nodeid);
+ freelist = kasan_reset_tag(freelist);
if (!freelist)
return NULL;
} else {
@@ -2681,6 +2682,13 @@ static struct page *cache_grow_begin(struct kmem_cache *cachep,
offset *= cachep->colour_off;
+ /*
+ * Call kasan_poison_slab() before calling alloc_slabmgmt(), so
+ * page_address() in the latter returns a non-tagged pointer,
+ * as it should be for slab pages.
+ */
+ kasan_poison_slab(page);
+
/* Get slab management. */
freelist = alloc_slabmgmt(cachep, page, offset,
local_flags & ~GFP_CONSTRAINT_MASK, page_node);
@@ -2689,7 +2697,6 @@ static struct page *cache_grow_begin(struct kmem_cache *cachep,
slab_map_pages(cachep, page, freelist);
- kasan_poison_slab(page);
cache_init_objs(cachep, page);
if (gfpflags_allow_blocking(local_flags))
@@ -3540,7 +3547,6 @@ void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
{
void *ret = slab_alloc(cachep, flags, _RET_IP_);
- ret = kasan_slab_alloc(cachep, ret, flags);
trace_kmem_cache_alloc(_RET_IP_, ret,
cachep->object_size, cachep->size, flags);
@@ -3630,7 +3636,6 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
{
void *ret = slab_alloc_node(cachep, flags, nodeid, _RET_IP_);
- ret = kasan_slab_alloc(cachep, ret, flags);
trace_kmem_cache_alloc_node(_RET_IP_, ret,
cachep->object_size, cachep->size,
flags, nodeid);
@@ -4408,6 +4413,8 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
unsigned int objnr;
unsigned long offset;
+ ptr = kasan_reset_tag(ptr);
+
/* Find and validate object. */
cachep = page->slab_cache;
objnr = obj_to_index(cachep, page, (void *)ptr);
diff --git a/mm/slab.h b/mm/slab.h
index 4190c24ef0e9..384105318779 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -437,11 +437,10 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
flags &= gfp_allowed_mask;
for (i = 0; i < size; i++) {
- void *object = p[i];
-
- kmemleak_alloc_recursive(object, s->object_size, 1,
+ p[i] = kasan_slab_alloc(s, p[i], flags);
+ /* As p[i] might get tagged, call kmemleak hook after KASAN. */
+ kmemleak_alloc_recursive(p[i], s->object_size, 1,
s->flags, flags);
- p[i] = kasan_slab_alloc(s, object, flags);
}
if (memcg_kmem_enabled())
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 81732d05e74a..f9d89c1b5977 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -1228,8 +1228,9 @@ void *kmalloc_order(size_t size, gfp_t flags, unsigned int order)
flags |= __GFP_COMP;
page = alloc_pages(flags, order);
ret = page ? page_address(page) : NULL;
- kmemleak_alloc(ret, size, 1, flags);
ret = kasan_kmalloc_large(ret, size, flags);
+ /* As ret might get tagged, call kmemleak hook after KASAN. */
+ kmemleak_alloc(ret, size, 1, flags);
return ret;
}
EXPORT_SYMBOL(kmalloc_order);
diff --git a/mm/slub.c b/mm/slub.c
index 1e3d0ec4e200..dc777761b6b7 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -249,7 +249,18 @@ static inline void *freelist_ptr(const struct kmem_cache *s, void *ptr,
unsigned long ptr_addr)
{
#ifdef CONFIG_SLAB_FREELIST_HARDENED
- return (void *)((unsigned long)ptr ^ s->random ^ ptr_addr);
+ /*
+ * When CONFIG_KASAN_SW_TAGS is enabled, ptr_addr might be tagged.
+ * Normally, this doesn't cause any issues, as both set_freepointer()
+ * and get_freepointer() are called with a pointer with the same tag.
+ * However, there are some issues with CONFIG_SLUB_DEBUG code. For
+ * example, when __free_slub() iterates over objects in a cache, it
+ * passes untagged pointers to check_object(). check_object() in turns
+ * calls get_freepointer() with an untagged pointer, which causes the
+ * freepointer to be restored incorrectly.
+ */
+ return (void *)((unsigned long)ptr ^ s->random ^
+ (unsigned long)kasan_reset_tag((void *)ptr_addr));
#else
return ptr;
#endif
@@ -303,15 +314,10 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
__p < (__addr) + (__objects) * (__s)->size; \
__p += (__s)->size)
-#define for_each_object_idx(__p, __idx, __s, __addr, __objects) \
- for (__p = fixup_red_left(__s, __addr), __idx = 1; \
- __idx <= __objects; \
- __p += (__s)->size, __idx++)
-
/* Determine object index from a given position */
static inline unsigned int slab_index(void *p, struct kmem_cache *s, void *addr)
{
- return (p - addr) / s->size;
+ return (kasan_reset_tag(p) - addr) / s->size;
}
static inline unsigned int order_objects(unsigned int order, unsigned int size)
@@ -507,6 +513,7 @@ static inline int check_valid_pointer(struct kmem_cache *s,
return 1;
base = page_address(page);
+ object = kasan_reset_tag(object);
object = restore_red_left(s, object);
if (object < base || object >= base + page->objects * s->size ||
(object - base) % s->size) {
@@ -1075,6 +1082,16 @@ static void setup_object_debug(struct kmem_cache *s, struct page *page,
init_tracking(s, object);
}
+static void setup_page_debug(struct kmem_cache *s, void *addr, int order)
+{
+ if (!(s->flags & SLAB_POISON))
+ return;
+
+ metadata_access_enable();
+ memset(addr, POISON_INUSE, PAGE_SIZE << order);
+ metadata_access_disable();
+}
+
static inline int alloc_consistency_checks(struct kmem_cache *s,
struct page *page,
void *object, unsigned long addr)
@@ -1330,6 +1347,8 @@ slab_flags_t kmem_cache_flags(unsigned int object_size,
#else /* !CONFIG_SLUB_DEBUG */
static inline void setup_object_debug(struct kmem_cache *s,
struct page *page, void *object) {}
+static inline void setup_page_debug(struct kmem_cache *s,
+ void *addr, int order) {}
static inline int alloc_debug_processing(struct kmem_cache *s,
struct page *page, void *object, unsigned long addr) { return 0; }
@@ -1374,8 +1393,10 @@ static inline void dec_slabs_node(struct kmem_cache *s, int node,
*/
static inline void *kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
{
+ ptr = kasan_kmalloc_large(ptr, size, flags);
+ /* As ptr might get tagged, call kmemleak hook after KASAN. */
kmemleak_alloc(ptr, size, 1, flags);
- return kasan_kmalloc_large(ptr, size, flags);
+ return ptr;
}
static __always_inline void kfree_hook(void *x)
@@ -1641,27 +1662,25 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
if (page_is_pfmemalloc(page))
SetPageSlabPfmemalloc(page);
+ kasan_poison_slab(page);
+
start = page_address(page);
- if (unlikely(s->flags & SLAB_POISON))
- memset(start, POISON_INUSE, PAGE_SIZE << order);
-
- kasan_poison_slab(page);
+ setup_page_debug(s, start, order);
shuffle = shuffle_freelist(s, page);
if (!shuffle) {
- for_each_object_idx(p, idx, s, start, page->objects) {
- if (likely(idx < page->objects)) {
- next = p + s->size;
- next = setup_object(s, page, next);
- set_freepointer(s, p, next);
- } else
- set_freepointer(s, p, NULL);
- }
start = fixup_red_left(s, start);
start = setup_object(s, page, start);
page->freelist = start;
+ for (idx = 0, p = start; idx < page->objects - 1; idx++) {
+ next = p + s->size;
+ next = setup_object(s, page, next);
+ set_freepointer(s, p, next);
+ p = next;
+ }
+ set_freepointer(s, p, NULL);
}
page->inuse = page->objects;
diff --git a/mm/swap.c b/mm/swap.c
index 4929bc1be60e..4d7d37eb3c40 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -320,11 +320,6 @@ static inline void activate_page_drain(int cpu)
{
}
-static bool need_activate_page_drain(int cpu)
-{
- return false;
-}
-
void activate_page(struct page *page)
{
struct zone *zone = page_zone(page);
@@ -653,13 +648,15 @@ void lru_add_drain(void)
put_cpu();
}
+#ifdef CONFIG_SMP
+
+static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work);
+
static void lru_add_drain_per_cpu(struct work_struct *dummy)
{
lru_add_drain();
}
-static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work);
-
/*
* Doesn't need any cpu hotplug locking because we do rely on per-cpu
* kworkers being shut down before our page_alloc_cpu_dead callback is
@@ -702,6 +699,12 @@ void lru_add_drain_all(void)
mutex_unlock(&lock);
}
+#else
+void lru_add_drain_all(void)
+{
+ lru_add_drain();
+}
+#endif
/**
* release_pages - batched put_page()
diff --git a/mm/util.c b/mm/util.c
index 1ea055138043..379319b1bcfd 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -150,7 +150,7 @@ void *memdup_user(const void __user *src, size_t len)
{
void *p;
- p = kmalloc_track_caller(len, GFP_USER);
+ p = kmalloc_track_caller(len, GFP_USER | __GFP_NOWARN);
if (!p)
return ERR_PTR(-ENOMEM);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index a714c4f800e9..e979705bbf32 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -491,16 +491,6 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
delta = freeable / 2;
}
- /*
- * Make sure we apply some minimal pressure on default priority
- * even on small cgroups. Stale objects are not only consuming memory
- * by themselves, but can also hold a reference to a dying cgroup,
- * preventing it from being reclaimed. A dying cgroup with all
- * corresponding structures like per-cpu stats and kmem caches
- * can be really big, so it may lead to a significant waste of memory.
- */
- delta = max_t(unsigned long long, delta, min(freeable, batch_size));
-
total_scan += delta;
if (total_scan < 0) {
pr_err("shrink_slab: %pF negative objects to delete nr=%ld\n",