From 7511c3ede752e6dd67df20779b4e11effe102637 Mon Sep 17 00:00:00 2001 From: Jerome Marchand Date: Fri, 20 Nov 2015 15:57:02 -0800 Subject: mm: vmalloc: don't remove inexistent guard hole in remove_vm_area() Commit 71394fe50146 ("mm: vmalloc: add flag preventing guard hole allocation") missed a spot. Currently remove_vm_area() decreases vm->size to "remove" the guard hole page, even when it isn't present. All but one users just free the vm_struct rigth away and never access vm->size anyway. Don't touch the size in remove_vm_area() and have __vunmap() use the proper get_vm_area_size() helper. Signed-off-by: Jerome Marchand Acked-by: Andrey Ryabinin Acked-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'mm') diff --git a/mm/vmalloc.c b/mm/vmalloc.c index d04563480c94..8e3c9c5a3042 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1443,7 +1443,6 @@ struct vm_struct *remove_vm_area(const void *addr) vmap_debug_free_range(va->va_start, va->va_end); kasan_free_shadow(vm); free_unmap_vmap_area(va); - vm->size -= PAGE_SIZE; return vm; } @@ -1468,8 +1467,8 @@ static void __vunmap(const void *addr, int deallocate_pages) return; } - debug_check_no_locks_freed(addr, area->size); - debug_check_no_obj_freed(addr, area->size); + debug_check_no_locks_freed(addr, get_vm_area_size(area)); + debug_check_no_obj_freed(addr, get_vm_area_size(area)); if (deallocate_pages) { int i; -- cgit v1.2.3 From 1a763615688b891246c5b0a932d7a95fea4c1a68 Mon Sep 17 00:00:00 2001 From: Jason J. Herne Date: Fri, 20 Nov 2015 15:57:04 -0800 Subject: mm: loosen MADV_NOHUGEPAGE to enable Qemu postcopy on s390 MADV_NOHUGEPAGE processing is too restrictive. kvm already disables hugepage but hugepage_madvise() takes the error path when we ask to turn on the MADV_NOHUGEPAGE bit and the bit is already on. This causes Qemu's new postcopy migration feature to fail on s390 because its first action is to madvise the guest address space as NOHUGEPAGE. This patch modifies the code so that the operation succeeds without error now. For consistency reasons do the same for MADV_HUGEPAGE. Signed-off-by: Jason J. Herne Reviewed-by: Andrea Arcangeli Acked-by: Christian Borntraeger Acked-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/huge_memory.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'mm') diff --git a/mm/huge_memory.c b/mm/huge_memory.c index c29ddebc8705..62fe06bb7d04 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2009,7 +2009,7 @@ int hugepage_madvise(struct vm_area_struct *vma, /* * Be somewhat over-protective like KSM for now! */ - if (*vm_flags & (VM_HUGEPAGE | VM_NO_THP)) + if (*vm_flags & VM_NO_THP) return -EINVAL; *vm_flags &= ~VM_NOHUGEPAGE; *vm_flags |= VM_HUGEPAGE; @@ -2025,7 +2025,7 @@ int hugepage_madvise(struct vm_area_struct *vma, /* * Be somewhat over-protective like KSM for now! */ - if (*vm_flags & (VM_NOHUGEPAGE | VM_NO_THP)) + if (*vm_flags & VM_NO_THP) return -EINVAL; *vm_flags &= ~VM_HUGEPAGE; *vm_flags |= VM_NOHUGEPAGE; -- cgit v1.2.3 From 50e55bf626ad3ebbca45c0c0d03eb1710a139638 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Fri, 20 Nov 2015 15:57:10 -0800 Subject: mm/page-writeback.c: initialize m_dirty to avoid compile warning When building kernel with gcc 5.2, the below warning is raised: mm/page-writeback.c: In function 'balance_dirty_pages.isra.10': mm/page-writeback.c:1545:17: warning: 'm_dirty' may be used uninitialized in this function [-Wmaybe-uninitialized] unsigned long m_dirty, m_thresh, m_bg_thresh; The m_dirty{thresh, bg_thresh} are initialized in the block of "if (mdtc)", so if mdts is null, they won't be initialized before being used. Initialize m_dirty to zero, also initialize m_thresh and m_bg_thresh to keep consistency. They are used later by if condition: !mdtc || m_dirty <= dirty_freerun_ceiling(m_thresh, m_bg_thresh) If mdtc is null, dirty_freerun_ceiling will not be called at all, so the initialization will not change any behavior other than just ceasing the compile warning. (akpm: the patch actually reduces .text size by ~20 bytes on gcc-4.x.y) [akpm@linux-foundation.org: add comment] Signed-off-by: Yang Shi Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page-writeback.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'mm') diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 2c90357c34ea..3e4d65445fa7 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -1542,7 +1542,9 @@ static void balance_dirty_pages(struct address_space *mapping, for (;;) { unsigned long now = jiffies; unsigned long dirty, thresh, bg_thresh; - unsigned long m_dirty, m_thresh, m_bg_thresh; + unsigned long m_dirty = 0; /* stop bogus uninit warnings */ + unsigned long m_thresh = 0; + unsigned long m_bg_thresh = 0; /* * Unstable writes are a feature of certain networked -- cgit v1.2.3 From 459372545c9c0d6f491e280dccc8a54a61b60e56 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 20 Nov 2015 15:57:18 -0800 Subject: kasan: fix kmemleak false-positive in kasan_module_alloc() Kmemleak reports the following leak: unreferenced object 0xfffffbfff41ea000 (size 20480): comm "modprobe", pid 65199, jiffies 4298875551 (age 542.568s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] kmemleak_alloc+0x4e/0xc0 [] __vmalloc_node_range+0x4b8/0x740 [] kasan_module_alloc+0x72/0xc0 [] module_alloc+0x78/0xb0 [] module_alloc_update_bounds+0x14/0x70 [] layout_and_allocate+0x16f4/0x3c90 [] load_module+0x2ff/0x6690 [] SyS_finit_module+0x136/0x170 [] system_call_fastpath+0x16/0x1b [] 0xffffffffffffffff kasan_module_alloc() allocates shadow memory for module and frees it on module unloading. It doesn't store the pointer to allocated shadow memory because it could be calculated from the shadowed address, i.e. kasan_mem_to_shadow(addr). Since kmemleak cannot find pointer to allocated shadow, it thinks that memory leaked. Use kmemleak_ignore() to tell kmemleak that this is not a leak and shadow memory doesn't contain any pointers. Signed-off-by: Andrey Ryabinin Acked-by: Catalin Marinas Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kasan/kasan.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'mm') diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index d41b21bce6a0..bc0a8d8b8f42 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -444,6 +445,7 @@ int kasan_module_alloc(void *addr, size_t size) if (ret) { find_vm_area(addr)->flags |= VM_KASAN; + kmemleak_ignore(ret); return 0; } -- cgit v1.2.3 From a380a3c75529a5c42b78c0d64a46404f8cb0c0d1 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Fri, 20 Nov 2015 15:57:35 -0800 Subject: slub: create new ___slab_alloc function that can be called with irqs disabled Bulk alloc needs a function like that because it enables interrupts before calling __slab_alloc which promptly disables them again using the expensive local_irq_save(). Signed-off-by: Christoph Lameter Cc: Jesper Dangaard Brouer Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Alexander Duyck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slub.c | 44 +++++++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 15 deletions(-) (limited to 'mm') diff --git a/mm/slub.c b/mm/slub.c index 7cb4bf9ae320..2a952751bb50 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2295,23 +2295,15 @@ static inline void *get_freelist(struct kmem_cache *s, struct page *page) * And if we were unable to get a new slab from the partial slab lists then * we need to allocate a new slab. This is the slowest path since it involves * a call to the page allocator and the setup of a new slab. + * + * Version of __slab_alloc to use when we know that interrupts are + * already disabled (which is the case for bulk allocation). */ -static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, +static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, unsigned long addr, struct kmem_cache_cpu *c) { void *freelist; struct page *page; - unsigned long flags; - - local_irq_save(flags); -#ifdef CONFIG_PREEMPT - /* - * We may have been preempted and rescheduled on a different - * cpu before disabling interrupts. Need to reload cpu area - * pointer. - */ - c = this_cpu_ptr(s->cpu_slab); -#endif page = c->page; if (!page) @@ -2369,7 +2361,6 @@ load_freelist: VM_BUG_ON(!c->page->frozen); c->freelist = get_freepointer(s, freelist); c->tid = next_tid(c->tid); - local_irq_restore(flags); return freelist; new_slab: @@ -2386,7 +2377,6 @@ new_slab: if (unlikely(!freelist)) { slab_out_of_memory(s, gfpflags, node); - local_irq_restore(flags); return NULL; } @@ -2402,10 +2392,34 @@ new_slab: deactivate_slab(s, page, get_freepointer(s, freelist)); c->page = NULL; c->freelist = NULL; - local_irq_restore(flags); return freelist; } +/* + * Another one that disabled interrupt and compensates for possible + * cpu changes by refetching the per cpu area pointer. + */ +static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, + unsigned long addr, struct kmem_cache_cpu *c) +{ + void *p; + unsigned long flags; + + local_irq_save(flags); +#ifdef CONFIG_PREEMPT + /* + * We may have been preempted and rescheduled on a different + * cpu before disabling interrupts. Need to reload cpu area + * pointer. + */ + c = this_cpu_ptr(s->cpu_slab); +#endif + + p = ___slab_alloc(s, gfpflags, node, addr, c); + local_irq_restore(flags); + return p; +} + /* * Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc) * have the fastpath folded into their functions. So no function call -- cgit v1.2.3 From 87098373e244840e00bd1c93884c1d917411597e Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Fri, 20 Nov 2015 15:57:38 -0800 Subject: slub: avoid irqoff/on in bulk allocation Use the new function that can do allocation while interrupts are disabled. Avoids irq on/off sequences. Signed-off-by: Christoph Lameter Cc: Jesper Dangaard Brouer Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Alexander Duyck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slub.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) (limited to 'mm') diff --git a/mm/slub.c b/mm/slub.c index 2a952751bb50..23f9d8d26422 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2818,30 +2818,23 @@ bool kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, void *object = c->freelist; if (unlikely(!object)) { - local_irq_enable(); /* * Invoking slow path likely have side-effect * of re-populating per CPU c->freelist */ - p[i] = __slab_alloc(s, flags, NUMA_NO_NODE, + p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE, _RET_IP_, c); - if (unlikely(!p[i])) { - __kmem_cache_free_bulk(s, i, p); - return false; - } - local_irq_disable(); + if (unlikely(!p[i])) + goto error; + c = this_cpu_ptr(s->cpu_slab); continue; /* goto for-loop */ } /* kmem_cache debug support */ s = slab_pre_alloc_hook(s, flags); - if (unlikely(!s)) { - __kmem_cache_free_bulk(s, i, p); - c->tid = next_tid(c->tid); - local_irq_enable(); - return false; - } + if (unlikely(!s)) + goto error; c->freelist = get_freepointer(s, object); p[i] = object; @@ -2861,6 +2854,11 @@ bool kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, } return true; + +error: + __kmem_cache_free_bulk(s, i, p); + local_irq_enable(); + return false; } EXPORT_SYMBOL(kmem_cache_alloc_bulk); -- cgit v1.2.3 From b4a64718797b84b64a6ddf3d4183c29c2e79ef1d Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 20 Nov 2015 15:57:41 -0800 Subject: slub: mark the dangling ifdef #else of CONFIG_SLUB_DEBUG The #ifdef of CONFIG_SLUB_DEBUG is located very far from the associated #else. For readability mark it with a comment. Signed-off-by: Jesper Dangaard Brouer Acked-by: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Alexander Duyck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm') diff --git a/mm/slub.c b/mm/slub.c index 23f9d8d26422..a0c1365f6426 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1204,7 +1204,7 @@ unsigned long kmem_cache_flags(unsigned long object_size, return flags; } -#else +#else /* !CONFIG_SLUB_DEBUG */ static inline void setup_object_debug(struct kmem_cache *s, struct page *page, void *object) {} -- cgit v1.2.3