diff options
Diffstat (limited to 'mm/vmalloc.c')
-rw-r--r-- | mm/vmalloc.c | 107 |
1 files changed, 75 insertions, 32 deletions
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index e8a807c78110..d2a00ad4e1dd 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1195,18 +1195,14 @@ find_vmap_lowest_match(unsigned long size, { struct vmap_area *va; struct rb_node *node; - unsigned long length; /* Start from the root. */ node = free_vmap_area_root.rb_node; - /* Adjust the search size for alignment overhead. */ - length = size + align - 1; - while (node) { va = rb_entry(node, struct vmap_area, rb_node); - if (get_subtree_max_size(node->rb_left) >= length && + if (get_subtree_max_size(node->rb_left) >= size && vstart < va->va_start) { node = node->rb_left; } else { @@ -1216,9 +1212,9 @@ find_vmap_lowest_match(unsigned long size, /* * Does not make sense to go deeper towards the right * sub-tree if it does not have a free block that is - * equal or bigger to the requested search length. + * equal or bigger to the requested search size. */ - if (get_subtree_max_size(node->rb_right) >= length) { + if (get_subtree_max_size(node->rb_right) >= size) { node = node->rb_right; continue; } @@ -1226,15 +1222,23 @@ find_vmap_lowest_match(unsigned long size, /* * OK. We roll back and find the first right sub-tree, * that will satisfy the search criteria. It can happen - * only once due to "vstart" restriction. + * due to "vstart" restriction or an alignment overhead + * that is bigger then PAGE_SIZE. */ while ((node = rb_parent(node))) { va = rb_entry(node, struct vmap_area, rb_node); if (is_within_this_va(va, size, align, vstart)) return va; - if (get_subtree_max_size(node->rb_right) >= length && + if (get_subtree_max_size(node->rb_right) >= size && vstart <= va->va_start) { + /* + * Shift the vstart forward. Please note, we update it with + * parent's start address adding "1" because we do not want + * to enter same sub-tree after it has already been checked + * and no suitable free block found there. + */ + vstart = va->va_start + 1; node = node->rb_right; break; } @@ -1265,7 +1269,7 @@ find_vmap_lowest_linear_match(unsigned long size, } static void -find_vmap_lowest_match_check(unsigned long size) +find_vmap_lowest_match_check(unsigned long size, unsigned long align) { struct vmap_area *va_1, *va_2; unsigned long vstart; @@ -1274,8 +1278,8 @@ find_vmap_lowest_match_check(unsigned long size) get_random_bytes(&rnd, sizeof(rnd)); vstart = VMALLOC_START + rnd; - va_1 = find_vmap_lowest_match(size, 1, vstart); - va_2 = find_vmap_lowest_linear_match(size, 1, vstart); + va_1 = find_vmap_lowest_match(size, align, vstart); + va_2 = find_vmap_lowest_linear_match(size, align, vstart); if (va_1 != va_2) pr_emerg("not lowest: t: 0x%p, l: 0x%p, v: 0x%lx\n", @@ -1454,7 +1458,7 @@ __alloc_vmap_area(unsigned long size, unsigned long align, return vend; #if DEBUG_AUGMENT_LOWEST_MATCH_CHECK - find_vmap_lowest_match_check(size); + find_vmap_lowest_match_check(size, align); #endif return nva_start_addr; @@ -2272,15 +2276,22 @@ void __init vm_area_add_early(struct vm_struct *vm) */ void __init vm_area_register_early(struct vm_struct *vm, size_t align) { - static size_t vm_init_off __initdata; - unsigned long addr; + unsigned long addr = ALIGN(VMALLOC_START, align); + struct vm_struct *cur, **p; - addr = ALIGN(VMALLOC_START + vm_init_off, align); - vm_init_off = PFN_ALIGN(addr + vm->size) - VMALLOC_START; + BUG_ON(vmap_initialized); - vm->addr = (void *)addr; + for (p = &vmlist; (cur = *p) != NULL; p = &cur->next) { + if ((unsigned long)cur->addr - addr >= vm->size) + break; + addr = ALIGN((unsigned long)cur->addr + cur->size, align); + } - vm_area_add_early(vm); + BUG_ON(addr > VMALLOC_END - vm->size); + vm->addr = (void *)addr; + vm->next = *p; + *p = vm; + kasan_populate_early_vm_area_shadow(vm->addr, vm->size); } static void vmap_init_free_space(void) @@ -2743,6 +2754,13 @@ void *vmap(struct page **pages, unsigned int count, might_sleep(); + /* + * Your top guard is someone else's bottom guard. Not having a top + * guard compromises someone else's mappings too. + */ + if (WARN_ON_ONCE(flags & VM_NO_GUARD)) + flags &= ~VM_NO_GUARD; + if (count > totalram_pages()) return NULL; @@ -2825,7 +2843,7 @@ vm_area_alloc_pages(gfp_t gfp, int nid, * to fails, fallback to a single page allocator that is * more permissive. */ - if (!order && nid != NUMA_NO_NODE) { + if (!order) { while (nr_allocated < nr_pages) { unsigned int nr, nr_pages_request; @@ -2837,8 +2855,20 @@ vm_area_alloc_pages(gfp_t gfp, int nid, */ nr_pages_request = min(100U, nr_pages - nr_allocated); - nr = alloc_pages_bulk_array_node(gfp, nid, - nr_pages_request, pages + nr_allocated); + /* memory allocation should consider mempolicy, we can't + * wrongly use nearest node when nid == NUMA_NO_NODE, + * otherwise memory may be allocated in only one node, + * but mempolcy want to alloc memory by interleaving. + */ + if (IS_ENABLED(CONFIG_NUMA) && nid == NUMA_NO_NODE) + nr = alloc_pages_bulk_array_mempolicy(gfp, + nr_pages_request, + pages + nr_allocated); + + else + nr = alloc_pages_bulk_array_node(gfp, nid, + nr_pages_request, + pages + nr_allocated); nr_allocated += nr; cond_resched(); @@ -2850,7 +2880,7 @@ vm_area_alloc_pages(gfp_t gfp, int nid, if (nr != nr_pages_request) break; } - } else if (order) + } else /* * Compound pages required for remap_vmalloc_page if * high-order pages. @@ -2860,6 +2890,9 @@ vm_area_alloc_pages(gfp_t gfp, int nid, /* High-order pages or fallback path if "bulk" fails. */ while (nr_allocated < nr_pages) { + if (fatal_signal_pending(current)) + break; + if (nid == NUMA_NO_NODE) page = alloc_pages(gfp, order); else @@ -2887,6 +2920,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, int node) { const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO; + const gfp_t orig_gfp_mask = gfp_mask; unsigned long addr = (unsigned long)area->addr; unsigned long size = get_vm_area_size(area); unsigned long array_size; @@ -2907,7 +2941,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, } if (!area->pages) { - warn_alloc(gfp_mask, NULL, + warn_alloc(orig_gfp_mask, NULL, "vmalloc error: size %lu, failed to allocated page array size %lu", nr_small_pages * PAGE_SIZE, array_size); free_vm_area(area); @@ -2927,7 +2961,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, * allocation request, free them via __vfree() if any. */ if (area->nr_pages != nr_small_pages) { - warn_alloc(gfp_mask, NULL, + warn_alloc(orig_gfp_mask, NULL, "vmalloc error: size %lu, page order %u, failed to allocate pages", area->nr_pages * PAGE_SIZE, page_order); goto fail; @@ -2935,7 +2969,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, if (vmap_pages_range(addr, addr + size, prot, area->pages, page_shift) < 0) { - warn_alloc(gfp_mask, NULL, + warn_alloc(orig_gfp_mask, NULL, "vmalloc error: size %lu, failed to map pages", area->nr_pages * PAGE_SIZE); goto fail; @@ -2961,8 +2995,16 @@ fail: * @caller: caller's return address * * Allocate enough pages to cover @size from the page level - * allocator with @gfp_mask flags. Map them into contiguous - * kernel virtual space, using a pagetable protection of @prot. + * allocator with @gfp_mask flags. Please note that the full set of gfp + * flags are not supported. GFP_KERNEL would be a preferred allocation mode + * but GFP_NOFS and GFP_NOIO are supported as well. Zone modifiers are not + * supported. From the reclaim modifiers__GFP_DIRECT_RECLAIM is required (aka + * GFP_NOWAIT is not supported) and only __GFP_NOFAIL is supported (aka + * __GFP_NORETRY and __GFP_RETRY_MAYFAIL are not supported). + * __GFP_NOWARN can be used to suppress error messages about failures. + * + * Map them into contiguous kernel virtual space, using a pagetable + * protection of @prot. * * Return: the address of the area or %NULL on failure */ @@ -3856,6 +3898,7 @@ static void show_numa_info(struct seq_file *m, struct vm_struct *v) { if (IS_ENABLED(CONFIG_NUMA)) { unsigned int nr, *counters = m->private; + unsigned int step = 1U << vm_area_page_order(v); if (!counters) return; @@ -3867,9 +3910,8 @@ static void show_numa_info(struct seq_file *m, struct vm_struct *v) memset(counters, 0, nr_node_ids * sizeof(unsigned int)); - for (nr = 0; nr < v->nr_pages; nr++) - counters[page_to_nid(v->pages[nr])]++; - + for (nr = 0; nr < v->nr_pages; nr += step) + counters[page_to_nid(v->pages[nr])] += step; for_each_node_state(nr, N_HIGH_MEMORY) if (counters[nr]) seq_printf(m, " N%u=%u", nr, counters[nr]); @@ -3905,7 +3947,7 @@ static int s_show(struct seq_file *m, void *p) (void *)va->va_start, (void *)va->va_end, va->va_end - va->va_start); - return 0; + goto final; } v = va->vm; @@ -3946,6 +3988,7 @@ static int s_show(struct seq_file *m, void *p) /* * As a final step, dump "unpurged" areas. */ +final: if (list_is_last(&va->list, &vmap_area_list)) show_purge_info(m); |