aboutsummaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
authorLinus Torvalds2010-03-03 08:15:05 -0800
committerLinus Torvalds2010-03-03 08:15:05 -0800
commita626b46e17d0762d664ce471d40bc506b6e721ab (patch)
tree445f6ac655ea9247d2e27529f23ba02d0991fec0 /mm
parentc1dcb4bb1e3e16e9baee578d9bb040e5fba1063e (diff)
parentdce46a04d55d6358d2d4ab44a4946a19f9425fe2 (diff)
Merge branch 'x86-bootmem-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'x86-bootmem-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (30 commits) early_res: Need to save the allocation name in drop_range_partial() sparsemem: Fix compilation on PowerPC early_res: Add free_early_partial() x86: Fix non-bootmem compilation on PowerPC core: Move early_res from arch/x86 to kernel/ x86: Add find_fw_memmap_area Move round_up/down to kernel.h x86: Make 32bit support NO_BOOTMEM early_res: Enhance check_and_double_early_res x86: Move back find_e820_area to e820.c x86: Add find_early_area_size x86: Separate early_res related code from e820.c x86: Move bios page reserve early to head32/64.c sparsemem: Put mem map for one node together. sparsemem: Put usemap for one node together x86: Make 64 bit use early_res instead of bootmem before slab x86: Only call dma32_reserve_bootmem 64bit !CONFIG_NUMA x86: Make early_node_mem get mem > 4 GB if possible x86: Dynamically increase early_res array size x86: Introduce max_early_res and early_res_count ...
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig4
-rw-r--r--mm/bootmem.c195
-rw-r--r--mm/page_alloc.c61
-rw-r--r--mm/sparse-vmemmap.c76
-rw-r--r--mm/sparse.c196
5 files changed, 508 insertions, 24 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index d34c2b971032..9c61158308dc 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -115,6 +115,10 @@ config SPARSEMEM_EXTREME
config SPARSEMEM_VMEMMAP_ENABLE
bool
+config SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
+ def_bool y
+ depends on SPARSEMEM && X86_64
+
config SPARSEMEM_VMEMMAP
bool "Sparse Memory virtual memmap"
depends on SPARSEMEM && SPARSEMEM_VMEMMAP_ENABLE
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 7d1486875e1c..d7c791ef0036 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -13,6 +13,7 @@
#include <linux/bootmem.h>
#include <linux/module.h>
#include <linux/kmemleak.h>
+#include <linux/range.h>
#include <asm/bug.h>
#include <asm/io.h>
@@ -32,6 +33,7 @@ unsigned long max_pfn;
unsigned long saved_max_pfn;
#endif
+#ifndef CONFIG_NO_BOOTMEM
bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata;
static struct list_head bdata_list __initdata = LIST_HEAD_INIT(bdata_list);
@@ -142,7 +144,7 @@ unsigned long __init init_bootmem(unsigned long start, unsigned long pages)
min_low_pfn = start;
return init_bootmem_core(NODE_DATA(0)->bdata, start, 0, pages);
}
-
+#endif
/*
* free_bootmem_late - free bootmem pages directly to page allocator
* @addr: starting address of the range
@@ -167,6 +169,60 @@ void __init free_bootmem_late(unsigned long addr, unsigned long size)
}
}
+#ifdef CONFIG_NO_BOOTMEM
+static void __init __free_pages_memory(unsigned long start, unsigned long end)
+{
+ int i;
+ unsigned long start_aligned, end_aligned;
+ int order = ilog2(BITS_PER_LONG);
+
+ start_aligned = (start + (BITS_PER_LONG - 1)) & ~(BITS_PER_LONG - 1);
+ end_aligned = end & ~(BITS_PER_LONG - 1);
+
+ if (end_aligned <= start_aligned) {
+#if 1
+ printk(KERN_DEBUG " %lx - %lx\n", start, end);
+#endif
+ for (i = start; i < end; i++)
+ __free_pages_bootmem(pfn_to_page(i), 0);
+
+ return;
+ }
+
+#if 1
+ printk(KERN_DEBUG " %lx %lx - %lx %lx\n",
+ start, start_aligned, end_aligned, end);
+#endif
+ for (i = start; i < start_aligned; i++)
+ __free_pages_bootmem(pfn_to_page(i), 0);
+
+ for (i = start_aligned; i < end_aligned; i += BITS_PER_LONG)
+ __free_pages_bootmem(pfn_to_page(i), order);
+
+ for (i = end_aligned; i < end; i++)
+ __free_pages_bootmem(pfn_to_page(i), 0);
+}
+
+unsigned long __init free_all_memory_core_early(int nodeid)
+{
+ int i;
+ u64 start, end;
+ unsigned long count = 0;
+ struct range *range = NULL;
+ int nr_range;
+
+ nr_range = get_free_all_memory_range(&range, nodeid);
+
+ for (i = 0; i < nr_range; i++) {
+ start = range[i].start;
+ end = range[i].end;
+ count += end - start;
+ __free_pages_memory(start, end);
+ }
+
+ return count;
+}
+#else
static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
{
int aligned;
@@ -227,6 +283,7 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
return count;
}
+#endif
/**
* free_all_bootmem_node - release a node's free pages to the buddy allocator
@@ -237,7 +294,12 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
{
register_page_bootmem_info_node(pgdat);
+#ifdef CONFIG_NO_BOOTMEM
+ /* free_all_memory_core_early(MAX_NUMNODES) will be called later */
+ return 0;
+#else
return free_all_bootmem_core(pgdat->bdata);
+#endif
}
/**
@@ -247,9 +309,14 @@ unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
*/
unsigned long __init free_all_bootmem(void)
{
+#ifdef CONFIG_NO_BOOTMEM
+ return free_all_memory_core_early(NODE_DATA(0)->node_id);
+#else
return free_all_bootmem_core(NODE_DATA(0)->bdata);
+#endif
}
+#ifndef CONFIG_NO_BOOTMEM
static void __init __free(bootmem_data_t *bdata,
unsigned long sidx, unsigned long eidx)
{
@@ -344,6 +411,7 @@ static int __init mark_bootmem(unsigned long start, unsigned long end,
}
BUG();
}
+#endif
/**
* free_bootmem_node - mark a page range as usable
@@ -358,6 +426,12 @@ static int __init mark_bootmem(unsigned long start, unsigned long end,
void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
unsigned long size)
{
+#ifdef CONFIG_NO_BOOTMEM
+ free_early(physaddr, physaddr + size);
+#if 0
+ printk(KERN_DEBUG "free %lx %lx\n", physaddr, size);
+#endif
+#else
unsigned long start, end;
kmemleak_free_part(__va(physaddr), size);
@@ -366,6 +440,7 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
end = PFN_DOWN(physaddr + size);
mark_bootmem_node(pgdat->bdata, start, end, 0, 0);
+#endif
}
/**
@@ -379,6 +454,12 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
*/
void __init free_bootmem(unsigned long addr, unsigned long size)
{
+#ifdef CONFIG_NO_BOOTMEM
+ free_early(addr, addr + size);
+#if 0
+ printk(KERN_DEBUG "free %lx %lx\n", addr, size);
+#endif
+#else
unsigned long start, end;
kmemleak_free_part(__va(addr), size);
@@ -387,6 +468,7 @@ void __init free_bootmem(unsigned long addr, unsigned long size)
end = PFN_DOWN(addr + size);
mark_bootmem(start, end, 0, 0);
+#endif
}
/**
@@ -403,12 +485,17 @@ void __init free_bootmem(unsigned long addr, unsigned long size)
int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
unsigned long size, int flags)
{
+#ifdef CONFIG_NO_BOOTMEM
+ panic("no bootmem");
+ return 0;
+#else
unsigned long start, end;
start = PFN_DOWN(physaddr);
end = PFN_UP(physaddr + size);
return mark_bootmem_node(pgdat->bdata, start, end, 1, flags);
+#endif
}
/**
@@ -424,14 +511,20 @@ int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
int __init reserve_bootmem(unsigned long addr, unsigned long size,
int flags)
{
+#ifdef CONFIG_NO_BOOTMEM
+ panic("no bootmem");
+ return 0;
+#else
unsigned long start, end;
start = PFN_DOWN(addr);
end = PFN_UP(addr + size);
return mark_bootmem(start, end, 1, flags);
+#endif
}
+#ifndef CONFIG_NO_BOOTMEM
static unsigned long __init align_idx(struct bootmem_data *bdata,
unsigned long idx, unsigned long step)
{
@@ -582,12 +675,33 @@ static void * __init alloc_arch_preferred_bootmem(bootmem_data_t *bdata,
#endif
return NULL;
}
+#endif
static void * __init ___alloc_bootmem_nopanic(unsigned long size,
unsigned long align,
unsigned long goal,
unsigned long limit)
{
+#ifdef CONFIG_NO_BOOTMEM
+ void *ptr;
+
+ if (WARN_ON_ONCE(slab_is_available()))
+ return kzalloc(size, GFP_NOWAIT);
+
+restart:
+
+ ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align, goal, limit);
+
+ if (ptr)
+ return ptr;
+
+ if (goal != 0) {
+ goal = 0;
+ goto restart;
+ }
+
+ return NULL;
+#else
bootmem_data_t *bdata;
void *region;
@@ -613,6 +727,7 @@ restart:
}
return NULL;
+#endif
}
/**
@@ -631,7 +746,13 @@ restart:
void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align,
unsigned long goal)
{
- return ___alloc_bootmem_nopanic(size, align, goal, 0);
+ unsigned long limit = 0;
+
+#ifdef CONFIG_NO_BOOTMEM
+ limit = -1UL;
+#endif
+
+ return ___alloc_bootmem_nopanic(size, align, goal, limit);
}
static void * __init ___alloc_bootmem(unsigned long size, unsigned long align,
@@ -665,9 +786,16 @@ static void * __init ___alloc_bootmem(unsigned long size, unsigned long align,
void * __init __alloc_bootmem(unsigned long size, unsigned long align,
unsigned long goal)
{
- return ___alloc_bootmem(size, align, goal, 0);
+ unsigned long limit = 0;
+
+#ifdef CONFIG_NO_BOOTMEM
+ limit = -1UL;
+#endif
+
+ return ___alloc_bootmem(size, align, goal, limit);
}
+#ifndef CONFIG_NO_BOOTMEM
static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata,
unsigned long size, unsigned long align,
unsigned long goal, unsigned long limit)
@@ -684,6 +812,7 @@ static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata,
return ___alloc_bootmem(size, align, goal, limit);
}
+#endif
/**
* __alloc_bootmem_node - allocate boot memory from a specific node
@@ -706,7 +835,46 @@ void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
if (WARN_ON_ONCE(slab_is_available()))
return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
+#ifdef CONFIG_NO_BOOTMEM
+ return __alloc_memory_core_early(pgdat->node_id, size, align,
+ goal, -1ULL);
+#else
return ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0);
+#endif
+}
+
+void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
+ unsigned long align, unsigned long goal)
+{
+#ifdef MAX_DMA32_PFN
+ unsigned long end_pfn;
+
+ if (WARN_ON_ONCE(slab_is_available()))
+ return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
+
+ /* update goal according ...MAX_DMA32_PFN */
+ end_pfn = pgdat->node_start_pfn + pgdat->node_spanned_pages;
+
+ if (end_pfn > MAX_DMA32_PFN + (128 >> (20 - PAGE_SHIFT)) &&
+ (goal >> PAGE_SHIFT) < MAX_DMA32_PFN) {
+ void *ptr;
+ unsigned long new_goal;
+
+ new_goal = MAX_DMA32_PFN << PAGE_SHIFT;
+#ifdef CONFIG_NO_BOOTMEM
+ ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
+ new_goal, -1ULL);
+#else
+ ptr = alloc_bootmem_core(pgdat->bdata, size, align,
+ new_goal, 0);
+#endif
+ if (ptr)
+ return ptr;
+ }
+#endif
+
+ return __alloc_bootmem_node(pgdat, size, align, goal);
+
}
#ifdef CONFIG_SPARSEMEM
@@ -720,6 +888,16 @@ void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
void * __init alloc_bootmem_section(unsigned long size,
unsigned long section_nr)
{
+#ifdef CONFIG_NO_BOOTMEM
+ unsigned long pfn, goal, limit;
+
+ pfn = section_nr_to_pfn(section_nr);
+ goal = pfn << PAGE_SHIFT;
+ limit = section_nr_to_pfn(section_nr + 1) << PAGE_SHIFT;
+
+ return __alloc_memory_core_early(early_pfn_to_nid(pfn), size,
+ SMP_CACHE_BYTES, goal, limit);
+#else
bootmem_data_t *bdata;
unsigned long pfn, goal, limit;
@@ -729,6 +907,7 @@ void * __init alloc_bootmem_section(unsigned long size,
bdata = &bootmem_node_data[early_pfn_to_nid(pfn)];
return alloc_bootmem_core(bdata, size, SMP_CACHE_BYTES, goal, limit);
+#endif
}
#endif
@@ -740,11 +919,16 @@ void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size,
if (WARN_ON_ONCE(slab_is_available()))
return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
+#ifdef CONFIG_NO_BOOTMEM
+ ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
+ goal, -1ULL);
+#else
ptr = alloc_arch_preferred_bootmem(pgdat->bdata, size, align, goal, 0);
if (ptr)
return ptr;
ptr = alloc_bootmem_core(pgdat->bdata, size, align, goal, 0);
+#endif
if (ptr)
return ptr;
@@ -795,6 +979,11 @@ void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size,
if (WARN_ON_ONCE(slab_is_available()))
return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
+#ifdef CONFIG_NO_BOOTMEM
+ return __alloc_memory_core_early(pgdat->node_id, size, align,
+ goal, ARCH_LOW_ADDRESS_LIMIT);
+#else
return ___alloc_bootmem_node(pgdat->bdata, size, align,
goal, ARCH_LOW_ADDRESS_LIMIT);
+#endif
}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 9a7aaae07ab4..a6b17aa4740b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3374,6 +3374,61 @@ void __init free_bootmem_with_active_regions(int nid,
}
}
+int __init add_from_early_node_map(struct range *range, int az,
+ int nr_range, int nid)
+{
+ int i;
+ u64 start, end;
+
+ /* need to go over early_node_map to find out good range for node */
+ for_each_active_range_index_in_nid(i, nid) {
+ start = early_node_map[i].start_pfn;
+ end = early_node_map[i].end_pfn;
+ nr_range = add_range(range, az, nr_range, start, end);
+ }
+ return nr_range;
+}
+
+#ifdef CONFIG_NO_BOOTMEM
+void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
+ u64 goal, u64 limit)
+{
+ int i;
+ void *ptr;
+
+ /* need to go over early_node_map to find out good range for node */
+ for_each_active_range_index_in_nid(i, nid) {
+ u64 addr;
+ u64 ei_start, ei_last;
+
+ ei_last = early_node_map[i].end_pfn;
+ ei_last <<= PAGE_SHIFT;
+ ei_start = early_node_map[i].start_pfn;
+ ei_start <<= PAGE_SHIFT;
+ addr = find_early_area(ei_start, ei_last,
+ goal, limit, size, align);
+
+ if (addr == -1ULL)
+ continue;
+
+#if 0
+ printk(KERN_DEBUG "alloc (nid=%d %llx - %llx) (%llx - %llx) %llx %llx => %llx\n",
+ nid,
+ ei_start, ei_last, goal, limit, size,
+ align, addr);
+#endif
+
+ ptr = phys_to_virt(addr);
+ memset(ptr, 0, size);
+ reserve_early_without_check(addr, addr + size, "BOOTMEM");
+ return ptr;
+ }
+
+ return NULL;
+}
+#endif
+
+
void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data)
{
int i;
@@ -4406,7 +4461,11 @@ void __init set_dma_reserve(unsigned long new_dma_reserve)
}
#ifndef CONFIG_NEED_MULTIPLE_NODES
-struct pglist_data __refdata contig_page_data = { .bdata = &bootmem_node_data[0] };
+struct pglist_data __refdata contig_page_data = {
+#ifndef CONFIG_NO_BOOTMEM
+ .bdata = &bootmem_node_data[0]
+#endif
+ };
EXPORT_SYMBOL(contig_page_data);
#endif
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index d9714bdcb4a3..392b9bb5bc01 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -40,9 +40,11 @@ static void * __init_refok __earlyonly_bootmem_alloc(int node,
unsigned long align,
unsigned long goal)
{
- return __alloc_bootmem_node(NODE_DATA(node), size, align, goal);
+ return __alloc_bootmem_node_high(NODE_DATA(node), size, align, goal);
}
+static void *vmemmap_buf;
+static void *vmemmap_buf_end;
void * __meminit vmemmap_alloc_block(unsigned long size, int node)
{
@@ -64,6 +66,24 @@ void * __meminit vmemmap_alloc_block(unsigned long size, int node)
__pa(MAX_DMA_ADDRESS));
}
+/* need to make sure size is all the same during early stage */
+void * __meminit vmemmap_alloc_block_buf(unsigned long size, int node)
+{
+ void *ptr;
+
+ if (!vmemmap_buf)
+ return vmemmap_alloc_block(size, node);
+
+ /* take the from buf */
+ ptr = (void *)ALIGN((unsigned long)vmemmap_buf, size);
+ if (ptr + size > vmemmap_buf_end)
+ return vmemmap_alloc_block(size, node);
+
+ vmemmap_buf = ptr + size;
+
+ return ptr;
+}
+
void __meminit vmemmap_verify(pte_t *pte, int node,
unsigned long start, unsigned long end)
{
@@ -80,7 +100,7 @@ pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node)
pte_t *pte = pte_offset_kernel(pmd, addr);
if (pte_none(*pte)) {
pte_t entry;
- void *p = vmemmap_alloc_block(PAGE_SIZE, node);
+ void *p = vmemmap_alloc_block_buf(PAGE_SIZE, node);
if (!p)
return NULL;
entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
@@ -163,3 +183,55 @@ struct page * __meminit sparse_mem_map_populate(unsigned long pnum, int nid)
return map;
}
+
+void __init sparse_mem_maps_populate_node(struct page **map_map,
+ unsigned long pnum_begin,
+ unsigned long pnum_end,
+ unsigned long map_count, int nodeid)
+{
+ unsigned long pnum;
+ unsigned long size = sizeof(struct page) * PAGES_PER_SECTION;
+ void *vmemmap_buf_start;
+
+ size = ALIGN(size, PMD_SIZE);
+ vmemmap_buf_start = __earlyonly_bootmem_alloc(nodeid, size * map_count,
+ PMD_SIZE, __pa(MAX_DMA_ADDRESS));
+
+ if (vmemmap_buf_start) {
+ vmemmap_buf = vmemmap_buf_start;
+ vmemmap_buf_end = vmemmap_buf_start + size * map_count;
+ }
+
+ for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
+ struct mem_section *ms;
+
+ if (!present_section_nr(pnum))
+ continue;
+
+ map_map[pnum] = sparse_mem_map_populate(pnum, nodeid);
+ if (map_map[pnum])
+ continue;
+ ms = __nr_to_section(pnum);
+ printk(KERN_ERR "%s: sparsemem memory map backing failed "
+ "some memory will not be available.\n", __func__);
+ ms->section_mem_map = 0;
+ }
+
+ if (vmemmap_buf_start) {
+ /* need to free left buf */
+#ifdef CONFIG_NO_BOOTMEM
+ free_early(__pa(vmemmap_buf_start), __pa(vmemmap_buf_end));
+ if (vmemmap_buf_start < vmemmap_buf) {
+ char name[15];
+
+ snprintf(name, sizeof(name), "MEMMAP %d", nodeid);
+ reserve_early_without_check(__pa(vmemmap_buf_start),
+ __pa(vmemmap_buf), name);
+ }
+#else
+ free_bootmem(__pa(vmemmap_buf), vmemmap_buf_end - vmemmap_buf);
+#endif
+ vmemmap_buf = NULL;
+ vmemmap_buf_end = NULL;
+ }
+}
diff --git a/mm/sparse.c b/mm/sparse.c
index 6ce4aab69e99..22896d589133 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -271,7 +271,8 @@ static unsigned long *__kmalloc_section_usemap(void)
#ifdef CONFIG_MEMORY_HOTREMOVE
static unsigned long * __init
-sparse_early_usemap_alloc_pgdat_section(struct pglist_data *pgdat)
+sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
+ unsigned long count)
{
unsigned long section_nr;
@@ -286,7 +287,7 @@ sparse_early_usemap_alloc_pgdat_section(struct pglist_data *pgdat)
* this problem.
*/
section_nr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
- return alloc_bootmem_section(usemap_size(), section_nr);
+ return alloc_bootmem_section(usemap_size() * count, section_nr);
}
static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
@@ -329,7 +330,8 @@ static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
}
#else
static unsigned long * __init
-sparse_early_usemap_alloc_pgdat_section(struct pglist_data *pgdat)
+sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
+ unsigned long count)
{
return NULL;
}
@@ -339,27 +341,40 @@ static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
}
#endif /* CONFIG_MEMORY_HOTREMOVE */
-static unsigned long *__init sparse_early_usemap_alloc(unsigned long pnum)
+static void __init sparse_early_usemaps_alloc_node(unsigned long**usemap_map,
+ unsigned long pnum_begin,
+ unsigned long pnum_end,
+ unsigned long usemap_count, int nodeid)
{
- unsigned long *usemap;
- struct mem_section *ms = __nr_to_section(pnum);
- int nid = sparse_early_nid(ms);
-
- usemap = sparse_early_usemap_alloc_pgdat_section(NODE_DATA(nid));
- if (usemap)
- return usemap;
+ void *usemap;
+ unsigned long pnum;
+ int size = usemap_size();
- usemap = alloc_bootmem_node(NODE_DATA(nid), usemap_size());
+ usemap = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nodeid),
+ usemap_count);
if (usemap) {
- check_usemap_section_nr(nid, usemap);
- return usemap;
+ for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
+ if (!present_section_nr(pnum))
+ continue;
+ usemap_map[pnum] = usemap;
+ usemap += size;
+ }
+ return;
}
- /* Stupid: suppress gcc warning for SPARSEMEM && !NUMA */
- nid = 0;
+ usemap = alloc_bootmem_node(NODE_DATA(nodeid), size * usemap_count);
+ if (usemap) {
+ for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
+ if (!present_section_nr(pnum))
+ continue;
+ usemap_map[pnum] = usemap;
+ usemap += size;
+ check_usemap_section_nr(nodeid, usemap_map[pnum]);
+ }
+ return;
+ }
printk(KERN_WARNING "%s: allocation failed\n", __func__);
- return NULL;
}
#ifndef CONFIG_SPARSEMEM_VMEMMAP
@@ -375,8 +390,65 @@ struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid)
PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION));
return map;
}
+void __init sparse_mem_maps_populate_node(struct page **map_map,
+ unsigned long pnum_begin,
+ unsigned long pnum_end,
+ unsigned long map_count, int nodeid)
+{
+ void *map;
+ unsigned long pnum;
+ unsigned long size = sizeof(struct page) * PAGES_PER_SECTION;
+
+ map = alloc_remap(nodeid, size * map_count);
+ if (map) {
+ for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
+ if (!present_section_nr(pnum))
+ continue;
+ map_map[pnum] = map;
+ map += size;
+ }
+ return;
+ }
+
+ size = PAGE_ALIGN(size);
+ map = alloc_bootmem_pages_node(NODE_DATA(nodeid), size * map_count);
+ if (map) {
+ for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
+ if (!present_section_nr(pnum))
+ continue;
+ map_map[pnum] = map;
+ map += size;
+ }
+ return;
+ }
+
+ /* fallback */
+ for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
+ struct mem_section *ms;
+
+ if (!present_section_nr(pnum))
+ continue;
+ map_map[pnum] = sparse_mem_map_populate(pnum, nodeid);
+ if (map_map[pnum])
+ continue;
+ ms = __nr_to_section(pnum);
+ printk(KERN_ERR "%s: sparsemem memory map backing failed "
+ "some memory will not be available.\n", __func__);
+ ms->section_mem_map = 0;
+ }
+}
#endif /* !CONFIG_SPARSEMEM_VMEMMAP */
+#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
+static void __init sparse_early_mem_maps_alloc_node(struct page **map_map,
+ unsigned long pnum_begin,
+ unsigned long pnum_end,
+ unsigned long map_count, int nodeid)
+{
+ sparse_mem_maps_populate_node(map_map, pnum_begin, pnum_end,
+ map_count, nodeid);
+}
+#else
static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum)
{
struct page *map;
@@ -392,10 +464,12 @@ static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum)
ms->section_mem_map = 0;
return NULL;
}
+#endif
void __attribute__((weak)) __meminit vmemmap_populate_print_last(void)
{
}
+
/*
* Allocate the accumulated non-linear sections, allocate a mem_map
* for each and record the physical to section mapping.
@@ -407,6 +481,14 @@ void __init sparse_init(void)
unsigned long *usemap;
unsigned long **usemap_map;
int size;
+ int nodeid_begin = 0;
+ unsigned long pnum_begin = 0;
+ unsigned long usemap_count;
+#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
+ unsigned long map_count;
+ int size2;
+ struct page **map_map;
+#endif
/*
* map is using big page (aka 2M in x86 64 bit)
@@ -425,10 +507,81 @@ void __init sparse_init(void)
panic("can not allocate usemap_map\n");
for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
+ struct mem_section *ms;
+
if (!present_section_nr(pnum))
continue;
- usemap_map[pnum] = sparse_early_usemap_alloc(pnum);
+ ms = __nr_to_section(pnum);
+ nodeid_begin = sparse_early_nid(ms);
+ pnum_begin = pnum;
+ break;
}
+ usemap_count = 1;
+ for (pnum = pnum_begin + 1; pnum < NR_MEM_SECTIONS; pnum++) {
+ struct mem_section *ms;
+ int nodeid;
+
+ if (!present_section_nr(pnum))
+ continue;
+ ms = __nr_to_section(pnum);
+ nodeid = sparse_early_nid(ms);
+ if (nodeid == nodeid_begin) {
+ usemap_count++;
+ continue;
+ }
+ /* ok, we need to take cake of from pnum_begin to pnum - 1*/
+ sparse_early_usemaps_alloc_node(usemap_map, pnum_begin, pnum,
+ usemap_count, nodeid_begin);
+ /* new start, update count etc*/
+ nodeid_begin = nodeid;
+ pnum_begin = pnum;
+ usemap_count = 1;
+ }
+ /* ok, last chunk */
+ sparse_early_usemaps_alloc_node(usemap_map, pnum_begin, NR_MEM_SECTIONS,
+ usemap_count, nodeid_begin);
+
+#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
+ size2 = sizeof(struct page *) * NR_MEM_SECTIONS;
+ map_map = alloc_bootmem(size2);
+ if (!map_map)
+ panic("can not allocate map_map\n");
+
+ for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
+ struct mem_section *ms;
+
+ if (!present_section_nr(pnum))
+ continue;
+ ms = __nr_to_section(pnum);
+ nodeid_begin = sparse_early_nid(ms);
+ pnum_begin = pnum;
+ break;
+ }
+ map_count = 1;
+ for (pnum = pnum_begin + 1; pnum < NR_MEM_SECTIONS; pnum++) {
+ struct mem_section *ms;
+ int nodeid;
+
+ if (!present_section_nr(pnum))
+ continue;
+ ms = __nr_to_section(pnum);
+ nodeid = sparse_early_nid(ms);
+ if (nodeid == nodeid_begin) {
+ map_count++;
+ continue;
+ }
+ /* ok, we need to take cake of from pnum_begin to pnum - 1*/
+ sparse_early_mem_maps_alloc_node(map_map, pnum_begin, pnum,
+ map_count, nodeid_begin);
+ /* new start, update count etc*/
+ nodeid_begin = nodeid;
+ pnum_begin = pnum;
+ map_count = 1;
+ }
+ /* ok, last chunk */
+ sparse_early_mem_maps_alloc_node(map_map, pnum_begin, NR_MEM_SECTIONS,
+ map_count, nodeid_begin);
+#endif
for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
if (!present_section_nr(pnum))
@@ -438,7 +591,11 @@ void __init sparse_init(void)
if (!usemap)
continue;
+#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
+ map = map_map[pnum];
+#else
map = sparse_early_mem_map_alloc(pnum);
+#endif
if (!map)
continue;
@@ -448,6 +605,9 @@ void __init sparse_init(void)
vmemmap_populate_print_last();
+#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
+ free_bootmem(__pa(map_map), size2);
+#endif
free_bootmem(__pa(usemap_map), size);
}