diff options
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 218 |
1 files changed, 145 insertions, 73 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 1521100f1e63..c677c1506d73 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -155,16 +155,17 @@ static inline void set_pcppage_migratetype(struct page *page, int migratetype) * The following functions are used by the suspend/hibernate code to temporarily * change gfp_allowed_mask in order to avoid using I/O during memory allocations * while devices are suspended. To avoid races with the suspend/hibernate code, - * they should always be called with pm_mutex held (gfp_allowed_mask also should - * only be modified with pm_mutex held, unless the suspend/hibernate code is - * guaranteed not to run in parallel with that modification). + * they should always be called with system_transition_mutex held + * (gfp_allowed_mask also should only be modified with system_transition_mutex + * held, unless the suspend/hibernate code is guaranteed not to run in parallel + * with that modification). */ static gfp_t saved_gfp_mask; void pm_restore_gfp_mask(void) { - WARN_ON(!mutex_is_locked(&pm_mutex)); + WARN_ON(!mutex_is_locked(&system_transition_mutex)); if (saved_gfp_mask) { gfp_allowed_mask = saved_gfp_mask; saved_gfp_mask = 0; @@ -173,7 +174,7 @@ void pm_restore_gfp_mask(void) void pm_restrict_gfp_mask(void) { - WARN_ON(!mutex_is_locked(&pm_mutex)); + WARN_ON(!mutex_is_locked(&system_transition_mutex)); WARN_ON(saved_gfp_mask); saved_gfp_mask = gfp_allowed_mask; gfp_allowed_mask &= ~(__GFP_IO | __GFP_FS); @@ -2908,10 +2909,10 @@ static inline void zone_statistics(struct zone *preferred_zone, struct zone *z) if (!static_branch_likely(&vm_numa_stat_key)) return; - if (z->node != numa_node_id()) + if (zone_to_nid(z) != numa_node_id()) local_stat = NUMA_OTHER; - if (z->node == preferred_zone->node) + if (zone_to_nid(z) == zone_to_nid(preferred_zone)) __inc_numa_state(z, NUMA_HIT); else { __inc_numa_state(z, NUMA_MISS); @@ -4164,11 +4165,12 @@ retry: alloc_flags = reserve_flags; /* - * Reset the zonelist iterators if memory policies can be ignored. - * These allocations are high priority and system rather than user - * orientated. + * Reset the nodemask and zonelist iterators if memory policies can be + * ignored. These allocations are high priority and system rather than + * user oriented. */ if (!(alloc_flags & ALLOC_CPUSET) || reserve_flags) { + ac->nodemask = NULL; ac->preferred_zoneref = first_zones_zonelist(ac->zonelist, ac->high_zoneidx, ac->nodemask); } @@ -4402,19 +4404,15 @@ out: EXPORT_SYMBOL(__alloc_pages_nodemask); /* - * Common helper functions. + * Common helper functions. Never use with __GFP_HIGHMEM because the returned + * address cannot represent highmem pages. Use alloc_pages and then kmap if + * you need to access high mem. */ unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order) { struct page *page; - /* - * __get_free_pages() returns a virtual address, which cannot represent - * a highmem page - */ - VM_BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0); - - page = alloc_pages(gfp_mask, order); + page = alloc_pages(gfp_mask & ~__GFP_HIGHMEM, order); if (!page) return 0; return (unsigned long) page_address(page); @@ -5280,7 +5278,7 @@ int local_memory_node(int node) z = first_zones_zonelist(node_zonelist(node, GFP_KERNEL), gfp_zone(GFP_KERNEL), NULL); - return z->zone->node; + return zone_to_nid(z->zone); } #endif @@ -5566,13 +5564,12 @@ static int zone_batchsize(struct zone *zone) /* * The per-cpu-pages pools are set to around 1000th of the - * size of the zone. But no more than 1/2 of a meg. - * - * OK, so we don't know how big the cache is. So guess. + * size of the zone. */ batch = zone->managed_pages / 1024; - if (batch * PAGE_SIZE > 512 * 1024) - batch = (512 * 1024) / PAGE_SIZE; + /* But no more than a meg. */ + if (batch * PAGE_SIZE > 1024 * 1024) + batch = (1024 * 1024) / PAGE_SIZE; batch /= 4; /* We effectively *= 4 below */ if (batch < 1) batch = 1; @@ -6123,7 +6120,7 @@ static unsigned long __init usemap_size(unsigned long zone_start_pfn, unsigned l return usemapsize / 8; } -static void __init setup_usemap(struct pglist_data *pgdat, +static void __ref setup_usemap(struct pglist_data *pgdat, struct zone *zone, unsigned long zone_start_pfn, unsigned long zonesize) @@ -6143,7 +6140,7 @@ static inline void setup_usemap(struct pglist_data *pgdat, struct zone *zone, #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE /* Initialise the number of pages represented by NR_PAGEBLOCK_BITS */ -void __paginginit set_pageblock_order(void) +void __init set_pageblock_order(void) { unsigned int order; @@ -6171,14 +6168,14 @@ void __paginginit set_pageblock_order(void) * include/linux/pageblock-flags.h for the values of pageblock_order based on * the kernel config */ -void __paginginit set_pageblock_order(void) +void __init set_pageblock_order(void) { } #endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */ -static unsigned long __paginginit calc_memmap_size(unsigned long spanned_pages, - unsigned long present_pages) +static unsigned long __init calc_memmap_size(unsigned long spanned_pages, + unsigned long present_pages) { unsigned long pages = spanned_pages; @@ -6197,39 +6194,99 @@ static unsigned long __paginginit calc_memmap_size(unsigned long spanned_pages, return PAGE_ALIGN(pages * sizeof(struct page)) >> PAGE_SHIFT; } -/* - * Set up the zone data structures: - * - mark all pages reserved - * - mark all memory queues empty - * - clear the memory bitmaps - * - * NOTE: pgdat should get zeroed by caller. - */ -static void __paginginit free_area_init_core(struct pglist_data *pgdat) -{ - enum zone_type j; - int nid = pgdat->node_id; - - pgdat_resize_init(pgdat); #ifdef CONFIG_NUMA_BALANCING +static void pgdat_init_numabalancing(struct pglist_data *pgdat) +{ spin_lock_init(&pgdat->numabalancing_migrate_lock); pgdat->numabalancing_migrate_nr_pages = 0; pgdat->numabalancing_migrate_next_window = jiffies; +} +#else +static void pgdat_init_numabalancing(struct pglist_data *pgdat) {} #endif + #ifdef CONFIG_TRANSPARENT_HUGEPAGE +static void pgdat_init_split_queue(struct pglist_data *pgdat) +{ spin_lock_init(&pgdat->split_queue_lock); INIT_LIST_HEAD(&pgdat->split_queue); pgdat->split_queue_len = 0; +} +#else +static void pgdat_init_split_queue(struct pglist_data *pgdat) {} #endif - init_waitqueue_head(&pgdat->kswapd_wait); - init_waitqueue_head(&pgdat->pfmemalloc_wait); + #ifdef CONFIG_COMPACTION +static void pgdat_init_kcompactd(struct pglist_data *pgdat) +{ init_waitqueue_head(&pgdat->kcompactd_wait); +} +#else +static void pgdat_init_kcompactd(struct pglist_data *pgdat) {} #endif + +static void __meminit pgdat_init_internals(struct pglist_data *pgdat) +{ + pgdat_resize_init(pgdat); + + pgdat_init_numabalancing(pgdat); + pgdat_init_split_queue(pgdat); + pgdat_init_kcompactd(pgdat); + + init_waitqueue_head(&pgdat->kswapd_wait); + init_waitqueue_head(&pgdat->pfmemalloc_wait); + pgdat_page_ext_init(pgdat); spin_lock_init(&pgdat->lru_lock); lruvec_init(node_lruvec(pgdat)); +} + +static void __meminit zone_init_internals(struct zone *zone, enum zone_type idx, int nid, + unsigned long remaining_pages) +{ + zone->managed_pages = remaining_pages; + zone_set_nid(zone, nid); + zone->name = zone_names[idx]; + zone->zone_pgdat = NODE_DATA(nid); + spin_lock_init(&zone->lock); + zone_seqlock_init(zone); + zone_pcp_init(zone); +} + +/* + * Set up the zone data structures + * - init pgdat internals + * - init all zones belonging to this node + * + * NOTE: this function is only called during memory hotplug + */ +#ifdef CONFIG_MEMORY_HOTPLUG +void __ref free_area_init_core_hotplug(int nid) +{ + enum zone_type z; + pg_data_t *pgdat = NODE_DATA(nid); + + pgdat_init_internals(pgdat); + for (z = 0; z < MAX_NR_ZONES; z++) + zone_init_internals(&pgdat->node_zones[z], z, nid, 0); +} +#endif + +/* + * Set up the zone data structures: + * - mark all pages reserved + * - mark all memory queues empty + * - clear the memory bitmaps + * + * NOTE: pgdat should get zeroed by caller. + * NOTE: this function is only called during early init. + */ +static void __init free_area_init_core(struct pglist_data *pgdat) +{ + enum zone_type j; + int nid = pgdat->node_id; + pgdat_init_internals(pgdat); pgdat->per_cpu_nodestats = &boot_nodestats; for (j = 0; j < MAX_NR_ZONES; j++) { @@ -6277,15 +6334,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat) * when the bootmem allocator frees pages into the buddy system. * And all highmem pages will be managed by the buddy system. */ - zone->managed_pages = freesize; -#ifdef CONFIG_NUMA - zone->node = nid; -#endif - zone->name = zone_names[j]; - zone->zone_pgdat = pgdat; - spin_lock_init(&zone->lock); - zone_seqlock_init(zone); - zone_pcp_init(zone); + zone_init_internals(zone, j, nid, freesize); if (!size) continue; @@ -6345,8 +6394,24 @@ static void __ref alloc_node_mem_map(struct pglist_data *pgdat) static void __ref alloc_node_mem_map(struct pglist_data *pgdat) { } #endif /* CONFIG_FLAT_NODE_MEM_MAP */ -void __paginginit free_area_init_node(int nid, unsigned long *zones_size, - unsigned long node_start_pfn, unsigned long *zholes_size) +#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT +static inline void pgdat_set_deferred_range(pg_data_t *pgdat) +{ + /* + * We start only with one section of pages, more pages are added as + * needed until the rest of deferred pages are initialized. + */ + pgdat->static_init_pgcnt = min_t(unsigned long, PAGES_PER_SECTION, + pgdat->node_spanned_pages); + pgdat->first_deferred_pfn = ULONG_MAX; +} +#else +static inline void pgdat_set_deferred_range(pg_data_t *pgdat) {} +#endif + +void __init free_area_init_node(int nid, unsigned long *zones_size, + unsigned long node_start_pfn, + unsigned long *zholes_size) { pg_data_t *pgdat = NODE_DATA(nid); unsigned long start_pfn = 0; @@ -6370,20 +6435,12 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size, zones_size, zholes_size); alloc_node_mem_map(pgdat); + pgdat_set_deferred_range(pgdat); -#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT - /* - * We start only with one section of pages, more pages are added as - * needed until the rest of deferred pages are initialized. - */ - pgdat->static_init_pgcnt = min_t(unsigned long, PAGES_PER_SECTION, - pgdat->node_spanned_pages); - pgdat->first_deferred_pfn = ULONG_MAX; -#endif free_area_init_core(pgdat); } -#ifdef CONFIG_HAVE_MEMBLOCK +#if defined(CONFIG_HAVE_MEMBLOCK) && !defined(CONFIG_FLAT_NODE_MEM_MAP) /* * Only struct pages that are backed by physical memory are zeroed and * initialized by going through __init_single_page(). But, there are some @@ -6391,7 +6448,7 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size, * may be accessed (for example page_to_pfn() on some configuration accesses * flags). We must explicitly zero those struct pages. */ -void __paginginit zero_resv_unavail(void) +void __init zero_resv_unavail(void) { phys_addr_t start, end; unsigned long pfn; @@ -6404,8 +6461,11 @@ void __paginginit zero_resv_unavail(void) pgcnt = 0; for_each_resv_unavail_range(i, &start, &end) { for (pfn = PFN_DOWN(start); pfn < PFN_UP(end); pfn++) { - if (!pfn_valid(ALIGN_DOWN(pfn, pageblock_nr_pages))) + if (!pfn_valid(ALIGN_DOWN(pfn, pageblock_nr_pages))) { + pfn = ALIGN_DOWN(pfn, pageblock_nr_pages) + + pageblock_nr_pages - 1; continue; + } mm_zero_struct_page(pfn_to_page(pfn)); pgcnt++; } @@ -6421,7 +6481,7 @@ void __paginginit zero_resv_unavail(void) if (pgcnt) pr_info("Reserved but unavailable: %lld pages", pgcnt); } -#endif /* CONFIG_HAVE_MEMBLOCK */ +#endif /* CONFIG_HAVE_MEMBLOCK && !CONFIG_FLAT_NODE_MEM_MAP */ #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP @@ -6847,6 +6907,7 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) /* Initialise every node */ mminit_verify_pageflags_layout(); setup_nr_node_ids(); + zero_resv_unavail(); for_each_online_node(nid) { pg_data_t *pgdat = NODE_DATA(nid); free_area_init_node(nid, NULL, @@ -6857,7 +6918,6 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) node_set_state(nid, N_MEMORY); check_for_memory(pgdat, nid); } - zero_resv_unavail(); } static int __init cmdline_parse_core(char *p, unsigned long *core, @@ -6939,9 +6999,21 @@ unsigned long free_reserved_area(void *start, void *end, int poison, char *s) start = (void *)PAGE_ALIGN((unsigned long)start); end = (void *)((unsigned long)end & PAGE_MASK); for (pos = start; pos < end; pos += PAGE_SIZE, pages++) { + struct page *page = virt_to_page(pos); + void *direct_map_addr; + + /* + * 'direct_map_addr' might be different from 'pos' + * because some architectures' virt_to_page() + * work with aliases. Getting the direct map + * address ensures that we get a _writeable_ + * alias for the memset(). + */ + direct_map_addr = page_address(page); if ((unsigned int)poison <= 0xFF) - memset(pos, poison, PAGE_SIZE); - free_reserved_page(virt_to_page(pos)); + memset(direct_map_addr, poison, PAGE_SIZE); + + free_reserved_page(page); } if (pages && s) @@ -7033,9 +7105,9 @@ void __init set_dma_reserve(unsigned long new_dma_reserve) void __init free_area_init(unsigned long *zones_size) { + zero_resv_unavail(); free_area_init_node(0, zones_size, __pa(PAGE_OFFSET) >> PAGE_SHIFT, NULL); - zero_resv_unavail(); } static int page_alloc_cpu_dead(unsigned int cpu) |