diff options
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/include/asm/fpu-internal.h | 10 | ||||
-rw-r--r-- | arch/x86/include/asm/processor.h | 9 | ||||
-rw-r--r-- | arch/x86/kernel/i387.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/module.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/process_32.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/process_64.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/setup.c | 9 | ||||
-rw-r--r-- | arch/x86/kernel/traps.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/init.c | 125 | ||||
-rw-r--r-- | arch/x86/mm/numa.c | 11 |
10 files changed, 141 insertions, 35 deletions
diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 4d0bda7b11e3..c49a613c6452 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -365,7 +365,7 @@ static inline void drop_fpu(struct task_struct *tsk) * Forget coprocessor state.. */ preempt_disable(); - tsk->fpu_counter = 0; + tsk->thread.fpu_counter = 0; __drop_fpu(tsk); clear_used_math(); preempt_enable(); @@ -424,7 +424,7 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta * or if the past 5 consecutive context-switches used math. */ fpu.preload = tsk_used_math(new) && (use_eager_fpu() || - new->fpu_counter > 5); + new->thread.fpu_counter > 5); if (__thread_has_fpu(old)) { if (!__save_init_fpu(old)) cpu = ~0; @@ -433,16 +433,16 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta /* Don't change CR0.TS if we just switch! */ if (fpu.preload) { - new->fpu_counter++; + new->thread.fpu_counter++; __thread_set_has_fpu(new); prefetch(new->thread.fpu.state); } else if (!use_eager_fpu()) stts(); } else { - old->fpu_counter = 0; + old->thread.fpu_counter = 0; old->thread.fpu.last_cpu = ~0; if (fpu.preload) { - new->fpu_counter++; + new->thread.fpu_counter++; if (!use_eager_fpu() && fpu_lazy_restore(new, cpu)) fpu.preload = 0; else diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 987c75ecc334..7b034a4057f9 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -488,6 +488,15 @@ struct thread_struct { unsigned long iopl; /* Max allowed port in the bitmap, in bytes: */ unsigned io_bitmap_max; + /* + * fpu_counter contains the number of consecutive context switches + * that the FPU is used. If this is over a threshold, the lazy fpu + * saving becomes unlazy to save the trap. This is an unsigned char + * so that after 256 times the counter wraps and the behavior turns + * lazy again; this to deal with bursty apps that only use FPU for + * a short time + */ + unsigned char fpu_counter; }; /* diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 5d576ab34403..e8368c6dd2a2 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -100,7 +100,7 @@ void unlazy_fpu(struct task_struct *tsk) __save_init_fpu(tsk); __thread_fpu_end(tsk); } else - tsk->fpu_counter = 0; + tsk->thread.fpu_counter = 0; preempt_enable(); } EXPORT_SYMBOL(unlazy_fpu); diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 216a4d754b0c..18be189368bb 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -49,7 +49,7 @@ void *module_alloc(unsigned long size) return NULL; return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC, - -1, __builtin_return_address(0)); + NUMA_NO_NODE, __builtin_return_address(0)); } #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index c2ec1aa6d454..6f1236c29c4b 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -153,7 +153,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, childregs->orig_ax = -1; childregs->cs = __KERNEL_CS | get_kernel_rpl(); childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED; - p->fpu_counter = 0; + p->thread.fpu_counter = 0; p->thread.io_bitmap_ptr = NULL; memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); return 0; @@ -166,7 +166,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, p->thread.ip = (unsigned long) ret_from_fork; task_user_gs(p) = get_user_gs(current_pt_regs()); - p->fpu_counter = 0; + p->thread.fpu_counter = 0; p->thread.io_bitmap_ptr = NULL; tsk = current; err = -ENOMEM; diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 45ab4d6fc8a7..10fe4c189621 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -163,7 +163,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, p->thread.sp = (unsigned long) childregs; p->thread.usersp = me->thread.usersp; set_tsk_thread_flag(p, TIF_FORK); - p->fpu_counter = 0; + p->thread.fpu_counter = 0; p->thread.io_bitmap_ptr = NULL; savesegment(gs, p->thread.gsindex); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 918d489fa53d..cb233bc9dee3 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1121,8 +1121,6 @@ void __init setup_arch(char **cmdline_p) acpi_initrd_override((void *)initrd_start, initrd_end - initrd_start); #endif - reserve_crashkernel(); - vsmp_init(); io_delay_init(); @@ -1135,6 +1133,13 @@ void __init setup_arch(char **cmdline_p) early_acpi_boot_init(); initmem_init(); + + /* + * Reserve memory for crash kernel after SRAT is parsed so that it + * won't consume hotpluggable memory. + */ + reserve_crashkernel(); + memblock_find_dma_reserve(); #ifdef CONFIG_KVM_GUEST diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 729aa779ff75..996ce2313ce6 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -653,7 +653,7 @@ void math_state_restore(void) return; } - tsk->fpu_counter++; + tsk->thread.fpu_counter++; } EXPORT_SYMBOL_GPL(math_state_restore); diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index ce32017c5e38..f97130618113 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -53,12 +53,12 @@ __ref void *alloc_low_pages(unsigned int num) if ((pgt_buf_end + num) > pgt_buf_top || !can_use_brk_pgt) { unsigned long ret; if (min_pfn_mapped >= max_pfn_mapped) - panic("alloc_low_page: ran out of memory"); + panic("alloc_low_pages: ran out of memory"); ret = memblock_find_in_range(min_pfn_mapped << PAGE_SHIFT, max_pfn_mapped << PAGE_SHIFT, PAGE_SIZE * num , PAGE_SIZE); if (!ret) - panic("alloc_low_page: can not alloc memory"); + panic("alloc_low_pages: can not alloc memory"); memblock_reserve(ret, PAGE_SIZE * num); pfn = ret >> PAGE_SHIFT; } else { @@ -418,27 +418,27 @@ static unsigned long __init get_new_step_size(unsigned long step_size) return step_size << 5; } -void __init init_mem_mapping(void) +/** + * memory_map_top_down - Map [map_start, map_end) top down + * @map_start: start address of the target memory range + * @map_end: end address of the target memory range + * + * This function will setup direct mapping for memory range + * [map_start, map_end) in top-down. That said, the page tables + * will be allocated at the end of the memory, and we map the + * memory in top-down. + */ +static void __init memory_map_top_down(unsigned long map_start, + unsigned long map_end) { - unsigned long end, real_end, start, last_start; + unsigned long real_end, start, last_start; unsigned long step_size; unsigned long addr; unsigned long mapped_ram_size = 0; unsigned long new_mapped_ram_size; - probe_page_size_mask(); - -#ifdef CONFIG_X86_64 - end = max_pfn << PAGE_SHIFT; -#else - end = max_low_pfn << PAGE_SHIFT; -#endif - - /* the ISA range is always mapped regardless of memory holes */ - init_memory_mapping(0, ISA_END_ADDRESS); - /* xen has big range in reserved near end of ram, skip it at first.*/ - addr = memblock_find_in_range(ISA_END_ADDRESS, end, PMD_SIZE, PMD_SIZE); + addr = memblock_find_in_range(map_start, map_end, PMD_SIZE, PMD_SIZE); real_end = addr + PMD_SIZE; /* step_size need to be small so pgt_buf from BRK could cover it */ @@ -453,13 +453,13 @@ void __init init_mem_mapping(void) * end of RAM in [min_pfn_mapped, max_pfn_mapped) used as new pages * for page table. */ - while (last_start > ISA_END_ADDRESS) { + while (last_start > map_start) { if (last_start > step_size) { start = round_down(last_start - 1, step_size); - if (start < ISA_END_ADDRESS) - start = ISA_END_ADDRESS; + if (start < map_start) + start = map_start; } else - start = ISA_END_ADDRESS; + start = map_start; new_mapped_ram_size = init_range_memory_mapping(start, last_start); last_start = start; @@ -470,8 +470,89 @@ void __init init_mem_mapping(void) mapped_ram_size += new_mapped_ram_size; } - if (real_end < end) - init_range_memory_mapping(real_end, end); + if (real_end < map_end) + init_range_memory_mapping(real_end, map_end); +} + +/** + * memory_map_bottom_up - Map [map_start, map_end) bottom up + * @map_start: start address of the target memory range + * @map_end: end address of the target memory range + * + * This function will setup direct mapping for memory range + * [map_start, map_end) in bottom-up. Since we have limited the + * bottom-up allocation above the kernel, the page tables will + * be allocated just above the kernel and we map the memory + * in [map_start, map_end) in bottom-up. + */ +static void __init memory_map_bottom_up(unsigned long map_start, + unsigned long map_end) +{ + unsigned long next, new_mapped_ram_size, start; + unsigned long mapped_ram_size = 0; + /* step_size need to be small so pgt_buf from BRK could cover it */ + unsigned long step_size = PMD_SIZE; + + start = map_start; + min_pfn_mapped = start >> PAGE_SHIFT; + + /* + * We start from the bottom (@map_start) and go to the top (@map_end). + * The memblock_find_in_range() gets us a block of RAM from the + * end of RAM in [min_pfn_mapped, max_pfn_mapped) used as new pages + * for page table. + */ + while (start < map_end) { + if (map_end - start > step_size) { + next = round_up(start + 1, step_size); + if (next > map_end) + next = map_end; + } else + next = map_end; + + new_mapped_ram_size = init_range_memory_mapping(start, next); + start = next; + + if (new_mapped_ram_size > mapped_ram_size) + step_size = get_new_step_size(step_size); + mapped_ram_size += new_mapped_ram_size; + } +} + +void __init init_mem_mapping(void) +{ + unsigned long end; + + probe_page_size_mask(); + +#ifdef CONFIG_X86_64 + end = max_pfn << PAGE_SHIFT; +#else + end = max_low_pfn << PAGE_SHIFT; +#endif + + /* the ISA range is always mapped regardless of memory holes */ + init_memory_mapping(0, ISA_END_ADDRESS); + + /* + * If the allocation is in bottom-up direction, we setup direct mapping + * in bottom-up, otherwise we setup direct mapping in top-down. + */ + if (memblock_bottom_up()) { + unsigned long kernel_end = __pa_symbol(_end); + + /* + * we need two separate calls here. This is because we want to + * allocate page tables above the kernel. So we first map + * [kernel_end, end) to make memory above the kernel be mapped + * as soon as possible. And then use page tables allocated above + * the kernel to map [ISA_END_ADDRESS, kernel_end). + */ + memory_map_bottom_up(kernel_end, end); + memory_map_bottom_up(ISA_END_ADDRESS, kernel_end); + } else { + memory_map_top_down(ISA_END_ADDRESS, end); + } #ifdef CONFIG_X86_64 if (max_pfn > max_low_pfn) { diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 8bf93bae1f13..24aec58d6afd 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -567,6 +567,17 @@ static int __init numa_init(int (*init_func)(void)) ret = init_func(); if (ret < 0) return ret; + + /* + * We reset memblock back to the top-down direction + * here because if we configured ACPI_NUMA, we have + * parsed SRAT in init_func(). It is ok to have the + * reset here even if we did't configure ACPI_NUMA + * or acpi numa init fails and fallbacks to dummy + * numa init. + */ + memblock_set_bottom_up(false); + ret = numa_cleanup_meminfo(&numa_meminfo); if (ret < 0) return ret; |