diff options
author | Ingo Molnar | 2009-01-18 18:15:49 +0100 |
---|---|---|
committer | Ingo Molnar | 2009-01-18 18:15:49 +0100 |
commit | af37501c792107c2bde1524bdae38d9a247b841a (patch) | |
tree | b50ee90d29e72956b8b7d8d19677fe5996755d49 /arch/x86 | |
parent | d859e29fe34cb833071b20aef860ee94fbad9bb2 (diff) | |
parent | 99937d6455cea95405ac681c86a857d0fcd530bd (diff) |
Merge branch 'core/percpu' into perfcounters/core
Conflicts:
arch/x86/include/asm/pda.h
We merge tip/core/percpu into tip/perfcounters/core because of a
semantic and contextual conflict: the former eliminates the PDA,
while the latter extends it with apic_perf_irqs field.
Resolve the conflict by moving the new field to the irq_cpustat
structure on 64-bit too.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86')
80 files changed, 1128 insertions, 1203 deletions
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 3c14ed07dc4e..01e7c4c5c7fe 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -112,8 +112,8 @@ ENTRY(ia32_sysenter_target) CFI_DEF_CFA rsp,0 CFI_REGISTER rsp,rbp SWAPGS_UNSAFE_STACK - movq %gs:pda_kernelstack, %rsp - addq $(PDA_STACKOFFSET),%rsp + movq PER_CPU_VAR(kernel_stack), %rsp + addq $(KERNEL_STACK_OFFSET),%rsp /* * No need to follow this irqs on/off section: the syscall * disabled irqs, here we enable it straight after entry: @@ -273,13 +273,13 @@ ENDPROC(ia32_sysenter_target) ENTRY(ia32_cstar_target) CFI_STARTPROC32 simple CFI_SIGNAL_FRAME - CFI_DEF_CFA rsp,PDA_STACKOFFSET + CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET CFI_REGISTER rip,rcx /*CFI_REGISTER rflags,r11*/ SWAPGS_UNSAFE_STACK movl %esp,%r8d CFI_REGISTER rsp,r8 - movq %gs:pda_kernelstack,%rsp + movq PER_CPU_VAR(kernel_stack),%rsp /* * No need to follow this irqs on/off section: the syscall * disabled irqs and here we enable it straight after entry: diff --git a/arch/x86/include/asm/apicnum.h b/arch/x86/include/asm/apicnum.h new file mode 100644 index 000000000000..82f613c607ce --- /dev/null +++ b/arch/x86/include/asm/apicnum.h @@ -0,0 +1,12 @@ +#ifndef _ASM_X86_APICNUM_H +#define _ASM_X86_APICNUM_H + +/* define MAX_IO_APICS */ +#ifdef CONFIG_X86_32 +# define MAX_IO_APICS 64 +#else +# define MAX_IO_APICS 128 +# define MAX_LOCAL_APIC 32768 +#endif + +#endif /* _ASM_X86_APICNUM_H */ diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index e02a359d2aa5..02b47a603fc8 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -3,6 +3,9 @@ /* * Copyright 1992, Linus Torvalds. + * + * Note: inlines with more than a single statement should be marked + * __always_inline to avoid problems with older gcc's inlining heuristics. */ #ifndef _LINUX_BITOPS_H @@ -53,7 +56,8 @@ * Note that @nr may be almost arbitrarily large; this function is not * restricted to acting on a single-word quantity. */ -static inline void set_bit(unsigned int nr, volatile unsigned long *addr) +static __always_inline void +set_bit(unsigned int nr, volatile unsigned long *addr) { if (IS_IMMEDIATE(nr)) { asm volatile(LOCK_PREFIX "orb %1,%0" @@ -90,7 +94,8 @@ static inline void __set_bit(int nr, volatile unsigned long *addr) * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() * in order to ensure changes are visible on other processors. */ -static inline void clear_bit(int nr, volatile unsigned long *addr) +static __always_inline void +clear_bit(int nr, volatile unsigned long *addr) { if (IS_IMMEDIATE(nr)) { asm volatile(LOCK_PREFIX "andb %1,%0" @@ -204,7 +209,8 @@ static inline int test_and_set_bit(int nr, volatile unsigned long *addr) * * This is the same as test_and_set_bit on x86. */ -static inline int test_and_set_bit_lock(int nr, volatile unsigned long *addr) +static __always_inline int +test_and_set_bit_lock(int nr, volatile unsigned long *addr) { return test_and_set_bit(nr, addr); } @@ -300,7 +306,7 @@ static inline int test_and_change_bit(int nr, volatile unsigned long *addr) return oldbit; } -static inline int constant_test_bit(unsigned int nr, const volatile unsigned long *addr) +static __always_inline int constant_test_bit(unsigned int nr, const volatile unsigned long *addr) { return ((1UL << (nr % BITS_PER_LONG)) & (((unsigned long *)addr)[nr / BITS_PER_LONG])) != 0; diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index bae482df6039..f03b23e32864 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -7,6 +7,20 @@ #include <linux/nodemask.h> #include <linux/percpu.h> +#ifdef CONFIG_SMP + +extern void prefill_possible_map(void); + +#else /* CONFIG_SMP */ + +static inline void prefill_possible_map(void) {} + +#define cpu_physical_id(cpu) boot_cpu_physical_apicid +#define safe_smp_processor_id() 0 +#define stack_smp_processor_id() 0 + +#endif /* CONFIG_SMP */ + struct x86_cpu { struct cpu cpu; }; @@ -17,4 +31,11 @@ extern void arch_unregister_cpu(int); #endif DECLARE_PER_CPU(int, cpu_state); + +#ifdef CONFIG_X86_HAS_BOOT_CPU_ID +extern unsigned char boot_cpu_id; +#else +#define boot_cpu_id 0 +#endif + #endif /* _ASM_X86_CPU_H */ diff --git a/arch/x86/include/asm/cpumask.h b/arch/x86/include/asm/cpumask.h new file mode 100644 index 000000000000..26c6dad90479 --- /dev/null +++ b/arch/x86/include/asm/cpumask.h @@ -0,0 +1,28 @@ +#ifndef _ASM_X86_CPUMASK_H +#define _ASM_X86_CPUMASK_H +#ifndef __ASSEMBLY__ +#include <linux/cpumask.h> + +#ifdef CONFIG_X86_64 + +extern cpumask_var_t cpu_callin_mask; +extern cpumask_var_t cpu_callout_mask; +extern cpumask_var_t cpu_initialized_mask; +extern cpumask_var_t cpu_sibling_setup_mask; + +#else /* CONFIG_X86_32 */ + +extern cpumask_t cpu_callin_map; +extern cpumask_t cpu_callout_map; +extern cpumask_t cpu_initialized; +extern cpumask_t cpu_sibling_setup_map; + +#define cpu_callin_mask ((struct cpumask *)&cpu_callin_map) +#define cpu_callout_mask ((struct cpumask *)&cpu_callout_map) +#define cpu_initialized_mask ((struct cpumask *)&cpu_initialized) +#define cpu_sibling_setup_mask ((struct cpumask *)&cpu_sibling_setup_map) + +#endif /* CONFIG_X86_32 */ + +#endif /* __ASSEMBLY__ */ +#endif /* _ASM_X86_CPUMASK_H */ diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h index 0930b4f8d672..c68c361697e1 100644 --- a/arch/x86/include/asm/current.h +++ b/arch/x86/include/asm/current.h @@ -1,39 +1,21 @@ #ifndef _ASM_X86_CURRENT_H #define _ASM_X86_CURRENT_H -#ifdef CONFIG_X86_32 #include <linux/compiler.h> #include <asm/percpu.h> +#ifndef __ASSEMBLY__ struct task_struct; DECLARE_PER_CPU(struct task_struct *, current_task); -static __always_inline struct task_struct *get_current(void) -{ - return x86_read_percpu(current_task); -} - -#else /* X86_32 */ - -#ifndef __ASSEMBLY__ -#include <asm/pda.h> - -struct task_struct; static __always_inline struct task_struct *get_current(void) { - return read_pda(pcurrent); + return percpu_read(current_task); } -#else /* __ASSEMBLY__ */ - -#include <asm/asm-offsets.h> -#define GET_CURRENT(reg) movq %gs:(pda_pcurrent),reg +#define current get_current() #endif /* __ASSEMBLY__ */ -#endif /* X86_32 */ - -#define current get_current() - #endif /* _ASM_X86_CURRENT_H */ diff --git a/arch/x86/include/asm/hardirq_32.h b/arch/x86/include/asm/hardirq_32.h index 7a07897a7888..7838276bfe51 100644 --- a/arch/x86/include/asm/hardirq_32.h +++ b/arch/x86/include/asm/hardirq_32.h @@ -20,6 +20,9 @@ typedef struct { DECLARE_PER_CPU(irq_cpustat_t, irq_stat); +/* We can have at most NR_VECTORS irqs routed to a cpu at a time */ +#define MAX_HARDIRQS_PER_CPU NR_VECTORS + #define __ARCH_IRQ_STAT #define __IRQ_STAT(cpu, member) (per_cpu(irq_stat, cpu).member) diff --git a/arch/x86/include/asm/hardirq_64.h b/arch/x86/include/asm/hardirq_64.h index b5a6b5d56704..42930b279215 100644 --- a/arch/x86/include/asm/hardirq_64.h +++ b/arch/x86/include/asm/hardirq_64.h @@ -3,22 +3,37 @@ #include <linux/threads.h> #include <linux/irq.h> -#include <asm/pda.h> #include <asm/apic.h> +typedef struct { + unsigned int __softirq_pending; + unsigned int __nmi_count; /* arch dependent */ + unsigned int apic_timer_irqs; /* arch dependent */ + unsigned int apic_perf_irqs; /* arch dependent */ + unsigned int irq0_irqs; + unsigned int irq_resched_count; + unsigned int irq_call_count; + unsigned int irq_tlb_count; + unsigned int irq_thermal_count; + unsigned int irq_spurious_count; + unsigned int irq_threshold_count; +} ____cacheline_aligned irq_cpustat_t; + +DECLARE_PER_CPU(irq_cpustat_t, irq_stat); + /* We can have at most NR_VECTORS irqs routed to a cpu at a time */ #define MAX_HARDIRQS_PER_CPU NR_VECTORS #define __ARCH_IRQ_STAT 1 -#define inc_irq_stat(member) add_pda(member, 1) +#define inc_irq_stat(member) percpu_add(irq_stat.member, 1) -#define local_softirq_pending() read_pda(__softirq_pending) +#define local_softirq_pending() percpu_read(irq_stat.__softirq_pending) #define __ARCH_SET_SOFTIRQ_PENDING 1 -#define set_softirq_pending(x) write_pda(__softirq_pending, (x)) -#define or_softirq_pending(x) or_pda(__softirq_pending, (x)) +#define set_softirq_pending(x) percpu_write(irq_stat.__softirq_pending, (x)) +#define or_softirq_pending(x) percpu_or(irq_stat.__softirq_pending, (x)) extern void ack_bad_irq(unsigned int irq); diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 7a1f44ac1f17..08ec793aa043 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -114,38 +114,16 @@ struct IR_IO_APIC_route_entry { extern int nr_ioapics; extern int nr_ioapic_registers[MAX_IO_APICS]; -/* - * MP-BIOS irq configuration table structures: - */ - #define MP_MAX_IOAPIC_PIN 127 -struct mp_config_ioapic { - unsigned long mp_apicaddr; - unsigned int mp_apicid; - unsigned char mp_type; - unsigned char mp_apicver; - unsigned char mp_flags; -}; - -struct mp_config_intsrc { - unsigned int mp_dstapic; - unsigned char mp_type; - unsigned char mp_irqtype; - unsigned short mp_irqflag; - unsigned char mp_srcbus; - unsigned char mp_srcbusirq; - unsigned char mp_dstirq; -}; - /* I/O APIC entries */ -extern struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; +extern struct mpc_ioapic mp_ioapics[MAX_IO_APICS]; /* # of MP IRQ source entries */ extern int mp_irq_entries; /* MP IRQ source entries */ -extern struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; +extern struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; /* non-0 if default (table-less) MP configuration */ extern int mpc_default_type; diff --git a/arch/x86/include/asm/irq_regs_32.h b/arch/x86/include/asm/irq_regs_32.h index 86afd7473457..d7ed33ee94e9 100644 --- a/arch/x86/include/asm/irq_regs_32.h +++ b/arch/x86/include/asm/irq_regs_32.h @@ -15,7 +15,7 @@ DECLARE_PER_CPU(struct pt_regs *, irq_regs); static inline struct pt_regs *get_irq_regs(void) { - return x86_read_percpu(irq_regs); + return percpu_read(irq_regs); } static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs) @@ -23,7 +23,7 @@ static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs) struct pt_regs *old_regs; old_regs = get_irq_regs(); - x86_write_percpu(irq_regs, new_regs); + percpu_write(irq_regs, new_regs); return old_regs; } diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 21a0b92027f5..1554d0236e03 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -110,6 +110,8 @@ #if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_X86_VOYAGER) +#include <asm/apicnum.h> /* need MAX_IO_APICS */ + #ifndef CONFIG_SPARSE_IRQ # if NR_CPUS < MAX_IO_APICS # define NR_IRQS (NR_VECTORS + (32 * NR_CPUS)) @@ -117,11 +119,12 @@ # define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) # endif #else -# if (8 * NR_CPUS) > (32 * MAX_IO_APICS) -# define NR_IRQS (NR_VECTORS + (8 * NR_CPUS)) -# else -# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) -# endif + +# define NR_IRQS \ + ((8 * NR_CPUS) > (32 * MAX_IO_APICS) ? \ + (NR_VECTORS + (8 * NR_CPUS)) : \ + (NR_VECTORS + (32 * MAX_IO_APICS))) \ + #endif #elif defined(CONFIG_X86_VOYAGER) diff --git a/arch/x86/include/asm/mach-default/mach_wakecpu.h b/arch/x86/include/asm/mach-default/mach_wakecpu.h index ceb013660146..89897a6a65b9 100644 --- a/arch/x86/include/asm/mach-default/mach_wakecpu.h +++ b/arch/x86/include/asm/mach-default/mach_wakecpu.h @@ -24,7 +24,13 @@ static inline void restore_NMI_vector(unsigned short *high, unsigned short *low) { } +#ifdef CONFIG_SMP extern void __inquire_remote_apic(int apicid); +#else /* CONFIG_SMP */ +static inline void __inquire_remote_apic(int apicid) +{ +} +#endif /* CONFIG_SMP */ static inline void inquire_remote_apic(int apicid) { diff --git a/arch/x86/include/asm/mmu_context_32.h b/arch/x86/include/asm/mmu_context_32.h index 7e98ce1d2c0e..08b53454f831 100644 --- a/arch/x86/include/asm/mmu_context_32.h +++ b/arch/x86/include/asm/mmu_context_32.h @@ -4,8 +4,8 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { #ifdef CONFIG_SMP - if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK) - x86_write_percpu(cpu_tlbstate.state, TLBSTATE_LAZY); + if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) + percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY); #endif } @@ -19,8 +19,8 @@ static inline void switch_mm(struct mm_struct *prev, /* stop flush ipis for the previous mm */ cpu_clear(cpu, prev->cpu_vm_mask); #ifdef CONFIG_SMP - x86_write_percpu(cpu_tlbstate.state, TLBSTATE_OK); - x86_write_percpu(cpu_tlbstate.active_mm, next); + percpu_write(cpu_tlbstate.state, TLBSTATE_OK); + percpu_write(cpu_tlbstate.active_mm, next); #endif cpu_set(cpu, next->cpu_vm_mask); @@ -35,8 +35,8 @@ static inline void switch_mm(struct mm_struct *prev, } #ifdef CONFIG_SMP else { - x86_write_percpu(cpu_tlbstate.state, TLBSTATE_OK); - BUG_ON(x86_read_percpu(cpu_tlbstate.active_mm) != next); + percpu_write(cpu_tlbstate.state, TLBSTATE_OK); + BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next); if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) { /* We were in lazy tlb mode and leave_mm disabled diff --git a/arch/x86/include/asm/mmu_context_64.h b/arch/x86/include/asm/mmu_context_64.h index 677d36e9540a..c4572505ab3e 100644 --- a/arch/x86/include/asm/mmu_context_64.h +++ b/arch/x86/include/asm/mmu_context_64.h @@ -1,13 +1,11 @@ #ifndef _ASM_X86_MMU_CONTEXT_64_H #define _ASM_X86_MMU_CONTEXT_64_H -#include <asm/pda.h> - static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { #ifdef CONFIG_SMP - if (read_pda(mmu_state) == TLBSTATE_OK) - write_pda(mmu_state, TLBSTATE_LAZY); + if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) + percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY); #endif } @@ -19,8 +17,8 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, /* stop flush ipis for the previous mm */ cpu_clear(cpu, prev->cpu_vm_mask); #ifdef CONFIG_SMP - write_pda(mmu_state, TLBSTATE_OK); - write_pda(active_mm, next); + percpu_write(cpu_tlbstate.state, TLBSTATE_OK); + percpu_write(cpu_tlbstate.active_mm, next); #endif cpu_set(cpu, next->cpu_vm_mask); load_cr3(next->pgd); @@ -30,9 +28,9 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, } #ifdef CONFIG_SMP else { - write_pda(mmu_state, TLBSTATE_OK); - if (read_pda(active_mm) != next) - BUG(); + percpu_write(cpu_tlbstate.state, TLBSTATE_OK); + BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next); + if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) { /* We were in lazy tlb mode and leave_mm disabled * tlb flush IPI delivery. We must reload CR3 diff --git a/arch/x86/include/asm/mpspec_def.h b/arch/x86/include/asm/mpspec_def.h index 59568bc4767f..4a7f96d7c188 100644 --- a/arch/x86/include/asm/mpspec_def.h +++ b/arch/x86/include/asm/mpspec_def.h @@ -24,17 +24,18 @@ # endif #endif -struct intel_mp_floating { - char mpf_signature[4]; /* "_MP_" */ - unsigned int mpf_physptr; /* Configuration table address */ - unsigned char mpf_length; /* Our length (paragraphs) */ - unsigned char mpf_specification;/* Specification version */ - unsigned char mpf_checksum; /* Checksum (makes sum 0) */ - unsigned char mpf_feature1; /* Standard or configuration ? */ - unsigned char mpf_feature2; /* Bit7 set for IMCR|PIC */ - unsigned char mpf_feature3; /* Unused (0) */ - unsigned char mpf_feature4; /* Unused (0) */ - unsigned char mpf_feature5; /* Unused (0) */ +/* Intel MP Floating Pointer Structure */ +struct mpf_intel { + char signature[4]; /* "_MP_" */ + unsigned int physptr; /* Configuration table address */ + unsigned char length; /* Our length (paragraphs) */ + unsigned char specification; /* Specification version */ + unsigned char checksum; /* Checksum (makes sum 0) */ + unsigned char feature1; /* Standard or configuration ? */ + unsigned char feature2; /* Bit7 set for IMCR|PIC */ + unsigned char feature3; /* Unused (0) */ + unsigned char feature4; /* Unused (0) */ + unsigned char feature5; /* Unused (0) */ }; #define MPC_SIGNATURE "PCMP" diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h index cb988aab716d..14080d22edb3 100644 --- a/arch/x86/include/asm/mtrr.h +++ b/arch/x86/include/asm/mtrr.h @@ -58,15 +58,15 @@ struct mtrr_gentry { #endif /* !__i386__ */ struct mtrr_var_range { - u32 base_lo; - u32 base_hi; - u32 mask_lo; - u32 mask_hi; + __u32 base_lo; + __u32 base_hi; + __u32 mask_lo; + __u32 mask_hi; }; /* In the Intel processor's MTRR interface, the MTRR type is always held in an 8 bit field: */ -typedef u8 mtrr_type; +typedef __u8 mtrr_type; #define MTRR_NUM_FIXED_RANGES 88 #define MTRR_MAX_VAR_RANGES 256 diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h index 5ebca29f44f0..e27fdbe5f9e4 100644 --- a/arch/x86/include/asm/page_64.h +++ b/arch/x86/include/asm/page_64.h @@ -13,8 +13,8 @@ #define DEBUG_STACK_ORDER (EXCEPTION_STACK_ORDER + 1) #define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER) -#define IRQSTACK_ORDER 2 -#define IRQSTACKSIZE (PAGE_SIZE << IRQSTACK_ORDER) +#define IRQ_STACK_ORDER 2 +#define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER) #define STACKFAULT_STACK 1 #define DOUBLEFAULT_STACK 2 diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index ba3e2ff6aedc..c26c6bf4da00 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -244,7 +244,8 @@ struct pv_mmu_ops { void (*flush_tlb_user)(void); void (*flush_tlb_kernel)(void); void (*flush_tlb_single)(unsigned long addr); - void (*flush_tlb_others)(const cpumask_t *cpus, struct mm_struct *mm, + void (*flush_tlb_others)(const struct cpumask *cpus, + struct mm_struct *mm, unsigned long va); /* Hooks for allocating and freeing a pagetable top-level */ @@ -984,10 +985,11 @@ static inline void __flush_tlb_single(unsigned long addr) PVOP_VCALL1(pv_mmu_ops.flush_tlb_single, addr); } -static inline void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, +static inline void flush_tlb_others(const struct cpumask *cpumask, + struct mm_struct *mm, unsigned long va) { - PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, &cpumask, mm, va); + PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, cpumask, mm, va); } static inline int paravirt_pgd_alloc(struct mm_struct *mm) diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index 90a8d9d4206b..c31ca048a901 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -5,134 +5,41 @@ #include <linux/stddef.h> #include <linux/types.h> #include <linux/cache.h> +#include <linux/threads.h> #include <asm/page.h> +#include <asm/percpu.h> /* Per processor datastructure. %gs points to it while the kernel runs */ struct x8664_pda { - struct task_struct *pcurrent; /* 0 Current process */ - unsigned long data_offset; /* 8 Per cpu data offset from linker - address */ - unsigned long kernelstack; /* 16 top of kernel stack for current */ - unsigned long oldrsp; /* 24 user rsp for system call */ - int irqcount; /* 32 Irq nesting counter. Starts -1 */ - unsigned int cpunumber; /* 36 Logical CPU number */ + unsigned long unused1; + unsigned long unused2; + unsigned long unused3; + unsigned long unused4; + int unused5; + unsigned int unused6; /* 36 was cpunumber */ #ifdef CONFIG_CC_STACKPROTECTOR unsigned long stack_canary; /* 40 stack canary value */ /* gcc-ABI: this canary MUST be at offset 40!!! */ #endif - char *irqstackptr; - short nodenumber; /* number of current node (32k max) */ short in_bootmem; /* pda lives in bootmem */ - unsigned int __softirq_pending; - unsigned int __nmi_count; /* number of NMI on this CPUs */ - short mmu_state; - short isidle; - struct mm_struct *active_mm; - unsigned apic_timer_irqs; - unsigned apic_perf_irqs; - unsigned irq0_irqs; - unsigned irq_resched_count; - unsigned irq_call_count; - unsigned irq_tlb_count; - unsigned irq_thermal_count; - unsigned irq_threshold_count; - unsigned irq_spurious_count; } ____cacheline_aligned_in_smp; -extern struct x8664_pda **_cpu_pda; +DECLARE_PER_CPU(struct x8664_pda, __pda); extern void pda_init(int); -#define cpu_pda(i) (_cpu_pda[i]) +#define cpu_pda(cpu) (&per_cpu(__pda, cpu)) -/* - * There is no fast way to get the base address of the PDA, all the accesses - * have to mention %fs/%gs. So it needs to be done this Torvaldian way. - */ -extern void __bad_pda_field(void) __attribute__((noreturn)); - -/* - * proxy_pda doesn't actually exist, but tell gcc it is accessed for - * all PDA accesses so it gets read/write dependencies right. - */ -extern struct x8664_pda _proxy_pda; - -#define pda_offset(field) offsetof(struct x8664_pda, field) - -#define pda_to_op(op, field, val) \ -do { \ - typedef typeof(_proxy_pda.field) T__; \ - if (0) { T__ tmp__; tmp__ = (val); } /* type checking */ \ - switch (sizeof(_proxy_pda.field)) { \ - case 2: \ - asm(op "w %1,%%gs:%c2" : \ - "+m" (_proxy_pda.field) : \ - "ri" ((T__)val), \ - "i"(pda_offset(field))); \ - break; \ - case 4: \ - asm(op "l %1,%%gs:%c2" : \ - "+m" (_proxy_pda.field) : \ - "ri" ((T__)val), \ - "i" (pda_offset(field))); \ - break; \ - case 8: \ - asm(op "q %1,%%gs:%c2": \ - "+m" (_proxy_pda.field) : \ - "ri" ((T__)val), \ - "i"(pda_offset(field))); \ - break; \ - default: \ - __bad_pda_field(); \ - } \ -} while (0) - -#define pda_from_op(op, field) \ -({ \ - typeof(_proxy_pda.field) ret__; \ - switch (sizeof(_proxy_pda.field)) { \ - case 2: \ - asm(op "w %%gs:%c1,%0" : \ - "=r" (ret__) : \ - "i" (pda_offset(field)), \ - "m" (_proxy_pda.field)); \ - break; \ - case 4: \ - asm(op "l %%gs:%c1,%0": \ - "=r" (ret__): \ - "i" (pda_offset(field)), \ - "m" (_proxy_pda.field)); \ - break; \ - case 8: \ - asm(op "q %%gs:%c1,%0": \ - "=r" (ret__) : \ - "i" (pda_offset(field)), \ - "m" (_proxy_pda.field)); \ - break; \ - default: \ - __bad_pda_field(); \ - } \ - ret__; \ -}) - -#define read_pda(field) pda_from_op("mov", field) -#define write_pda(field, val) pda_to_op("mov", field, val) -#define add_pda(field, val) pda_to_op("add", field, val) -#define sub_pda(field, val) pda_to_op("sub", field, val) -#define or_pda(field, val) pda_to_op("or", field, val) +#define read_pda(field) percpu_read(__pda.field) +#define write_pda(field, val) percpu_write(__pda.field, val) +#define add_pda(field, val) percpu_add(__pda.field, val) +#define sub_pda(field, val) percpu_sub(__pda.field, val) +#define or_pda(field, val) percpu_or(__pda.field, val) /* This is not atomic against other CPUs -- CPU preemption needs to be off */ #define test_and_clear_bit_pda(bit, field) \ -({ \ - int old__; \ - asm volatile("btr %2,%%gs:%c3\n\tsbbl %0,%0" \ - : "=r" (old__), "+m" (_proxy_pda.field) \ - : "dIr" (bit), "i" (pda_offset(field)) : "memory");\ - old__; \ -}) + x86_test_and_clear_bit_percpu(bit, __pda.field) #endif -#define PDA_STACKOFFSET (5*8) - #endif /* _ASM_X86_PDA_H */ diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index ece72053ba63..165d5272ece1 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -2,53 +2,12 @@ #define _ASM_X86_PERCPU_H #ifdef CONFIG_X86_64 -#include <linux/compiler.h> - -/* Same as asm-generic/percpu.h, except that we store the per cpu offset - in the PDA. Longer term the PDA and every per cpu variable - should be just put into a single section and referenced directly - from %gs */ - -#ifdef CONFIG_SMP -#include <asm/pda.h> - -#define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset) -#define __my_cpu_offset read_pda(data_offset) - -#define per_cpu_offset(x) (__per_cpu_offset(x)) - +#define __percpu_seg gs +#define __percpu_mov_op movq +#else +#define __percpu_seg fs +#define __percpu_mov_op movl #endif -#include <asm-generic/percpu.h> - -DECLARE_PER_CPU(struct x8664_pda, pda); - -/* - * These are supposed to be implemented as a single instruction which - * operates on the per-cpu data base segment. x86-64 doesn't have - * that yet, so this is a fairly inefficient workaround for the - * meantime. The single instruction is atomic with respect to - * preemption and interrupts, so we need to explicitly disable - * interrupts here to achieve the same effect. However, because it - * can be used from within interrupt-disable/enable, we can't actually - * disable interrupts; disabling preemption is enough. - */ -#define x86_read_percpu(var) \ - ({ \ - typeof(per_cpu_var(var)) __tmp; \ - preempt_disable(); \ - __tmp = __get_cpu_var(var); \ - preempt_enable(); \ - __tmp; \ - }) - -#define x86_write_percpu(var, val) \ - do { \ - preempt_disable(); \ - __get_cpu_var(var) = (val); \ - preempt_enable(); \ - } while(0) - -#else /* CONFIG_X86_64 */ #ifdef __ASSEMBLY__ @@ -65,47 +24,26 @@ DECLARE_PER_CPU(struct x8664_pda, pda); * PER_CPU(cpu_gdt_descr, %ebx) */ #ifdef CONFIG_SMP -#define PER_CPU(var, reg) \ - movl %fs:per_cpu__##this_cpu_off, reg; \ +#define PER_CPU(var, reg) \ + __percpu_mov_op %__percpu_seg:per_cpu__this_cpu_off, reg; \ lea per_cpu__##var(reg), reg -#define PER_CPU_VAR(var) %fs:per_cpu__##var +#define PER_CPU_VAR(var) %__percpu_seg:per_cpu__##var #else /* ! SMP */ -#define PER_CPU(var, reg) \ - movl $per_cpu__##var, reg +#define PER_CPU(var, reg) \ + __percpu_mov_op $per_cpu__##var, reg #define PER_CPU_VAR(var) per_cpu__##var #endif /* SMP */ #else /* ...!ASSEMBLY */ -/* - * PER_CPU finds an address of a per-cpu variable. - * - * Args: - * var - variable name - * cpu - 32bit register containing the current CPU number - * - * The resulting address is stored in the "cpu" argument. - * - * Example: - * PER_CPU(cpu_gdt_descr, %ebx) - */ -#ifdef CONFIG_SMP - -#define __my_cpu_offset x86_read_percpu(this_cpu_off) - -/* fs segment starts at (positive) offset == __per_cpu_offset[cpu] */ -#define __percpu_seg "%%fs:" - -#else /* !SMP */ +#include <linux/stringify.h> -#define __percpu_seg "" - -#endif /* SMP */ - -#include <asm-generic/percpu.h> - -/* We can use this directly for local CPU (faster). */ -DECLARE_PER_CPU(unsigned long, this_cpu_off); +#ifdef CONFIG_SMP +#define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x +#define __my_cpu_offset percpu_read(this_cpu_off) +#else +#define __percpu_arg(x) "%" #x +#endif /* For arch-specific code, we can use direct single-insn ops (they * don't give an lvalue though). */ @@ -120,20 +58,25 @@ do { \ } \ switch (sizeof(var)) { \ case 1: \ - asm(op "b %1,"__percpu_seg"%0" \ + asm(op "b %1,"__percpu_arg(0) \ : "+m" (var) \ : "ri" ((T__)val)); \ break; \ case 2: \ - asm(op "w %1,"__percpu_seg"%0" \ + asm(op "w %1,"__percpu_arg(0) \ : "+m" (var) \ : "ri" ((T__)val)); \ break; \ case 4: \ - asm(op "l %1,"__percpu_seg"%0" \ + asm(op "l %1,"__percpu_arg(0) \ : "+m" (var) \ : "ri" ((T__)val)); \ break; \ + case 8: \ + asm(op "q %1,"__percpu_arg(0) \ + : "+m" (var) \ + : "r" ((T__)val)); \ + break; \ default: __bad_percpu_size(); \ } \ } while (0) @@ -143,17 +86,22 @@ do { \ typeof(var) ret__; \ switch (sizeof(var)) { \ case 1: \ - asm(op "b "__percpu_seg"%1,%0" \ + asm(op "b "__percpu_arg(1)",%0" \ : "=r" (ret__) \ : "m" (var)); \ break; \ case 2: \ - asm(op "w "__percpu_seg"%1,%0" \ + asm(op "w "__percpu_arg(1)",%0" \ : "=r" (ret__) \ : "m" (var)); \ break; \ case 4: \ - asm(op "l "__percpu_seg"%1,%0" \ + asm(op "l "__percpu_arg(1)",%0" \ + : "=r" (ret__) \ + : "m" (var)); \ + break; \ + case 8: \ + asm(op "q "__percpu_arg(1)",%0" \ : "=r" (ret__) \ : "m" (var)); \ break; \ @@ -162,13 +110,36 @@ do { \ ret__; \ }) -#define x86_read_percpu(var) percpu_from_op("mov", per_cpu__##var) -#define x86_write_percpu(var, val) percpu_to_op("mov", per_cpu__##var, val) -#define x86_add_percpu(var, val) percpu_to_op("add", per_cpu__##var, val) -#define x86_sub_percpu(var, val) percpu_to_op("sub", per_cpu__##var, val) -#define x86_or_percpu(var, val) percpu_to_op("or", per_cpu__##var, val) +#define percpu_read(var) percpu_from_op("mov", per_cpu__##var) +#define percpu_write(var, val) percpu_to_op("mov", per_cpu__##var, val) +#define percpu_add(var, val) percpu_to_op("add", per_cpu__##var, val) +#define percpu_sub(var, val) percpu_to_op("sub", per_cpu__##var, val) +#define percpu_and(var, val) percpu_to_op("and", per_cpu__##var, val) +#define percpu_or(var, val) percpu_to_op("or", per_cpu__##var, val) +#define percpu_xor(var, val) percpu_to_op("xor", per_cpu__##var, val) + +/* This is not atomic against other CPUs -- CPU preemption needs to be off */ +#define x86_test_and_clear_bit_percpu(bit, var) \ +({ \ + int old__; \ + asm volatile("btr %2,"__percpu_arg(1)"\n\tsbbl %0,%0" \ + : "=r" (old__), "+m" (per_cpu__##var) \ + : "dIr" (bit)); \ + old__; \ +}) + +#include <asm-generic/percpu.h> + +/* We can use this directly for local CPU (faster). */ +DECLARE_PER_CPU(unsigned long, this_cpu_off); + +#ifdef CONFIG_X86_64 +extern void load_pda_offset(int cpu); +#else +static inline void load_pda_offset(int cpu) { } +#endif + #endif /* !__ASSEMBLY__ */ -#endif /* !CONFIG_X86_64 */ #ifdef CONFIG_SMP @@ -195,9 +166,9 @@ do { \ #define early_per_cpu_ptr(_name) (_name##_early_ptr) #define early_per_cpu_map(_name, _idx) (_name##_early_map[_idx]) #define early_per_cpu(_name, _cpu) \ - (early_per_cpu_ptr(_name) ? \ - early_per_cpu_ptr(_name)[_cpu] : \ - per_cpu(_name, _cpu)) + *(early_per_cpu_ptr(_name) ? \ + &early_per_cpu_ptr(_name)[_cpu] : \ + &per_cpu(_name, _cpu)) #else /* !CONFIG_SMP */ #define DEFINE_EARLY_PER_CPU(_type, _name, _initvalue) \ diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 83e69f4a37f0..06bbcbd66e9c 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -341,6 +341,25 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) #define canon_pgprot(p) __pgprot(pgprot_val(p) & __supported_pte_mask) +static inline int is_new_memtype_allowed(unsigned long flags, + unsigned long new_flags) +{ + /* + * Certain new memtypes are not allowed with certain + * requested memtype: + * - request is uncached, return cannot be write-back + * - request is write-combine, return cannot be write-back + */ + if ((flags == _PAGE_CACHE_UC_MINUS && + new_flags == _PAGE_CACHE_WB) || + (flags == _PAGE_CACHE_WC && + new_flags == _PAGE_CACHE_WB)) { + return 0; + } + + return 1; +} + #ifndef __ASSEMBLY__ /* Indicate that x86 has its own track and untrack pfn vma functions */ #define __HAVE_PFNMAP_TRACKING diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 091cd8855f2e..f511246fa6cd 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -378,6 +378,9 @@ union thread_xstate { #ifdef CONFIG_X86_64 DECLARE_PER_CPU(struct orig_ist, orig_ist); + +DECLARE_PER_CPU(char[IRQ_STACK_SIZE], irq_stack); +DECLARE_PER_CPU(char *, irq_stack_ptr); #endif extern void print_cpu_info(struct cpuinfo_x86 *); diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index ebe858cdc8a3..536949749bc2 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -100,7 +100,6 @@ extern unsigned long init_pg_tables_start; extern unsigned long init_pg_tables_end; #else -void __init x86_64_init_pda(void); void __init x86_64_start_kernel(char *real_mode); void __init x86_64_start_reservations(char *real_mode_data); diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 19953df61c52..68636e767a91 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -17,32 +17,7 @@ #endif #include <asm/pda.h> #include <asm/thread_info.h> - -#ifdef CONFIG_X86_64 - -extern cpumask_var_t cpu_callin_mask; -extern cpumask_var_t cpu_callout_mask; -extern cpumask_var_t cpu_initialized_mask; -extern cpumask_var_t cpu_sibling_setup_mask; - -#else /* CONFIG_X86_32 */ - -extern cpumask_t cpu_callin_map; -extern cpumask_t cpu_callout_map; -extern cpumask_t cpu_initialized; -extern cpumask_t cpu_sibling_setup_map; - -#define cpu_callin_mask ((struct cpumask *)&cpu_callin_map) -#define cpu_callout_mask ((struct cpumask *)&cpu_callout_map) -#define cpu_initialized_mask ((struct cpumask *)&cpu_initialized) -#define cpu_sibling_setup_mask ((struct cpumask *)&cpu_sibling_setup_map) - -#endif /* CONFIG_X86_32 */ - -extern void (*mtrr_hook)(void); -extern void zap_low_mappings(void); - -extern int __cpuinit get_local_pda(int cpu); +#include <asm/cpumask.h> extern int smp_num_siblings; extern unsigned int num_processors; @@ -50,9 +25,7 @@ extern unsigned int num_processors; DECLARE_PER_CPU(cpumask_t, cpu_sibling_map); DECLARE_PER_CPU(cpumask_t, cpu_core_map); DECLARE_PER_CPU(u16, cpu_llc_id); -#ifdef CONFIG_X86_32 DECLARE_PER_CPU(int, cpu_number); -#endif static inline struct cpumask *cpu_sibling_mask(int cpu) { @@ -167,8 +140,6 @@ void play_dead_common(void); void native_send_call_func_ipi(const struct cpumask *mask); void native_send_call_func_single_ipi(int cpu); -extern void prefill_possible_map(void); - void smp_store_cpu_info(int id); #define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) @@ -177,10 +148,6 @@ static inline int num_booting_cpus(void) { return cpumask_weight(cpu_callout_mask); } -#else -static inline void prefill_possible_map(void) -{ -} #endif /* CONFIG_SMP */ extern unsigned disabled_cpus __cpuinitdata; @@ -191,11 +158,11 @@ extern unsigned disabled_cpus __cpuinitdata; * from the initial startup. We map APIC_BASE very early in page_setup(), * so this is correct in the x86 case. */ -#define raw_smp_processor_id() (x86_read_percpu(cpu_number)) +#define raw_smp_processor_id() (percpu_read(cpu_number)) extern int safe_smp_processor_id(void); #elif defined(CONFIG_X86_64_SMP) -#define raw_smp_processor_id() read_pda(cpunumber) +#define raw_smp_processor_id() (percpu_read(cpu_number)) #define stack_smp_processor_id() \ ({ \ @@ -205,10 +172,6 @@ extern int safe_smp_processor_id(void); }) #define safe_smp_processor_id() smp_processor_id() -#else /* !CONFIG_X86_32_SMP && !CONFIG_X86_64_SMP */ -#define cpu_physical_id(cpu) boot_cpu_physical_apicid -#define safe_smp_processor_id() 0 -#define stack_smp_processor_id() 0 #endif #ifdef CONFIG_X86_LOCAL_APIC @@ -251,11 +214,5 @@ static inline int hard_smp_processor_id(void) #endif /* CONFIG_X86_LOCAL_APIC */ -#ifdef CONFIG_X86_HAS_BOOT_CPU_ID -extern unsigned char boot_cpu_id; -#else -#define boot_cpu_id 0 -#endif - #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_SMP_H */ diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index 8e626ea33a1a..d1dc27dba36d 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -94,7 +94,7 @@ do { \ "call __switch_to\n\t" \ ".globl thread_return\n" \ "thread_return:\n\t" \ - "movq %%gs:%P[pda_pcurrent],%%rsi\n\t" \ + "movq "__percpu_arg([current_task])",%%rsi\n\t" \ "movq %P[thread_info](%%rsi),%%r8\n\t" \ LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \ "movq %%rax,%%rdi\n\t" \ @@ -106,7 +106,7 @@ do { \ [ti_flags] "i" (offsetof(struct thread_info, flags)), \ [tif_fork] "i" (TIF_FORK), \ [thread_info] "i" (offsetof(struct task_struct, stack)), \ - [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ + [current_task] "m" (per_cpu_var(current_task)) \ : "memory", "cc" __EXTRA_CLOBBER) #endif diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index efdf93820aed..f38488989db7 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -196,25 +196,21 @@ static inline struct thread_info *current_thread_info(void) #else /* X86_32 */ -#include <asm/pda.h> +#include <asm/percpu.h> +#define KERNEL_STACK_OFFSET (5*8) /* * macros/functions for gaining access to the thread information structure * preempt_count needs to be 1 initially, until the scheduler is functional. */ #ifndef __ASSEMBLY__ -static inline struct thread_info *current_thread_info(void) -{ - struct thread_info *ti; - ti = (void *)(read_pda(kernelstack) + PDA_STACKOFFSET - THREAD_SIZE); - return ti; -} +DECLARE_PER_CPU(unsigned long, kernel_stack); -/* do not use in interrupt context */ -static inline struct thread_info *stack_thread_info(void) +static inline struct thread_info *current_thread_info(void) { struct thread_info *ti; - asm("andq %%rsp,%0; " : "=r" (ti) : "0" (~(THREAD_SIZE - 1))); + ti = (void *)(percpu_read(kernel_stack) + + KERNEL_STACK_OFFSET - THREAD_SIZE); return ti; } @@ -222,8 +218,8 @@ static inline struct thread_info *stack_thread_info(void) /* how to get the thread information struct from ASM */ #define GET_THREAD_INFO(reg) \ - movq %gs:pda_kernelstack,reg ; \ - subq $(THREAD_SIZE-PDA_STACKOFFSET),reg + movq PER_CPU_VAR(kernel_stack),reg ; \ + subq $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg #endif diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 0e7bbb549116..d3539f998f88 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -113,7 +113,7 @@ static inline void flush_tlb_range(struct vm_area_struct *vma, __flush_tlb(); } -static inline void native_flush_tlb_others(const cpumask_t *cpumask, +static inline void native_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm, unsigned long va) { @@ -142,31 +142,28 @@ static inline void flush_tlb_range(struct vm_area_struct *vma, flush_tlb_mm(vma->vm_mm); } -void native_flush_tlb_others(const cpumask_t *cpumask, struct mm_struct *mm, - unsigned long va); +void native_flush_tlb_others(const struct cpumask *cpumask, + struct mm_struct *mm, unsigned long va); #define TLBSTATE_OK 1 #define TLBSTATE_LAZY 2 -#ifdef CONFIG_X86_32 struct tlb_state { struct mm_struct *active_mm; int state; - char __cacheline_padding[L1_CACHE_BYTES-8]; }; DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate); -void reset_lazy_tlbstate(void); -#else static inline void reset_lazy_tlbstate(void) { + percpu_write(cpu_tlbstate.state, 0); + percpu_write(cpu_tlbstate.active_mm, &init_mm); } -#endif #endif /* SMP */ #ifndef CONFIG_PARAVIRT -#define flush_tlb_others(mask, mm, va) native_flush_tlb_others(&mask, mm, va) +#define flush_tlb_others(mask, mm, va) native_flush_tlb_others(mask, mm, va) #endif static inline void flush_tlb_kernel_range(unsigned long start, @@ -175,4 +172,6 @@ static inline void flush_tlb_kernel_range(unsigned long start, flush_tlb_all(); } +extern void zap_low_mappings(void); + #endif /* _ASM_X86_TLBFLUSH_H */ diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 4e2f2e0aab27..ffea1fe03a99 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -83,7 +83,8 @@ extern cpumask_t *node_to_cpumask_map; DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map); /* Returns the number of the current Node. */ -#define numa_node_id() read_pda(nodenumber) +DECLARE_PER_CPU(int, node_number); +#define numa_node_id() percpu_read(node_number) #ifdef CONFIG_DEBUG_PER_CPU_MAPS extern int cpu_to_node(int cpu); @@ -102,10 +103,7 @@ static inline int cpu_to_node(int cpu) /* Same function but used if called before per_cpu areas are setup */ static inline int early_cpu_to_node(int cpu) { - if (early_per_cpu_ptr(x86_cpu_to_node_map)) - return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; - - return per_cpu(x86_cpu_to_node_map, cpu); + return early_per_cpu(x86_cpu_to_node_map, cpu); } /* Returns a pointer to the cpumask of CPUs on Node 'node'. */ diff --git a/arch/x86/include/asm/trampoline.h b/arch/x86/include/asm/trampoline.h index 780ba0ab94f9..90f06c25221d 100644 --- a/arch/x86/include/asm/trampoline.h +++ b/arch/x86/include/asm/trampoline.h @@ -13,6 +13,7 @@ extern unsigned char *trampoline_base; extern unsigned long init_rsp; extern unsigned long initial_code; +extern unsigned long initial_gs; #define TRAMPOLINE_SIZE roundup(trampoline_end - trampoline_data, PAGE_SIZE) #define TRAMPOLINE_BASE 0x6000 diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index 50423c7b56b2..74e6393bfddb 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -325,7 +325,8 @@ static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits) #define cpubit_isset(cpu, bau_local_cpumask) \ test_bit((cpu), (bau_local_cpumask).bits) -extern int uv_flush_tlb_others(cpumask_t *, struct mm_struct *, unsigned long); +extern int uv_flush_tlb_others(struct cpumask *, + struct mm_struct *, unsigned long); extern void uv_bau_message_intr1(void); extern void uv_bau_timeout_intr1(void); diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index d37593c2f438..4cb5964f1499 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -912,8 +912,8 @@ static u8 __init uniq_ioapic_id(u8 id) DECLARE_BITMAP(used, 256); bitmap_zero(used, 256); for (i = 0; i < nr_ioapics; i++) { - struct mp_config_ioapic *ia = &mp_ioapics[i]; - __set_bit(ia->mp_apicid, used); + struct mpc_ioapic *ia = &mp_ioapics[i]; + __set_bit(ia->apicid, used); } if (!test_bit(id, used)) return id; @@ -945,47 +945,47 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) idx = nr_ioapics; - mp_ioapics[idx].mp_type = MP_IOAPIC; - mp_ioapics[idx].mp_flags = MPC_APIC_USABLE; - mp_ioapics[idx].mp_apicaddr = address; + mp_ioapics[idx].type = MP_IOAPIC; + mp_ioapics[idx].flags = MPC_APIC_USABLE; + mp_ioapics[idx].apicaddr = address; set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); - mp_ioapics[idx].mp_apicid = uniq_ioapic_id(id); + mp_ioapics[idx].apicid = uniq_ioapic_id(id); #ifdef CONFIG_X86_32 - mp_ioapics[idx].mp_apicver = io_apic_get_version(idx); + mp_ioapics[idx].apicver = io_apic_get_version(idx); #else - mp_ioapics[idx].mp_apicver = 0; + mp_ioapics[idx].apicver = 0; #endif /* * Build basic GSI lookup table to facilitate gsi->io_apic lookups * and to prevent reprogramming of IOAPIC pins (PCI GSIs). */ - mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mp_apicid; + mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].apicid; mp_ioapic_routing[idx].gsi_base = gsi_base; mp_ioapic_routing[idx].gsi_end = gsi_base + io_apic_get_redir_entries(idx); - printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, " - "GSI %d-%d\n", idx, mp_ioapics[idx].mp_apicid, - mp_ioapics[idx].mp_apicver, mp_ioapics[idx].mp_apicaddr, + printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " + "GSI %d-%d\n", idx, mp_ioapics[idx].apicid, + mp_ioapics[idx].apicver, mp_ioapics[idx].apicaddr, mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end); nr_ioapics++; } -static void assign_to_mp_irq(struct mp_config_intsrc *m, - struct mp_config_intsrc *mp_irq) +static void assign_to_mp_irq(struct mpc_intsrc *m, + struct mpc_intsrc *mp_irq) { - memcpy(mp_irq, m, sizeof(struct mp_config_intsrc)); + memcpy(mp_irq, m, sizeof(struct mpc_intsrc)); } -static int mp_irq_cmp(struct mp_config_intsrc *mp_irq, - struct mp_config_intsrc *m) +static int mp_irq_cmp(struct mpc_intsrc *mp_irq, + struct mpc_intsrc *m) { - return memcmp(mp_irq, m, sizeof(struct mp_config_intsrc)); + return memcmp(mp_irq, m, sizeof(struct mpc_intsrc)); } -static void save_mp_irq(struct mp_config_intsrc *m) +static void save_mp_irq(struct mpc_intsrc *m) { int i; @@ -1003,7 +1003,7 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) { int ioapic; int pin; - struct mp_config_intsrc mp_irq; + struct mpc_intsrc mp_irq; /* * Convert 'gsi' to 'ioapic.pin'. @@ -1021,13 +1021,13 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) if ((bus_irq == 0) && (trigger == 3)) trigger = 1; - mp_irq.mp_type = MP_INTSRC; - mp_irq.mp_irqtype = mp_INT; - mp_irq.mp_irqflag = (trigger << 2) | polarity; - mp_irq.mp_srcbus = MP_ISA_BUS; - mp_irq.mp_srcbusirq = bus_irq; /* IRQ */ - mp_irq.mp_dstapic = mp_ioapics[ioapic].mp_apicid; /* APIC ID */ - mp_irq.mp_dstirq = pin; /* INTIN# */ + mp_irq.type = MP_INTSRC; + mp_irq.irqtype = mp_INT; + mp_irq.irqflag = (trigger << 2) | polarity; + mp_irq.srcbus = MP_ISA_BUS; + mp_irq.srcbusirq = bus_irq; /* IRQ */ + mp_irq.dstapic = mp_ioapics[ioapic].apicid; /* APIC ID */ + mp_irq.dstirq = pin; /* INTIN# */ save_mp_irq(&mp_irq); } @@ -1037,7 +1037,7 @@ void __init mp_config_acpi_legacy_irqs(void) int i; int ioapic; unsigned int dstapic; - struct mp_config_intsrc mp_irq; + struct mpc_intsrc mp_irq; #if defined (CONFIG_MCA) || defined (CONFIG_EISA) /* @@ -1062,7 +1062,7 @@ void __init mp_config_acpi_legacy_irqs(void) ioapic = mp_find_ioapic(0); if (ioapic < 0) return; - dstapic = mp_ioapics[ioapic].mp_apicid; + dstapic = mp_ioapics[ioapic].apicid; /* * Use the default configuration for the IRQs 0-15. Unless @@ -1072,16 +1072,14 @@ void __init mp_config_acpi_legacy_irqs(void) int idx; for (idx = 0; idx < mp_irq_entries; idx++) { - struct mp_config_intsrc *irq = mp_irqs + idx; + struct mpc_intsrc *irq = mp_irqs + idx; /* Do we already have a mapping for this ISA IRQ? */ - if (irq->mp_srcbus == MP_ISA_BUS - && irq->mp_srcbusirq == i) + if (irq->srcbus == MP_ISA_BUS && irq->srcbusirq == i) break; /* Do we already have a mapping for this IOAPIC pin */ - if (irq->mp_dstapic == dstapic && - irq->mp_dstirq == i) + if (irq->dstapic == dstapic && irq->dstirq == i) break; } @@ -1090,13 +1088,13 @@ void __init mp_config_acpi_legacy_irqs(void) continue; /* IRQ already used */ } - mp_irq.mp_type = MP_INTSRC; - mp_irq.mp_irqflag = 0; /* Conforming */ - mp_irq.mp_srcbus = MP_ISA_BUS; - mp_irq.mp_dstapic = dstapic; - mp_irq.mp_irqtype = mp_INT; - mp_irq.mp_srcbusirq = i; /* Identity mapped */ - mp_irq.mp_dstirq = i; + mp_irq.type = MP_INTSRC; + mp_irq.irqflag = 0; /* Conforming */ + mp_irq.srcbus = MP_ISA_BUS; + mp_irq.dstapic = dstapic; + mp_irq.irqtype = mp_INT; + mp_irq.srcbusirq = i; /* Identity mapped */ + mp_irq.dstirq = i; save_mp_irq(&mp_irq); } @@ -1207,22 +1205,22 @@ int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin, u32 gsi, int triggering, int polarity) { #ifdef CONFIG_X86_MPPARSE - struct mp_config_intsrc mp_irq; + struct mpc_intsrc mp_irq; int ioapic; if (!acpi_ioapic) return 0; /* print the entry should happen on mptable identically */ - mp_irq.mp_type = MP_INTSRC; - mp_irq.mp_irqtype = mp_INT; - mp_irq.mp_irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) | + mp_irq.type = MP_INTSRC; + mp_irq.irqtype = mp_INT; + mp_irq.irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) | (polarity == ACPI_ACTIVE_HIGH ? 1 : 3); - mp_irq.mp_srcbus = number; - mp_irq.mp_srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3); + mp_irq.srcbus = number; + mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3); ioapic = mp_find_ioapic(gsi); - mp_irq.mp_dstapic = mp_ioapic_routing[ioapic].apic_id; - mp_irq.mp_dstirq = gsi - mp_ioapic_routing[ioapic].gsi_base; + mp_irq.dstapic = mp_ioapic_routing[ioapic].apic_id; + mp_irq.dstirq = gsi - mp_ioapic_routing[ioapic].gsi_base; save_mp_irq(&mp_irq); #endif diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 707c1f6f95fa..4abff454c55b 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -101,6 +101,7 @@ int acpi_save_state_mem(void) stack_start.sp = temp_stack + sizeof(temp_stack); early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(smp_processor_id()); + initial_gs = per_cpu_offset(smp_processor_id()); #endif initial_code = (unsigned long)wakeup_long64; saved_magic = 0x123456789abcdef0; diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index d2d17b8d10f8..e9af14f748ea 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -48,6 +48,7 @@ #include <asm/proto.h> #include <asm/apic.h> #include <asm/i8259.h> +#include <asm/smp.h> #include <mach_apic.h> #include <mach_apicdef.h> @@ -895,6 +896,10 @@ void disable_local_APIC(void) { unsigned int value; + /* APIC hasn't been mapped yet */ + if (!apic_phys) + return; + clear_local_APIC(); /* @@ -1126,6 +1131,11 @@ void __cpuinit setup_local_APIC(void) unsigned int value; int i, j; + if (disable_apic) { + disable_ioapic_setup(); + return; + } + #ifdef CONFIG_X86_32 /* Pound the ESR really hard over the head with a big hammer - mbligh */ if (lapic_is_integrated() && esr_disable) { @@ -1567,11 +1577,11 @@ int apic_version[MAX_APICS]; int __init APIC_init_uniprocessor(void) { -#ifdef CONFIG_X86_64 if (disable_apic) { pr_info("Apic disabled\n"); return -1; } +#ifdef CONFIG_X86_64 if (!cpu_has_apic) { disable_apic = 1; pr_info("Apic disabled by BIOS\n"); @@ -1869,17 +1879,8 @@ void __cpuinit generic_processor_info(int apicid, int version) #endif #if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64) - /* are we being called early in kernel startup? */ - if (early_per_cpu_ptr(x86_cpu_to_apicid)) { - u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); - u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); - - cpu_to_apicid[cpu] = apicid; - bios_cpu_apicid[cpu] = apicid; - } else { - per_cpu(x86_cpu_to_apicid, cpu) = apicid; - per_cpu(x86_bios_cpu_apicid, cpu) = apicid; - } + early_per_cpu(x86_cpu_to_apicid, cpu) = apicid; + early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid; #endif set_cpu_possible(cpu, true); diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 1d41d3f1edbc..64c834a39aa8 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -49,13 +49,7 @@ int main(void) BLANK(); #undef ENTRY #define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry)) - ENTRY(kernelstack); - ENTRY(oldrsp); - ENTRY(pcurrent); - ENTRY(irqcount); - ENTRY(cpunumber); - ENTRY(irqstackptr); - ENTRY(data_offset); + DEFINE(pda_size, sizeof(struct x8664_pda)); BLANK(); #undef ENTRY #ifdef CONFIG_PARAVIRT diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 667e5d561ed7..95eb30e1e677 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -22,6 +22,8 @@ #include <asm/asm.h> #include <asm/numa.h> #include <asm/smp.h> +#include <asm/cpu.h> +#include <asm/cpumask.h> #ifdef CONFIG_X86_LOCAL_APIC #include <asm/mpspec.h> #include <asm/apic.h> @@ -879,54 +881,34 @@ static __init int setup_disablecpuid(char *arg) __setup("clearcpuid=", setup_disablecpuid); #ifdef CONFIG_X86_64 -struct x8664_pda **_cpu_pda __read_mostly; -EXPORT_SYMBOL(_cpu_pda); - struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; -static char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; +DEFINE_PER_CPU_PAGE_ALIGNED(char[IRQ_STACK_SIZE], irq_stack); +#ifdef CONFIG_SMP +DEFINE_PER_CPU(char *, irq_stack_ptr); /* will be set during per cpu init */ +#else +DEFINE_PER_CPU(char *, irq_stack_ptr) = + per_cpu_var(irq_stack) + IRQ_STACK_SIZE - 64; +#endif + +DEFINE_PER_CPU(unsigned long, kernel_stack) = + (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE; +EXPORT_PER_CPU_SYMBOL(kernel_stack); + +DEFINE_PER_CPU(unsigned int, irq_count) = -1; void __cpuinit pda_init(int cpu) { - struct x8664_pda *pda = cpu_pda(cpu); - /* Setup up data that may be needed in __get_free_pages early */ loadsegment(fs, 0); loadsegment(gs, 0); - /* Memory clobbers used to order PDA accessed */ - mb(); - wrmsrl(MSR_GS_BASE, pda); - mb(); - - pda->cpunumber = cpu; - pda->irqcount = -1; - pda->kernelstack = (unsigned long)stack_thread_info() - - PDA_STACKOFFSET + THREAD_SIZE; - pda->active_mm = &init_mm; - pda->mmu_state = 0; - - if (cpu == 0) { - /* others are initialized in smpboot.c */ - pda->pcurrent = &init_task; - pda->irqstackptr = boot_cpu_stack; - pda->irqstackptr += IRQSTACKSIZE - 64; - } else { - if (!pda->irqstackptr) { - pda->irqstackptr = (char *) - __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); - if (!pda->irqstackptr) - panic("cannot allocate irqstack for cpu %d", - cpu); - pda->irqstackptr += IRQSTACKSIZE - 64; - } - if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) - pda->nodenumber = cpu_to_node(cpu); - } + load_pda_offset(cpu); } -static char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + - DEBUG_STKSZ] __page_aligned_bss; +static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks + [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]) + __aligned(PAGE_SIZE); extern asmlinkage void ignore_sysret(void); @@ -984,15 +966,18 @@ void __cpuinit cpu_init(void) struct tss_struct *t = &per_cpu(init_tss, cpu); struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu); unsigned long v; - char *estacks = NULL; struct task_struct *me; int i; /* CPU 0 is initialised in head64.c */ if (cpu != 0) pda_init(cpu); - else - estacks = boot_exception_stacks; + +#ifdef CONFIG_NUMA + if (cpu != 0 && percpu_read(node_number) == 0 && + cpu_to_node(cpu) != NUMA_NO_NODE) + percpu_write(node_number, cpu_to_node(cpu)); +#endif me = current; @@ -1026,18 +1011,13 @@ void __cpuinit cpu_init(void) * set up and load the per-CPU TSS */ if (!orig_ist->ist[0]) { - static const unsigned int order[N_EXCEPTION_STACKS] = { - [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, - [DEBUG_STACK - 1] = DEBUG_STACK_ORDER + static const unsigned int sizes[N_EXCEPTION_STACKS] = { + [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ, + [DEBUG_STACK - 1] = DEBUG_STKSZ }; + char *estacks = per_cpu(exception_stacks, cpu); for (v = 0; v < N_EXCEPTION_STACKS; v++) { - if (cpu) { - estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); - if (!estacks) - panic("Cannot allocate exception " - "stack %ld %d\n", v, cpu); - } - estacks += PAGE_SIZE << order[v]; + estacks += sizes[v]; orig_ist->ist[v] = t->x86_tss.ist[v] = (unsigned long)estacks; } diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 06fcd8f9323c..8f3c95c7e61f 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -150,9 +150,8 @@ struct drv_cmd { u32 val; }; -static long do_drv_read(void *_cmd) +static void do_drv_read(struct drv_cmd *cmd) { - struct drv_cmd *cmd = _cmd; u32 h; switch (cmd->type) { @@ -167,12 +166,10 @@ static long do_drv_read(void *_cmd) default: break; } - return 0; } -static long do_drv_write(void *_cmd) +static void do_drv_write(struct drv_cmd *cmd) { - struct drv_cmd *cmd = _cmd; u32 lo, hi; switch (cmd->type) { @@ -189,23 +186,30 @@ static long do_drv_write(void *_cmd) default: break; } - return 0; } static void drv_read(struct drv_cmd *cmd) { + cpumask_t saved_mask = current->cpus_allowed; cmd->val = 0; - work_on_cpu(cpumask_any(cmd->mask), do_drv_read, cmd); + set_cpus_allowed_ptr(current, cmd->mask); + do_drv_read(cmd); + set_cpus_allowed_ptr(current, &saved_mask); } static void drv_write(struct drv_cmd *cmd) { + cpumask_t saved_mask = current->cpus_allowed; unsigned int i; for_each_cpu(i, cmd->mask) { - work_on_cpu(i, do_drv_write, cmd); + set_cpus_allowed_ptr(current, cpumask_of(i)); + do_drv_write(cmd); } + + set_cpus_allowed_ptr(current, &saved_mask); + return; } static u32 get_cur_val(const struct cpumask *mask) @@ -231,15 +235,8 @@ static u32 get_cur_val(const struct cpumask *mask) return 0; } - if (unlikely(!alloc_cpumask_var(&cmd.mask, GFP_KERNEL))) - return 0; - - cpumask_copy(cmd.mask, mask); - drv_read(&cmd); - free_cpumask_var(cmd.mask); - dprintk("get_cur_val = %u\n", cmd.val); return cmd.val; diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 48533d77be78..58527a9fc404 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -132,7 +132,16 @@ struct _cpuid4_info { union _cpuid4_leaf_ecx ecx; unsigned long size; unsigned long can_disable; - cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */ + DECLARE_BITMAP(shared_cpu_map, NR_CPUS); +}; + +/* subset of above _cpuid4_info w/o shared_cpu_map */ +struct _cpuid4_info_regs { + union _cpuid4_leaf_eax eax; + union _cpuid4_leaf_ebx ebx; + union _cpuid4_leaf_ecx ecx; + unsigned long size; + unsigned long can_disable; }; #ifdef CONFIG_PCI @@ -263,7 +272,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, } static void __cpuinit -amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf) +amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf) { if (index < 3) return; @@ -271,7 +280,8 @@ amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf) } static int -__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) +__cpuinit cpuid4_cache_lookup_regs(int index, + struct _cpuid4_info_regs *this_leaf) { union _cpuid4_leaf_eax eax; union _cpuid4_leaf_ebx ebx; @@ -299,6 +309,15 @@ __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) return 0; } +static int +__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) +{ + struct _cpuid4_info_regs *leaf_regs = + (struct _cpuid4_info_regs *)this_leaf; + + return cpuid4_cache_lookup_regs(index, leaf_regs); +} + static int __cpuinit find_num_cache_leaves(void) { unsigned int eax, ebx, ecx, edx; @@ -338,11 +357,10 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) * parameters cpuid leaf to find the cache details */ for (i = 0; i < num_cache_leaves; i++) { - struct _cpuid4_info this_leaf; - + struct _cpuid4_info_regs this_leaf; int retval; - retval = cpuid4_cache_lookup(i, &this_leaf); + retval = cpuid4_cache_lookup_regs(i, &this_leaf); if (retval >= 0) { switch(this_leaf.eax.split.level) { case 1: @@ -491,17 +509,20 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing; if (num_threads_sharing == 1) - cpu_set(cpu, this_leaf->shared_cpu_map); + cpumask_set_cpu(cpu, to_cpumask(this_leaf->shared_cpu_map)); else { index_msb = get_count_order(num_threads_sharing); for_each_online_cpu(i) { if (cpu_data(i).apicid >> index_msb == c->apicid >> index_msb) { - cpu_set(i, this_leaf->shared_cpu_map); + cpumask_set_cpu(i, + to_cpumask(this_leaf->shared_cpu_map)); if (i != cpu && per_cpu(cpuid4_info, i)) { - sibling_leaf = CPUID4_INFO_IDX(i, index); - cpu_set(cpu, sibling_leaf->shared_cpu_map); + sibling_leaf = + CPUID4_INFO_IDX(i, index); + cpumask_set_cpu(cpu, to_cpumask( + sibling_leaf->shared_cpu_map)); } } } @@ -513,9 +534,10 @@ static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) int sibling; this_leaf = CPUID4_INFO_IDX(cpu, index); - for_each_cpu_mask_nr(sibling, this_leaf->shared_cpu_map) { + for_each_cpu(sibling, to_cpumask(this_leaf->shared_cpu_map)) { sibling_leaf = CPUID4_INFO_IDX(sibling, index); - cpu_clear(cpu, sibling_leaf->shared_cpu_map); + cpumask_clear_cpu(cpu, + to_cpumask(sibling_leaf->shared_cpu_map)); } } #else @@ -620,8 +642,9 @@ static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf, int n = 0; if (len > 1) { - cpumask_t *mask = &this_leaf->shared_cpu_map; + const struct cpumask *mask; + mask = to_cpumask(this_leaf->shared_cpu_map); n = type? cpulist_scnprintf(buf, len-2, mask) : cpumask_scnprintf(buf, len-2, mask); @@ -684,7 +707,8 @@ static struct pci_dev *get_k8_northbridge(int node) static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf) { - int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); + const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map); + int node = cpu_to_node(cpumask_first(mask)); struct pci_dev *dev = NULL; ssize_t ret = 0; int i; @@ -718,7 +742,8 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf, size_t count) { - int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); + const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map); + int node = cpu_to_node(cpumask_first(mask)); struct pci_dev *dev = NULL; unsigned int ret, index, val; @@ -863,7 +888,7 @@ err_out: return -ENOMEM; } -static cpumask_t cache_dev_map = CPU_MASK_NONE; +static DECLARE_BITMAP(cache_dev_map, NR_CPUS); /* Add/Remove cache interface for CPU device */ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) @@ -903,7 +928,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) } kobject_uevent(&(this_object->kobj), KOBJ_ADD); } - cpu_set(cpu, cache_dev_map); + cpumask_set_cpu(cpu, to_cpumask(cache_dev_map)); kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD); return 0; @@ -916,9 +941,9 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev) if (per_cpu(cpuid4_info, cpu) == NULL) return; - if (!cpu_isset(cpu, cache_dev_map)) + if (!cpumask_test_cpu(cpu, to_cpumask(cache_dev_map))) return; - cpu_clear(cpu, cache_dev_map); + cpumask_clear_cpu(cpu, to_cpumask(cache_dev_map)); for (i = 0; i < num_cache_leaves; i++) kobject_put(&(INDEX_KOBJECT_PTR(cpu,i)->kobj)); diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c index 8ae8c4ff094d..4772e91e8246 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c @@ -67,7 +67,7 @@ static struct threshold_block threshold_defaults = { struct threshold_bank { struct kobject *kobj; struct threshold_block *blocks; - cpumask_t cpus; + cpumask_var_t cpus; }; static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]); @@ -481,7 +481,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) #ifdef CONFIG_SMP if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */ - i = first_cpu(per_cpu(cpu_core_map, cpu)); + i = cpumask_first(&per_cpu(cpu_core_map, cpu)); /* first core not up yet */ if (cpu_data(i).cpu_core_id) @@ -501,7 +501,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) if (err) goto out; - b->cpus = per_cpu(cpu_core_map, cpu); + cpumask_copy(b->cpus, &per_cpu(cpu_core_map, cpu)); per_cpu(threshold_banks, cpu)[bank] = b; goto out; } @@ -512,15 +512,20 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) err = -ENOMEM; goto out; } + if (!alloc_cpumask_var(&b->cpus, GFP_KERNEL)) { + kfree(b); + err = -ENOMEM; + goto out; + } b->kobj = kobject_create_and_add(name, &per_cpu(device_mce, cpu).kobj); if (!b->kobj) goto out_free; #ifndef CONFIG_SMP - b->cpus = CPU_MASK_ALL; + cpumask_setall(b->cpus); #else - b->cpus = per_cpu(cpu_core_map, cpu); + cpumask_copy(b->cpus, &per_cpu(cpu_core_map, cpu)); #endif per_cpu(threshold_banks, cpu)[bank] = b; @@ -529,7 +534,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) if (err) goto out_free; - for_each_cpu_mask_nr(i, b->cpus) { + for_each_cpu(i, b->cpus) { if (i == cpu) continue; @@ -545,6 +550,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) out_free: per_cpu(threshold_banks, cpu)[bank] = NULL; + free_cpumask_var(b->cpus); kfree(b); out: return err; @@ -619,7 +625,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank) #endif /* remove all sibling symlinks before unregistering */ - for_each_cpu_mask_nr(i, b->cpus) { + for_each_cpu(i, b->cpus) { if (i == cpu) continue; @@ -632,6 +638,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank) free_out: kobject_del(b->kobj); kobject_put(b->kobj); + free_cpumask_var(b->cpus); kfree(b); per_cpu(threshold_banks, cpu)[bank] = NULL; } diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index c689d19e35ab..11b93cabdf78 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -24,7 +24,7 @@ #include <asm/apic.h> #include <asm/hpet.h> #include <linux/kdebug.h> -#include <asm/smp.h> +#include <asm/cpu.h> #include <asm/reboot.h> #include <asm/virtext.h> diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index c302d0707048..d35db5993fd6 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -106,7 +106,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, const struct stacktrace_ops *ops, void *data) { const unsigned cpu = get_cpu(); - unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; + unsigned long *irq_stack_end = + (unsigned long *)per_cpu(irq_stack_ptr, cpu); unsigned used = 0; struct thread_info *tinfo; int graph = 0; @@ -160,23 +161,23 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, stack = (unsigned long *) estack_end[-2]; continue; } - if (irqstack_end) { - unsigned long *irqstack; - irqstack = irqstack_end - - (IRQSTACKSIZE - 64) / sizeof(*irqstack); + if (irq_stack_end) { + unsigned long *irq_stack; + irq_stack = irq_stack_end - + (IRQ_STACK_SIZE - 64) / sizeof(*irq_stack); - if (stack >= irqstack && stack < irqstack_end) { + if (stack >= irq_stack && stack < irq_stack_end) { if (ops->stack(data, "IRQ") < 0) break; bp = print_context_stack(tinfo, stack, bp, - ops, data, irqstack_end, &graph); + ops, data, irq_stack_end, &graph); /* * We link to the next stack (which would be * the process stack normally) the last * pointer (index -1 to end) in the IRQ stack: */ - stack = (unsigned long *) (irqstack_end[-1]); - irqstack_end = NULL; + stack = (unsigned long *) (irq_stack_end[-1]); + irq_stack_end = NULL; ops->stack(data, "EOI"); continue; } @@ -199,10 +200,10 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *stack; int i; const int cpu = smp_processor_id(); - unsigned long *irqstack_end = - (unsigned long *) (cpu_pda(cpu)->irqstackptr); - unsigned long *irqstack = - (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE); + unsigned long *irq_stack_end = + (unsigned long *)(per_cpu(irq_stack_ptr, cpu)); + unsigned long *irq_stack = + (unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE); /* * debugging aid: "show_stack(NULL, NULL);" prints the @@ -218,9 +219,9 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, stack = sp; for (i = 0; i < kstack_depth_to_print; i++) { - if (stack >= irqstack && stack <= irqstack_end) { - if (stack == irqstack_end) { - stack = (unsigned long *) (irqstack_end[-1]); + if (stack >= irq_stack && stack <= irq_stack_end) { + if (stack == irq_stack_end) { + stack = (unsigned long *) (irq_stack_end[-1]); printk(" <EOI> "); } } else { @@ -241,7 +242,7 @@ void show_registers(struct pt_regs *regs) int i; unsigned long sp; const int cpu = smp_processor_id(); - struct task_struct *cur = cpu_pda(cpu)->pcurrent; + struct task_struct *cur = current; sp = regs->sp; printk("CPU %d ", cpu); diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index d6f0490a7391..46469029e9d3 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1203,7 +1203,6 @@ nmi_stack_correct: pushl %eax CFI_ADJUST_CFA_OFFSET 4 SAVE_ALL - TRACE_IRQS_OFF xorl %edx,%edx # zero error code movl %esp,%eax # pt_regs pointer call do_nmi @@ -1244,7 +1243,6 @@ nmi_espfix_stack: pushl %eax CFI_ADJUST_CFA_OFFSET 4 SAVE_ALL - TRACE_IRQS_OFF FIXUP_ESPFIX_STACK # %eax == %esp xorl %edx,%edx # zero error code call do_nmi diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 1954a9662203..c092e7d2686d 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -52,6 +52,7 @@ #include <asm/irqflags.h> #include <asm/paravirt.h> #include <asm/ftrace.h> +#include <asm/percpu.h> /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ #include <linux/elf-em.h> @@ -209,7 +210,7 @@ ENTRY(native_usergs_sysret64) /* %rsp:at FRAMEEND */ .macro FIXUP_TOP_OF_STACK tmp offset=0 - movq %gs:pda_oldrsp,\tmp + movq PER_CPU_VAR(old_rsp),\tmp movq \tmp,RSP+\offset(%rsp) movq $__USER_DS,SS+\offset(%rsp) movq $__USER_CS,CS+\offset(%rsp) @@ -220,7 +221,7 @@ ENTRY(native_usergs_sysret64) .macro RESTORE_TOP_OF_STACK tmp offset=0 movq RSP+\offset(%rsp),\tmp - movq \tmp,%gs:pda_oldrsp + movq \tmp,PER_CPU_VAR(old_rsp) movq EFLAGS+\offset(%rsp),\tmp movq \tmp,R11+\offset(%rsp) .endm @@ -336,15 +337,15 @@ ENTRY(save_args) je 1f SWAPGS /* - * irqcount is used to check if a CPU is already on an interrupt stack + * irq_count is used to check if a CPU is already on an interrupt stack * or not. While this is essentially redundant with preempt_count it is * a little cheaper to use a separate counter in the PDA (short of * moving irq_enter into assembly, which would be too much work) */ -1: incl %gs:pda_irqcount +1: incl PER_CPU_VAR(irq_count) jne 2f popq_cfi %rax /* move return address... */ - mov %gs:pda_irqstackptr,%rsp + mov PER_CPU_VAR(irq_stack_ptr),%rsp EMPTY_FRAME 0 pushq_cfi %rax /* ... to the new stack */ /* @@ -467,7 +468,7 @@ END(ret_from_fork) ENTRY(system_call) CFI_STARTPROC simple CFI_SIGNAL_FRAME - CFI_DEF_CFA rsp,PDA_STACKOFFSET + CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET CFI_REGISTER rip,rcx /*CFI_REGISTER rflags,r11*/ SWAPGS_UNSAFE_STACK @@ -478,8 +479,8 @@ ENTRY(system_call) */ ENTRY(system_call_after_swapgs) - movq %rsp,%gs:pda_oldrsp - movq %gs:pda_kernelstack,%rsp + movq %rsp,PER_CPU_VAR(old_rsp) + movq PER_CPU_VAR(kernel_stack),%rsp /* * No need to follow this irqs off/on section - it's straight * and short: @@ -522,7 +523,7 @@ sysret_check: CFI_REGISTER rip,rcx RESTORE_ARGS 0,-ARG_SKIP,1 /*CFI_REGISTER rflags,r11*/ - movq %gs:pda_oldrsp, %rsp + movq PER_CPU_VAR(old_rsp), %rsp USERGS_SYSRET64 CFI_RESTORE_STATE @@ -832,11 +833,11 @@ common_interrupt: XCPT_FRAME addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ interrupt do_IRQ - /* 0(%rsp): oldrsp-ARGOFFSET */ + /* 0(%rsp): old_rsp-ARGOFFSET */ ret_from_intr: DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF - decl %gs:pda_irqcount + decl PER_CPU_VAR(irq_count) leaveq CFI_DEF_CFA_REGISTER rsp CFI_ADJUST_CFA_OFFSET -8 @@ -1077,10 +1078,10 @@ ENTRY(\sym) TRACE_IRQS_OFF movq %rsp,%rdi /* pt_regs pointer */ xorl %esi,%esi /* no error code */ - movq %gs:pda_data_offset, %rbp - subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) + PER_CPU(init_tss, %rbp) + subq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp) call \do_sym - addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) + addq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp) jmp paranoid_exit /* %ebx: no swapgs flag */ CFI_ENDPROC END(\sym) @@ -1264,14 +1265,14 @@ ENTRY(call_softirq) CFI_REL_OFFSET rbp,0 mov %rsp,%rbp CFI_DEF_CFA_REGISTER rbp - incl %gs:pda_irqcount - cmove %gs:pda_irqstackptr,%rsp + incl PER_CPU_VAR(irq_count) + cmove PER_CPU_VAR(irq_stack_ptr),%rsp push %rbp # backlink for old unwinder call __do_softirq leaveq CFI_DEF_CFA_REGISTER rsp CFI_ADJUST_CFA_OFFSET -8 - decl %gs:pda_irqcount + decl PER_CPU_VAR(irq_count) ret CFI_ENDPROC END(call_softirq) @@ -1301,15 +1302,15 @@ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) movq %rdi, %rsp # we don't return, adjust the stack frame CFI_ENDPROC DEFAULT_FRAME -11: incl %gs:pda_irqcount +11: incl PER_CPU_VAR(irq_count) movq %rsp,%rbp CFI_DEF_CFA_REGISTER rbp - cmovzq %gs:pda_irqstackptr,%rsp + cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp pushq %rbp # backlink for old unwinder call xen_evtchn_do_upcall popq %rsp CFI_DEF_CFA_REGISTER rsp - decl %gs:pda_irqcount + decl PER_CPU_VAR(irq_count) jmp error_exit CFI_ENDPROC END(do_hypervisor_callback) diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index b9a4d8c4b935..af67d3227ea6 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -26,27 +26,6 @@ #include <asm/bios_ebda.h> #include <asm/trampoline.h> -/* boot cpu pda */ -static struct x8664_pda _boot_cpu_pda; - -#ifdef CONFIG_SMP -/* - * We install an empty cpu_pda pointer table to indicate to early users - * (numa_set_node) that the cpu_pda pointer table for cpus other than - * the boot cpu is not yet setup. - */ -static struct x8664_pda *__cpu_pda[NR_CPUS] __initdata; -#else -static struct x8664_pda *__cpu_pda[NR_CPUS] __read_mostly; -#endif - -void __init x86_64_init_pda(void) -{ - _cpu_pda = __cpu_pda; - cpu_pda(0) = &_boot_cpu_pda; - pda_init(0); -} - static void __init zap_identity_mappings(void) { pgd_t *pgd = pgd_offset_k(0UL); @@ -112,7 +91,7 @@ void __init x86_64_start_kernel(char * real_mode_data) if (console_loglevel == 10) early_printk("Kernel alive\n"); - x86_64_init_pda(); + pda_init(0); x86_64_start_reservations(real_mode_data); } diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 0e275d495563..c8ace880661b 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -19,6 +19,7 @@ #include <asm/msr.h> #include <asm/cache.h> #include <asm/processor-flags.h> +#include <asm/percpu.h> #ifdef CONFIG_PARAVIRT #include <asm/asm-offsets.h> @@ -204,6 +205,23 @@ ENTRY(secondary_startup_64) pushq $0 popfq +#ifdef CONFIG_SMP + /* + * early_gdt_base should point to the gdt_page in static percpu init + * data area. Computing this requires two symbols - __per_cpu_load + * and per_cpu__gdt_page. As linker can't do no such relocation, do + * it by hand. As early_gdt_descr is manipulated by C code for + * secondary CPUs, this should be done only once for the boot CPU + * when early_gdt_descr_base contains zero. + */ + movq early_gdt_descr_base(%rip), %rax + testq %rax, %rax + jnz 1f + movq $__per_cpu_load, %rax + addq $per_cpu__gdt_page, %rax + movq %rax, early_gdt_descr_base(%rip) +1: +#endif /* * We must switch to a new descriptor in kernel space for the GDT * because soon the kernel won't have access anymore to the userspace @@ -226,12 +244,18 @@ ENTRY(secondary_startup_64) movl %eax,%fs movl %eax,%gs - /* - * Setup up a dummy PDA. this is just for some early bootup code - * that does in_interrupt() - */ + /* Set up %gs. + * + * On SMP, %gs should point to the per-cpu area. For initial + * boot, make %gs point to the init data section. For a + * secondary CPU,initial_gs should be set to its pda address + * before the CPU runs this code. + * + * On UP, initial_gs points to PER_CPU_VAR(__pda) and doesn't + * change. + */ movl $MSR_GS_BASE,%ecx - movq $empty_zero_page,%rax + movq initial_gs(%rip),%rax movq %rax,%rdx shrq $32,%rdx wrmsr @@ -257,6 +281,12 @@ ENTRY(secondary_startup_64) .align 8 ENTRY(initial_code) .quad x86_64_start_kernel + ENTRY(initial_gs) +#ifdef CONFIG_SMP + .quad __per_cpu_load +#else + .quad PER_CPU_VAR(__pda) +#endif __FINITDATA ENTRY(stack_start) @@ -401,7 +431,12 @@ NEXT_PAGE(level2_spare_pgt) .globl early_gdt_descr early_gdt_descr: .word GDT_ENTRIES*8-1 - .quad per_cpu__gdt_page +#ifdef CONFIG_SMP +early_gdt_descr_base: + .quad 0x0000000000000000 +#else + .quad per_cpu__gdt_page +#endif ENTRY(phys_base) /* This must match the first entry in level2_kernel_pgt */ diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 1c4a1302536c..f79660390724 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -46,6 +46,7 @@ #include <asm/idle.h> #include <asm/io.h> #include <asm/smp.h> +#include <asm/cpu.h> #include <asm/desc.h> #include <asm/proto.h> #include <asm/acpi.h> @@ -82,11 +83,11 @@ static DEFINE_SPINLOCK(vector_lock); int nr_ioapic_registers[MAX_IO_APICS]; /* I/O APIC entries */ -struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; +struct mpc_ioapic mp_ioapics[MAX_IO_APICS]; int nr_ioapics; /* MP IRQ source entries */ -struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; +struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; /* # of MP IRQ source entries */ int mp_irq_entries; @@ -356,7 +357,7 @@ set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask) if (!cfg->move_in_progress) { /* it means that domain is not changed */ - if (!cpumask_intersects(&desc->affinity, mask)) + if (!cpumask_intersects(desc->affinity, mask)) cfg->move_desc_pending = 1; } } @@ -386,7 +387,7 @@ struct io_apic { static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) { return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) - + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK); + + (mp_ioapics[idx].apicaddr & ~PAGE_MASK); } static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) @@ -579,9 +580,9 @@ set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) if (assign_irq_vector(irq, cfg, mask)) return BAD_APICID; - cpumask_and(&desc->affinity, cfg->domain, mask); + cpumask_and(desc->affinity, cfg->domain, mask); set_extra_move_desc(desc, mask); - return cpu_mask_to_apicid_and(&desc->affinity, cpu_online_mask); + return cpu_mask_to_apicid_and(desc->affinity, cpu_online_mask); } static void @@ -944,10 +945,10 @@ static int find_irq_entry(int apic, int pin, int type) int i; for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mp_irqtype == type && - (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid || - mp_irqs[i].mp_dstapic == MP_APIC_ALL) && - mp_irqs[i].mp_dstirq == pin) + if (mp_irqs[i].irqtype == type && + (mp_irqs[i].dstapic == mp_ioapics[apic].apicid || + mp_irqs[i].dstapic == MP_APIC_ALL) && + mp_irqs[i].dstirq == pin) return i; return -1; @@ -961,13 +962,13 @@ static int __init find_isa_irq_pin(int irq, int type) int i; for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mp_srcbus; + int lbus = mp_irqs[i].srcbus; if (test_bit(lbus, mp_bus_not_pci) && - (mp_irqs[i].mp_irqtype == type) && - (mp_irqs[i].mp_srcbusirq == irq)) + (mp_irqs[i].irqtype == type) && + (mp_irqs[i].srcbusirq == irq)) - return mp_irqs[i].mp_dstirq; + return mp_irqs[i].dstirq; } return -1; } @@ -977,17 +978,17 @@ static int __init find_isa_irq_apic(int irq, int type) int i; for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mp_srcbus; + int lbus = mp_irqs[i].srcbus; if (test_bit(lbus, mp_bus_not_pci) && - (mp_irqs[i].mp_irqtype == type) && - (mp_irqs[i].mp_srcbusirq == irq)) + (mp_irqs[i].irqtype == type) && + (mp_irqs[i].srcbusirq == irq)) break; } if (i < mp_irq_entries) { int apic; for(apic = 0; apic < nr_ioapics; apic++) { - if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic) + if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic) return apic; } } @@ -1012,23 +1013,23 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) return -1; } for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mp_srcbus; + int lbus = mp_irqs[i].srcbus; for (apic = 0; apic < nr_ioapics; apic++) - if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic || - mp_irqs[i].mp_dstapic == MP_APIC_ALL) + if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic || + mp_irqs[i].dstapic == MP_APIC_ALL) break; if (!test_bit(lbus, mp_bus_not_pci) && - !mp_irqs[i].mp_irqtype && + !mp_irqs[i].irqtype && (bus == lbus) && - (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) { - int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq); + (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) { + int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq); if (!(apic || IO_APIC_IRQ(irq))) continue; - if (pin == (mp_irqs[i].mp_srcbusirq & 3)) + if (pin == (mp_irqs[i].srcbusirq & 3)) return irq; /* * Use the first all-but-pin matching entry as a @@ -1071,7 +1072,7 @@ static int EISA_ELCR(unsigned int irq) * EISA conforming in the MP table, that means its trigger type must * be read in from the ELCR */ -#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mp_srcbusirq)) +#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].srcbusirq)) #define default_EISA_polarity(idx) default_ISA_polarity(idx) /* PCI interrupts are always polarity one level triggered, @@ -1088,13 +1089,13 @@ static int EISA_ELCR(unsigned int irq) static int MPBIOS_polarity(int idx) { - int bus = mp_irqs[idx].mp_srcbus; + int bus = mp_irqs[idx].srcbus; int polarity; /* * Determine IRQ line polarity (high active or low active): */ - switch (mp_irqs[idx].mp_irqflag & 3) + switch (mp_irqs[idx].irqflag & 3) { case 0: /* conforms, ie. bus-type dependent polarity */ if (test_bit(bus, mp_bus_not_pci)) @@ -1130,13 +1131,13 @@ static int MPBIOS_polarity(int idx) static int MPBIOS_trigger(int idx) { - int bus = mp_irqs[idx].mp_srcbus; + int bus = mp_irqs[idx].srcbus; int trigger; /* * Determine IRQ trigger mode (edge or level sensitive): */ - switch ((mp_irqs[idx].mp_irqflag>>2) & 3) + switch ((mp_irqs[idx].irqflag>>2) & 3) { case 0: /* conforms, ie. bus-type dependent */ if (test_bit(bus, mp_bus_not_pci)) @@ -1214,16 +1215,16 @@ int (*ioapic_renumber_irq)(int ioapic, int irq); static int pin_2_irq(int idx, int apic, int pin) { int irq, i; - int bus = mp_irqs[idx].mp_srcbus; + int bus = mp_irqs[idx].srcbus; /* * Debugging check, we are in big trouble if this message pops up! */ - if (mp_irqs[idx].mp_dstirq != pin) + if (mp_irqs[idx].dstirq != pin) printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); if (test_bit(bus, mp_bus_not_pci)) { - irq = mp_irqs[idx].mp_srcbusirq; + irq = mp_irqs[idx].srcbusirq; } else { /* * PCI IRQs are mapped in order @@ -1566,14 +1567,14 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_de apic_printk(APIC_VERBOSE,KERN_DEBUG "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " "IRQ %d Mode:%i Active:%i)\n", - apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, + apic, mp_ioapics[apic].apicid, pin, cfg->vector, irq, trigger, polarity); - if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry, + if (setup_ioapic_entry(mp_ioapics[apic].apicid, irq, &entry, dest, trigger, polarity, cfg->vector)) { printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", - mp_ioapics[apic].mp_apicid, pin); + mp_ioapics[apic].apicid, pin); __clear_irq_vector(irq, cfg); return; } @@ -1604,12 +1605,10 @@ static void __init setup_IO_APIC_irqs(void) notcon = 1; apic_printk(APIC_VERBOSE, KERN_DEBUG " %d-%d", - mp_ioapics[apic].mp_apicid, - pin); + mp_ioapics[apic].apicid, pin); } else apic_printk(APIC_VERBOSE, " %d-%d", - mp_ioapics[apic].mp_apicid, - pin); + mp_ioapics[apic].apicid, pin); continue; } if (notcon) { @@ -1699,7 +1698,7 @@ __apicdebuginit(void) print_IO_APIC(void) printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); for (i = 0; i < nr_ioapics; i++) printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", - mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]); + mp_ioapics[i].apicid, nr_ioapic_registers[i]); /* * We are a bit conservative about what we expect. We have to @@ -1719,7 +1718,7 @@ __apicdebuginit(void) print_IO_APIC(void) spin_unlock_irqrestore(&ioapic_lock, flags); printk("\n"); - printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid); + printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].apicid); printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); @@ -2121,14 +2120,14 @@ static void __init setup_ioapic_ids_from_mpc(void) reg_00.raw = io_apic_read(apic, 0); spin_unlock_irqrestore(&ioapic_lock, flags); - old_id = mp_ioapics[apic].mp_apicid; + old_id = mp_ioapics[apic].apicid; - if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) { + if (mp_ioapics[apic].apicid >= get_physical_broadcast()) { printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", - apic, mp_ioapics[apic].mp_apicid); + apic, mp_ioapics[apic].apicid); printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", reg_00.bits.ID); - mp_ioapics[apic].mp_apicid = reg_00.bits.ID; + mp_ioapics[apic].apicid = reg_00.bits.ID; } /* @@ -2137,9 +2136,9 @@ static void __init setup_ioapic_ids_from_mpc(void) * 'stuck on smp_invalidate_needed IPI wait' messages. */ if (check_apicid_used(phys_id_present_map, - mp_ioapics[apic].mp_apicid)) { + mp_ioapics[apic].apicid)) { printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", - apic, mp_ioapics[apic].mp_apicid); + apic, mp_ioapics[apic].apicid); for (i = 0; i < get_physical_broadcast(); i++) if (!physid_isset(i, phys_id_present_map)) break; @@ -2148,13 +2147,13 @@ static void __init setup_ioapic_ids_from_mpc(void) printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", i); physid_set(i, phys_id_present_map); - mp_ioapics[apic].mp_apicid = i; + mp_ioapics[apic].apicid = i; } else { physid_mask_t tmp; - tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid); + tmp = apicid_to_cpu_present(mp_ioapics[apic].apicid); apic_printk(APIC_VERBOSE, "Setting %d in the " "phys_id_present_map\n", - mp_ioapics[apic].mp_apicid); + mp_ioapics[apic].apicid); physids_or(phys_id_present_map, phys_id_present_map, tmp); } @@ -2163,11 +2162,11 @@ static void __init setup_ioapic_ids_from_mpc(void) * We need to adjust the IRQ routing table * if the ID changed. */ - if (old_id != mp_ioapics[apic].mp_apicid) + if (old_id != mp_ioapics[apic].apicid) for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mp_dstapic == old_id) - mp_irqs[i].mp_dstapic - = mp_ioapics[apic].mp_apicid; + if (mp_irqs[i].dstapic == old_id) + mp_irqs[i].dstapic + = mp_ioapics[apic].apicid; /* * Read the right value from the MPC table and @@ -2175,9 +2174,9 @@ static void __init setup_ioapic_ids_from_mpc(void) */ apic_printk(APIC_VERBOSE, KERN_INFO "...changing IO-APIC physical APIC ID to %d ...", - mp_ioapics[apic].mp_apicid); + mp_ioapics[apic].apicid); - reg_00.bits.ID = mp_ioapics[apic].mp_apicid; + reg_00.bits.ID = mp_ioapics[apic].apicid; spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(apic, 0, reg_00.raw); spin_unlock_irqrestore(&ioapic_lock, flags); @@ -2188,7 +2187,7 @@ static void __init setup_ioapic_ids_from_mpc(void) spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(apic, 0); spin_unlock_irqrestore(&ioapic_lock, flags); - if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid) + if (reg_00.bits.ID != mp_ioapics[apic].apicid) printk("could not set ID!\n"); else apic_printk(APIC_VERBOSE, " ok.\n"); @@ -2383,7 +2382,7 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) if (cfg->move_in_progress) send_cleanup_vector(cfg); - cpumask_copy(&desc->affinity, mask); + cpumask_copy(desc->affinity, mask); } static int migrate_irq_remapped_level_desc(struct irq_desc *desc) @@ -2405,11 +2404,11 @@ static int migrate_irq_remapped_level_desc(struct irq_desc *desc) } /* everthing is clear. we have right of way */ - migrate_ioapic_irq_desc(desc, &desc->pending_mask); + migrate_ioapic_irq_desc(desc, desc->pending_mask); ret = 0; desc->status &= ~IRQ_MOVE_PENDING; - cpumask_clear(&desc->pending_mask); + cpumask_clear(desc->pending_mask); unmask: unmask_IO_APIC_irq_desc(desc); @@ -2434,7 +2433,7 @@ static void ir_irq_migration(struct work_struct *work) continue; } - desc->chip->set_affinity(irq, &desc->pending_mask); + desc->chip->set_affinity(irq, desc->pending_mask); spin_unlock_irqrestore(&desc->lock, flags); } } @@ -2448,7 +2447,7 @@ static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, { if (desc->status & IRQ_LEVEL) { desc->status |= IRQ_MOVE_PENDING; - cpumask_copy(&desc->pending_mask, mask); + cpumask_copy(desc->pending_mask, mask); migrate_irq_remapped_level_desc(desc); return; } @@ -2516,7 +2515,7 @@ static void irq_complete_move(struct irq_desc **descp) /* domain has not changed, but affinity did */ me = smp_processor_id(); - if (cpu_isset(me, desc->affinity)) { + if (cpumask_test_cpu(me, desc->affinity)) { *descp = desc = move_irq_desc(desc, me); /* get the new one */ cfg = desc->chip_data; @@ -3117,8 +3116,8 @@ static int ioapic_resume(struct sys_device *dev) spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(dev->id, 0); - if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) { - reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid; + if (reg_00.bits.ID != mp_ioapics[dev->id].apicid) { + reg_00.bits.ID = mp_ioapics[dev->id].apicid; io_apic_write(dev->id, 0, reg_00.raw); } spin_unlock_irqrestore(&ioapic_lock, flags); @@ -3183,7 +3182,7 @@ unsigned int create_irq_nr(unsigned int irq_want) irq = 0; spin_lock_irqsave(&vector_lock, flags); - for (new = irq_want; new < NR_IRQS; new++) { + for (new = irq_want; new < nr_irqs; new++) { if (platform_legacy_irq(new)) continue; @@ -3258,6 +3257,9 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms int err; unsigned dest; + if (disable_apic) + return -ENXIO; + cfg = irq_cfg(irq); err = assign_irq_vector(irq, cfg, TARGET_CPUS); if (err) @@ -3726,6 +3728,9 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) struct irq_cfg *cfg; int err; + if (disable_apic) + return -ENXIO; + cfg = irq_cfg(irq); err = assign_irq_vector(irq, cfg, TARGET_CPUS); if (!err) { @@ -3850,6 +3855,22 @@ void __init probe_nr_irqs_gsi(void) nr_irqs_gsi = nr; } +#ifdef CONFIG_SPARSE_IRQ +int __init arch_probe_nr_irqs(void) +{ + int nr; + + nr = ((8 * nr_cpu_ids) > (32 * nr_ioapics) ? + (NR_VECTORS + (8 * nr_cpu_ids)) : + (NR_VECTORS + (32 * nr_ioapics))); + + if (nr < nr_irqs && nr > nr_irqs_gsi) + nr_irqs = nr; + + return 0; +} +#endif + /* -------------------------------------------------------------------------- ACPI-based IOAPIC Configuration -------------------------------------------------------------------------- */ @@ -3984,8 +4005,8 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) return -1; for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mp_irqtype == mp_INT && - mp_irqs[i].mp_srcbusirq == bus_irq) + if (mp_irqs[i].irqtype == mp_INT && + mp_irqs[i].srcbusirq == bus_irq) break; if (i >= mp_irq_entries) return -1; @@ -4039,7 +4060,7 @@ void __init setup_ioapic_dest(void) */ if (desc->status & (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) - mask = &desc->affinity; + mask = desc->affinity; else mask = TARGET_CPUS; @@ -4100,7 +4121,7 @@ void __init ioapic_init_mappings(void) ioapic_res = ioapic_setup_resources(); for (i = 0; i < nr_ioapics; i++) { if (smp_found_config) { - ioapic_phys = mp_ioapics[i].mp_apicaddr; + ioapic_phys = mp_ioapics[i].apicaddr; #ifdef CONFIG_X86_32 if (!ioapic_phys) { printk(KERN_ERR diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 22f650db917f..a6bca1d33a8a 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -36,11 +36,7 @@ void ack_bad_irq(unsigned int irq) #endif } -#ifdef CONFIG_X86_32 -# define irq_stats(x) (&per_cpu(irq_stat, x)) -#else -# define irq_stats(x) cpu_pda(x) -#endif +#define irq_stats(x) (&per_cpu(irq_stat, x)) /* * /proc/interrupts printing: */ diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 74b9ff7341e9..e0f29be8ab0b 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -248,7 +248,7 @@ void fixup_irqs(void) if (irq == 2) continue; - affinity = &desc->affinity; + affinity = desc->affinity; if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { printk("Breaking affinity for irq %i\n", irq); affinity = cpu_all_mask; diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 63c88e6ec025..1db05247b47f 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -19,6 +19,9 @@ #include <asm/io_apic.h> #include <asm/idle.h> +DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); +EXPORT_PER_CPU_SYMBOL(irq_stat); + /* * Probabilistic stack overflow check: * @@ -100,7 +103,7 @@ void fixup_irqs(void) /* interrupt's are disabled at this point */ spin_lock(&desc->lock); - affinity = &desc->affinity; + affinity = desc->affinity; if (!irq_has_action(irq) || cpumask_equal(affinity, cpu_online_mask)) { spin_unlock(&desc->lock); diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index b7f4c929e615..5e9f4fc51385 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c @@ -87,9 +87,9 @@ #include <linux/cpu.h> #include <linux/firmware.h> #include <linux/platform_device.h> +#include <linux/uaccess.h> #include <asm/msr.h> -#include <asm/uaccess.h> #include <asm/processor.h> #include <asm/microcode.h> @@ -196,7 +196,7 @@ static inline int update_match_cpu(struct cpu_signature *csig, int sig, int pf) return (!sigmatch(sig, csig->sig, pf, csig->pf)) ? 0 : 1; } -static inline int +static inline int update_match_revision(struct microcode_header_intel *mc_header, int rev) { return (mc_header->rev <= rev) ? 0 : 1; @@ -442,8 +442,8 @@ static int request_microcode_fw(int cpu, struct device *device) return ret; } - ret = generic_load_microcode(cpu, (void*)firmware->data, firmware->size, - &get_ucode_fw); + ret = generic_load_microcode(cpu, (void *)firmware->data, + firmware->size, &get_ucode_fw); release_firmware(firmware); @@ -460,7 +460,7 @@ static int request_microcode_user(int cpu, const void __user *buf, size_t size) /* We should bind the task to the CPU */ BUG_ON(cpu != raw_smp_processor_id()); - return generic_load_microcode(cpu, (void*)buf, size, &get_ucode_user); + return generic_load_microcode(cpu, (void *)buf, size, &get_ucode_user); } static void microcode_fini_cpu(int cpu) diff --git a/arch/x86/kernel/module_32.c b/arch/x86/kernel/module_32.c index 3db0a5442eb1..0edd819050e7 100644 --- a/arch/x86/kernel/module_32.c +++ b/arch/x86/kernel/module_32.c @@ -42,7 +42,7 @@ void module_free(struct module *mod, void *module_region) { vfree(module_region); /* FIXME: If module_region == mod->init_region, trim exception - table entries. */ + table entries. */ } /* We don't need anything special. */ @@ -113,13 +113,13 @@ int module_finalize(const Elf_Ehdr *hdr, *para = NULL; char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; - for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { + for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { if (!strcmp(".text", secstrings + s->sh_name)) text = s; if (!strcmp(".altinstructions", secstrings + s->sh_name)) alt = s; if (!strcmp(".smp_locks", secstrings + s->sh_name)) - locks= s; + locks = s; if (!strcmp(".parainstructions", secstrings + s->sh_name)) para = s; } diff --git a/arch/x86/kernel/module_64.c b/arch/x86/kernel/module_64.c index 6ba87830d4b1..c23880b90b5c 100644 --- a/arch/x86/kernel/module_64.c +++ b/arch/x86/kernel/module_64.c @@ -30,14 +30,14 @@ #include <asm/page.h> #include <asm/pgtable.h> -#define DEBUGP(fmt...) +#define DEBUGP(fmt...) #ifndef CONFIG_UML void module_free(struct module *mod, void *module_region) { vfree(module_region); /* FIXME: If module_region == mod->init_region, trim exception - table entries. */ + table entries. */ } void *module_alloc(unsigned long size) @@ -77,7 +77,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, Elf64_Rela *rel = (void *)sechdrs[relsec].sh_addr; Elf64_Sym *sym; void *loc; - u64 val; + u64 val; DEBUGP("Applying relocate section %u to %u\n", relsec, sechdrs[relsec].sh_info); @@ -91,11 +91,11 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, sym = (Elf64_Sym *)sechdrs[symindex].sh_addr + ELF64_R_SYM(rel[i].r_info); - DEBUGP("type %d st_value %Lx r_addend %Lx loc %Lx\n", - (int)ELF64_R_TYPE(rel[i].r_info), - sym->st_value, rel[i].r_addend, (u64)loc); + DEBUGP("type %d st_value %Lx r_addend %Lx loc %Lx\n", + (int)ELF64_R_TYPE(rel[i].r_info), + sym->st_value, rel[i].r_addend, (u64)loc); - val = sym->st_value + rel[i].r_addend; + val = sym->st_value + rel[i].r_addend; switch (ELF64_R_TYPE(rel[i].r_info)) { case R_X86_64_NONE: @@ -113,16 +113,16 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, if ((s64)val != *(s32 *)loc) goto overflow; break; - case R_X86_64_PC32: + case R_X86_64_PC32: val -= (u64)loc; *(u32 *)loc = val; #if 0 if ((s64)val != *(s32 *)loc) - goto overflow; + goto overflow; #endif break; default: - printk(KERN_ERR "module %s: Unknown rela relocation: %Lu\n", + printk(KERN_ERR "module %s: Unknown rela relocation: %llu\n", me->name, ELF64_R_TYPE(rel[i].r_info)); return -ENOEXEC; } @@ -130,7 +130,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, return 0; overflow: - printk(KERN_ERR "overflow in relocation type %d val %Lx\n", + printk(KERN_ERR "overflow in relocation type %d val %Lx\n", (int)ELF64_R_TYPE(rel[i].r_info), val); printk(KERN_ERR "`%s' likely not compiled with -mcmodel=kernel\n", me->name); @@ -143,13 +143,13 @@ int apply_relocate(Elf_Shdr *sechdrs, unsigned int relsec, struct module *me) { - printk("non add relocation not supported\n"); + printk(KERN_ERR "non add relocation not supported\n"); return -ENOSYS; -} +} int module_finalize(const Elf_Ehdr *hdr, - const Elf_Shdr *sechdrs, - struct module *me) + const Elf_Shdr *sechdrs, + struct module *me) { const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL, *para = NULL; @@ -161,7 +161,7 @@ int module_finalize(const Elf_Ehdr *hdr, if (!strcmp(".altinstructions", secstrings + s->sh_name)) alt = s; if (!strcmp(".smp_locks", secstrings + s->sh_name)) - locks= s; + locks = s; if (!strcmp(".parainstructions", secstrings + s->sh_name)) para = s; } diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index c0601c2848a1..fa6bb263892e 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -27,6 +27,7 @@ #include <asm/e820.h> #include <asm/trampoline.h> #include <asm/setup.h> +#include <asm/smp.h> #include <mach_apic.h> #ifdef CONFIG_X86_32 @@ -143,11 +144,11 @@ static void __init MP_ioapic_info(struct mpc_ioapic *m) if (bad_ioapic(m->apicaddr)) return; - mp_ioapics[nr_ioapics].mp_apicaddr = m->apicaddr; - mp_ioapics[nr_ioapics].mp_apicid = m->apicid; - mp_ioapics[nr_ioapics].mp_type = m->type; - mp_ioapics[nr_ioapics].mp_apicver = m->apicver; - mp_ioapics[nr_ioapics].mp_flags = m->flags; + mp_ioapics[nr_ioapics].apicaddr = m->apicaddr; + mp_ioapics[nr_ioapics].apicid = m->apicid; + mp_ioapics[nr_ioapics].type = m->type; + mp_ioapics[nr_ioapics].apicver = m->apicver; + mp_ioapics[nr_ioapics].flags = m->flags; nr_ioapics++; } @@ -159,55 +160,55 @@ static void print_MP_intsrc_info(struct mpc_intsrc *m) m->srcbusirq, m->dstapic, m->dstirq); } -static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq) +static void __init print_mp_irq_info(struct mpc_intsrc *mp_irq) { apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x," " IRQ %02x, APIC ID %x, APIC INT %02x\n", - mp_irq->mp_irqtype, mp_irq->mp_irqflag & 3, - (mp_irq->mp_irqflag >> 2) & 3, mp_irq->mp_srcbus, - mp_irq->mp_srcbusirq, mp_irq->mp_dstapic, mp_irq->mp_dstirq); + mp_irq->irqtype, mp_irq->irqflag & 3, + (mp_irq->irqflag >> 2) & 3, mp_irq->srcbus, + mp_irq->srcbusirq, mp_irq->dstapic, mp_irq->dstirq); } static void __init assign_to_mp_irq(struct mpc_intsrc *m, - struct mp_config_intsrc *mp_irq) + struct mpc_intsrc *mp_irq) { - mp_irq->mp_dstapic = m->dstapic; - mp_irq->mp_type = m->type; - mp_irq->mp_irqtype = m->irqtype; - mp_irq->mp_irqflag = m->irqflag; - mp_irq->mp_srcbus = m->srcbus; - mp_irq->mp_srcbusirq = m->srcbusirq; - mp_irq->mp_dstirq = m->dstirq; + mp_irq->dstapic = m->dstapic; + mp_irq->type = m->type; + mp_irq->irqtype = m->irqtype; + mp_irq->irqflag = m->irqflag; + mp_irq->srcbus = m->srcbus; + mp_irq->srcbusirq = m->srcbusirq; + mp_irq->dstirq = m->dstirq; } -static void __init assign_to_mpc_intsrc(struct mp_config_intsrc *mp_irq, +static void __init assign_to_mpc_intsrc(struct mpc_intsrc *mp_irq, struct mpc_intsrc *m) { - m->dstapic = mp_irq->mp_dstapic; - m->type = mp_irq->mp_type; - m->irqtype = mp_irq->mp_irqtype; - m->irqflag = mp_irq->mp_irqflag; - m->srcbus = mp_irq->mp_srcbus; - m->srcbusirq = mp_irq->mp_srcbusirq; - m->dstirq = mp_irq->mp_dstirq; + m->dstapic = mp_irq->dstapic; + m->type = mp_irq->type; + m->irqtype = mp_irq->irqtype; + m->irqflag = mp_irq->irqflag; + m->srcbus = mp_irq->srcbus; + m->srcbusirq = mp_irq->srcbusirq; + m->dstirq = mp_irq->dstirq; } -static int __init mp_irq_mpc_intsrc_cmp(struct mp_config_intsrc *mp_irq, +static int __init mp_irq_mpc_intsrc_cmp(struct mpc_intsrc *mp_irq, struct mpc_intsrc *m) { - if (mp_irq->mp_dstapic != m->dstapic) + if (mp_irq->dstapic != m->dstapic) return 1; - if (mp_irq->mp_type != m->type) + if (mp_irq->type != m->type) return 2; - if (mp_irq->mp_irqtype != m->irqtype) + if (mp_irq->irqtype != m->irqtype) return 3; - if (mp_irq->mp_irqflag != m->irqflag) + if (mp_irq->irqflag != m->irqflag) return 4; - if (mp_irq->mp_srcbus != m->srcbus) + if (mp_irq->srcbus != m->srcbus) return 5; - if (mp_irq->mp_srcbusirq != m->srcbusirq) + if (mp_irq->srcbusirq != m->srcbusirq) return 6; - if (mp_irq->mp_dstirq != m->dstirq) + if (mp_irq->dstirq != m->dstirq) return 7; return 0; @@ -416,7 +417,7 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type) intsrc.type = MP_INTSRC; intsrc.irqflag = 0; /* conforming */ intsrc.srcbus = 0; - intsrc.dstapic = mp_ioapics[0].mp_apicid; + intsrc.dstapic = mp_ioapics[0].apicid; intsrc.irqtype = mp_INT; @@ -569,14 +570,14 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) } } -static struct intel_mp_floating *mpf_found; +static struct mpf_intel *mpf_found; /* * Scan the memory blocks for an SMP configuration block. */ static void __init __get_smp_config(unsigned int early) { - struct intel_mp_floating *mpf = mpf_found; + struct mpf_intel *mpf = mpf_found; if (!mpf) return; @@ -597,9 +598,9 @@ static void __init __get_smp_config(unsigned int early) } printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", - mpf->mpf_specification); + mpf->specification); #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) - if (mpf->mpf_feature2 & (1 << 7)) { + if (mpf->feature2 & (1 << 7)) { printk(KERN_INFO " IMCR and PIC compatibility mode.\n"); pic_mode = 1; } else { @@ -610,7 +611,7 @@ static void __init __get_smp_config(unsigned int early) /* * Now see if we need to read further. */ - if (mpf->mpf_feature1 != 0) { + if (mpf->feature1 != 0) { if (early) { /* * local APIC has default address @@ -620,16 +621,16 @@ static void __init __get_smp_config(unsigned int early) } printk(KERN_INFO "Default MP configuration #%d\n", - mpf->mpf_feature1); - construct_default_ISA_mptable(mpf->mpf_feature1); + mpf->feature1); + construct_default_ISA_mptable(mpf->feature1); - } else if (mpf->mpf_physptr) { + } else if (mpf->physptr) { /* * Read the physical hardware table. Anything here will * override the defaults. */ - if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr), early)) { + if (!smp_read_mpc(phys_to_virt(mpf->physptr), early)) { #ifdef CONFIG_X86_LOCAL_APIC smp_found_config = 0; #endif @@ -687,19 +688,19 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, unsigned reserve) { unsigned int *bp = phys_to_virt(base); - struct intel_mp_floating *mpf; + struct mpf_intel *mpf; apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n", bp, length); BUILD_BUG_ON(sizeof(*mpf) != 16); while (length > 0) { - mpf = (struct intel_mp_floating *)bp; + mpf = (struct mpf_intel *)bp; if ((*bp == SMP_MAGIC_IDENT) && - (mpf->mpf_length == 1) && + (mpf->length == 1) && !mpf_checksum((unsigned char *)bp, 16) && - ((mpf->mpf_specification == 1) - || (mpf->mpf_specification == 4))) { + ((mpf->specification == 1) + || (mpf->specification == 4))) { #ifdef CONFIG_X86_LOCAL_APIC smp_found_config = 1; #endif @@ -712,7 +713,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, return 1; reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE, BOOTMEM_DEFAULT); - if (mpf->mpf_physptr) { + if (mpf->physptr) { unsigned long size = PAGE_SIZE; #ifdef CONFIG_X86_32 /* @@ -721,14 +722,14 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, * the bottom is mapped now. * PC-9800's MPC table places on the very last * of physical memory; so that simply reserving - * PAGE_SIZE from mpg->mpf_physptr yields BUG() + * PAGE_SIZE from mpf->physptr yields BUG() * in reserve_bootmem. */ unsigned long end = max_low_pfn * PAGE_SIZE; - if (mpf->mpf_physptr + size > end) - size = end - mpf->mpf_physptr; + if (mpf->physptr + size > end) + size = end - mpf->physptr; #endif - reserve_bootmem_generic(mpf->mpf_physptr, size, + reserve_bootmem_generic(mpf->physptr, size, BOOTMEM_DEFAULT); } @@ -808,15 +809,15 @@ static int __init get_MP_intsrc_index(struct mpc_intsrc *m) /* not legacy */ for (i = 0; i < mp_irq_entries; i++) { - if (mp_irqs[i].mp_irqtype != mp_INT) + if (mp_irqs[i].irqtype != mp_INT) continue; - if (mp_irqs[i].mp_irqflag != 0x0f) + if (mp_irqs[i].irqflag != 0x0f) continue; - if (mp_irqs[i].mp_srcbus != m->srcbus) + if (mp_irqs[i].srcbus != m->srcbus) continue; - if (mp_irqs[i].mp_srcbusirq != m->srcbusirq) + if (mp_irqs[i].srcbusirq != m->srcbusirq) continue; if (irq_used[i]) { /* already claimed */ @@ -921,10 +922,10 @@ static int __init replace_intsrc_all(struct mpc_table *mpc, if (irq_used[i]) continue; - if (mp_irqs[i].mp_irqtype != mp_INT) + if (mp_irqs[i].irqtype != mp_INT) continue; - if (mp_irqs[i].mp_irqflag != 0x0f) + if (mp_irqs[i].irqflag != 0x0f) continue; if (nr_m_spare > 0) { @@ -1000,7 +1001,7 @@ static int __init update_mp_table(void) { char str[16]; char oem[10]; - struct intel_mp_floating *mpf; + struct mpf_intel *mpf; struct mpc_table *mpc, *mpc_new; if (!enable_update_mptable) @@ -1013,19 +1014,19 @@ static int __init update_mp_table(void) /* * Now see if we need to go further. */ - if (mpf->mpf_feature1 != 0) + if (mpf->feature1 != 0) return 0; - if (!mpf->mpf_physptr) + if (!mpf->physptr) return 0; - mpc = phys_to_virt(mpf->mpf_physptr); + mpc = phys_to_virt(mpf->physptr); if (!smp_check_mpc(mpc, oem, str)) return 0; printk(KERN_INFO "mpf: %lx\n", virt_to_phys(mpf)); - printk(KERN_INFO "mpf_physptr: %x\n", mpf->mpf_physptr); + printk(KERN_INFO "physptr: %x\n", mpf->physptr); if (mpc_new_phys && mpc->length > mpc_new_length) { mpc_new_phys = 0; @@ -1046,23 +1047,23 @@ static int __init update_mp_table(void) } printk(KERN_INFO "use in-positon replacing\n"); } else { - mpf->mpf_physptr = mpc_new_phys; + mpf->physptr = mpc_new_phys; mpc_new = phys_to_virt(mpc_new_phys); memcpy(mpc_new, mpc, mpc->length); mpc = mpc_new; /* check if we can modify that */ - if (mpc_new_phys - mpf->mpf_physptr) { - struct intel_mp_floating *mpf_new; + if (mpc_new_phys - mpf->physptr) { + struct mpf_intel *mpf_new; /* steal 16 bytes from [0, 1k) */ printk(KERN_INFO "mpf new: %x\n", 0x400 - 16); mpf_new = phys_to_virt(0x400 - 16); memcpy(mpf_new, mpf, 16); mpf = mpf_new; - mpf->mpf_physptr = mpc_new_phys; + mpf->physptr = mpc_new_phys; } - mpf->mpf_checksum = 0; - mpf->mpf_checksum -= mpf_checksum((unsigned char *)mpf, 16); - printk(KERN_INFO "mpf_physptr new: %x\n", mpf->mpf_physptr); + mpf->checksum = 0; + mpf->checksum -= mpf_checksum((unsigned char *)mpf, 16); + printk(KERN_INFO "physptr new: %x\n", mpf->physptr); } /* diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 726266695b2c..3cf3413ec626 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -35,10 +35,10 @@ #include <linux/device.h> #include <linux/cpu.h> #include <linux/notifier.h> +#include <linux/uaccess.h> #include <asm/processor.h> #include <asm/msr.h> -#include <asm/uaccess.h> #include <asm/system.h> static struct class *msr_class; diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 7228979f1e7f..23b6d9e6e4f5 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -61,11 +61,7 @@ static int endflag __initdata; static inline unsigned int get_nmi_count(int cpu) { -#ifdef CONFIG_X86_64 - return cpu_pda(cpu)->__nmi_count; -#else - return nmi_count(cpu); -#endif + return per_cpu(irq_stat, cpu).__nmi_count; } static inline int mce_in_progress(void) @@ -82,12 +78,8 @@ static inline int mce_in_progress(void) */ static inline unsigned int get_timer_irqs(int cpu) { -#ifdef CONFIG_X86_64 - return read_pda(apic_timer_irqs) + read_pda(irq0_irqs); -#else return per_cpu(irq_stat, cpu).apic_timer_irqs + per_cpu(irq_stat, cpu).irq0_irqs; -#endif } #ifdef CONFIG_SMP diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index a546f55c77b4..2c00a57ccb90 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -66,9 +66,6 @@ asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; EXPORT_PER_CPU_SYMBOL(current_task); -DEFINE_PER_CPU(int, cpu_number); -EXPORT_PER_CPU_SYMBOL(cpu_number); - /* * Return saved PC of a blocked thread. */ @@ -591,7 +588,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) if (prev->gs | next->gs) loadsegment(gs, next->gs); - x86_write_percpu(current_task, next_p); + percpu_write(current_task, next_p); return prev_p; } diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 416fb9282f4f..4523ff88a69d 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -57,6 +57,12 @@ asmlinkage extern void ret_from_fork(void); +DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; +EXPORT_PER_CPU_SYMBOL(current_task); + +DEFINE_PER_CPU(unsigned long, old_rsp); +static DEFINE_PER_CPU(unsigned char, is_idle); + unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED; static ATOMIC_NOTIFIER_HEAD(idle_notifier); @@ -75,13 +81,13 @@ EXPORT_SYMBOL_GPL(idle_notifier_unregister); void enter_idle(void) { - write_pda(isidle, 1); + percpu_write(is_idle, 1); atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); } static void __exit_idle(void) { - if (test_and_clear_bit_pda(0, isidle) == 0) + if (x86_test_and_clear_bit_percpu(0, is_idle) == 0) return; atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL); } @@ -392,7 +398,7 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) load_gs_index(0); regs->ip = new_ip; regs->sp = new_sp; - write_pda(oldrsp, new_sp); + percpu_write(old_rsp, new_sp); regs->cs = __USER_CS; regs->ss = __USER_DS; regs->flags = 0x200; @@ -613,13 +619,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* * Switch the PDA and FPU contexts. */ - prev->usersp = read_pda(oldrsp); - write_pda(oldrsp, next->usersp); - write_pda(pcurrent, next_p); + prev->usersp = percpu_read(old_rsp); + percpu_write(old_rsp, next->usersp); + percpu_write(current_task, next_p); - write_pda(kernelstack, + percpu_write(kernel_stack, (unsigned long)task_stack_page(next_p) + - THREAD_SIZE - PDA_STACKOFFSET); + THREAD_SIZE - KERNEL_STACK_OFFSET); #ifdef CONFIG_CC_STACKPROTECTOR write_pda(stack_canary, next_p->stack_canary); /* diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 2b46eb41643b..f8536fee5c12 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -14,6 +14,7 @@ #include <asm/reboot.h> #include <asm/pci_x86.h> #include <asm/virtext.h> +#include <asm/cpu.h> #ifdef CONFIG_X86_32 # include <linux/dmi.h> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index ae0d8042cf69..f41c4486c270 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -89,7 +89,7 @@ #include <asm/system.h> #include <asm/vsyscall.h> -#include <asm/smp.h> +#include <asm/cpu.h> #include <asm/desc.h> #include <asm/dma.h> #include <asm/iommu.h> diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 55c46074eba0..efbafbbff584 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -13,6 +13,23 @@ #include <asm/mpspec.h> #include <asm/apicdef.h> #include <asm/highmem.h> +#include <asm/proto.h> +#include <asm/cpumask.h> + +#ifdef CONFIG_DEBUG_PER_CPU_MAPS +# define DBG(x...) printk(KERN_DEBUG x) +#else +# define DBG(x...) +#endif + +/* + * Could be inside CONFIG_HAVE_SETUP_PER_CPU_AREA with other stuff but + * voyager wants cpu_number too. + */ +#ifdef CONFIG_SMP +DEFINE_PER_CPU(int, cpu_number); +EXPORT_PER_CPU_SYMBOL(cpu_number); +#endif #ifdef CONFIG_X86_LOCAL_APIC unsigned int num_processors; @@ -26,31 +43,84 @@ unsigned int max_physical_apicid; physid_mask_t phys_cpu_present_map; #endif -/* map cpu index to physical APIC ID */ +/* + * Map cpu index to physical APIC ID + */ DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID); DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID); EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) -#define X86_64_NUMA 1 +#define X86_64_NUMA 1 /* (used later) */ +DEFINE_PER_CPU(int, node_number) = 0; +EXPORT_PER_CPU_SYMBOL(node_number); -/* map cpu index to node index */ +/* + * Map cpu index to node index + */ DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); -/* which logical CPUs are on which nodes */ +/* + * Which logical CPUs are on which nodes + */ cpumask_t *node_to_cpumask_map; EXPORT_SYMBOL(node_to_cpumask_map); -/* setup node_to_cpumask_map */ +/* + * Setup node_to_cpumask_map + */ static void __init setup_node_to_cpumask_map(void); #else static inline void setup_node_to_cpumask_map(void) { } #endif -#if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP) +/* + * Define load_pda_offset() and per-cpu __pda for x86_64. + * load_pda_offset() is responsible for loading the offset of pda into + * %gs. + * + * On SMP, pda offset also duals as percpu base address and thus it + * should be at the start of per-cpu area. To achieve this, it's + * preallocated in vmlinux_64.lds.S directly instead of using + * DEFINE_PER_CPU(). + */ +#ifdef CONFIG_X86_64 +void __cpuinit load_pda_offset(int cpu) +{ + /* Memory clobbers used to order pda/percpu accesses */ + mb(); + wrmsrl(MSR_GS_BASE, cpu_pda(cpu)); + mb(); +} +#ifndef CONFIG_SMP +DEFINE_PER_CPU(struct x8664_pda, __pda); +#endif +EXPORT_PER_CPU_SYMBOL(__pda); +#endif /* CONFIG_SMP && CONFIG_X86_64 */ + +#ifdef CONFIG_X86_64 + +/* correctly size the local cpu masks */ +static void setup_cpu_local_masks(void) +{ + alloc_bootmem_cpumask_var(&cpu_initialized_mask); + alloc_bootmem_cpumask_var(&cpu_callin_mask); + alloc_bootmem_cpumask_var(&cpu_callout_mask); + alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); +} + +#else /* CONFIG_X86_32 */ + +static inline void setup_cpu_local_masks(void) +{ +} + +#endif /* CONFIG_X86_32 */ + +#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA /* * Copy data used in early init routines from the initial arrays to the * per cpu data areas. These arrays then become expendable and the @@ -79,78 +149,14 @@ static void __init setup_per_cpu_maps(void) #endif } -#ifdef CONFIG_X86_32 -/* - * Great future not-so-futuristic plan: make i386 and x86_64 do it - * the same way - */ +#ifdef CONFIG_X86_64 +unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = { + [0] = (unsigned long)__per_cpu_load, +}; +#else unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; +#endif EXPORT_SYMBOL(__per_cpu_offset); -static inline void setup_cpu_pda_map(void) { } - -#elif !defined(CONFIG_SMP) -static inline void setup_cpu_pda_map(void) { } - -#else /* CONFIG_SMP && CONFIG_X86_64 */ - -/* - * Allocate cpu_pda pointer table and array via alloc_bootmem. - */ -static void __init setup_cpu_pda_map(void) -{ - char *pda; - struct x8664_pda **new_cpu_pda; - unsigned long size; - int cpu; - - size = roundup(sizeof(struct x8664_pda), cache_line_size()); - - /* allocate cpu_pda array and pointer table */ - { - unsigned long tsize = nr_cpu_ids * sizeof(void *); - unsigned long asize = size * (nr_cpu_ids - 1); - - tsize = roundup(tsize, cache_line_size()); - new_cpu_pda = alloc_bootmem(tsize + asize); - pda = (char *)new_cpu_pda + tsize; - } - - /* initialize pointer table to static pda's */ - for_each_possible_cpu(cpu) { - if (cpu == 0) { - /* leave boot cpu pda in place */ - new_cpu_pda[0] = cpu_pda(0); - continue; - } - new_cpu_pda[cpu] = (struct x8664_pda *)pda; - new_cpu_pda[cpu]->in_bootmem = 1; - pda += size; - } - - /* point to new pointer table */ - _cpu_pda = new_cpu_pda; -} - -#endif /* CONFIG_SMP && CONFIG_X86_64 */ - -#ifdef CONFIG_X86_64 - -/* correctly size the local cpu masks */ -static void setup_cpu_local_masks(void) -{ - alloc_bootmem_cpumask_var(&cpu_initialized_mask); - alloc_bootmem_cpumask_var(&cpu_callin_mask); - alloc_bootmem_cpumask_var(&cpu_callout_mask); - alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); -} - -#else /* CONFIG_X86_32 */ - -static inline void setup_cpu_local_masks(void) -{ -} - -#endif /* CONFIG_X86_32 */ /* * Great future plan: @@ -164,9 +170,6 @@ void __init setup_per_cpu_areas(void) int cpu; unsigned long align = 1; - /* Setup cpu_pda map */ - setup_cpu_pda_map(); - /* Copy section for each CPU (we discard the original) */ old_size = PERCPU_ENOUGH_ROOM; align = max_t(unsigned long, PAGE_SIZE, align); @@ -197,8 +200,25 @@ void __init setup_per_cpu_areas(void) cpu, node, __pa(ptr)); } #endif + + memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start); per_cpu_offset(cpu) = ptr - __per_cpu_start; - memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); + per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); + per_cpu(cpu_number, cpu) = cpu; +#ifdef CONFIG_X86_64 + per_cpu(irq_stack_ptr, cpu) = + (char *)per_cpu(irq_stack, cpu) + IRQ_STACK_SIZE - 64; + /* + * CPU0 modified pda in the init data area, reload pda + * offset for CPU0 and clear the area for others. + */ + if (cpu == 0) + load_pda_offset(0); + else + memset(cpu_pda(cpu), 0, sizeof(*cpu_pda(cpu))); +#endif + + DBG("PERCPU: cpu %4d %p\n", cpu, ptr); } /* Setup percpu data maps */ @@ -220,6 +240,7 @@ void __init setup_per_cpu_areas(void) * Requires node_possible_map to be valid. * * Note: node_to_cpumask() is not valid until after this is done. + * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.) */ static void __init setup_node_to_cpumask_map(void) { @@ -235,6 +256,7 @@ static void __init setup_node_to_cpumask_map(void) /* allocate the map */ map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t)); + DBG("node_to_cpumask_map at %p for %d nodes\n", map, nr_node_ids); pr_debug("Node to cpumask map at %p for %d nodes\n", map, nr_node_ids); @@ -247,17 +269,23 @@ void __cpuinit numa_set_node(int cpu, int node) { int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); - if (cpu_pda(cpu) && node != NUMA_NO_NODE) - cpu_pda(cpu)->nodenumber = node; - - if (cpu_to_node_map) + /* early setting, no percpu area yet */ + if (cpu_to_node_map) { cpu_to_node_map[cpu] = node; + return; + } - else if (per_cpu_offset(cpu)) - per_cpu(x86_cpu_to_node_map, cpu) = node; +#ifdef CONFIG_DEBUG_PER_CPU_MAPS + if (cpu >= nr_cpu_ids || !per_cpu_offset(cpu)) { + printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu); + dump_stack(); + return; + } +#endif + per_cpu(x86_cpu_to_node_map, cpu) = node; - else - pr_debug("Setting node for non-present cpu %d\n", cpu); + if (node != NUMA_NO_NODE) + per_cpu(node_number, cpu) = node; } void __cpuinit numa_clear_node(int cpu) @@ -274,7 +302,7 @@ void __cpuinit numa_add_cpu(int cpu) void __cpuinit numa_remove_cpu(int cpu) { - cpu_clear(cpu, node_to_cpumask_map[cpu_to_node(cpu)]); + cpu_clear(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); } #else /* CONFIG_DEBUG_PER_CPU_MAPS */ @@ -284,7 +312,7 @@ void __cpuinit numa_remove_cpu(int cpu) */ static void __cpuinit numa_set_cpumask(int cpu, int enable) { - int node = cpu_to_node(cpu); + int node = early_cpu_to_node(cpu); cpumask_t *mask; char buf[64]; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index bb1a3b1fc87f..869b98840fd0 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -53,7 +53,6 @@ #include <asm/nmi.h> #include <asm/irq.h> #include <asm/idle.h> -#include <asm/smp.h> #include <asm/trampoline.h> #include <asm/cpu.h> #include <asm/numa.h> @@ -745,52 +744,6 @@ static void __cpuinit do_fork_idle(struct work_struct *work) complete(&c_idle->done); } -#ifdef CONFIG_X86_64 - -/* __ref because it's safe to call free_bootmem when after_bootmem == 0. */ -static void __ref free_bootmem_pda(struct x8664_pda *oldpda) -{ - if (!after_bootmem) - free_bootmem((unsigned long)oldpda, sizeof(*oldpda)); -} - -/* - * Allocate node local memory for the AP pda. - * - * Must be called after the _cpu_pda pointer table is initialized. - */ -int __cpuinit get_local_pda(int cpu) -{ - struct x8664_pda *oldpda, *newpda; - unsigned long size = sizeof(struct x8664_pda); - int node = cpu_to_node(cpu); - - if (cpu_pda(cpu) && !cpu_pda(cpu)->in_bootmem) - return 0; - - oldpda = cpu_pda(cpu); - newpda = kmalloc_node(size, GFP_ATOMIC, node); - if (!newpda) { - printk(KERN_ERR "Could not allocate node local PDA " - "for CPU %d on node %d\n", cpu, node); - - if (oldpda) - return 0; /* have a usable pda */ - else - return -1; - } - - if (oldpda) { - memcpy(newpda, oldpda, size); - free_bootmem_pda(oldpda); - } - - newpda->in_bootmem = 0; - cpu_pda(cpu) = newpda; - return 0; -} -#endif /* CONFIG_X86_64 */ - static int __cpuinit do_boot_cpu(int apicid, int cpu) /* * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad @@ -808,16 +761,6 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) }; INIT_WORK(&c_idle.work, do_fork_idle); -#ifdef CONFIG_X86_64 - /* Allocate node local memory for AP pdas */ - if (cpu > 0) { - boot_error = get_local_pda(cpu); - if (boot_error) - goto restore_state; - /* if can't get pda memory, can't start cpu */ - } -#endif - alternatives_smp_switch(1); c_idle.idle = get_idle_for_cpu(cpu); @@ -847,14 +790,17 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) set_idle_for_cpu(cpu, c_idle.idle); do_rest: -#ifdef CONFIG_X86_32 per_cpu(current_task, cpu) = c_idle.idle; +#ifdef CONFIG_X86_32 init_gdt(cpu); /* Stack for startup_32 can be just as for start_secondary onwards */ irq_ctx_init(cpu); #else - cpu_pda(cpu)->pcurrent = c_idle.idle; clear_tsk_thread_flag(c_idle.idle, TIF_FORK); + initial_gs = per_cpu_offset(cpu); + per_cpu(kernel_stack, cpu) = + (unsigned long)task_stack_page(c_idle.idle) - + KERNEL_STACK_OFFSET + THREAD_SIZE; #endif early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); initial_code = (unsigned long)start_secondary; @@ -931,9 +877,7 @@ do_rest: inquire_remote_apic(apicid); } } -#ifdef CONFIG_X86_64 -restore_state: -#endif + if (boot_error) { /* Try to put things back the way they were before ... */ numa_remove_cpu(cpu); /* was set by numa_add_cpu */ @@ -1125,6 +1069,7 @@ static int __init smp_sanity_check(unsigned max_cpus) printk(KERN_ERR "... forcing use of dummy APIC emulation." "(tell your hw vendor)\n"); smpboot_clear_io_apic(); + disable_ioapic_setup(); return -1; } diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c index 397e309839dd..add36b4e37c9 100644 --- a/arch/x86/kernel/smpcommon.c +++ b/arch/x86/kernel/smpcommon.c @@ -3,11 +3,16 @@ */ #include <linux/module.h> #include <asm/smp.h> +#include <asm/sections.h> -#ifdef CONFIG_X86_32 +#ifdef CONFIG_X86_64 +DEFINE_PER_CPU(unsigned long, this_cpu_off) = (unsigned long)__per_cpu_load; +#else DEFINE_PER_CPU(unsigned long, this_cpu_off); +#endif EXPORT_PER_CPU_SYMBOL(this_cpu_off); +#ifdef CONFIG_X86_32 /* * Initialize the CPU's GDT. This is either the boot CPU doing itself * (still using the master per-cpu area), or a CPU doing it for a @@ -23,8 +28,5 @@ __cpuinit void init_gdt(int cpu) write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S); - - per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu]; - per_cpu(cpu_number, cpu) = cpu; } #endif diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c index ce5054642247..abf0808d6fc4 100644 --- a/arch/x86/kernel/tlb_32.c +++ b/arch/x86/kernel/tlb_32.c @@ -4,8 +4,8 @@ #include <asm/tlbflush.h> -DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) - ____cacheline_aligned = { &init_mm, 0, }; +DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) + = { &init_mm, 0, }; /* must come after the send_IPI functions above for inlining */ #include <mach_ipi.h> @@ -20,7 +20,7 @@ DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) * Optimizations Manfred Spraul <manfred@colorfullife.com> */ -static cpumask_t flush_cpumask; +static cpumask_var_t flush_cpumask; static struct mm_struct *flush_mm; static unsigned long flush_va; static DEFINE_SPINLOCK(tlbstate_lock); @@ -34,8 +34,8 @@ static DEFINE_SPINLOCK(tlbstate_lock); */ void leave_mm(int cpu) { - BUG_ON(x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK); - cpu_clear(cpu, x86_read_percpu(cpu_tlbstate.active_mm)->cpu_vm_mask); + BUG_ON(percpu_read(cpu_tlbstate.state) == TLBSTATE_OK); + cpu_clear(cpu, percpu_read(cpu_tlbstate.active_mm)->cpu_vm_mask); load_cr3(swapper_pg_dir); } EXPORT_SYMBOL_GPL(leave_mm); @@ -92,7 +92,7 @@ void smp_invalidate_interrupt(struct pt_regs *regs) cpu = get_cpu(); - if (!cpu_isset(cpu, flush_cpumask)) + if (!cpumask_test_cpu(cpu, flush_cpumask)) goto out; /* * This was a BUG() but until someone can quote me the @@ -103,8 +103,8 @@ void smp_invalidate_interrupt(struct pt_regs *regs) * BUG(); */ - if (flush_mm == x86_read_percpu(cpu_tlbstate.active_mm)) { - if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK) { + if (flush_mm == percpu_read(cpu_tlbstate.active_mm)) { + if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) { if (flush_va == TLB_FLUSH_ALL) local_flush_tlb(); else @@ -114,35 +114,22 @@ void smp_invalidate_interrupt(struct pt_regs *regs) } ack_APIC_irq(); smp_mb__before_clear_bit(); - cpu_clear(cpu, flush_cpumask); + cpumask_clear_cpu(cpu, flush_cpumask); smp_mb__after_clear_bit(); out: put_cpu_no_resched(); inc_irq_stat(irq_tlb_count); } -void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, - unsigned long va) +void native_flush_tlb_others(const struct cpumask *cpumask, + struct mm_struct *mm, unsigned long va) { - cpumask_t cpumask = *cpumaskp; - /* - * A couple of (to be removed) sanity checks: - * - * - current CPU must not be in mask * - mask must exist :) */ - BUG_ON(cpus_empty(cpumask)); - BUG_ON(cpu_isset(smp_processor_id(), cpumask)); + BUG_ON(cpumask_empty(cpumask)); BUG_ON(!mm); -#ifdef CONFIG_HOTPLUG_CPU - /* If a CPU which we ran on has gone down, OK. */ - cpus_and(cpumask, cpumask, cpu_online_map); - if (unlikely(cpus_empty(cpumask))) - return; -#endif - /* * i'm not happy about this global shared spinlock in the * MM hot path, but we'll see how contended it is. @@ -150,9 +137,17 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, */ spin_lock(&tlbstate_lock); + cpumask_andnot(flush_cpumask, cpumask, cpumask_of(smp_processor_id())); +#ifdef CONFIG_HOTPLUG_CPU + /* If a CPU which we ran on has gone down, OK. */ + cpumask_and(flush_cpumask, flush_cpumask, cpu_online_mask); + if (unlikely(cpumask_empty(flush_cpumask))) { + spin_unlock(&tlbstate_lock); + return; + } +#endif flush_mm = mm; flush_va = va; - cpus_or(flush_cpumask, cpumask, flush_cpumask); /* * Make the above memory operations globally visible before @@ -163,9 +158,9 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, * We have to send the IPI only to * CPUs affected. */ - send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR); + send_IPI_mask(flush_cpumask, INVALIDATE_TLB_VECTOR); - while (!cpus_empty(flush_cpumask)) + while (!cpumask_empty(flush_cpumask)) /* nothing. lockup detection does not belong here */ cpu_relax(); @@ -177,25 +172,19 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, void flush_tlb_current_task(void) { struct mm_struct *mm = current->mm; - cpumask_t cpu_mask; preempt_disable(); - cpu_mask = mm->cpu_vm_mask; - cpu_clear(smp_processor_id(), cpu_mask); local_flush_tlb(); - if (!cpus_empty(cpu_mask)) - flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); + if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) + flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL); preempt_enable(); } void flush_tlb_mm(struct mm_struct *mm) { - cpumask_t cpu_mask; preempt_disable(); - cpu_mask = mm->cpu_vm_mask; - cpu_clear(smp_processor_id(), cpu_mask); if (current->active_mm == mm) { if (current->mm) @@ -203,8 +192,8 @@ void flush_tlb_mm(struct mm_struct *mm) else leave_mm(smp_processor_id()); } - if (!cpus_empty(cpu_mask)) - flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); + if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) + flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL); preempt_enable(); } @@ -212,11 +201,8 @@ void flush_tlb_mm(struct mm_struct *mm) void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) { struct mm_struct *mm = vma->vm_mm; - cpumask_t cpu_mask; preempt_disable(); - cpu_mask = mm->cpu_vm_mask; - cpu_clear(smp_processor_id(), cpu_mask); if (current->active_mm == mm) { if (current->mm) @@ -225,9 +211,8 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) leave_mm(smp_processor_id()); } - if (!cpus_empty(cpu_mask)) - flush_tlb_others(cpu_mask, mm, va); - + if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) + flush_tlb_others(&mm->cpu_vm_mask, mm, va); preempt_enable(); } EXPORT_SYMBOL(flush_tlb_page); @@ -237,7 +222,7 @@ static void do_flush_tlb_all(void *info) unsigned long cpu = smp_processor_id(); __flush_tlb_all(); - if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_LAZY) + if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY) leave_mm(cpu); } @@ -246,11 +231,9 @@ void flush_tlb_all(void) on_each_cpu(do_flush_tlb_all, NULL, 1); } -void reset_lazy_tlbstate(void) +static int init_flush_cpumask(void) { - int cpu = raw_smp_processor_id(); - - per_cpu(cpu_tlbstate, cpu).state = 0; - per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; + alloc_cpumask_var(&flush_cpumask, GFP_KERNEL); + return 0; } - +early_initcall(init_flush_cpumask); diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index f8be6f1d2e48..e64a32c48825 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c @@ -18,6 +18,9 @@ #include <asm/uv/uv_hub.h> #include <asm/uv/uv_bau.h> +DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) + = { &init_mm, 0, }; + #include <mach_ipi.h> /* * Smarter SMP flushing macros. @@ -43,10 +46,10 @@ union smp_flush_state { struct { - cpumask_t flush_cpumask; struct mm_struct *flush_mm; unsigned long flush_va; spinlock_t tlbstate_lock; + DECLARE_BITMAP(flush_cpumask, NR_CPUS); }; char pad[SMP_CACHE_BYTES]; } ____cacheline_aligned; @@ -62,9 +65,9 @@ static DEFINE_PER_CPU(union smp_flush_state, flush_state); */ void leave_mm(int cpu) { - if (read_pda(mmu_state) == TLBSTATE_OK) + if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) BUG(); - cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask); + cpu_clear(cpu, percpu_read(cpu_tlbstate.active_mm)->cpu_vm_mask); load_cr3(swapper_pg_dir); } EXPORT_SYMBOL_GPL(leave_mm); @@ -131,7 +134,7 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START; f = &per_cpu(flush_state, sender); - if (!cpu_isset(cpu, f->flush_cpumask)) + if (!cpumask_test_cpu(cpu, to_cpumask(f->flush_cpumask))) goto out; /* * This was a BUG() but until someone can quote me the @@ -142,8 +145,8 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) * BUG(); */ - if (f->flush_mm == read_pda(active_mm)) { - if (read_pda(mmu_state) == TLBSTATE_OK) { + if (f->flush_mm == percpu_read(cpu_tlbstate.active_mm)) { + if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) { if (f->flush_va == TLB_FLUSH_ALL) local_flush_tlb(); else @@ -153,19 +156,15 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) } out: ack_APIC_irq(); - cpu_clear(cpu, f->flush_cpumask); + cpumask_clear_cpu(cpu, to_cpumask(f->flush_cpumask)); inc_irq_stat(irq_tlb_count); } -void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, - unsigned long va) +static void flush_tlb_others_ipi(const struct cpumask *cpumask, + struct mm_struct *mm, unsigned long va) { int sender; union smp_flush_state *f; - cpumask_t cpumask = *cpumaskp; - - if (is_uv_system() && uv_flush_tlb_others(&cpumask, mm, va)) - return; /* Caller has disabled preemption */ sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS; @@ -180,7 +179,8 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, f->flush_mm = mm; f->flush_va = va; - cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask); + cpumask_andnot(to_cpumask(f->flush_cpumask), + cpumask, cpumask_of(smp_processor_id())); /* * Make the above memory operations globally visible before @@ -191,9 +191,10 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, * We have to send the IPI only to * CPUs affected. */ - send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR_START + sender); + send_IPI_mask(to_cpumask(f->flush_cpumask), + INVALIDATE_TLB_VECTOR_START + sender); - while (!cpus_empty(f->flush_cpumask)) + while (!cpumask_empty(to_cpumask(f->flush_cpumask))) cpu_relax(); f->flush_mm = NULL; @@ -201,6 +202,25 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, spin_unlock(&f->tlbstate_lock); } +void native_flush_tlb_others(const struct cpumask *cpumask, + struct mm_struct *mm, unsigned long va) +{ + if (is_uv_system()) { + /* FIXME: could be an percpu_alloc'd thing */ + static DEFINE_PER_CPU(cpumask_t, flush_tlb_mask); + struct cpumask *after_uv_flush = &get_cpu_var(flush_tlb_mask); + + cpumask_andnot(after_uv_flush, cpumask, + cpumask_of(smp_processor_id())); + if (!uv_flush_tlb_others(after_uv_flush, mm, va)) + flush_tlb_others_ipi(after_uv_flush, mm, va); + + put_cpu_var(flush_tlb_uv_cpumask); + return; + } + flush_tlb_others_ipi(cpumask, mm, va); +} + static int __cpuinit init_smp_flush(void) { int i; @@ -215,25 +235,18 @@ core_initcall(init_smp_flush); void flush_tlb_current_task(void) { struct mm_struct *mm = current->mm; - cpumask_t cpu_mask; preempt_disable(); - cpu_mask = mm->cpu_vm_mask; - cpu_clear(smp_processor_id(), cpu_mask); local_flush_tlb(); - if (!cpus_empty(cpu_mask)) - flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); + if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) + flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL); preempt_enable(); } void flush_tlb_mm(struct mm_struct *mm) { - cpumask_t cpu_mask; - preempt_disable(); - cpu_mask = mm->cpu_vm_mask; - cpu_clear(smp_processor_id(), cpu_mask); if (current->active_mm == mm) { if (current->mm) @@ -241,8 +254,8 @@ void flush_tlb_mm(struct mm_struct *mm) else leave_mm(smp_processor_id()); } - if (!cpus_empty(cpu_mask)) - flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); + if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) + flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL); preempt_enable(); } @@ -250,11 +263,8 @@ void flush_tlb_mm(struct mm_struct *mm) void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) { struct mm_struct *mm = vma->vm_mm; - cpumask_t cpu_mask; preempt_disable(); - cpu_mask = mm->cpu_vm_mask; - cpu_clear(smp_processor_id(), cpu_mask); if (current->active_mm == mm) { if (current->mm) @@ -263,8 +273,8 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) leave_mm(smp_processor_id()); } - if (!cpus_empty(cpu_mask)) - flush_tlb_others(cpu_mask, mm, va); + if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) + flush_tlb_others(&mm->cpu_vm_mask, mm, va); preempt_enable(); } @@ -274,7 +284,7 @@ static void do_flush_tlb_all(void *info) unsigned long cpu = smp_processor_id(); __flush_tlb_all(); - if (read_pda(mmu_state) == TLBSTATE_LAZY) + if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY) leave_mm(cpu); } diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index f885023167e0..690dcf1a27d4 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c @@ -212,11 +212,11 @@ static int uv_wait_completion(struct bau_desc *bau_desc, * The cpumaskp mask contains the cpus the broadcast was sent to. * * Returns 1 if all remote flushing was done. The mask is zeroed. - * Returns 0 if some remote flushing remains to be done. The mask is left - * unchanged. + * Returns 0 if some remote flushing remains to be done. The mask will have + * some bits still set. */ int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc, - cpumask_t *cpumaskp) + struct cpumask *cpumaskp) { int completion_status = 0; int right_shift; @@ -263,13 +263,13 @@ int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc, * Success, so clear the remote cpu's from the mask so we don't * use the IPI method of shootdown on them. */ - for_each_cpu_mask(bit, *cpumaskp) { + for_each_cpu(bit, cpumaskp) { blade = uv_cpu_to_blade_id(bit); if (blade == this_blade) continue; - cpu_clear(bit, *cpumaskp); + cpumask_clear_cpu(bit, cpumaskp); } - if (!cpus_empty(*cpumaskp)) + if (!cpumask_empty(cpumaskp)) return 0; return 1; } @@ -296,7 +296,7 @@ int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc, * Returns 1 if all remote flushing was done. * Returns 0 if some remote flushing remains to be done. */ -int uv_flush_tlb_others(cpumask_t *cpumaskp, struct mm_struct *mm, +int uv_flush_tlb_others(struct cpumask *cpumaskp, struct mm_struct *mm, unsigned long va) { int i; @@ -315,7 +315,7 @@ int uv_flush_tlb_others(cpumask_t *cpumaskp, struct mm_struct *mm, bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); i = 0; - for_each_cpu_mask(bit, *cpumaskp) { + for_each_cpu(bit, cpumaskp) { blade = uv_cpu_to_blade_id(bit); BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1)); if (blade == this_blade) { diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index 82c67559dde7..3eba7f7bac05 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S @@ -178,14 +178,7 @@ SECTIONS __initramfs_end = .; } #endif - . = ALIGN(PAGE_SIZE); - .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { - __per_cpu_start = .; - *(.data.percpu.page_aligned) - *(.data.percpu) - *(.data.percpu.shared_aligned) - __per_cpu_end = .; - } + PERCPU(PAGE_SIZE) . = ALIGN(PAGE_SIZE); /* freed after init ends here */ diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index 1a614c0e6bef..a09abb8fb97f 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -5,6 +5,7 @@ #define LOAD_OFFSET __START_KERNEL_map #include <asm-generic/vmlinux.lds.h> +#include <asm/asm-offsets.h> #include <asm/page.h> #undef i386 /* in case the preprocessor is a 32bit one */ @@ -13,12 +14,14 @@ OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64") OUTPUT_ARCH(i386:x86-64) ENTRY(phys_startup_64) jiffies_64 = jiffies; -_proxy_pda = 1; PHDRS { text PT_LOAD FLAGS(5); /* R_E */ data PT_LOAD FLAGS(7); /* RWE */ user PT_LOAD FLAGS(7); /* RWE */ data.init PT_LOAD FLAGS(7); /* RWE */ +#ifdef CONFIG_SMP + percpu PT_LOAD FLAGS(7); /* RWE */ +#endif note PT_NOTE FLAGS(0); /* ___ */ } SECTIONS @@ -208,14 +211,29 @@ SECTIONS __initramfs_end = .; #endif +#ifdef CONFIG_SMP + /* + * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the + * output PHDR, so the next output section - __data_nosave - should + * switch it back to data.init. Also, pda should be at the head of + * percpu area. Preallocate it and define the percpu offset symbol + * so that it can be accessed as a percpu variable. + */ + . = ALIGN(PAGE_SIZE); + PERCPU_VADDR_PREALLOC(0, :percpu, pda_size) + per_cpu____pda = __per_cpu_start; +#else PERCPU(PAGE_SIZE) +#endif . = ALIGN(PAGE_SIZE); __init_end = .; . = ALIGN(PAGE_SIZE); __nosave_begin = .; - .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { *(.data.nosave) } + .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { + *(.data.nosave) + } :data.init /* switch back to data.init, see PERCPU_VADDR() above */ . = ALIGN(PAGE_SIZE); __nosave_end = .; diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 695e426aa354..3909e3ba5ce3 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -58,5 +58,3 @@ EXPORT_SYMBOL(__memcpy); EXPORT_SYMBOL(empty_zero_page); EXPORT_SYMBOL(init_level4_pgt); EXPORT_SYMBOL(load_gs_index); - -EXPORT_SYMBOL(_proxy_pda); diff --git a/arch/x86/mach-voyager/setup.c b/arch/x86/mach-voyager/setup.c index a580b9562e76..0ade62555ff3 100644 --- a/arch/x86/mach-voyager/setup.c +++ b/arch/x86/mach-voyager/setup.c @@ -9,6 +9,7 @@ #include <asm/e820.h> #include <asm/io.h> #include <asm/setup.h> +#include <asm/cpu.h> void __init pre_intr_init_hook(void) { diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index 9840b7ec749a..96f15b09a4c5 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c @@ -402,7 +402,7 @@ void __init find_smp_config(void) VOYAGER_SUS_IN_CONTROL_PORT); current_thread_info()->cpu = boot_cpu_id; - x86_write_percpu(cpu_number, boot_cpu_id); + percpu_write(cpu_number, boot_cpu_id); } /* @@ -531,6 +531,7 @@ static void __init do_boot_cpu(__u8 cpu) stack_start.sp = (void *)idle->thread.sp; init_gdt(cpu); + per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu]; per_cpu(current_task, cpu) = idle; early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); irq_ctx_init(cpu); @@ -1748,6 +1749,7 @@ static void __init voyager_smp_prepare_cpus(unsigned int max_cpus) static void __cpuinit voyager_smp_prepare_boot_cpu(void) { init_gdt(smp_processor_id()); + per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu]; switch_to_new_gdt(); cpu_set(smp_processor_id(), cpu_online_map); @@ -1780,7 +1782,7 @@ static void __init voyager_smp_cpus_done(unsigned int max_cpus) void __init smp_setup_processor_id(void) { current_thread_info()->cpu = hard_smp_processor_id(); - x86_write_percpu(cpu_number, hard_smp_processor_id()); + percpu_write(cpu_number, hard_smp_processor_id()); } static void voyager_send_call_func(cpumask_t callmask) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 9e268b6b204e..90dfae511a41 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -534,7 +534,7 @@ static int vmalloc_fault(unsigned long address) happen within a race in page table update. In the later case just flush. */ - pgd = pgd_offset(current->mm ?: &init_mm, address); + pgd = pgd_offset(current->active_mm, address); pgd_ref = pgd_offset_k(address); if (pgd_none(*pgd_ref)) return -1; diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 88f1b10de3be..4a6989e47a53 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -49,7 +49,6 @@ #include <asm/paravirt.h> #include <asm/setup.h> #include <asm/cacheflush.h> -#include <asm/smp.h> unsigned int __VMALLOC_RESERVE = 128 << 20; diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index e89d24815f26..4cf30dee8161 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -555,10 +555,12 @@ repeat: if (!pte_val(old_pte)) { if (!primary) return 0; - WARN(1, KERN_WARNING "CPA: called for zero pte. " - "vaddr = %lx cpa->vaddr = %lx\n", address, - *cpa->vaddr); - return -EINVAL; + + /* + * Special error value returned, indicating that the mapping + * did not exist at this address. + */ + return -EFAULT; } if (level == PG_LEVEL_4K) { diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 85cbd3cd3723..3be399013de6 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -333,11 +333,20 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, req_type & _PAGE_CACHE_MASK); } - is_range_ram = pagerange_is_ram(start, end); - if (is_range_ram == 1) - return reserve_ram_pages_type(start, end, req_type, new_type); - else if (is_range_ram < 0) - return -EINVAL; + /* + * For legacy reasons, some parts of the physical address range in the + * legacy 1MB region is treated as non-RAM (even when listed as RAM in + * the e820 tables). So we will track the memory attributes of this + * legacy 1MB region using the linear memtype_list always. + */ + if (end >= ISA_END_ADDRESS) { + is_range_ram = pagerange_is_ram(start, end); + if (is_range_ram == 1) + return reserve_ram_pages_type(start, end, req_type, + new_type); + else if (is_range_ram < 0) + return -EINVAL; + } new = kmalloc(sizeof(struct memtype), GFP_KERNEL); if (!new) @@ -437,11 +446,19 @@ int free_memtype(u64 start, u64 end) if (is_ISA_range(start, end - 1)) return 0; - is_range_ram = pagerange_is_ram(start, end); - if (is_range_ram == 1) - return free_ram_pages_type(start, end); - else if (is_range_ram < 0) - return -EINVAL; + /* + * For legacy reasons, some parts of the physical address range in the + * legacy 1MB region is treated as non-RAM (even when listed as RAM in + * the e820 tables). So we will track the memory attributes of this + * legacy 1MB region using the linear memtype_list always. + */ + if (end >= ISA_END_ADDRESS) { + is_range_ram = pagerange_is_ram(start, end); + if (is_range_ram == 1) + return free_ram_pages_type(start, end); + else if (is_range_ram < 0) + return -EINVAL; + } spin_lock(&memtype_lock); list_for_each_entry(entry, &memtype_list, nd) { @@ -505,6 +522,35 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size) } #endif /* CONFIG_STRICT_DEVMEM */ +/* + * Change the memory type for the physial address range in kernel identity + * mapping space if that range is a part of identity map. + */ +static int kernel_map_sync_memtype(u64 base, unsigned long size, + unsigned long flags) +{ + unsigned long id_sz; + int ret; + + if (!pat_enabled || base >= __pa(high_memory)) + return 0; + + id_sz = (__pa(high_memory) < base + size) ? + __pa(high_memory) - base : + size; + + ret = ioremap_change_attr((unsigned long)__va(base), id_sz, flags); + /* + * -EFAULT return means that the addr was not valid and did not have + * any identity mapping. That case is a success for + * kernel_map_sync_memtype. + */ + if (ret == -EFAULT) + ret = 0; + + return ret; +} + int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, unsigned long size, pgprot_t *vma_prot) { @@ -555,9 +601,7 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, if (retval < 0) return 0; - if (((pfn < max_low_pfn_mapped) || - (pfn >= (1UL<<(32 - PAGE_SHIFT)) && pfn < max_pfn_mapped)) && - ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) { + if (kernel_map_sync_memtype(offset, size, flags)) { free_memtype(offset, offset + size); printk(KERN_INFO "%s:%d /dev/mem ioremap_change_attr failed %s for %Lx-%Lx\n", @@ -601,12 +645,13 @@ void unmap_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot) * Reserved non RAM regions only and after successful reserve_memtype, * this func also keeps identity mapping (if any) in sync with this new prot. */ -static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t vma_prot) +static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, + int strict_prot) { int is_ram = 0; - int id_sz, ret; + int ret; unsigned long flags; - unsigned long want_flags = (pgprot_val(vma_prot) & _PAGE_CACHE_MASK); + unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK); is_ram = pagerange_is_ram(paddr, paddr + size); @@ -625,26 +670,27 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t vma_prot) return ret; if (flags != want_flags) { - free_memtype(paddr, paddr + size); - printk(KERN_ERR - "%s:%d map pfn expected mapping type %s for %Lx-%Lx, got %s\n", - current->comm, current->pid, - cattr_name(want_flags), - (unsigned long long)paddr, - (unsigned long long)(paddr + size), - cattr_name(flags)); - return -EINVAL; + if (strict_prot || !is_new_memtype_allowed(want_flags, flags)) { + free_memtype(paddr, paddr + size); + printk(KERN_ERR "%s:%d map pfn expected mapping type %s" + " for %Lx-%Lx, got %s\n", + current->comm, current->pid, + cattr_name(want_flags), + (unsigned long long)paddr, + (unsigned long long)(paddr + size), + cattr_name(flags)); + return -EINVAL; + } + /* + * We allow returning different type than the one requested in + * non strict case. + */ + *vma_prot = __pgprot((pgprot_val(*vma_prot) & + (~_PAGE_CACHE_MASK)) | + flags); } - /* Need to keep identity mapping in sync */ - if (paddr >= __pa(high_memory)) - return 0; - - id_sz = (__pa(high_memory) < paddr + size) ? - __pa(high_memory) - paddr : - size; - - if (ioremap_change_attr((unsigned long)__va(paddr), id_sz, flags) < 0) { + if (kernel_map_sync_memtype(paddr, size, flags)) { free_memtype(paddr, paddr + size); printk(KERN_ERR "%s:%d reserve_pfn_range ioremap_change_attr failed %s " @@ -689,6 +735,7 @@ int track_pfn_vma_copy(struct vm_area_struct *vma) unsigned long vma_start = vma->vm_start; unsigned long vma_end = vma->vm_end; unsigned long vma_size = vma_end - vma_start; + pgprot_t pgprot; if (!pat_enabled) return 0; @@ -702,7 +749,8 @@ int track_pfn_vma_copy(struct vm_area_struct *vma) WARN_ON_ONCE(1); return -EINVAL; } - return reserve_pfn_range(paddr, vma_size, __pgprot(prot)); + pgprot = __pgprot(prot); + return reserve_pfn_range(paddr, vma_size, &pgprot, 1); } /* reserve entire vma page by page, using pfn and prot from pte */ @@ -710,7 +758,8 @@ int track_pfn_vma_copy(struct vm_area_struct *vma) if (follow_phys(vma, vma_start + i, 0, &prot, &paddr)) continue; - retval = reserve_pfn_range(paddr, PAGE_SIZE, __pgprot(prot)); + pgprot = __pgprot(prot); + retval = reserve_pfn_range(paddr, PAGE_SIZE, &pgprot, 1); if (retval) goto cleanup_ret; } @@ -741,7 +790,7 @@ cleanup_ret: * Note that this function can be called with caller trying to map only a * subrange/page inside the vma. */ -int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t prot, +int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot, unsigned long pfn, unsigned long size) { int retval = 0; @@ -758,14 +807,14 @@ int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t prot, if (is_linear_pfn_mapping(vma)) { /* reserve the whole chunk starting from vm_pgoff */ paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; - return reserve_pfn_range(paddr, vma_size, prot); + return reserve_pfn_range(paddr, vma_size, prot, 0); } /* reserve page by page using pfn and size */ base_paddr = (resource_size_t)pfn << PAGE_SHIFT; for (i = 0; i < size; i += PAGE_SIZE) { paddr = base_paddr + i; - retval = reserve_pfn_range(paddr, PAGE_SIZE, prot); + retval = reserve_pfn_range(paddr, PAGE_SIZE, prot, 0); if (retval) goto cleanup_ret; } diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index f884740da318..5ead808dd70c 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -314,17 +314,7 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, return retval; if (flags != new_flags) { - /* - * Do not fallback to certain memory types with certain - * requested type: - * - request is uncached, return cannot be write-back - * - request is uncached, return cannot be write-combine - * - request is write-combine, return cannot be write-back - */ - if ((flags == _PAGE_CACHE_UC_MINUS && - (new_flags == _PAGE_CACHE_WB)) || - (flags == _PAGE_CACHE_WC && - new_flags == _PAGE_CACHE_WB)) { + if (!is_new_memtype_allowed(flags, new_flags)) { free_memtype(addr, addr+len); return -EINVAL; } diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index bea215230b20..75b94139e1f2 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -634,35 +634,27 @@ static void xen_flush_tlb_single(unsigned long addr) preempt_enable(); } -static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm, - unsigned long va) +static void xen_flush_tlb_others(const struct cpumask *cpus, + struct mm_struct *mm, unsigned long va) { struct { struct mmuext_op op; - cpumask_t mask; + DECLARE_BITMAP(mask, NR_CPUS); } *args; - cpumask_t cpumask = *cpus; struct multicall_space mcs; - /* - * A couple of (to be removed) sanity checks: - * - * - current CPU must not be in mask - * - mask must exist :) - */ - BUG_ON(cpus_empty(cpumask)); - BUG_ON(cpu_isset(smp_processor_id(), cpumask)); + BUG_ON(cpumask_empty(cpus)); BUG_ON(!mm); - /* If a CPU which we ran on has gone down, OK. */ - cpus_and(cpumask, cpumask, cpu_online_map); - if (cpus_empty(cpumask)) - return; - mcs = xen_mc_entry(sizeof(*args)); args = mcs.args; - args->mask = cpumask; - args->op.arg2.vcpumask = &args->mask; + args->op.arg2.vcpumask = to_cpumask(args->mask); + + /* Remove us, and any offline CPUS. */ + cpumask_and(to_cpumask(args->mask), cpus, cpu_online_mask); + cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask)); + if (unlikely(cpumask_empty(to_cpumask(args->mask)))) + goto issue; if (va == TLB_FLUSH_ALL) { args->op.cmd = MMUEXT_TLB_FLUSH_MULTI; @@ -673,6 +665,7 @@ static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm, MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF); +issue: xen_mc_issue(PARAVIRT_LAZY_MMU); } @@ -702,17 +695,17 @@ static void xen_write_cr0(unsigned long cr0) static void xen_write_cr2(unsigned long cr2) { - x86_read_percpu(xen_vcpu)->arch.cr2 = cr2; + percpu_read(xen_vcpu)->arch.cr2 = cr2; } static unsigned long xen_read_cr2(void) { - return x86_read_percpu(xen_vcpu)->arch.cr2; + return percpu_read(xen_vcpu)->arch.cr2; } static unsigned long xen_read_cr2_direct(void) { - return x86_read_percpu(xen_vcpu_info.arch.cr2); + return percpu_read(xen_vcpu_info.arch.cr2); } static void xen_write_cr4(unsigned long cr4) @@ -725,12 +718,12 @@ static void xen_write_cr4(unsigned long cr4) static unsigned long xen_read_cr3(void) { - return x86_read_percpu(xen_cr3); + return percpu_read(xen_cr3); } static void set_current_cr3(void *v) { - x86_write_percpu(xen_current_cr3, (unsigned long)v); + percpu_write(xen_current_cr3, (unsigned long)v); } static void __xen_write_cr3(bool kernel, unsigned long cr3) @@ -755,7 +748,7 @@ static void __xen_write_cr3(bool kernel, unsigned long cr3) MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); if (kernel) { - x86_write_percpu(xen_cr3, cr3); + percpu_write(xen_cr3, cr3); /* Update xen_current_cr3 once the batch has actually been submitted. */ @@ -771,7 +764,7 @@ static void xen_write_cr3(unsigned long cr3) /* Update while interrupts are disabled, so its atomic with respect to ipis */ - x86_write_percpu(xen_cr3, cr3); + percpu_write(xen_cr3, cr3); __xen_write_cr3(true, cr3); @@ -1652,7 +1645,7 @@ asmlinkage void __init xen_start_kernel(void) #ifdef CONFIG_X86_64 /* Disable until direct per-cpu data access. */ have_vcpu_info_placement = 0; - x86_64_init_pda(); + pda_init(0); #endif xen_smp_init(); diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c index bb042608c602..2e8271431e1a 100644 --- a/arch/x86/xen/irq.c +++ b/arch/x86/xen/irq.c @@ -39,7 +39,7 @@ static unsigned long xen_save_fl(void) struct vcpu_info *vcpu; unsigned long flags; - vcpu = x86_read_percpu(xen_vcpu); + vcpu = percpu_read(xen_vcpu); /* flag has opposite sense of mask */ flags = !vcpu->evtchn_upcall_mask; @@ -62,7 +62,7 @@ static void xen_restore_fl(unsigned long flags) make sure we're don't switch CPUs between getting the vcpu pointer and updating the mask. */ preempt_disable(); - vcpu = x86_read_percpu(xen_vcpu); + vcpu = percpu_read(xen_vcpu); vcpu->evtchn_upcall_mask = flags; preempt_enable_no_resched(); @@ -83,7 +83,7 @@ static void xen_irq_disable(void) make sure we're don't switch CPUs between getting the vcpu pointer and updating the mask. */ preempt_disable(); - x86_read_percpu(xen_vcpu)->evtchn_upcall_mask = 1; + percpu_read(xen_vcpu)->evtchn_upcall_mask = 1; preempt_enable_no_resched(); } @@ -96,7 +96,7 @@ static void xen_irq_enable(void) the caller is confused and is trying to re-enable interrupts on an indeterminate processor. */ - vcpu = x86_read_percpu(xen_vcpu); + vcpu = percpu_read(xen_vcpu); vcpu->evtchn_upcall_mask = 0; /* Doesn't matter if we get preempted here, because any diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 503c240e26c7..98cb9869eb24 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1063,18 +1063,14 @@ static void drop_other_mm_ref(void *info) struct mm_struct *mm = info; struct mm_struct *active_mm; -#ifdef CONFIG_X86_64 - active_mm = read_pda(active_mm); -#else - active_mm = __get_cpu_var(cpu_tlbstate).active_mm; -#endif + active_mm = percpu_read(cpu_tlbstate.active_mm); if (active_mm == mm) leave_mm(smp_processor_id()); /* If this cpu still has a stale cr3 reference, then make sure it has been flushed. */ - if (x86_read_percpu(xen_current_cr3) == __pa(mm->pgd)) { + if (percpu_read(xen_current_cr3) == __pa(mm->pgd)) { load_cr3(swapper_pg_dir); arch_flush_lazy_cpu_mode(); } diff --git a/arch/x86/xen/multicalls.h b/arch/x86/xen/multicalls.h index 858938241616..e786fa7f2615 100644 --- a/arch/x86/xen/multicalls.h +++ b/arch/x86/xen/multicalls.h @@ -39,7 +39,7 @@ static inline void xen_mc_issue(unsigned mode) xen_mc_flush(); /* restore flags saved in xen_mc_batch */ - local_irq_restore(x86_read_percpu(xen_mc_irq_flags)); + local_irq_restore(percpu_read(xen_mc_irq_flags)); } /* Set up a callback to be called when the current batch is flushed */ diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index c44e2069c7c7..72c2eb9b64cd 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -50,11 +50,7 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id); */ static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id) { -#ifdef CONFIG_X86_32 - __get_cpu_var(irq_stat).irq_resched_count++; -#else - add_pda(irq_resched_count, 1); -#endif + inc_irq_stat(irq_resched_count); return IRQ_HANDLED; } @@ -78,7 +74,7 @@ static __cpuinit void cpu_bringup(void) xen_setup_cpu_clockevents(); cpu_set(cpu, cpu_online_map); - x86_write_percpu(cpu_state, CPU_ONLINE); + percpu_write(cpu_state, CPU_ONLINE); wmb(); /* We can take interrupts now: we're officially "up". */ @@ -283,22 +279,11 @@ static int __cpuinit xen_cpu_up(unsigned int cpu) struct task_struct *idle = idle_task(cpu); int rc; -#ifdef CONFIG_X86_64 - /* Allocate node local memory for AP pdas */ - WARN_ON(cpu == 0); - if (cpu > 0) { - rc = get_local_pda(cpu); - if (rc) - return rc; - } -#endif - + per_cpu(current_task, cpu) = idle; #ifdef CONFIG_X86_32 init_gdt(cpu); - per_cpu(current_task, cpu) = idle; irq_ctx_init(cpu); #else - cpu_pda(cpu)->pcurrent = idle; clear_tsk_thread_flag(idle, TIF_FORK); #endif xen_setup_timer(cpu); @@ -445,11 +430,7 @@ static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id) { irq_enter(); generic_smp_call_function_interrupt(); -#ifdef CONFIG_X86_32 - __get_cpu_var(irq_stat).irq_call_count++; -#else - add_pda(irq_call_count, 1); -#endif + inc_irq_stat(irq_call_count); irq_exit(); return IRQ_HANDLED; @@ -459,11 +440,7 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id) { irq_enter(); generic_smp_call_function_single_interrupt(); -#ifdef CONFIG_X86_32 - __get_cpu_var(irq_stat).irq_call_count++; -#else - add_pda(irq_call_count, 1); -#endif + inc_irq_stat(irq_call_count); irq_exit(); return IRQ_HANDLED; diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 05794c566e87..d6fc51f4ce85 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -17,6 +17,7 @@ #include <asm/processor-flags.h> #include <asm/errno.h> #include <asm/segment.h> +#include <asm/percpu.h> #include <xen/interface/xen.h> @@ -28,12 +29,10 @@ #if 1 /* - x86-64 does not yet support direct access to percpu variables - via a segment override, so we just need to make sure this code - never gets used + FIXME: x86_64 now can support direct access to percpu variables + via a segment override. Update xen accordingly. */ #define BUG ud2a -#define PER_CPU_VAR(var, off) 0xdeadbeef #endif /* @@ -45,14 +44,14 @@ ENTRY(xen_irq_enable_direct) BUG /* Unmask events */ - movb $0, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) + movb $0, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask /* Preempt here doesn't matter because that will deal with any pending interrupts. The pending check may end up being run on the wrong CPU, but that doesn't hurt. */ /* Test for pending */ - testb $0xff, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_pending) + testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending jz 1f 2: call check_events @@ -69,7 +68,7 @@ ENDPATCH(xen_irq_enable_direct) ENTRY(xen_irq_disable_direct) BUG - movb $1, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) + movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask ENDPATCH(xen_irq_disable_direct) ret ENDPROC(xen_irq_disable_direct) @@ -87,7 +86,7 @@ ENDPATCH(xen_irq_disable_direct) ENTRY(xen_save_fl_direct) BUG - testb $0xff, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) + testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask setz %ah addb %ah,%ah ENDPATCH(xen_save_fl_direct) @@ -107,13 +106,13 @@ ENTRY(xen_restore_fl_direct) BUG testb $X86_EFLAGS_IF>>8, %ah - setz PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) + setz PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask /* Preempt here doesn't matter because that will deal with any pending interrupts. The pending check may end up being run on the wrong CPU, but that doesn't hurt. */ /* check for unmasked and pending */ - cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_pending) + cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending jz 1f 2: call check_events 1: @@ -195,11 +194,11 @@ RELOC(xen_sysexit, 1b+1) ENTRY(xen_sysret64) /* We're already on the usermode stack at this point, but still with the kernel gs, so we can easily switch back */ - movq %rsp, %gs:pda_oldrsp - movq %gs:pda_kernelstack,%rsp + movq %rsp, PER_CPU_VAR(old_rsp) + movq PER_CPU_VAR(kernel_stack),%rsp pushq $__USER_DS - pushq %gs:pda_oldrsp + pushq PER_CPU_VAR(old_rsp) pushq %r11 pushq $__USER_CS pushq %rcx @@ -212,11 +211,11 @@ RELOC(xen_sysret64, 1b+1) ENTRY(xen_sysret32) /* We're already on the usermode stack at this point, but still with the kernel gs, so we can easily switch back */ - movq %rsp, %gs:pda_oldrsp - movq %gs:pda_kernelstack, %rsp + movq %rsp, PER_CPU_VAR(old_rsp) + movq PER_CPU_VAR(kernel_stack), %rsp pushq $__USER32_DS - pushq %gs:pda_oldrsp + pushq PER_CPU_VAR(old_rsp) pushq %r11 pushq $__USER32_CS pushq %rcx |