diff options
author | Linus Torvalds | 2013-11-11 18:19:06 +0900 |
---|---|---|
committer | Linus Torvalds | 2013-11-11 18:19:06 +0900 |
commit | edae583a6d4d1ad2eb73981787790993fef1bbad (patch) | |
tree | c1680187bad65be2e1c4d72b25642d42d2efd394 /arch | |
parent | 0a759b2466d19c619257fec36f988f93424cf3ae (diff) | |
parent | 737d5b980be82f722153d8104f7949e4204c5911 (diff) |
Merge tag 'arc-v3.13-rc1-part1' of git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc
Pull ARC changes from Vineet Gupta:
- Towards a working SMP setup (ASID allocation, TLB Flush,...)
- Support for TRACE_IRQFLAGS, LOCKDEP
- cacheflush backend consolidation for I/D
- Lots of allmodconfig fixlets from Chen
- Other improvements/fixes
* tag 'arc-v3.13-rc1-part1' of git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc: (25 commits)
ARC: [plat-arcfpga] defconfig update
smp, ARC: kill SMP single function call interrupt
ARC: [SMP] Disallow RTSC
ARC: [SMP] Fix build failures for large NR_CPUS
ARC: [SMP] enlarge possible NR_CPUS
ARC: [SMP] TLB flush
ARC: [SMP] ASID allocation
arc: export symbol for pm_power_off in reset.c
arc: export symbol for save_stack_trace() in stacktrace.c
arc: remove '__init' for get_hw_config_num_irq()
arc: remove '__init' for first_lines_of_secondary()
arc: remove '__init' for setup_processor() and arc_init_IRQ()
arc: kgdb: add default implementation for kgdb_roundup_cpus()
ARC: Fix bogus gcc warning and micro-optimise TLB iteration loop
ARC: Add support for irqflags tracing and lockdep
ARC: Reset the value of Interrupt Priority Register
ARC: Reduce #ifdef'ery for unaligned access emulation
ARC: Change calling convention of do_page_fault()
ARC: cacheflush optim - PTAG can be loop invariant if V-P is const
ARC: cacheflush refactor #3: Unify the {d,i}cache flush leaf helpers
...
Diffstat (limited to 'arch')
28 files changed, 326 insertions, 181 deletions
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 91dbb2757afd..5ede5460c806 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -35,6 +35,12 @@ config ARC select PERF_USE_VMALLOC select HAVE_DEBUG_STACKOVERFLOW +config TRACE_IRQFLAGS_SUPPORT + def_bool y + +config LOCKDEP_SUPPORT + def_bool y + config SCHED_OMIT_FRAME_POINTER def_bool y @@ -130,17 +136,14 @@ if SMP config ARC_HAS_COH_CACHES def_bool n -config ARC_HAS_COH_RTSC - def_bool n - config ARC_HAS_REENTRANT_IRQ_LV2 def_bool n endif config NR_CPUS - int "Maximum number of CPUs (2-32)" - range 2 32 + int "Maximum number of CPUs (2-4096)" + range 2 4096 depends on SMP default "2" @@ -326,8 +329,7 @@ config ARC_HAS_RTSC bool "Insn: RTSC (64-bit r/o cycle counter)" default y depends on ARC_CPU_REL_4_10 - # if SMP, enable RTSC only if counter is coherent across cores - depends on !SMP || ARC_HAS_COH_RTSC + depends on !SMP endmenu # "ARC CPU Configuration" diff --git a/arch/arc/configs/fpga_defconfig b/arch/arc/configs/fpga_defconfig index 4ca50f1f8d05..e283aa586934 100644 --- a/arch/arc/configs/fpga_defconfig +++ b/arch/arc/configs/fpga_defconfig @@ -2,6 +2,8 @@ CONFIG_CROSS_COMPILE="arc-linux-uclibc-" # CONFIG_LOCALVERSION_AUTO is not set CONFIG_DEFAULT_HOSTNAME="ARCLinux" # CONFIG_SWAP is not set +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y CONFIG_HIGH_RES_TIMERS=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y @@ -62,4 +64,5 @@ CONFIG_TMPFS=y CONFIG_NFS_FS=y # CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set +# CONFIG_DEBUG_PREEMPT is not set CONFIG_XZ_DEC=y diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h index e4abdaac6f9f..2fd3162ec4df 100644 --- a/arch/arc/include/asm/cache.h +++ b/arch/arc/include/asm/cache.h @@ -17,13 +17,7 @@ #endif #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) - -/* For a rare case where customers have differently config I/D */ -#define ARC_ICACHE_LINE_LEN L1_CACHE_BYTES -#define ARC_DCACHE_LINE_LEN L1_CACHE_BYTES - -#define ICACHE_LINE_MASK (~(ARC_ICACHE_LINE_LEN - 1)) -#define DCACHE_LINE_MASK (~(ARC_DCACHE_LINE_LEN - 1)) +#define CACHE_LINE_MASK (~(L1_CACHE_BYTES - 1)) /* * ARC700 doesn't cache any access in top 256M. diff --git a/arch/arc/include/asm/irq.h b/arch/arc/include/asm/irq.h index c0a72105ee0b..291a70db68b8 100644 --- a/arch/arc/include/asm/irq.h +++ b/arch/arc/include/asm/irq.h @@ -18,8 +18,8 @@ #include <asm-generic/irq.h> -extern void __init arc_init_IRQ(void); -extern int __init get_hw_config_num_irq(void); +extern void arc_init_IRQ(void); +extern int get_hw_config_num_irq(void); void arc_local_timer_setup(unsigned int cpu); diff --git a/arch/arc/include/asm/irqflags.h b/arch/arc/include/asm/irqflags.h index b68b53f458d1..cb7efc29f16f 100644 --- a/arch/arc/include/asm/irqflags.h +++ b/arch/arc/include/asm/irqflags.h @@ -151,16 +151,38 @@ static inline void arch_unmask_irq(unsigned int irq) #else +#ifdef CONFIG_TRACE_IRQFLAGS + +.macro TRACE_ASM_IRQ_DISABLE + bl trace_hardirqs_off +.endm + +.macro TRACE_ASM_IRQ_ENABLE + bl trace_hardirqs_on +.endm + +#else + +.macro TRACE_ASM_IRQ_DISABLE +.endm + +.macro TRACE_ASM_IRQ_ENABLE +.endm + +#endif + .macro IRQ_DISABLE scratch lr \scratch, [status32] bic \scratch, \scratch, (STATUS_E1_MASK | STATUS_E2_MASK) flag \scratch + TRACE_ASM_IRQ_DISABLE .endm .macro IRQ_ENABLE scratch lr \scratch, [status32] or \scratch, \scratch, (STATUS_E1_MASK | STATUS_E2_MASK) flag \scratch + TRACE_ASM_IRQ_ENABLE .endm #endif /* __ASSEMBLY__ */ diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h index c2663b32866b..8c84ae98c337 100644 --- a/arch/arc/include/asm/mmu.h +++ b/arch/arc/include/asm/mmu.h @@ -48,7 +48,7 @@ #ifndef __ASSEMBLY__ typedef struct { - unsigned long asid; /* 8 bit MMU PID + Generation cycle */ + unsigned long asid[NR_CPUS]; /* 8 bit MMU PID + Generation cycle */ } mm_context_t; #ifdef CONFIG_ARC_DBG_TLB_PARANOIA diff --git a/arch/arc/include/asm/mmu_context.h b/arch/arc/include/asm/mmu_context.h index 43a1b51bb8cc..1fd467ef658f 100644 --- a/arch/arc/include/asm/mmu_context.h +++ b/arch/arc/include/asm/mmu_context.h @@ -30,13 +30,13 @@ * "Fast Context Switch" i.e. no TLB flush on ctxt-switch * * Linux assigns each task a unique ASID. A simple round-robin allocation - * of H/w ASID is done using software tracker @asid_cache. + * of H/w ASID is done using software tracker @asid_cpu. * When it reaches max 255, the allocation cycle starts afresh by flushing * the entire TLB and wrapping ASID back to zero. * * A new allocation cycle, post rollover, could potentially reassign an ASID * to a different task. Thus the rule is to refresh the ASID in a new cycle. - * The 32 bit @asid_cache (and mm->asid) have 8 bits MMU PID and rest 24 bits + * The 32 bit @asid_cpu (and mm->asid) have 8 bits MMU PID and rest 24 bits * serve as cycle/generation indicator and natural 32 bit unsigned math * automagically increments the generation when lower 8 bits rollover. */ @@ -47,9 +47,11 @@ #define MM_CTXT_FIRST_CYCLE (MM_CTXT_ASID_MASK + 1) #define MM_CTXT_NO_ASID 0UL -#define hw_pid(mm) (mm->context.asid & MM_CTXT_ASID_MASK) +#define asid_mm(mm, cpu) mm->context.asid[cpu] +#define hw_pid(mm, cpu) (asid_mm(mm, cpu) & MM_CTXT_ASID_MASK) -extern unsigned int asid_cache; +DECLARE_PER_CPU(unsigned int, asid_cache); +#define asid_cpu(cpu) per_cpu(asid_cache, cpu) /* * Get a new ASID if task doesn't have a valid one (unalloc or from prev cycle) @@ -57,6 +59,7 @@ extern unsigned int asid_cache; */ static inline void get_new_mmu_context(struct mm_struct *mm) { + const unsigned int cpu = smp_processor_id(); unsigned long flags; local_irq_save(flags); @@ -71,28 +74,28 @@ static inline void get_new_mmu_context(struct mm_struct *mm) * first need to destroy the context, setting it to invalid * value. */ - if (!((mm->context.asid ^ asid_cache) & MM_CTXT_CYCLE_MASK)) + if (!((asid_mm(mm, cpu) ^ asid_cpu(cpu)) & MM_CTXT_CYCLE_MASK)) goto set_hw; /* move to new ASID and handle rollover */ - if (unlikely(!(++asid_cache & MM_CTXT_ASID_MASK))) { + if (unlikely(!(++asid_cpu(cpu) & MM_CTXT_ASID_MASK))) { - flush_tlb_all(); + local_flush_tlb_all(); /* * Above checke for rollover of 8 bit ASID in 32 bit container. * If the container itself wrapped around, set it to a non zero * "generation" to distinguish from no context */ - if (!asid_cache) - asid_cache = MM_CTXT_FIRST_CYCLE; + if (!asid_cpu(cpu)) + asid_cpu(cpu) = MM_CTXT_FIRST_CYCLE; } /* Assign new ASID to tsk */ - mm->context.asid = asid_cache; + asid_mm(mm, cpu) = asid_cpu(cpu); set_hw: - write_aux_reg(ARC_REG_PID, hw_pid(mm) | MMU_ENABLE); + write_aux_reg(ARC_REG_PID, hw_pid(mm, cpu) | MMU_ENABLE); local_irq_restore(flags); } @@ -104,16 +107,45 @@ set_hw: static inline int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { - mm->context.asid = MM_CTXT_NO_ASID; + int i; + + for_each_possible_cpu(i) + asid_mm(mm, i) = MM_CTXT_NO_ASID; + return 0; } +static inline void destroy_context(struct mm_struct *mm) +{ + unsigned long flags; + + /* Needed to elide CONFIG_DEBUG_PREEMPT warning */ + local_irq_save(flags); + asid_mm(mm, smp_processor_id()) = MM_CTXT_NO_ASID; + local_irq_restore(flags); +} + /* Prepare the MMU for task: setup PID reg with allocated ASID If task doesn't have an ASID (never alloc or stolen, get a new ASID) */ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { + const int cpu = smp_processor_id(); + + /* + * Note that the mm_cpumask is "aggregating" only, we don't clear it + * for the switched-out task, unlike some other arches. + * It is used to enlist cpus for sending TLB flush IPIs and not sending + * it to CPUs where a task once ran-on, could cause stale TLB entry + * re-use, specially for a multi-threaded task. + * e.g. T1 runs on C1, migrates to C3. T2 running on C2 munmaps. + * For a non-aggregating mm_cpumask, IPI not sent C1, and if T1 + * were to re-migrate to C1, it could access the unmapped region + * via any existing stale TLB entries. + */ + cpumask_set_cpu(cpu, mm_cpumask(next)); + #ifndef CONFIG_SMP /* PGD cached in MMU reg to avoid 3 mem lookups: task->mm->pgd */ write_aux_reg(ARC_REG_SCRATCH_DATA0, next->pgd); @@ -131,11 +163,6 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, */ #define activate_mm(prev, next) switch_mm(prev, next, NULL) -static inline void destroy_context(struct mm_struct *mm) -{ - mm->context.asid = MM_CTXT_NO_ASID; -} - /* it seemed that deactivate_mm( ) is a reasonable place to do book-keeping * for retiring-mm. However destroy_context( ) still needs to do that because * between mm_release( ) = >deactive_mm( ) and diff --git a/arch/arc/include/asm/setup.h b/arch/arc/include/asm/setup.h index 229e50681497..e10f8cef56a8 100644 --- a/arch/arc/include/asm/setup.h +++ b/arch/arc/include/asm/setup.h @@ -31,7 +31,7 @@ struct cpuinfo_data { extern int root_mountflags, end_mem; extern int running_on_hw; -void __init setup_processor(void); +void setup_processor(void); void __init setup_arch_memory(void); #endif /* __ASMARC_SETUP_H */ diff --git a/arch/arc/include/asm/smp.h b/arch/arc/include/asm/smp.h index c4fb211dcd25..eefc29f08cdb 100644 --- a/arch/arc/include/asm/smp.h +++ b/arch/arc/include/asm/smp.h @@ -30,7 +30,7 @@ extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); * APIs provided by arch SMP code to rest of arch code */ extern void __init smp_init_cpus(void); -extern void __init first_lines_of_secondary(void); +extern void first_lines_of_secondary(void); extern const char *arc_platform_smp_cpuinfo(void); /* diff --git a/arch/arc/include/asm/tlbflush.h b/arch/arc/include/asm/tlbflush.h index b2f9bc7f68c8..71c7b2e4b874 100644 --- a/arch/arc/include/asm/tlbflush.h +++ b/arch/arc/include/asm/tlbflush.h @@ -18,11 +18,18 @@ void local_flush_tlb_kernel_range(unsigned long start, unsigned long end); void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); -/* XXX: Revisit for SMP */ +#ifndef CONFIG_SMP #define flush_tlb_range(vma, s, e) local_flush_tlb_range(vma, s, e) #define flush_tlb_page(vma, page) local_flush_tlb_page(vma, page) #define flush_tlb_kernel_range(s, e) local_flush_tlb_kernel_range(s, e) #define flush_tlb_all() local_flush_tlb_all() #define flush_tlb_mm(mm) local_flush_tlb_mm(mm) - +#else +extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end); +extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long page); +extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); +extern void flush_tlb_all(void); +extern void flush_tlb_mm(struct mm_struct *mm); +#endif /* CONFIG_SMP */ #endif diff --git a/arch/arc/include/asm/unaligned.h b/arch/arc/include/asm/unaligned.h index 60702f3751d2..3e5f071bc00c 100644 --- a/arch/arc/include/asm/unaligned.h +++ b/arch/arc/include/asm/unaligned.h @@ -22,7 +22,8 @@ static inline int misaligned_fixup(unsigned long address, struct pt_regs *regs, struct callee_regs *cregs) { - return 0; + /* Not fixed */ + return 1; } #endif diff --git a/arch/arc/kernel/ctx_sw.c b/arch/arc/kernel/ctx_sw.c index 34410eb1a308..c14a5bea0c76 100644 --- a/arch/arc/kernel/ctx_sw.c +++ b/arch/arc/kernel/ctx_sw.c @@ -17,6 +17,8 @@ #include <asm/asm-offsets.h> #include <linux/sched.h> +#define KSP_WORD_OFF ((TASK_THREAD + THREAD_KSP) / 4) + struct task_struct *__sched __switch_to(struct task_struct *prev_task, struct task_struct *next_task) { @@ -45,7 +47,16 @@ __switch_to(struct task_struct *prev_task, struct task_struct *next_task) #endif /* set ksp of outgoing task in tsk->thread.ksp */ +#if KSP_WORD_OFF <= 255 "st.as sp, [%3, %1] \n\t" +#else + /* + * Workaround for NR_CPUS=4k + * %1 is bigger than 255 (S9 offset for st.as) + */ + "add2 r24, %3, %1 \n\t" + "st sp, [r24] \n\t" +#endif "sync \n\t" @@ -97,7 +108,7 @@ __switch_to(struct task_struct *prev_task, struct task_struct *next_task) /* FP/BLINK restore generated by gcc (standard func epilogue */ : "=r"(tmp) - : "n"((TASK_THREAD + THREAD_KSP) / 4), "r"(next), "r"(prev) + : "n"(KSP_WORD_OFF), "r"(next), "r"(prev) : "blink" ); diff --git a/arch/arc/kernel/ctx_sw_asm.S b/arch/arc/kernel/ctx_sw_asm.S index d8972345e4c2..65690e7fcc8c 100644 --- a/arch/arc/kernel/ctx_sw_asm.S +++ b/arch/arc/kernel/ctx_sw_asm.S @@ -14,6 +14,8 @@ #include <asm/asm-offsets.h> #include <asm/linkage.h> +#define KSP_WORD_OFF ((TASK_THREAD + THREAD_KSP) / 4) + ;################### Low Level Context Switch ########################## .section .sched.text,"ax",@progbits @@ -28,8 +30,13 @@ __switch_to: SAVE_CALLEE_SAVED_KERNEL /* Save the now KSP in task->thread.ksp */ - st.as sp, [r0, (TASK_THREAD + THREAD_KSP)/4] - +#if KSP_WORD_OFF <= 255 + st.as sp, [r0, KSP_WORD_OFF] +#else + /* Workaround for NR_CPUS=4k as ST.as can only take s9 offset */ + add2 r24, r0, KSP_WORD_OFF + st sp, [r24] +#endif /* * Return last task in r0 (return reg) * On ARC, Return reg = First Arg reg = r0. diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S index b908dde8a331..47d09d07f093 100644 --- a/arch/arc/kernel/entry.S +++ b/arch/arc/kernel/entry.S @@ -250,6 +250,14 @@ ARC_ENTRY handle_interrupt_level1 lr r0, [icause1] and r0, r0, 0x1f +#ifdef CONFIG_TRACE_IRQFLAGS + ; icause1 needs to be read early, before calling tracing, which + ; can clobber scratch regs, hence use of stack to stash it + push r0 + TRACE_ASM_IRQ_DISABLE + pop r0 +#endif + bl.d @arch_do_IRQ mov r1, sp @@ -337,9 +345,9 @@ ARC_ENTRY EV_TLBProtV ; vineetg: Mar 6th: Random Seg Fault issue #1 ; ecr and efa were not saved in case an Intr sneaks in ; after fake rtie - ; + lr r2, [ecr] - lr r1, [efa] ; Faulting Data address + lr r0, [efa] ; Faulting Data address ; --------(4) Return from CPU Exception Mode --------- ; Fake a rtie, but rtie to next label @@ -348,6 +356,8 @@ ARC_ENTRY EV_TLBProtV FAKE_RET_FROM_EXCPN r9 + mov r1, sp + ;------ (5) Type of Protection Violation? ---------- ; ; ProtV Hardware Exception is triggered for Access Faults of 2 types @@ -358,16 +368,12 @@ ARC_ENTRY EV_TLBProtV bbit1 r2, ECR_C_BIT_PROTV_MISALIG_DATA, 4f ;========= (6a) Access Violation Processing ======== - mov r0, sp ; pt_regs bl do_page_fault b ret_from_exception ;========== (6b) Non aligned access ============ 4: - mov r0, r1 - mov r1, sp ; pt_regs -#ifdef CONFIG_ARC_MISALIGN_ACCESS SAVE_CALLEE_SAVED_USER mov r2, sp ; callee_regs @@ -376,9 +382,6 @@ ARC_ENTRY EV_TLBProtV ; TBD: optimize - do this only if a callee reg was involved ; either a dst of emulated LD/ST or src with address-writeback RESTORE_CALLEE_SAVED_USER -#else - bl do_misaligned_error -#endif b ret_from_exception @@ -575,6 +578,7 @@ resume_user_mode_begin: ; --- (Slow Path #2) pending signal --- mov r0, sp ; pt_regs for arg to do_signal()/do_notify_resume() + GET_CURR_THR_INFO_FLAGS r9 bbit0 r9, TIF_SIGPENDING, .Lchk_notify_resume ; Normal Trap/IRQ entry only saves Scratch (caller-saved) regs @@ -640,6 +644,8 @@ resume_kernel_mode: restore_regs : + TRACE_ASM_IRQ_ENABLE + lr r10, [status32] ; Restore REG File. In case multiple Events outstanding, diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S index 0f944f024513..2c878e964a64 100644 --- a/arch/arc/kernel/head.S +++ b/arch/arc/kernel/head.S @@ -95,7 +95,7 @@ stext: ;---------------------------------------------------------------- ; First lines of code run by secondary before jumping to 'C' ;---------------------------------------------------------------- - .section .init.text, "ax",@progbits + .section .text, "ax",@progbits .type first_lines_of_secondary, @function .globl first_lines_of_secondary diff --git a/arch/arc/kernel/irq.c b/arch/arc/kernel/irq.c index 5fc92455da36..a4b141ee9a6a 100644 --- a/arch/arc/kernel/irq.c +++ b/arch/arc/kernel/irq.c @@ -39,10 +39,14 @@ void arc_init_IRQ(void) level_mask |= IS_ENABLED(CONFIG_ARC_IRQ5_LV2) << 5; level_mask |= IS_ENABLED(CONFIG_ARC_IRQ6_LV2) << 6; - if (level_mask) { + /* + * Write to register, even if no LV2 IRQs configured to reset it + * in case bootloader had mucked with it + */ + write_aux_reg(AUX_IRQ_LEV, level_mask); + + if (level_mask) pr_info("Level-2 interrupts bitset %x\n", level_mask); - write_aux_reg(AUX_IRQ_LEV, level_mask); - } } /* @@ -146,7 +150,7 @@ void arch_do_IRQ(unsigned int irq, struct pt_regs *regs) set_irq_regs(old_regs); } -int __init get_hw_config_num_irq(void) +int get_hw_config_num_irq(void) { uint32_t val = read_aux_reg(ARC_REG_VECBASE_BCR); diff --git a/arch/arc/kernel/kgdb.c b/arch/arc/kernel/kgdb.c index a7698fb14818..a2ff5c5d1450 100644 --- a/arch/arc/kernel/kgdb.c +++ b/arch/arc/kernel/kgdb.c @@ -196,6 +196,18 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip) instruction_pointer(regs) = ip; } +static void kgdb_call_nmi_hook(void *ignored) +{ + kgdb_nmicallback(raw_smp_processor_id(), NULL); +} + +void kgdb_roundup_cpus(unsigned long flags) +{ + local_irq_enable(); + smp_call_function(kgdb_call_nmi_hook, NULL, 0); + local_irq_disable(); +} + struct kgdb_arch arch_kgdb_ops = { /* breakpoint instruction: TRAP_S 0x3 */ #ifdef CONFIG_CPU_BIG_ENDIAN diff --git a/arch/arc/kernel/kprobes.c b/arch/arc/kernel/kprobes.c index 72f97822784a..eb1c2ee5eaf0 100644 --- a/arch/arc/kernel/kprobes.c +++ b/arch/arc/kernel/kprobes.c @@ -87,13 +87,13 @@ static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) { - __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; + __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp); kcb->kprobe_status = kcb->prev_kprobe.status; } static inline void __kprobes set_current_kprobe(struct kprobe *p) { - __get_cpu_var(current_kprobe) = p; + __this_cpu_write(current_kprobe, p); } static void __kprobes resume_execution(struct kprobe *p, unsigned long addr, @@ -237,7 +237,7 @@ int __kprobes arc_kprobe_handler(unsigned long addr, struct pt_regs *regs) return 1; } else if (kprobe_running()) { - p = __get_cpu_var(current_kprobe); + p = __this_cpu_read(current_kprobe); if (p->break_handler && p->break_handler(p, regs)) { setup_singlestep(p, regs); kcb->kprobe_status = KPROBE_HIT_SS; diff --git a/arch/arc/kernel/reset.c b/arch/arc/kernel/reset.c index e227a2b1c943..2768fa1e39b9 100644 --- a/arch/arc/kernel/reset.c +++ b/arch/arc/kernel/reset.c @@ -31,3 +31,4 @@ void machine_power_off(void) } void (*pm_power_off) (void) = NULL; +EXPORT_SYMBOL(pm_power_off); diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index 2c68bc7e6a78..d9e15f16633e 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -37,8 +37,7 @@ struct task_struct *_current_task[NR_CPUS]; /* For stack switching */ struct cpuinfo_arc cpuinfo_arc700[NR_CPUS]; - -void read_arc_build_cfg_regs(void) +static void read_arc_build_cfg_regs(void) { struct bcr_perip uncached_space; struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()]; @@ -106,7 +105,7 @@ static const struct cpuinfo_data arc_cpu_tbl[] = { { {0x00, NULL } } }; -char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len) +static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len) { int n = 0; struct cpuinfo_arc *cpu = &cpuinfo_arc700[cpu_id]; @@ -171,7 +170,7 @@ static const struct id_to_str mac_mul_nm[] = { {0x6, "Dual 16x16 and 32x16"} }; -char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len) +static char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len) { int n = 0; struct cpuinfo_arc *cpu = &cpuinfo_arc700[cpu_id]; @@ -234,7 +233,7 @@ char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len) return buf; } -void arc_chk_ccms(void) +static void arc_chk_ccms(void) { #if defined(CONFIG_ARC_HAS_DCCM) || defined(CONFIG_ARC_HAS_ICCM) struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()]; @@ -269,7 +268,7 @@ void arc_chk_ccms(void) * hardware has dedicated regs which need to be saved/restored on ctx-sw * (Single Precision uses core regs), thus kernel is kind of oblivious to it */ -void arc_chk_fpu(void) +static void arc_chk_fpu(void) { struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()]; diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c index bca3052c956d..c2f9ebbc38f6 100644 --- a/arch/arc/kernel/smp.c +++ b/arch/arc/kernel/smp.c @@ -95,7 +95,7 @@ void __init smp_cpus_done(unsigned int max_cpus) * If it turns out to be elaborate, it's better to code it in assembly * */ -void __attribute__((weak)) arc_platform_smp_wait_to_boot(int cpu) +void __weak arc_platform_smp_wait_to_boot(int cpu) { /* * As a hack for debugging - since debugger will single-step over the @@ -128,6 +128,7 @@ void start_kernel_secondary(void) atomic_inc(&mm->mm_users); atomic_inc(&mm->mm_count); current->active_mm = mm; + cpumask_set_cpu(cpu, mm_cpumask(mm)); notify_cpu_starting(cpu); set_cpu_online(cpu, true); @@ -210,7 +211,6 @@ enum ipi_msg_type { IPI_NOP = 0, IPI_RESCHEDULE = 1, IPI_CALL_FUNC, - IPI_CALL_FUNC_SINGLE, IPI_CPU_STOP }; @@ -254,7 +254,7 @@ void smp_send_stop(void) void arch_send_call_function_single_ipi(int cpu) { - ipi_send_msg(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE); + ipi_send_msg(cpumask_of(cpu), IPI_CALL_FUNC); } void arch_send_call_function_ipi_mask(const struct cpumask *mask) @@ -286,10 +286,6 @@ static inline void __do_IPI(unsigned long *ops, struct ipi_data *ipi, int cpu) generic_smp_call_function_interrupt(); break; - case IPI_CALL_FUNC_SINGLE: - generic_smp_call_function_single_interrupt(); - break; - case IPI_CPU_STOP: ipi_cpu_stop(cpu); break; diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c index f8b7d880304d..9ce47cfe2303 100644 --- a/arch/arc/kernel/stacktrace.c +++ b/arch/arc/kernel/stacktrace.c @@ -237,11 +237,14 @@ unsigned int get_wchan(struct task_struct *tsk) */ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) { + /* Assumes @tsk is sleeping so unwinds from __switch_to */ arc_unwind_core(tsk, NULL, __collect_all_but_sched, trace); } void save_stack_trace(struct stack_trace *trace) { - arc_unwind_core(current, NULL, __collect_all, trace); + /* Pass NULL for task so it unwinds the current call frame */ + arc_unwind_core(NULL, NULL, __collect_all, trace); } +EXPORT_SYMBOL_GPL(save_stack_trace); #endif diff --git a/arch/arc/kernel/time.c b/arch/arc/kernel/time.c index 3fde7de3ea67..e5f3a837fb35 100644 --- a/arch/arc/kernel/time.c +++ b/arch/arc/kernel/time.c @@ -63,9 +63,10 @@ int arc_counter_setup(void) { - /* RTSC insn taps into cpu clk, needs no setup */ - - /* For SMP, only allowed if cross-core-sync, hence usable as cs */ + /* + * For SMP this needs to be 0. However Kconfig glue doesn't + * enable this option for SMP configs + */ return 1; } @@ -206,7 +207,7 @@ static DEFINE_PER_CPU(struct clock_event_device, arc_clockevent_device) = { static irqreturn_t timer_irq_handler(int irq, void *dev_id) { - struct clock_event_device *clk = &__get_cpu_var(arc_clockevent_device); + struct clock_event_device *clk = this_cpu_ptr(&arc_clockevent_device); arc_timer_event_ack(clk->mode == CLOCK_EVT_MODE_PERIODIC); clk->event_handler(clk); @@ -223,7 +224,7 @@ static struct irqaction arc_timer_irq = { * Setup the local event timer for @cpu * N.B. weak so that some exotic ARC SoCs can completely override it */ -void __attribute__((weak)) arc_local_timer_setup(unsigned int cpu) +void __weak arc_local_timer_setup(unsigned int cpu) { struct clock_event_device *clk = &per_cpu(arc_clockevent_device, cpu); diff --git a/arch/arc/kernel/traps.c b/arch/arc/kernel/traps.c index e21692d2fdab..3eadfdabc322 100644 --- a/arch/arc/kernel/traps.c +++ b/arch/arc/kernel/traps.c @@ -84,19 +84,18 @@ DO_ERROR_INFO(SIGBUS, "Invalid Mem Access", do_memory_error, BUS_ADRERR) DO_ERROR_INFO(SIGTRAP, "Breakpoint Set", trap_is_brkpt, TRAP_BRKPT) DO_ERROR_INFO(SIGBUS, "Misaligned Access", do_misaligned_error, BUS_ADRALN) -#ifdef CONFIG_ARC_MISALIGN_ACCESS /* * Entry Point for Misaligned Data access Exception, for emulating in software */ int do_misaligned_access(unsigned long address, struct pt_regs *regs, struct callee_regs *cregs) { + /* If emulation not enabled, or failed, kill the task */ if (misaligned_fixup(address, regs, cregs) != 0) return do_misaligned_error(address, regs); return 0; } -#endif /* * Entry point for miscll errors such as Nested Exceptions diff --git a/arch/arc/mm/cache_arc700.c b/arch/arc/mm/cache_arc700.c index 5a1259cd948c..6b58c1de7577 100644 --- a/arch/arc/mm/cache_arc700.c +++ b/arch/arc/mm/cache_arc700.c @@ -182,7 +182,7 @@ void arc_cache_init(void) #ifdef CONFIG_ARC_HAS_ICACHE /* 1. Confirm some of I-cache params which Linux assumes */ - if (ic->line_len != ARC_ICACHE_LINE_LEN) + if (ic->line_len != L1_CACHE_BYTES) panic("Cache H/W doesn't match kernel Config"); if (ic->ver != CONFIG_ARC_MMU_VER) @@ -205,7 +205,7 @@ chk_dc: return; #ifdef CONFIG_ARC_HAS_DCACHE - if (dc->line_len != ARC_DCACHE_LINE_LEN) + if (dc->line_len != L1_CACHE_BYTES) panic("Cache H/W doesn't match kernel Config"); /* check for D-Cache aliasing */ @@ -240,6 +240,67 @@ chk_dc: #define OP_INV 0x1 #define OP_FLUSH 0x2 #define OP_FLUSH_N_INV 0x3 +#define OP_INV_IC 0x4 + +/* + * Common Helper for Line Operations on {I,D}-Cache + */ +static inline void __cache_line_loop(unsigned long paddr, unsigned long vaddr, + unsigned long sz, const int cacheop) +{ + unsigned int aux_cmd, aux_tag; + int num_lines; + const int full_page_op = __builtin_constant_p(sz) && sz == PAGE_SIZE; + + if (cacheop == OP_INV_IC) { + aux_cmd = ARC_REG_IC_IVIL; + aux_tag = ARC_REG_IC_PTAG; + } + else { + /* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */ + aux_cmd = cacheop & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL; + aux_tag = ARC_REG_DC_PTAG; + } + + /* Ensure we properly floor/ceil the non-line aligned/sized requests + * and have @paddr - aligned to cache line and integral @num_lines. + * This however can be avoided for page sized since: + * -@paddr will be cache-line aligned already (being page aligned) + * -@sz will be integral multiple of line size (being page sized). + */ + if (!full_page_op) { + sz += paddr & ~CACHE_LINE_MASK; + paddr &= CACHE_LINE_MASK; + vaddr &= CACHE_LINE_MASK; + } + + num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES); + +#if (CONFIG_ARC_MMU_VER <= 2) + /* MMUv2 and before: paddr contains stuffed vaddrs bits */ + paddr |= (vaddr >> PAGE_SHIFT) & 0x1F; +#else + /* if V-P const for loop, PTAG can be written once outside loop */ + if (full_page_op) + write_aux_reg(ARC_REG_DC_PTAG, paddr); +#endif + + while (num_lines-- > 0) { +#if (CONFIG_ARC_MMU_VER > 2) + /* MMUv3, cache ops require paddr seperately */ + if (!full_page_op) { + write_aux_reg(aux_tag, paddr); + paddr += L1_CACHE_BYTES; + } + + write_aux_reg(aux_cmd, vaddr); + vaddr += L1_CACHE_BYTES; +#else + write_aux_reg(aux, paddr); + paddr += L1_CACHE_BYTES; +#endif + } +} #ifdef CONFIG_ARC_HAS_DCACHE @@ -289,53 +350,6 @@ static inline void __dc_entire_op(const int cacheop) write_aux_reg(ARC_REG_DC_CTRL, tmp & ~DC_CTRL_INV_MODE_FLUSH); } -/* - * Per Line Operation on D-Cache - * Doesn't deal with type-of-op/IRQ-disabling/waiting-for-flush-to-complete - * It's sole purpose is to help gcc generate ZOL - * (aliasing VIPT dcache flushing needs both vaddr and paddr) - */ -static inline void __dc_line_loop(unsigned long paddr, unsigned long vaddr, - unsigned long sz, const int aux_reg) -{ - int num_lines; - - /* Ensure we properly floor/ceil the non-line aligned/sized requests - * and have @paddr - aligned to cache line and integral @num_lines. - * This however can be avoided for page sized since: - * -@paddr will be cache-line aligned already (being page aligned) - * -@sz will be integral multiple of line size (being page sized). - */ - if (!(__builtin_constant_p(sz) && sz == PAGE_SIZE)) { - sz += paddr & ~DCACHE_LINE_MASK; - paddr &= DCACHE_LINE_MASK; - vaddr &= DCACHE_LINE_MASK; - } - - num_lines = DIV_ROUND_UP(sz, ARC_DCACHE_LINE_LEN); - -#if (CONFIG_ARC_MMU_VER <= 2) - paddr |= (vaddr >> PAGE_SHIFT) & 0x1F; -#endif - - while (num_lines-- > 0) { -#if (CONFIG_ARC_MMU_VER > 2) - /* - * Just as for I$, in MMU v3, D$ ops also require - * "tag" bits in DC_PTAG, "index" bits in FLDL,IVDL ops - */ - write_aux_reg(ARC_REG_DC_PTAG, paddr); - - write_aux_reg(aux_reg, vaddr); - vaddr += ARC_DCACHE_LINE_LEN; -#else - /* paddr contains stuffed vaddrs bits */ - write_aux_reg(aux_reg, paddr); -#endif - paddr += ARC_DCACHE_LINE_LEN; - } -} - /* For kernel mappings cache operation: index is same as paddr */ #define __dc_line_op_k(p, sz, op) __dc_line_op(p, p, sz, op) @@ -346,7 +360,6 @@ static inline void __dc_line_op(unsigned long paddr, unsigned long vaddr, unsigned long sz, const int cacheop) { unsigned long flags, tmp = tmp; - int aux; local_irq_save(flags); @@ -361,12 +374,7 @@ static inline void __dc_line_op(unsigned long paddr, unsigned long vaddr, write_aux_reg(ARC_REG_DC_CTRL, tmp | DC_CTRL_INV_MODE_FLUSH); } - if (cacheop & OP_INV) /* Inv / flush-n-inv use same cmd reg */ - aux = ARC_REG_DC_IVDL; - else - aux = ARC_REG_DC_FLDL; - - __dc_line_loop(paddr, vaddr, sz, aux); + __cache_line_loop(paddr, vaddr, sz, cacheop); if (cacheop & OP_FLUSH) /* flush / flush-n-inv both wait */ wait_for_flush(); @@ -438,42 +446,9 @@ static void __ic_line_inv_vaddr(unsigned long paddr, unsigned long vaddr, unsigned long sz) { unsigned long flags; - int num_lines; - - /* - * Ensure we properly floor/ceil the non-line aligned/sized requests: - * However page sized flushes can be compile time optimised. - * -@paddr will be cache-line aligned already (being page aligned) - * -@sz will be integral multiple of line size (being page sized). - */ - if (!(__builtin_constant_p(sz) && sz == PAGE_SIZE)) { - sz += paddr & ~ICACHE_LINE_MASK; - paddr &= ICACHE_LINE_MASK; - vaddr &= ICACHE_LINE_MASK; - } - - num_lines = DIV_ROUND_UP(sz, ARC_ICACHE_LINE_LEN); - -#if (CONFIG_ARC_MMU_VER <= 2) - /* bits 17:13 of vaddr go as bits 4:0 of paddr */ - paddr |= (vaddr >> PAGE_SHIFT) & 0x1F; -#endif local_irq_save(flags); - while (num_lines-- > 0) { -#if (CONFIG_ARC_MMU_VER > 2) - /* tag comes from phy addr */ - write_aux_reg(ARC_REG_IC_PTAG, paddr); - - /* index bits come from vaddr */ - write_aux_reg(ARC_REG_IC_IVIL, vaddr); - vaddr += ARC_ICACHE_LINE_LEN; -#else - /* paddr contains stuffed vaddrs bits */ - write_aux_reg(ARC_REG_IC_IVIL, paddr); -#endif - paddr += ARC_ICACHE_LINE_LEN; - } + __cache_line_loop(paddr, vaddr, sz, OP_INV_IC); local_irq_restore(flags); } diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c index 0c14d8a52683..9c69552350c4 100644 --- a/arch/arc/mm/fault.c +++ b/arch/arc/mm/fault.c @@ -52,7 +52,7 @@ bad_area: return 1; } -void do_page_fault(struct pt_regs *regs, unsigned long address) +void do_page_fault(unsigned long address, struct pt_regs *regs) { struct vm_area_struct *vma = NULL; struct task_struct *tsk = current; diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index 71cb26df4255..e1acf0ce5647 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -100,7 +100,7 @@ /* A copy of the ASID from the PID reg is kept in asid_cache */ -unsigned int asid_cache = MM_CTXT_FIRST_CYCLE; +DEFINE_PER_CPU(unsigned int, asid_cache) = MM_CTXT_FIRST_CYCLE; /* * Utility Routine to erase a J-TLB entry @@ -274,6 +274,7 @@ noinline void local_flush_tlb_mm(struct mm_struct *mm) void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { + const unsigned int cpu = smp_processor_id(); unsigned long flags; /* If range @start to @end is more than 32 TLB entries deep, @@ -297,9 +298,9 @@ void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, local_irq_save(flags); - if (vma->vm_mm->context.asid != MM_CTXT_NO_ASID) { + if (asid_mm(vma->vm_mm, cpu) != MM_CTXT_NO_ASID) { while (start < end) { - tlb_entry_erase(start | hw_pid(vma->vm_mm)); + tlb_entry_erase(start | hw_pid(vma->vm_mm, cpu)); start += PAGE_SIZE; } } @@ -346,6 +347,7 @@ void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) { + const unsigned int cpu = smp_processor_id(); unsigned long flags; /* Note that it is critical that interrupts are DISABLED between @@ -353,14 +355,87 @@ void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) */ local_irq_save(flags); - if (vma->vm_mm->context.asid != MM_CTXT_NO_ASID) { - tlb_entry_erase((page & PAGE_MASK) | hw_pid(vma->vm_mm)); + if (asid_mm(vma->vm_mm, cpu) != MM_CTXT_NO_ASID) { + tlb_entry_erase((page & PAGE_MASK) | hw_pid(vma->vm_mm, cpu)); utlb_invalidate(); } local_irq_restore(flags); } +#ifdef CONFIG_SMP + +struct tlb_args { + struct vm_area_struct *ta_vma; + unsigned long ta_start; + unsigned long ta_end; +}; + +static inline void ipi_flush_tlb_page(void *arg) +{ + struct tlb_args *ta = arg; + + local_flush_tlb_page(ta->ta_vma, ta->ta_start); +} + +static inline void ipi_flush_tlb_range(void *arg) +{ + struct tlb_args *ta = arg; + + local_flush_tlb_range(ta->ta_vma, ta->ta_start, ta->ta_end); +} + +static inline void ipi_flush_tlb_kernel_range(void *arg) +{ + struct tlb_args *ta = (struct tlb_args *)arg; + + local_flush_tlb_kernel_range(ta->ta_start, ta->ta_end); +} + +void flush_tlb_all(void) +{ + on_each_cpu((smp_call_func_t)local_flush_tlb_all, NULL, 1); +} + +void flush_tlb_mm(struct mm_struct *mm) +{ + on_each_cpu_mask(mm_cpumask(mm), (smp_call_func_t)local_flush_tlb_mm, + mm, 1); +} + +void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) +{ + struct tlb_args ta = { + .ta_vma = vma, + .ta_start = uaddr + }; + + on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_page, &ta, 1); +} + +void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + struct tlb_args ta = { + .ta_vma = vma, + .ta_start = start, + .ta_end = end + }; + + on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_range, &ta, 1); +} + +void flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + struct tlb_args ta = { + .ta_start = start, + .ta_end = end + }; + + on_each_cpu(ipi_flush_tlb_kernel_range, &ta, 1); +} +#endif + /* * Routine to create a TLB entry */ @@ -400,7 +475,7 @@ void create_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) local_irq_save(flags); - tlb_paranoid_check(vma->vm_mm->context.asid, address); + tlb_paranoid_check(asid_mm(vma->vm_mm, smp_processor_id()), address); address &= PAGE_MASK; @@ -610,9 +685,9 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address, struct pt_regs *regs) { int set, way, n; - unsigned int pd0[4], pd1[4]; /* assume max 4 ways */ unsigned long flags, is_valid; struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu; + unsigned int pd0[mmu->ways], pd1[mmu->ways]; local_irq_save(flags); @@ -637,7 +712,7 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address, continue; /* Scan the set for duplicate ways: needs a nested loop */ - for (way = 0; way < mmu->ways; way++) { + for (way = 0; way < mmu->ways - 1; way++) { if (!pd0[way]) continue; diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S index cf7d7d9ad695..3fcfdb38d242 100644 --- a/arch/arc/mm/tlbex.S +++ b/arch/arc/mm/tlbex.S @@ -369,8 +369,8 @@ do_slow_path_pf: EXCEPTION_PROLOGUE ; ------- setup args for Linux Page fault Hanlder --------- - mov_s r0, sp - lr r1, [efa] + mov_s r1, sp + lr r0, [efa] ; We don't want exceptions to be disabled while the fault is handled. ; Now that we have saved the context we return from exception hence |