diff options
author | Linus Torvalds | 2019-07-19 12:15:33 -0700 |
---|---|---|
committer | Linus Torvalds | 2019-07-19 12:15:33 -0700 |
commit | a84d2d2906f983fb80f5dcc3e8e7c3ad70aa9f0d (patch) | |
tree | 26d5ca2e6f2e799d858a18bccaa7c3f42f7d3fe3 | |
parent | b5d72dda8976e878be47415b94bca8465d1fa22d (diff) | |
parent | bdfeb0ccea1a12b58299b95eb0f28e2aa26de4c2 (diff) |
Merge tag 'csky-for-linus-5.3-rc1' of git://github.com/c-sky/csky-linux
Pull arch/csky pupdates from Guo Ren:
"This round of csky subsystem gives two features (ASID algorithm
update, Perf pmu record support) and some fixups.
ASID updates:
- Revert mmu ASID mechanism
- Add new asid lib code from arm
- Use generic asid algorithm to implement switch_mm
- Improve tlb operation with help of asid
Perf pmu record support:
- Init pmu as a device
- Add count-width property for csky pmu
- Add pmu interrupt support
- Fix perf record in kernel/user space
- dt-bindings: Add csky PMU bindings
Fixes:
- Fixup no panic in kernel for some traps
- Fixup some error count in 810 & 860.
- Fixup abiv1 memset error"
* tag 'csky-for-linus-5.3-rc1' of git://github.com/c-sky/csky-linux:
csky: Fixup abiv1 memset error
csky: Improve tlb operation with help of asid
csky: Use generic asid algorithm to implement switch_mm
csky: Add new asid lib code from arm
csky: Revert mmu ASID mechanism
dt-bindings: csky: Add csky PMU bindings
dt-bindings: interrupt-controller: Update csky mpintc
csky: Fixup some error count in 810 & 860.
csky: Fix perf record in kernel/user space
csky: Add pmu interrupt support
csky: Add count-width property for csky pmu
csky: Init pmu as a device
csky: Fixup no panic in kernel for some traps
csky: Select intc & timer drivers
-rw-r--r-- | Documentation/devicetree/bindings/csky/pmu.txt | 38 | ||||
-rw-r--r-- | arch/csky/Kconfig | 4 | ||||
-rw-r--r-- | arch/csky/abiv1/Makefile | 1 | ||||
-rw-r--r-- | arch/csky/abiv1/inc/abi/ckmmu.h | 6 | ||||
-rw-r--r-- | arch/csky/abiv1/inc/abi/string.h | 3 | ||||
-rw-r--r-- | arch/csky/abiv1/memset.c | 37 | ||||
-rw-r--r-- | arch/csky/abiv1/strksyms.c | 1 | ||||
-rw-r--r-- | arch/csky/abiv2/inc/abi/ckmmu.h | 10 | ||||
-rw-r--r-- | arch/csky/include/asm/asid.h | 78 | ||||
-rw-r--r-- | arch/csky/include/asm/mmu.h | 2 | ||||
-rw-r--r-- | arch/csky/include/asm/mmu_context.h | 114 | ||||
-rw-r--r-- | arch/csky/include/asm/pgtable.h | 2 | ||||
-rw-r--r-- | arch/csky/kernel/perf_event.c | 410 | ||||
-rw-r--r-- | arch/csky/kernel/smp.c | 2 | ||||
-rw-r--r-- | arch/csky/kernel/traps.c | 5 | ||||
-rw-r--r-- | arch/csky/mm/Makefile | 2 | ||||
-rw-r--r-- | arch/csky/mm/asid.c | 189 | ||||
-rw-r--r-- | arch/csky/mm/context.c | 46 | ||||
-rw-r--r-- | arch/csky/mm/init.c | 2 | ||||
-rw-r--r-- | arch/csky/mm/tlb.c | 238 |
20 files changed, 861 insertions, 329 deletions
diff --git a/Documentation/devicetree/bindings/csky/pmu.txt b/Documentation/devicetree/bindings/csky/pmu.txt new file mode 100644 index 000000000000..728d05ca6a1c --- /dev/null +++ b/Documentation/devicetree/bindings/csky/pmu.txt @@ -0,0 +1,38 @@ +=============================== +C-SKY Performance Monitor Units +=============================== + +C-SKY Performance Monitor is designed for ck807/ck810/ck860 SMP soc and +it could count cpu's events for helping analysis performance issues. + +============================ +PMU node bindings definition +============================ + + Description: Describes PMU + + PROPERTIES + + - compatible + Usage: required + Value type: <string> + Definition: must be "csky,csky-pmu" + - interrupts + Usage: required + Value type: <u32 IRQ_TYPE_XXX> + Definition: must be pmu irq num defined by soc + - count-width + Usage: optional + Value type: <u32> + Definition: the width of pmu counter + +Examples: +--------- +#include <dt-bindings/interrupt-controller/irq.h> + + pmu: performace-monitor { + compatible = "csky,csky-pmu"; + interrupts = <23 IRQ_TYPE_EDGE_RISING>; + interrupt-parent = <&intc>; + count-width = <48>; + }; diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig index cf798a1628cf..3973847b5f42 100644 --- a/arch/csky/Kconfig +++ b/arch/csky/Kconfig @@ -10,6 +10,9 @@ config CSKY select COMMON_CLK select CLKSRC_MMIO select CLKSRC_OF + select CSKY_MPINTC if CPU_CK860 + select CSKY_MP_TIMER if CPU_CK860 + select CSKY_APB_INTC select DMA_DIRECT_REMAP select IRQ_DOMAIN select HANDLE_DOMAIN_IRQ @@ -30,6 +33,7 @@ config CSKY select GENERIC_IRQ_MULTI_HANDLER select GENERIC_SCHED_CLOCK select GENERIC_SMP_IDLE_THREAD + select GX6605S_TIMER if CPU_CK610 select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_AUDITSYSCALL select HAVE_DYNAMIC_FTRACE diff --git a/arch/csky/abiv1/Makefile b/arch/csky/abiv1/Makefile index e52b42beac97..601ce3b2fb85 100644 --- a/arch/csky/abiv1/Makefile +++ b/arch/csky/abiv1/Makefile @@ -5,5 +5,4 @@ obj-y += bswapsi.o obj-y += cacheflush.o obj-y += mmap.o obj-y += memcpy.o -obj-y += memset.o obj-y += strksyms.o diff --git a/arch/csky/abiv1/inc/abi/ckmmu.h b/arch/csky/abiv1/inc/abi/ckmmu.h index 81f37715c0d2..ba8eb5870835 100644 --- a/arch/csky/abiv1/inc/abi/ckmmu.h +++ b/arch/csky/abiv1/inc/abi/ckmmu.h @@ -78,6 +78,12 @@ static inline void tlb_invalid_all(void) cpwcr("cpcr8", 0x04000000); } + +static inline void local_tlb_invalid_all(void) +{ + tlb_invalid_all(); +} + static inline void tlb_invalid_indexed(void) { cpwcr("cpcr8", 0x02000000); diff --git a/arch/csky/abiv1/inc/abi/string.h b/arch/csky/abiv1/inc/abi/string.h index 5abe80be044d..0cd43384f8d2 100644 --- a/arch/csky/abiv1/inc/abi/string.h +++ b/arch/csky/abiv1/inc/abi/string.h @@ -7,7 +7,4 @@ #define __HAVE_ARCH_MEMCPY extern void *memcpy(void *, const void *, __kernel_size_t); -#define __HAVE_ARCH_MEMSET -extern void *memset(void *, int, __kernel_size_t); - #endif /* __ABI_CSKY_STRING_H */ diff --git a/arch/csky/abiv1/memset.c b/arch/csky/abiv1/memset.c deleted file mode 100644 index b4aa75b99c5d..000000000000 --- a/arch/csky/abiv1/memset.c +++ /dev/null @@ -1,37 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd. - -#include <linux/types.h> - -void *memset(void *dest, int c, size_t l) -{ - char *d = dest; - int ch = c & 0xff; - int tmp = (ch | ch << 8 | ch << 16 | ch << 24); - - while (((uintptr_t)d & 0x3) && l--) - *d++ = ch; - - while (l >= 16) { - *(((u32 *)d)) = tmp; - *(((u32 *)d)+1) = tmp; - *(((u32 *)d)+2) = tmp; - *(((u32 *)d)+3) = tmp; - l -= 16; - d += 16; - } - - while (l > 3) { - *(((u32 *)d)) = tmp; - l -= 4; - d += 4; - } - - while (l) { - *d = ch; - l--; - d++; - } - - return dest; -} diff --git a/arch/csky/abiv1/strksyms.c b/arch/csky/abiv1/strksyms.c index 436995c9b75c..c7ccbb27e8d7 100644 --- a/arch/csky/abiv1/strksyms.c +++ b/arch/csky/abiv1/strksyms.c @@ -4,4 +4,3 @@ #include <linux/module.h> EXPORT_SYMBOL(memcpy); -EXPORT_SYMBOL(memset); diff --git a/arch/csky/abiv2/inc/abi/ckmmu.h b/arch/csky/abiv2/inc/abi/ckmmu.h index e4480e6bc3b3..73ded7c72482 100644 --- a/arch/csky/abiv2/inc/abi/ckmmu.h +++ b/arch/csky/abiv2/inc/abi/ckmmu.h @@ -85,6 +85,16 @@ static inline void tlb_invalid_all(void) #endif } +static inline void local_tlb_invalid_all(void) +{ +#ifdef CONFIG_CPU_HAS_TLBI + asm volatile("tlbi.all\n":::"memory"); + sync_is(); +#else + tlb_invalid_all(); +#endif +} + static inline void tlb_invalid_indexed(void) { mtcr("cr<8, 15>", 0x02000000); diff --git a/arch/csky/include/asm/asid.h b/arch/csky/include/asm/asid.h new file mode 100644 index 000000000000..ac08b0ffbe1f --- /dev/null +++ b/arch/csky/include/asm/asid.h @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_ASM_ASID_H +#define __ASM_ASM_ASID_H + +#include <linux/atomic.h> +#include <linux/compiler.h> +#include <linux/cpumask.h> +#include <linux/percpu.h> +#include <linux/spinlock.h> + +struct asid_info +{ + atomic64_t generation; + unsigned long *map; + atomic64_t __percpu *active; + u64 __percpu *reserved; + u32 bits; + /* Lock protecting the structure */ + raw_spinlock_t lock; + /* Which CPU requires context flush on next call */ + cpumask_t flush_pending; + /* Number of ASID allocated by context (shift value) */ + unsigned int ctxt_shift; + /* Callback to locally flush the context. */ + void (*flush_cpu_ctxt_cb)(void); +}; + +#define NUM_ASIDS(info) (1UL << ((info)->bits)) +#define NUM_CTXT_ASIDS(info) (NUM_ASIDS(info) >> (info)->ctxt_shift) + +#define active_asid(info, cpu) *per_cpu_ptr((info)->active, cpu) + +void asid_new_context(struct asid_info *info, atomic64_t *pasid, + unsigned int cpu, struct mm_struct *mm); + +/* + * Check the ASID is still valid for the context. If not generate a new ASID. + * + * @pasid: Pointer to the current ASID batch + * @cpu: current CPU ID. Must have been acquired throught get_cpu() + */ +static inline void asid_check_context(struct asid_info *info, + atomic64_t *pasid, unsigned int cpu, + struct mm_struct *mm) +{ + u64 asid, old_active_asid; + + asid = atomic64_read(pasid); + + /* + * The memory ordering here is subtle. + * If our active_asid is non-zero and the ASID matches the current + * generation, then we update the active_asid entry with a relaxed + * cmpxchg. Racing with a concurrent rollover means that either: + * + * - We get a zero back from the cmpxchg and end up waiting on the + * lock. Taking the lock synchronises with the rollover and so + * we are forced to see the updated generation. + * + * - We get a valid ASID back from the cmpxchg, which means the + * relaxed xchg in flush_context will treat us as reserved + * because atomic RmWs are totally ordered for a given location. + */ + old_active_asid = atomic64_read(&active_asid(info, cpu)); + if (old_active_asid && + !((asid ^ atomic64_read(&info->generation)) >> info->bits) && + atomic64_cmpxchg_relaxed(&active_asid(info, cpu), + old_active_asid, asid)) + return; + + asid_new_context(info, pasid, cpu, mm); +} + +int asid_allocator_init(struct asid_info *info, + u32 bits, unsigned int asid_per_ctxt, + void (*flush_cpu_ctxt_cb)(void)); + +#endif diff --git a/arch/csky/include/asm/mmu.h b/arch/csky/include/asm/mmu.h index cb344675ccc4..b382a14ea4ec 100644 --- a/arch/csky/include/asm/mmu.h +++ b/arch/csky/include/asm/mmu.h @@ -5,7 +5,7 @@ #define __ASM_CSKY_MMU_H typedef struct { - unsigned long asid[NR_CPUS]; + atomic64_t asid; void *vdso; } mm_context_t; diff --git a/arch/csky/include/asm/mmu_context.h b/arch/csky/include/asm/mmu_context.h index 734db3a122e1..0285b0ad18b6 100644 --- a/arch/csky/include/asm/mmu_context.h +++ b/arch/csky/include/asm/mmu_context.h @@ -16,122 +16,32 @@ #define TLBMISS_HANDLER_SETUP_PGD(pgd) \ setup_pgd(__pa(pgd), false) + #define TLBMISS_HANDLER_SETUP_PGD_KERNEL(pgd) \ setup_pgd(__pa(pgd), true) -#define cpu_context(cpu, mm) ((mm)->context.asid[cpu]) -#define cpu_asid(cpu, mm) (cpu_context((cpu), (mm)) & ASID_MASK) -#define asid_cache(cpu) (cpu_data[cpu].asid_cache) +#define ASID_MASK ((1 << CONFIG_CPU_ASID_BITS) - 1) +#define cpu_asid(mm) (atomic64_read(&mm->context.asid) & ASID_MASK) -#define ASID_FIRST_VERSION (1 << CONFIG_CPU_ASID_BITS) -#define ASID_INC 0x1 -#define ASID_MASK (ASID_FIRST_VERSION - 1) -#define ASID_VERSION_MASK ~ASID_MASK +#define init_new_context(tsk,mm) ({ atomic64_set(&(mm)->context.asid, 0); 0; }) +#define activate_mm(prev,next) switch_mm(prev, next, current) #define destroy_context(mm) do {} while (0) #define enter_lazy_tlb(mm, tsk) do {} while (0) #define deactivate_mm(tsk, mm) do {} while (0) -/* - * All unused by hardware upper bits will be considered - * as a software asid extension. - */ -static inline void -get_new_mmu_context(struct mm_struct *mm, unsigned long cpu) -{ - unsigned long asid = asid_cache(cpu); - - asid += ASID_INC; - if (!(asid & ASID_MASK)) { - flush_tlb_all(); /* start new asid cycle */ - if (!asid) /* fix version if needed */ - asid = ASID_FIRST_VERSION; - } - cpu_context(cpu, mm) = asid_cache(cpu) = asid; -} - -/* - * Initialize the context related info for a new mm_struct - * instance. - */ -static inline int -init_new_context(struct task_struct *tsk, struct mm_struct *mm) -{ - int i; - - for_each_online_cpu(i) - cpu_context(i, mm) = 0; - return 0; -} - -static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, - struct task_struct *tsk) -{ - unsigned int cpu = smp_processor_id(); - unsigned long flags; - - local_irq_save(flags); - /* Check if our ASID is of an older version and thus invalid */ - if ((cpu_context(cpu, next) ^ asid_cache(cpu)) & ASID_VERSION_MASK) - get_new_mmu_context(next, cpu); - write_mmu_entryhi(cpu_asid(cpu, next)); - TLBMISS_HANDLER_SETUP_PGD(next->pgd); - - /* - * Mark current->active_mm as not "active" anymore. - * We don't want to mislead possible IPI tlb flush routines. - */ - cpumask_clear_cpu(cpu, mm_cpumask(prev)); - cpumask_set_cpu(cpu, mm_cpumask(next)); +void check_and_switch_context(struct mm_struct *mm, unsigned int cpu); - local_irq_restore(flags); -} - -/* - * After we have set current->mm to a new value, this activates - * the context for the new mm so we see the new mappings. - */ static inline void -activate_mm(struct mm_struct *prev, struct mm_struct *next) +switch_mm(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk) { - unsigned long flags; - int cpu = smp_processor_id(); - - local_irq_save(flags); + unsigned int cpu = smp_processor_id(); - /* Unconditionally get a new ASID. */ - get_new_mmu_context(next, cpu); + if (prev != next) + check_and_switch_context(next, cpu); - write_mmu_entryhi(cpu_asid(cpu, next)); TLBMISS_HANDLER_SETUP_PGD(next->pgd); - - /* mark mmu ownership change */ - cpumask_clear_cpu(cpu, mm_cpumask(prev)); - cpumask_set_cpu(cpu, mm_cpumask(next)); - - local_irq_restore(flags); -} - -/* - * If mm is currently active_mm, we can't really drop it. Instead, - * we will get a new one for it. - */ -static inline void -drop_mmu_context(struct mm_struct *mm, unsigned int cpu) -{ - unsigned long flags; - - local_irq_save(flags); - - if (cpumask_test_cpu(cpu, mm_cpumask(mm))) { - get_new_mmu_context(mm, cpu); - write_mmu_entryhi(cpu_asid(cpu, mm)); - } else { - /* will get a new context next time */ - cpu_context(cpu, mm) = 0; - } - - local_irq_restore(flags); + write_mmu_entryhi(next->context.asid.counter); } - #endif /* __ASM_CSKY_MMU_CONTEXT_H */ diff --git a/arch/csky/include/asm/pgtable.h b/arch/csky/include/asm/pgtable.h index dcea277c09ae..c429a6f347de 100644 --- a/arch/csky/include/asm/pgtable.h +++ b/arch/csky/include/asm/pgtable.h @@ -290,8 +290,6 @@ static inline pte_t *pte_offset(pmd_t *dir, unsigned long address) extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; extern void paging_init(void); -extern void show_jtlb_table(void); - void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *pte); diff --git a/arch/csky/kernel/perf_event.c b/arch/csky/kernel/perf_event.c index 376c972f5f37..4c1a1934d76a 100644 --- a/arch/csky/kernel/perf_event.c +++ b/arch/csky/kernel/perf_event.c @@ -9,17 +9,44 @@ #include <linux/platform_device.h> #define CSKY_PMU_MAX_EVENTS 32 +#define DEFAULT_COUNT_WIDTH 48 + +#define HPCR "<0, 0x0>" /* PMU Control reg */ +#define HPSPR "<0, 0x1>" /* Start PC reg */ +#define HPEPR "<0, 0x2>" /* End PC reg */ +#define HPSIR "<0, 0x3>" /* Soft Counter reg */ +#define HPCNTENR "<0, 0x4>" /* Count Enable reg */ +#define HPINTENR "<0, 0x5>" /* Interrupt Enable reg */ +#define HPOFSR "<0, 0x6>" /* Interrupt Status reg */ + +/* The events for a given PMU register set. */ +struct pmu_hw_events { + /* + * The events that are active on the PMU for the given index. + */ + struct perf_event *events[CSKY_PMU_MAX_EVENTS]; -#define HPCR "<0, 0x0>" /* PMU Control reg */ -#define HPCNTENR "<0, 0x4>" /* Count Enable reg */ + /* + * A 1 bit for an index indicates that the counter is being used for + * an event. A 0 means that the counter can be used. + */ + unsigned long used_mask[BITS_TO_LONGS(CSKY_PMU_MAX_EVENTS)]; +}; static uint64_t (*hw_raw_read_mapping[CSKY_PMU_MAX_EVENTS])(void); static void (*hw_raw_write_mapping[CSKY_PMU_MAX_EVENTS])(uint64_t val); -struct csky_pmu_t { - struct pmu pmu; - uint32_t hpcr; +static struct csky_pmu_t { + struct pmu pmu; + struct pmu_hw_events __percpu *hw_events; + struct platform_device *plat_device; + uint32_t count_width; + uint32_t hpcr; + u64 max_period; } csky_pmu; +static int csky_pmu_irq; + +#define to_csky_pmu(p) (container_of(p, struct csky_pmu, pmu)) #define cprgr(reg) \ ({ \ @@ -701,6 +728,20 @@ static const int csky_pmu_hw_map[PERF_COUNT_HW_MAX] = { #define CACHE_OP_UNSUPPORTED 0xffff static const int csky_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(L1D)] = { +#ifdef CONFIG_CPU_CK810 + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = 0x5, + [C(RESULT_MISS)] = 0x6, + }, +#else [C(OP_READ)] = { [C(RESULT_ACCESS)] = 0x14, [C(RESULT_MISS)] = 0x15, @@ -710,9 +751,10 @@ static const int csky_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(RESULT_MISS)] = 0x17, }, [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = 0x5, - [C(RESULT_MISS)] = 0x6, + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, }, +#endif }, [C(L1I)] = { [C(OP_READ)] = { @@ -729,6 +771,20 @@ static const int csky_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { }, }, [C(LL)] = { +#ifdef CONFIG_CPU_CK810 + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = 0x7, + [C(RESULT_MISS)] = 0x8, + }, +#else [C(OP_READ)] = { [C(RESULT_ACCESS)] = 0x18, [C(RESULT_MISS)] = 0x19, @@ -738,29 +794,48 @@ static const int csky_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(RESULT_MISS)] = 0x1b, }, [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = 0x7, - [C(RESULT_MISS)] = 0x8, + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, }, +#endif }, [C(DTLB)] = { +#ifdef CONFIG_CPU_CK810 [C(OP_READ)] = { - [C(RESULT_ACCESS)] = 0x5, - [C(RESULT_MISS)] = 0xb, + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, }, [C(OP_WRITE)] = { [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, }, +#else + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x14, + [C(RESULT_MISS)] = 0xb, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = 0x16, + [C(RESULT_MISS)] = 0xb, + }, +#endif [C(OP_PREFETCH)] = { [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, }, }, [C(ITLB)] = { +#ifdef CONFIG_CPU_CK810 + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, +#else [C(OP_READ)] = { [C(RESULT_ACCESS)] = 0x3, [C(RESULT_MISS)] = 0xa, }, +#endif [C(OP_WRITE)] = { [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, @@ -800,11 +875,57 @@ static const int csky_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { }, }; +int csky_pmu_event_set_period(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + s64 left = local64_read(&hwc->period_left); + s64 period = hwc->sample_period; + int ret = 0; + + if (unlikely(left <= -period)) { + left = period; + local64_set(&hwc->period_left, left); + hwc->last_period = period; + ret = 1; + } + + if (unlikely(left <= 0)) { + left += period; + local64_set(&hwc->period_left, left); + hwc->last_period = period; + ret = 1; + } + + if (left > (s64)csky_pmu.max_period) + left = csky_pmu.max_period; + + /* + * The hw event starts counting from this event offset, + * mark it to be able to extract future "deltas": + */ + local64_set(&hwc->prev_count, (u64)(-left)); + + if (hw_raw_write_mapping[hwc->idx] != NULL) + hw_raw_write_mapping[hwc->idx]((u64)(-left) & + csky_pmu.max_period); + + cpwcr(HPOFSR, ~BIT(hwc->idx) & cprcr(HPOFSR)); + + perf_event_update_userpage(event); + + return ret; +} + static void csky_perf_event_update(struct perf_event *event, struct hw_perf_event *hwc) { uint64_t prev_raw_count = local64_read(&hwc->prev_count); - uint64_t new_raw_count = hw_raw_read_mapping[hwc->idx](); + /* + * Sign extend count value to 64bit, otherwise delta calculation + * would be incorrect when overflow occurs. + */ + uint64_t new_raw_count = sign_extend64( + hw_raw_read_mapping[hwc->idx](), csky_pmu.count_width - 1); int64_t delta = new_raw_count - prev_raw_count; /* @@ -816,6 +937,11 @@ static void csky_perf_event_update(struct perf_event *event, local64_sub(delta, &hwc->period_left); } +static void csky_pmu_reset(void *info) +{ + cpwcr(HPCR, BIT(31) | BIT(30) | BIT(1)); +} + static void csky_pmu_read(struct perf_event *event) { csky_perf_event_update(event, &event->hw); @@ -844,15 +970,6 @@ static int csky_pmu_event_init(struct perf_event *event) struct hw_perf_event *hwc = &event->hw; int ret; - if (event->attr.exclude_user) - csky_pmu.hpcr = BIT(2); - else if (event->attr.exclude_kernel) - csky_pmu.hpcr = BIT(3); - else - csky_pmu.hpcr = BIT(2) | BIT(3); - - csky_pmu.hpcr |= BIT(1) | BIT(0); - switch (event->attr.type) { case PERF_TYPE_HARDWARE: if (event->attr.config >= PERF_COUNT_HW_MAX) @@ -861,21 +978,32 @@ static int csky_pmu_event_init(struct perf_event *event) if (ret == HW_OP_UNSUPPORTED) return -ENOENT; hwc->idx = ret; - return 0; + break; case PERF_TYPE_HW_CACHE: ret = csky_pmu_cache_event(event->attr.config); if (ret == CACHE_OP_UNSUPPORTED) return -ENOENT; hwc->idx = ret; - return 0; + break; case PERF_TYPE_RAW: if (hw_raw_read_mapping[event->attr.config] == NULL) return -ENOENT; hwc->idx = event->attr.config; - return 0; + break; default: return -ENOENT; } + + if (event->attr.exclude_user) + csky_pmu.hpcr = BIT(2); + else if (event->attr.exclude_kernel) + csky_pmu.hpcr = BIT(3); + else + csky_pmu.hpcr = BIT(2) | BIT(3); + + csky_pmu.hpcr |= BIT(1) | BIT(0); + + return 0; } /* starts all counters */ @@ -892,6 +1020,7 @@ static void csky_pmu_disable(struct pmu *pmu) static void csky_pmu_start(struct perf_event *event, int flags) { + unsigned long flg; struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; @@ -903,16 +1032,34 @@ static void csky_pmu_start(struct perf_event *event, int flags) hwc->state = 0; + csky_pmu_event_set_period(event); + + local_irq_save(flg); + + cpwcr(HPINTENR, BIT(idx) | cprcr(HPINTENR)); cpwcr(HPCNTENR, BIT(idx) | cprcr(HPCNTENR)); + + local_irq_restore(flg); } -static void csky_pmu_stop(struct perf_event *event, int flags) +static void csky_pmu_stop_event(struct perf_event *event) { + unsigned long flg; struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; + local_irq_save(flg); + + cpwcr(HPINTENR, ~BIT(idx) & cprcr(HPINTENR)); + cpwcr(HPCNTENR, ~BIT(idx) & cprcr(HPCNTENR)); + + local_irq_restore(flg); +} + +static void csky_pmu_stop(struct perf_event *event, int flags) +{ if (!(event->hw.state & PERF_HES_STOPPED)) { - cpwcr(HPCNTENR, ~BIT(idx) & cprcr(HPCNTENR)); + csky_pmu_stop_event(event); event->hw.state |= PERF_HES_STOPPED; } @@ -925,22 +1072,26 @@ static void csky_pmu_stop(struct perf_event *event, int flags) static void csky_pmu_del(struct perf_event *event, int flags) { + struct pmu_hw_events *hw_events = this_cpu_ptr(csky_pmu.hw_events); + struct hw_perf_event *hwc = &event->hw; + csky_pmu_stop(event, PERF_EF_UPDATE); + hw_events->events[hwc->idx] = NULL; + perf_event_update_userpage(event); } /* allocate hardware counter and optionally start counting */ static int csky_pmu_add(struct perf_event *event, int flags) { + struct pmu_hw_events *hw_events = this_cpu_ptr(csky_pmu.hw_events); struct hw_perf_event *hwc = &event->hw; - local64_set(&hwc->prev_count, 0); - - if (hw_raw_write_mapping[hwc->idx] != NULL) - hw_raw_write_mapping[hwc->idx](0); + hw_events->events[hwc->idx] = event; hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + if (flags & PERF_EF_START) csky_pmu_start(event, PERF_EF_RELOAD); @@ -949,8 +1100,110 @@ static int csky_pmu_add(struct perf_event *event, int flags) return 0; } -int __init init_hw_perf_events(void) +static irqreturn_t csky_pmu_handle_irq(int irq_num, void *dev) +{ + struct perf_sample_data data; + struct pmu_hw_events *cpuc = this_cpu_ptr(csky_pmu.hw_events); + struct pt_regs *regs; + int idx; + + /* + * Did an overflow occur? + */ + if (!cprcr(HPOFSR)) + return IRQ_NONE; + + /* + * Handle the counter(s) overflow(s) + */ + regs = get_irq_regs(); + + csky_pmu_disable(&csky_pmu.pmu); + + for (idx = 0; idx < CSKY_PMU_MAX_EVENTS; ++idx) { + struct perf_event *event = cpuc->events[idx]; + struct hw_perf_event *hwc; + + /* Ignore if we don't have an event. */ + if (!event) + continue; + /* + * We have a single interrupt for all counters. Check that + * each counter has overflowed before we process it. + */ + if (!(cprcr(HPOFSR) & BIT(idx))) + continue; + + hwc = &event->hw; + csky_perf_event_update(event, &event->hw); + perf_sample_data_init(&data, 0, hwc->last_period); + csky_pmu_event_set_period(event); + + if (perf_event_overflow(event, &data, regs)) + csky_pmu_stop_event(event); + } + + csky_pmu_enable(&csky_pmu.pmu); + + /* + * Handle the pending perf events. + * + * Note: this call *must* be run with interrupts disabled. For + * platforms that can have the PMU interrupts raised as an NMI, this + * will not work. + */ + irq_work_run(); + + return IRQ_HANDLED; +} + +static int csky_pmu_request_irq(irq_handler_t handler) { + int err, irqs; + struct platform_device *pmu_device = csky_pmu.plat_device; + + if (!pmu_device) + return -ENODEV; + + irqs = min(pmu_device->num_resources, num_possible_cpus()); + if (irqs < 1) { + pr_err("no irqs for PMUs defined\n"); + return -ENODEV; + } + + csky_pmu_irq = platform_get_irq(pmu_device, 0); + if (csky_pmu_irq < 0) + return -ENODEV; + err = request_percpu_irq(csky_pmu_irq, handler, "csky-pmu", + this_cpu_ptr(csky_pmu.hw_events)); + if (err) { + pr_err("unable to request IRQ%d for CSKY PMU counters\n", + csky_pmu_irq); + return err; + } + + return 0; +} + +static void csky_pmu_free_irq(void) +{ + int irq; + struct platform_device *pmu_device = csky_pmu.plat_device; + + irq = platform_get_irq(pmu_device, 0); + if (irq >= 0) + free_percpu_irq(irq, this_cpu_ptr(csky_pmu.hw_events)); +} + +int init_hw_perf_events(void) +{ + csky_pmu.hw_events = alloc_percpu_gfp(struct pmu_hw_events, + GFP_KERNEL); + if (!csky_pmu.hw_events) { + pr_info("failed to allocate per-cpu PMU data.\n"); + return -ENOMEM; + } + csky_pmu.pmu = (struct pmu) { .pmu_enable = csky_pmu_enable, .pmu_disable = csky_pmu_disable, @@ -1022,10 +1275,97 @@ int __init init_hw_perf_events(void) hw_raw_write_mapping[0x1a] = csky_pmu_write_l2wac; hw_raw_write_mapping[0x1b] = csky_pmu_write_l2wmc; - csky_pmu.pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; + return 0; +} - cpwcr(HPCR, BIT(31) | BIT(30) | BIT(1)); +static int csky_pmu_starting_cpu(unsigned int cpu) +{ + enable_percpu_irq(csky_pmu_irq, 0); + return 0; +} - return perf_pmu_register(&csky_pmu.pmu, "cpu", PERF_TYPE_RAW); +static int csky_pmu_dying_cpu(unsigned int cpu) +{ + disable_percpu_irq(csky_pmu_irq); + return 0; } -arch_initcall(init_hw_perf_events); + +int csky_pmu_device_probe(struct platform_device *pdev, + const struct of_device_id *of_table) +{ + struct device_node *node = pdev->dev.of_node; + int ret; + + ret = init_hw_perf_events(); + if (ret) { + pr_notice("[perf] failed to probe PMU!\n"); + return ret; + } + + if (of_property_read_u32(node, "count-width", + &csky_pmu.count_width)) { + csky_pmu.count_width = DEFAULT_COUNT_WIDTH; + } + csky_pmu.max_period = BIT(csky_pmu.count_width) - 1; + + csky_pmu.plat_device = pdev; + + /* Ensure the PMU has sane values out of reset. */ + on_each_cpu(csky_pmu_reset, &csky_pmu, 1); + + ret = csky_pmu_request_irq(csky_pmu_handle_irq); + if (ret) { + csky_pmu.pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; + pr_notice("[perf] PMU request irq fail!\n"); + } + + ret = cpuhp_setup_state(CPUHP_AP_PERF_ONLINE, "AP_PERF_ONLINE", + csky_pmu_starting_cpu, + csky_pmu_dying_cpu); + if (ret) { + csky_pmu_free_irq(); + free_percpu(csky_pmu.hw_events); + return ret; + } + + ret = perf_pmu_register(&csky_pmu.pmu, "cpu", PERF_TYPE_RAW); + if (ret) { + csky_pmu_free_irq(); + free_percpu(csky_pmu.hw_events); + } + + return ret; +} + +const static struct of_device_id csky_pmu_of_device_ids[] = { + {.compatible = "csky,csky-pmu"}, + {}, +}; + +static int csky_pmu_dev_probe(struct platform_device *pdev) +{ + return csky_pmu_device_probe(pdev, csky_pmu_of_device_ids); +} + +static struct platform_driver csky_pmu_driver = { + .driver = { + .name = "csky-pmu", + .of_match_table = csky_pmu_of_device_ids, + }, + .probe = csky_pmu_dev_probe, +}; + +static int __init csky_pmu_probe(void) +{ + int ret; + + ret = platform_driver_register(&csky_pmu_driver); + if (ret) + pr_notice("[perf] PMU initialization failed\n"); + else + pr_notice("[perf] PMU initialization done\n"); + + return ret; +} + +device_initcall(csky_pmu_probe); diff --git a/arch/csky/kernel/smp.c b/arch/csky/kernel/smp.c index b07a534b3062..b753d382e4ce 100644 --- a/arch/csky/kernel/smp.c +++ b/arch/csky/kernel/smp.c @@ -212,8 +212,6 @@ void csky_start_secondary(void) TLBMISS_HANDLER_SETUP_PGD(swapper_pg_dir); TLBMISS_HANDLER_SETUP_PGD_KERNEL(swapper_pg_dir); - asid_cache(smp_processor_id()) = ASID_FIRST_VERSION; - #ifdef CONFIG_CPU_HAS_FPU init_fpu(); #endif diff --git a/arch/csky/kernel/traps.c b/arch/csky/kernel/traps.c index 2792e9601ac5..b057480e7463 100644 --- a/arch/csky/kernel/traps.c +++ b/arch/csky/kernel/traps.c @@ -120,6 +120,7 @@ asmlinkage void trap_c(struct pt_regs *regs) switch (vector) { case VEC_ZERODIV: + die_if_kernel("Kernel mode ZERO DIV", regs, vector); sig = SIGFPE; break; /* ptrace */ @@ -128,6 +129,7 @@ asmlinkage void trap_c(struct pt_regs *regs) sig = SIGTRAP; break; case VEC_ILLEGAL: + die_if_kernel("Kernel mode ILLEGAL", regs, vector); #ifndef CONFIG_CPU_NO_USER_BKPT if (*(uint16_t *)instruction_pointer(regs) != USR_BKPT) #endif @@ -139,6 +141,7 @@ asmlinkage void trap_c(struct pt_regs *regs) case VEC_TRAP1: /* jtagserver breakpoint */ case VEC_BREAKPOINT: + die_if_kernel("Kernel mode BKPT", regs, vector); info.si_code = TRAP_BRKPT; sig = SIGTRAP; break; @@ -150,8 +153,10 @@ asmlinkage void trap_c(struct pt_regs *regs) #endif #ifdef CONFIG_CPU_HAS_FPU case VEC_FPE: + die_if_kernel("Kernel mode FPE", regs, vector); return fpu_fpe(regs); case VEC_PRIV: + die_if_kernel("Kernel mode PRIV", regs, vector); if (fpu_libc_helper(regs)) return; #endif diff --git a/arch/csky/mm/Makefile b/arch/csky/mm/Makefile index 4eebebdcd1bf..c94ef6481098 100644 --- a/arch/csky/mm/Makefile +++ b/arch/csky/mm/Makefile @@ -12,3 +12,5 @@ obj-y += init.o obj-y += ioremap.o obj-y += syscache.o obj-y += tlb.o +obj-y += asid.o +obj-y += context.o diff --git a/arch/csky/mm/asid.c b/arch/csky/mm/asid.c new file mode 100644 index 000000000000..b2e914745c1d --- /dev/null +++ b/arch/csky/mm/asid.c @@ -0,0 +1,189 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Generic ASID allocator. + * + * Based on arch/arm/mm/context.c + * + * Copyright (C) 2002-2003 Deep Blue Solutions Ltd, all rights reserved. + * Copyright (C) 2012 ARM Ltd. + */ + +#include <linux/slab.h> +#include <linux/mm_types.h> + +#include <asm/asid.h> + +#define reserved_asid(info, cpu) *per_cpu_ptr((info)->reserved, cpu) + +#define ASID_MASK(info) (~GENMASK((info)->bits - 1, 0)) +#define ASID_FIRST_VERSION(info) (1UL << ((info)->bits)) + +#define asid2idx(info, asid) (((asid) & ~ASID_MASK(info)) >> (info)->ctxt_shift) +#define idx2asid(info, idx) (((idx) << (info)->ctxt_shift) & ~ASID_MASK(info)) + +static void flush_context(struct asid_info *info) +{ + int i; + u64 asid; + + /* Update the list of reserved ASIDs and the ASID bitmap. */ + bitmap_clear(info->map, 0, NUM_CTXT_ASIDS(info)); + + for_each_possible_cpu(i) { + asid = atomic64_xchg_relaxed(&active_asid(info, i), 0); + /* + * If this CPU has already been through a + * rollover, but hasn't run another task in + * the meantime, we must preserve its reserved + * ASID, as this is the only trace we have of + * the process it is still running. + */ + if (asid == 0) + asid = reserved_asid(info, i); + __set_bit(asid2idx(info, asid), info->map); + reserved_asid(info, i) = asid; + } + + /* + * Queue a TLB invalidation for each CPU to perform on next + * context-switch + */ + cpumask_setall(&info->flush_pending); +} + +static bool check_update_reserved_asid(struct asid_info *info, u64 asid, + u64 newasid) +{ + int cpu; + bool hit = false; + + /* + * Iterate over the set of reserved ASIDs looking for a match. + * If we find one, then we can update our mm to use newasid + * (i.e. the same ASID in the current generation) but we can't + * exit the loop early, since we need to ensure that all copies + * of the old ASID are updated to reflect the mm. Failure to do + * so could result in us missing the reserved ASID in a future + * generation. + */ + for_each_possible_cpu(cpu) { + if (reserved_asid(info, cpu) == asid) { + hit = true; + reserved_asid(info, cpu) = newasid; + } + } + + return hit; +} + +static u64 new_context(struct asid_info *info, atomic64_t *pasid, + struct mm_struct *mm) +{ + static u32 cur_idx = 1; + u64 asid = atomic64_read(pasid); + u64 generation = atomic64_read(&info->generation); + + if (asid != 0) { + u64 newasid = generation | (asid & ~ASID_MASK(info)); + + /* + * If our current ASID was active during a rollover, we + * can continue to use it and this was just a false alarm. + */ + if (check_update_reserved_asid(info, asid, newasid)) + return newasid; + + /* + * We had a valid ASID in a previous life, so try to re-use + * it if possible. + */ + if (!__test_and_set_bit(asid2idx(info, asid), info->map)) + return newasid; + } + + /* + * Allocate a free ASID. If we can't find one, take a note of the + * currently active ASIDs and mark the TLBs as requiring flushes. We + * always count from ASID #2 (index 1), as we use ASID #0 when setting + * a reserved TTBR0 for the init_mm and we allocate ASIDs in even/odd + * pairs. + */ + asid = find_next_zero_bit(info->map, NUM_CTXT_ASIDS(info), cur_idx); + if (asid != NUM_CTXT_ASIDS(info)) + goto set_asid; + + /* We're out of ASIDs, so increment the global generation count */ + generation = atomic64_add_return_relaxed(ASID_FIRST_VERSION(info), + &info->generation); + flush_context(info); + + /* We have more ASIDs than CPUs, so this will always succeed */ + asid = find_next_zero_bit(info->map, NUM_CTXT_ASIDS(info), 1); + +set_asid: + __set_bit(asid, info->map); + cur_idx = asid; + cpumask_clear(mm_cpumask(mm)); + return idx2asid(info, asid) | generation; +} + +/* + * Generate a new ASID for the context. + * + * @pasid: Pointer to the current ASID batch allocated. It will be updated + * with the new ASID batch. + * @cpu: current CPU ID. Must have been acquired through get_cpu() + */ +void asid_new_context(struct asid_info *info, atomic64_t *pasid, + unsigned int cpu, struct mm_struct *mm) +{ + unsigned long flags; + u64 asid; + + raw_spin_lock_irqsave(&info->lock, flags); + /* Check that our ASID belongs to the current generation. */ + asid = atomic64_read(pasid); + if ((asid ^ atomic64_read(&info->generation)) >> info->bits) { + asid = new_context(info, pasid, mm); + atomic64_set(pasid, asid); + } + + if (cpumask_test_and_clear_cpu(cpu, &info->flush_pending)) + info->flush_cpu_ctxt_cb(); + + atomic64_set(&active_asid(info, cpu), asid); + cpumask_set_cpu(cpu, mm_cpumask(mm)); + raw_spin_unlock_irqrestore(&info->lock, flags); +} + +/* + * Initialize the ASID allocator + * + * @info: Pointer to the asid allocator structure + * @bits: Number of ASIDs available + * @asid_per_ctxt: Number of ASIDs to allocate per-context. ASIDs are + * allocated contiguously for a given context. This value should be a power of + * 2. + */ +int asid_allocator_init(struct asid_info *info, + u32 bits, unsigned int asid_per_ctxt, + void (*flush_cpu_ctxt_cb)(void)) +{ + info->bits = bits; + info->ctxt_shift = ilog2(asid_per_ctxt); + info->flush_cpu_ctxt_cb = flush_cpu_ctxt_cb; + /* + * Expect allocation after rollover to fail if we don't have at least + * one more ASID than CPUs. ASID #0 is always reserved. + */ + WARN_ON(NUM_CTXT_ASIDS(info) - 1 <= num_possible_cpus()); + atomic64_set(&info->generation, ASID_FIRST_VERSION(info)); + info->map = kcalloc(BITS_TO_LONGS(NUM_CTXT_ASIDS(info)), + sizeof(*info->map), GFP_KERNEL); + if (!info->map) + return -ENOMEM; + + raw_spin_lock_init(&info->lock); + + return 0; +} diff --git a/arch/csky/mm/context.c b/arch/csky/mm/context.c new file mode 100644 index 000000000000..0d95bdd93846 --- /dev/null +++ b/arch/csky/mm/context.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd. + +#include <linux/bitops.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/mm.h> + +#include <asm/asid.h> +#include <asm/mmu_context.h> +#include <asm/smp.h> +#include <asm/tlbflush.h> + +static DEFINE_PER_CPU(atomic64_t, active_asids); +static DEFINE_PER_CPU(u64, reserved_asids); + +struct asid_info asid_info; + +void check_and_switch_context(struct mm_struct *mm, unsigned int cpu) +{ + asid_check_context(&asid_info, &mm->context.asid, cpu, mm); +} + +static void asid_flush_cpu_ctxt(void) +{ + local_tlb_invalid_all(); +} + +static int asids_init(void) +{ + BUG_ON(((1 << CONFIG_CPU_ASID_BITS) - 1) <= num_possible_cpus()); + + if (asid_allocator_init(&asid_info, CONFIG_CPU_ASID_BITS, 1, + asid_flush_cpu_ctxt)) + panic("Unable to initialize ASID allocator for %lu ASIDs\n", + NUM_ASIDS(&asid_info)); + + asid_info.active = &active_asids; + asid_info.reserved = &reserved_asids; + + pr_info("ASID allocator initialised with %lu entries\n", + NUM_CTXT_ASIDS(&asid_info)); + + return 0; +} +early_initcall(asids_init); diff --git a/arch/csky/mm/init.c b/arch/csky/mm/init.c index 66e597053488..eb0dc9e5065f 100644 --- a/arch/csky/mm/init.c +++ b/arch/csky/mm/init.c @@ -114,8 +114,6 @@ void __init pre_mmu_init(void) TLBMISS_HANDLER_SETUP_PGD(swapper_pg_dir); TLBMISS_HANDLER_SETUP_PGD_KERNEL(swapper_pg_dir); - asid_cache(smp_processor_id()) = ASID_FIRST_VERSION; - /* Setup page mask to 4k */ write_mmu_pagemask(0); } diff --git a/arch/csky/mm/tlb.c b/arch/csky/mm/tlb.c index 08b8394e5b8f..eb3ba6c9c927 100644 --- a/arch/csky/mm/tlb.c +++ b/arch/csky/mm/tlb.c @@ -10,7 +10,12 @@ #include <asm/pgtable.h> #include <asm/setup.h> -#define CSKY_TLB_SIZE CONFIG_CPU_TLB_SIZE +/* + * One C-SKY MMU TLB entry contain two PFN/page entry, ie: + * 1VPN -> 2PFN + */ +#define TLB_ENTRY_SIZE (PAGE_SIZE * 2) +#define TLB_ENTRY_SIZE_MASK (PAGE_MASK << 1) void flush_tlb_all(void) { @@ -19,201 +24,148 @@ void flush_tlb_all(void) void flush_tlb_mm(struct mm_struct *mm) { - int cpu = smp_processor_id(); - - if (cpu_context(cpu, mm) != 0) - drop_mmu_context(mm, cpu); - +#ifdef CONFIG_CPU_HAS_TLBI + asm volatile("tlbi.asids %0"::"r"(cpu_asid(mm))); +#else tlb_invalid_all(); +#endif } +/* + * MMU operation regs only could invalid tlb entry in jtlb and we + * need change asid field to invalid I-utlb & D-utlb. + */ +#ifndef CONFIG_CPU_HAS_TLBI #define restore_asid_inv_utlb(oldpid, newpid) \ do { \ - if ((oldpid & ASID_MASK) == newpid) \ + if (oldpid == newpid) \ write_mmu_entryhi(oldpid + 1); \ write_mmu_entryhi(oldpid); \ } while (0) +#endif void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, - unsigned long end) + unsigned long end) { - struct mm_struct *mm = vma->vm_mm; - int cpu = smp_processor_id(); - - if (cpu_context(cpu, mm) != 0) { - unsigned long size, flags; - int newpid = cpu_asid(cpu, mm); - - local_irq_save(flags); - size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; - size = (size + 1) >> 1; - if (size <= CSKY_TLB_SIZE/2) { - start &= (PAGE_MASK << 1); - end += ((PAGE_SIZE << 1) - 1); - end &= (PAGE_MASK << 1); -#ifdef CONFIG_CPU_HAS_TLBI - while (start < end) { - asm volatile("tlbi.vaas %0" - ::"r"(start | newpid)); - start += (PAGE_SIZE << 1); - } - sync_is(); -#else - { - int oldpid = read_mmu_entryhi(); - - while (start < end) { - int idx; - - write_mmu_entryhi(start | newpid); - start += (PAGE_SIZE << 1); - tlb_probe(); - idx = read_mmu_index(); - if (idx >= 0) - tlb_invalid_indexed(); - } - restore_asid_inv_utlb(oldpid, newpid); - } -#endif - } else { - drop_mmu_context(mm, cpu); - } - local_irq_restore(flags); - } -} + unsigned long newpid = cpu_asid(vma->vm_mm); -void flush_tlb_kernel_range(unsigned long start, unsigned long end) -{ - unsigned long size, flags; + start &= TLB_ENTRY_SIZE_MASK; + end += TLB_ENTRY_SIZE - 1; + end &= TLB_ENTRY_SIZE_MASK; - local_irq_save(flags); - size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; - if (size <= CSKY_TLB_SIZE) { - start &= (PAGE_MASK << 1); - end += ((PAGE_SIZE << 1) - 1); - end &= (PAGE_MASK << 1); #ifdef CONFIG_CPU_HAS_TLBI - while (start < end) { - asm volatile("tlbi.vaas %0"::"r"(start)); - start += (PAGE_SIZE << 1); - } - sync_is(); -#else - { - int oldpid = read_mmu_entryhi(); - - while (start < end) { - int idx; - - write_mmu_entryhi(start); - start += (PAGE_SIZE << 1); - tlb_probe(); - idx = read_mmu_index(); - if (idx >= 0) - tlb_invalid_indexed(); - } - restore_asid_inv_utlb(oldpid, 0); - } -#endif - } else { - flush_tlb_all(); + while (start < end) { + asm volatile("tlbi.vas %0"::"r"(start | newpid)); + start += 2*PAGE_SIZE; } + sync_is(); +#else + { + unsigned long flags, oldpid; + + local_irq_save(flags); + oldpid = read_mmu_entryhi() & ASID_MASK; + while (start < end) { + int idx; + write_mmu_entryhi(start | newpid); + start += 2*PAGE_SIZE; + tlb_probe(); + idx = read_mmu_index(); + if (idx >= 0) + tlb_invalid_indexed(); + } + restore_asid_inv_utlb(oldpid, newpid); local_irq_restore(flags); + } +#endif } -void flush_tlb_page(struct vm_area_struct *vma, unsigned long page) +void flush_tlb_kernel_range(unsigned long start, unsigned long end) { - int cpu = smp_processor_id(); - int newpid = cpu_asid(cpu, vma->vm_mm); - - if (!vma || cpu_context(cpu, vma->vm_mm) != 0) { - page &= (PAGE_MASK << 1); + start &= TLB_ENTRY_SIZE_MASK; + end += TLB_ENTRY_SIZE - 1; + end &= TLB_ENTRY_SIZE_MASK; #ifdef CONFIG_CPU_HAS_TLBI - asm volatile("tlbi.vaas %0"::"r"(page | newpid)); - sync_is(); + while (start < end) { + asm volatile("tlbi.vaas %0"::"r"(start)); + start += 2*PAGE_SIZE; + } + sync_is(); #else - { - int oldpid, idx; - unsigned long flags; + { + unsigned long flags, oldpid; - local_irq_save(flags); - oldpid = read_mmu_entryhi(); - write_mmu_entryhi(page | newpid); + local_irq_save(flags); + oldpid = read_mmu_entryhi() & ASID_MASK; + while (start < end) { + int idx; + + write_mmu_entryhi(start | oldpid); + start += 2*PAGE_SIZE; tlb_probe(); idx = read_mmu_index(); if (idx >= 0) tlb_invalid_indexed(); - - restore_asid_inv_utlb(oldpid, newpid); - local_irq_restore(flags); - } -#endif } + restore_asid_inv_utlb(oldpid, oldpid); + local_irq_restore(flags); + } +#endif } -/* - * Remove one kernel space TLB entry. This entry is assumed to be marked - * global so we don't do the ASID thing. - */ -void flush_tlb_one(unsigned long page) +void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) { - int oldpid; + int newpid = cpu_asid(vma->vm_mm); - oldpid = read_mmu_entryhi(); - page &= (PAGE_MASK << 1); + addr &= TLB_ENTRY_SIZE_MASK; #ifdef CONFIG_CPU_HAS_TLBI - page = page | (oldpid & 0xfff); - asm volatile("tlbi.vaas %0"::"r"(page)); + asm volatile("tlbi.vas %0"::"r"(addr | newpid)); sync_is(); #else { - int idx; + int oldpid, idx; unsigned long flags; - page = page | (oldpid & 0xff); - local_irq_save(flags); - write_mmu_entryhi(page); + oldpid = read_mmu_entryhi() & ASID_MASK; + write_mmu_entryhi(addr | newpid); tlb_probe(); idx = read_mmu_index(); if (idx >= 0) tlb_invalid_indexed(); - restore_asid_inv_utlb(oldpid, oldpid); + + restore_asid_inv_utlb(oldpid, newpid); local_irq_restore(flags); } #endif } -EXPORT_SYMBOL(flush_tlb_one); -/* show current 32 jtlbs */ -void show_jtlb_table(void) +void flush_tlb_one(unsigned long addr) { + addr &= TLB_ENTRY_SIZE_MASK; + +#ifdef CONFIG_CPU_HAS_TLBI + asm volatile("tlbi.vaas %0"::"r"(addr)); + sync_is(); +#else + { + int oldpid, idx; unsigned long flags; - int entryhi, entrylo0, entrylo1; - int entry; - int oldpid; local_irq_save(flags); - entry = 0; - pr_info("\n\n\n"); - - oldpid = read_mmu_entryhi(); - while (entry < CSKY_TLB_SIZE) { - write_mmu_index(entry); - tlb_read(); - entryhi = read_mmu_entryhi(); - entrylo0 = read_mmu_entrylo0(); - entrylo0 = entrylo0; - entrylo1 = read_mmu_entrylo1(); - entrylo1 = entrylo1; - pr_info("jtlb[%d]: entryhi - 0x%x; entrylo0 - 0x%x;" - " entrylo1 - 0x%x\n", - entry, entryhi, entrylo0, entrylo1); - entry++; - } - write_mmu_entryhi(oldpid); + oldpid = read_mmu_entryhi() & ASID_MASK; + write_mmu_entryhi(addr | oldpid); + tlb_probe(); + idx = read_mmu_index(); + if (idx >= 0) + tlb_invalid_indexed(); + + restore_asid_inv_utlb(oldpid, oldpid); local_irq_restore(flags); + } +#endif } +EXPORT_SYMBOL(flush_tlb_one); |