aboutsummaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/asm-sparc64/mmu.h1
-rw-r--r--include/asm-sparc64/mmu_context.h46
-rw-r--r--include/asm-sparc64/pgalloc.h1
-rw-r--r--include/asm-sparc64/pgtable.h9
-rw-r--r--include/asm-sparc64/processor.h14
-rw-r--r--include/asm-sparc64/tlbflush.h25
-rw-r--r--include/asm-sparc64/tsb.h165
7 files changed, 204 insertions, 57 deletions
diff --git a/include/asm-sparc64/mmu.h b/include/asm-sparc64/mmu.h
index 8627eed6e83d..36384cf7faa6 100644
--- a/include/asm-sparc64/mmu.h
+++ b/include/asm-sparc64/mmu.h
@@ -92,6 +92,7 @@
typedef struct {
unsigned long sparc64_ctx_val;
+ unsigned long *sparc64_tsb;
} mm_context_t;
#endif /* !__ASSEMBLY__ */
diff --git a/include/asm-sparc64/mmu_context.h b/include/asm-sparc64/mmu_context.h
index 57ee7b306189..34640a370ab4 100644
--- a/include/asm-sparc64/mmu_context.h
+++ b/include/asm-sparc64/mmu_context.h
@@ -25,7 +25,13 @@ extern void get_new_mmu_context(struct mm_struct *mm);
* This just needs to set mm->context to an invalid context.
*/
#define init_new_context(__tsk, __mm) \
- (((__mm)->context.sparc64_ctx_val = 0UL), 0)
+({ unsigned long __pg = get_zeroed_page(GFP_KERNEL); \
+ (__mm)->context.sparc64_ctx_val = 0UL; \
+ (__mm)->context.sparc64_tsb = \
+ (unsigned long *) __pg; \
+ (__pg ? 0 : -ENOMEM); \
+})
+
/* Destroy a dead context. This occurs when mmput drops the
* mm_users count to zero, the mmaps have been released, and
@@ -35,7 +41,8 @@ extern void get_new_mmu_context(struct mm_struct *mm);
* this task if valid.
*/
#define destroy_context(__mm) \
-do { spin_lock(&ctx_alloc_lock); \
+do { free_page((unsigned long)(__mm)->context.sparc64_tsb); \
+ spin_lock(&ctx_alloc_lock); \
if (CTX_VALID((__mm)->context)) { \
unsigned long nr = CTX_NRBITS((__mm)->context); \
mmu_context_bmap[nr>>6] &= ~(1UL << (nr & 63)); \
@@ -43,35 +50,7 @@ do { spin_lock(&ctx_alloc_lock); \
spin_unlock(&ctx_alloc_lock); \
} while(0)
-/* Reload the two core values used by TLB miss handler
- * processing on sparc64. They are:
- * 1) The physical address of mm->pgd, when full page
- * table walks are necessary, this is where the
- * search begins.
- * 2) A "PGD cache". For 32-bit tasks only pgd[0] is
- * ever used since that maps the entire low 4GB
- * completely. To speed up TLB miss processing we
- * make this value available to the handlers. This
- * decreases the amount of memory traffic incurred.
- */
-#define reload_tlbmiss_state(__tsk, __mm) \
-do { \
- register unsigned long paddr asm("o5"); \
- register unsigned long pgd_cache asm("o4"); \
- paddr = __pa((__mm)->pgd); \
- pgd_cache = 0UL; \
- if (task_thread_info(__tsk)->flags & _TIF_32BIT) \
- pgd_cache = get_pgd_cache((__mm)->pgd); \
- __asm__ __volatile__("wrpr %%g0, 0x494, %%pstate\n\t" \
- "mov %3, %%g4\n\t" \
- "mov %0, %%g7\n\t" \
- "stxa %1, [%%g4] %2\n\t" \
- "membar #Sync\n\t" \
- "wrpr %%g0, 0x096, %%pstate" \
- : /* no outputs */ \
- : "r" (paddr), "r" (pgd_cache),\
- "i" (ASI_DMMU), "i" (TSB_REG)); \
-} while(0)
+extern unsigned long tsb_context_switch(unsigned long pgd_pa, unsigned long *tsb);
/* Set MMU context in the actual hardware. */
#define load_secondary_context(__mm) \
@@ -101,7 +80,8 @@ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, str
if (!ctx_valid || (old_mm != mm)) {
load_secondary_context(mm);
- reload_tlbmiss_state(tsk, mm);
+ tsb_context_switch(__pa(mm->pgd),
+ mm->context.sparc64_tsb);
}
/* Even if (mm == old_mm) we _must_ check
@@ -139,7 +119,7 @@ static inline void activate_mm(struct mm_struct *active_mm, struct mm_struct *mm
load_secondary_context(mm);
__flush_tlb_mm(CTX_HWBITS(mm->context), SECONDARY_CONTEXT);
- reload_tlbmiss_state(current, mm);
+ tsb_context_switch(__pa(mm->pgd), mm->context.sparc64_tsb);
}
#endif /* !(__ASSEMBLY__) */
diff --git a/include/asm-sparc64/pgalloc.h b/include/asm-sparc64/pgalloc.h
index a96067cca963..baf59c00ea47 100644
--- a/include/asm-sparc64/pgalloc.h
+++ b/include/asm-sparc64/pgalloc.h
@@ -61,6 +61,7 @@ static __inline__ void free_pgd_slow(pgd_t *pgd)
free_page((unsigned long)pgd);
}
+/* XXX This crap can die, no longer using virtual page tables... */
#ifdef DCACHE_ALIASING_POSSIBLE
#define VPTE_COLOR(address) (((address) >> (PAGE_SHIFT + 10)) & 1UL)
#define DCACHE_COLOR(address) (((address) >> PAGE_SHIFT) & 1UL)
diff --git a/include/asm-sparc64/pgtable.h b/include/asm-sparc64/pgtable.h
index f0a9b44d3eb5..f3ba1e058195 100644
--- a/include/asm-sparc64/pgtable.h
+++ b/include/asm-sparc64/pgtable.h
@@ -25,7 +25,8 @@
#include <asm/const.h>
/* The kernel image occupies 0x4000000 to 0x1000000 (4MB --> 32MB).
- * The page copy blockops can use 0x2000000 to 0x10000000.
+ * The page copy blockops can use 0x2000000 to 0x4000000.
+ * The TSB is mapped in the 0x4000000 to 0x6000000 range.
* The PROM resides in an area spanning 0xf0000000 to 0x100000000.
* The vmalloc area spans 0x100000000 to 0x200000000.
* Since modules need to be in the lowest 32-bits of the address space,
@@ -34,6 +35,7 @@
* 0x400000000.
*/
#define TLBTEMP_BASE _AC(0x0000000002000000,UL)
+#define TSBMAP_BASE _AC(0x0000000004000000,UL)
#define MODULES_VADDR _AC(0x0000000010000000,UL)
#define MODULES_LEN _AC(0x00000000e0000000,UL)
#define MODULES_END _AC(0x00000000f0000000,UL)
@@ -296,11 +298,6 @@ static inline pte_t pte_modify(pte_t orig_pte, pgprot_t new_prot)
/* to find an entry in a kernel page-table-directory */
#define pgd_offset_k(address) pgd_offset(&init_mm, address)
-/* extract the pgd cache used for optimizing the tlb miss
- * slow path when executing 32-bit compat processes
- */
-#define get_pgd_cache(pgd) ((unsigned long) pgd_val(*pgd) << 11)
-
/* Find an entry in the second-level page table.. */
#define pmd_offset(pudp, address) \
((pmd_t *) pud_page(*(pudp)) + \
diff --git a/include/asm-sparc64/processor.h b/include/asm-sparc64/processor.h
index cd8d9b4c8658..b3889f3f943a 100644
--- a/include/asm-sparc64/processor.h
+++ b/include/asm-sparc64/processor.h
@@ -28,6 +28,8 @@
* User lives in his very own context, and cannot reference us. Note
* that TASK_SIZE is a misnomer, it really gives maximum user virtual
* address that the kernel will allocate out.
+ *
+ * XXX No longer using virtual page tables, kill this upper limit...
*/
#define VA_BITS 44
#ifndef __ASSEMBLY__
@@ -37,18 +39,6 @@
#endif
#define TASK_SIZE ((unsigned long)-VPTE_SIZE)
-/*
- * The vpte base must be able to hold the entire vpte, half
- * of which lives above, and half below, the base. And it
- * is placed as close to the highest address range as possible.
- */
-#define VPTE_BASE_SPITFIRE (-(VPTE_SIZE/2))
-#if 1
-#define VPTE_BASE_CHEETAH VPTE_BASE_SPITFIRE
-#else
-#define VPTE_BASE_CHEETAH 0xffe0000000000000
-#endif
-
#ifndef __ASSEMBLY__
typedef struct {
diff --git a/include/asm-sparc64/tlbflush.h b/include/asm-sparc64/tlbflush.h
index 3ef9909ac3ac..9ad5d9c51d42 100644
--- a/include/asm-sparc64/tlbflush.h
+++ b/include/asm-sparc64/tlbflush.h
@@ -5,6 +5,11 @@
#include <linux/mm.h>
#include <asm/mmu_context.h>
+/* TSB flush operations. */
+struct mmu_gather;
+extern void flush_tsb_kernel_range(unsigned long start, unsigned long end);
+extern void flush_tsb_user(struct mmu_gather *mp);
+
/* TLB flush operations. */
extern void flush_tlb_pending(void);
@@ -14,28 +19,36 @@ extern void flush_tlb_pending(void);
#define flush_tlb_page(vma,addr) flush_tlb_pending()
#define flush_tlb_mm(mm) flush_tlb_pending()
+/* Local cpu only. */
extern void __flush_tlb_all(void);
+
extern void __flush_tlb_page(unsigned long context, unsigned long page, unsigned long r);
extern void __flush_tlb_kernel_range(unsigned long start, unsigned long end);
#ifndef CONFIG_SMP
-#define flush_tlb_all() __flush_tlb_all()
#define flush_tlb_kernel_range(start,end) \
- __flush_tlb_kernel_range(start,end)
+do { flush_tsb_kernel_range(start,end); \
+ __flush_tlb_kernel_range(start,end); \
+} while (0)
#else /* CONFIG_SMP */
-extern void smp_flush_tlb_all(void);
extern void smp_flush_tlb_kernel_range(unsigned long start, unsigned long end);
-#define flush_tlb_all() smp_flush_tlb_all()
#define flush_tlb_kernel_range(start, end) \
- smp_flush_tlb_kernel_range(start, end)
+do { flush_tsb_kernel_range(start,end); \
+ smp_flush_tlb_kernel_range(start, end); \
+} while (0)
#endif /* ! CONFIG_SMP */
-extern void flush_tlb_pgtables(struct mm_struct *, unsigned long, unsigned long);
+static inline void flush_tlb_pgtables(struct mm_struct *mm, unsigned long start, unsigned long end)
+{
+ /* We don't use virtual page tables for TLB miss processing
+ * any more. Nowadays we use the TSB.
+ */
+}
#endif /* _SPARC64_TLBFLUSH_H */
diff --git a/include/asm-sparc64/tsb.h b/include/asm-sparc64/tsb.h
new file mode 100644
index 000000000000..03d272e0e477
--- /dev/null
+++ b/include/asm-sparc64/tsb.h
@@ -0,0 +1,165 @@
+#ifndef _SPARC64_TSB_H
+#define _SPARC64_TSB_H
+
+/* The sparc64 TSB is similar to the powerpc hashtables. It's a
+ * power-of-2 sized table of TAG/PTE pairs. The cpu precomputes
+ * pointers into this table for 8K and 64K page sizes, and also a
+ * comparison TAG based upon the virtual address and context which
+ * faults.
+ *
+ * TLB miss trap handler software does the actual lookup via something
+ * of the form:
+ *
+ * ldxa [%g0] ASI_{D,I}MMU_TSB_8KB_PTR, %g1
+ * ldxa [%g0] ASI_{D,I}MMU, %g6
+ * ldda [%g1] ASI_NUCLEUS_QUAD_LDD, %g4
+ * cmp %g4, %g6
+ * bne,pn %xcc, tsb_miss_{d,i}tlb
+ * mov FAULT_CODE_{D,I}TLB, %g3
+ * stxa %g5, [%g0] ASI_{D,I}TLB_DATA_IN
+ * retry
+ *
+
+ * Each 16-byte slot of the TSB is the 8-byte tag and then the 8-byte
+ * PTE. The TAG is of the same layout as the TLB TAG TARGET mmu
+ * register which is:
+ *
+ * -------------------------------------------------
+ * | - | CONTEXT | - | VADDR bits 63:22 |
+ * -------------------------------------------------
+ * 63 61 60 48 47 42 41 0
+ *
+ * Like the powerpc hashtables we need to use locking in order to
+ * synchronize while we update the entries. PTE updates need locking
+ * as well.
+ *
+ * We need to carefully choose a lock bits for the TSB entry. We
+ * choose to use bit 47 in the tag. Also, since we never map anything
+ * at page zero in context zero, we use zero as an invalid tag entry.
+ * When the lock bit is set, this forces a tag comparison failure.
+ *
+ * Currently, we allocate an 8K TSB per-process and we use it for both
+ * I-TLB and D-TLB misses. Perhaps at some point we'll add code that
+ * monitors the number of active pages in the process as we get
+ * major/minor faults, and grow the TSB in response. The only trick
+ * in implementing that is synchronizing the freeing of the old TSB
+ * wrt. parallel TSB updates occuring on other processors. On
+ * possible solution is to use RCU for the freeing of the TSB.
+ */
+
+#define TSB_TAG_LOCK (1 << (47 - 32))
+
+#define TSB_MEMBAR membar #StoreStore
+
+#define TSB_LOCK_TAG(TSB, REG1, REG2) \
+99: lduwa [TSB] ASI_N, REG1; \
+ sethi %hi(TSB_TAG_LOCK), REG2;\
+ andcc REG1, REG2, %g0; \
+ bne,pn %icc, 99b; \
+ nop; \
+ casa [TSB] ASI_N, REG1, REG2;\
+ cmp REG1, REG2; \
+ bne,pn %icc, 99b; \
+ nop; \
+ TSB_MEMBAR
+
+#define TSB_WRITE(TSB, TTE, TAG) \
+ stx TTE, [TSB + 0x08]; \
+ TSB_MEMBAR; \
+ stx TAG, [TSB + 0x00];
+
+ /* Do a kernel page table walk. Leaves physical PTE pointer in
+ * REG1. Jumps to FAIL_LABEL on early page table walk termination.
+ * VADDR will not be clobbered, but REG2 will.
+ */
+#define KERN_PGTABLE_WALK(VADDR, REG1, REG2, FAIL_LABEL) \
+ sethi %hi(swapper_pg_dir), REG1; \
+ or REG1, %lo(swapper_pg_dir), REG1; \
+ sllx VADDR, 64 - (PGDIR_SHIFT + PGDIR_BITS), REG2; \
+ srlx REG2, 64 - PAGE_SHIFT, REG2; \
+ andn REG2, 0x3, REG2; \
+ lduw [REG1 + REG2], REG1; \
+ brz,pn REG1, FAIL_LABEL; \
+ sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \
+ srlx REG2, 64 - PAGE_SHIFT, REG2; \
+ sllx REG1, 11, REG1; \
+ andn REG2, 0x3, REG2; \
+ lduwa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
+ brz,pn REG1, FAIL_LABEL; \
+ sllx VADDR, 64 - PMD_SHIFT, REG2; \
+ srlx REG2, 64 - PAGE_SHIFT, REG2; \
+ sllx REG1, 11, REG1; \
+ andn REG2, 0x7, REG2; \
+ add REG1, REG2, REG1;
+
+ /* Do a user page table walk in MMU globals. Leaves physical PTE
+ * pointer in REG1. Jumps to FAIL_LABEL on early page table walk
+ * termination. Physical base of page tables is in PHYS_PGD which
+ * will not be modified.
+ *
+ * VADDR will not be clobbered, but REG1 and REG2 will.
+ */
+#define USER_PGTABLE_WALK_TL1(VADDR, PHYS_PGD, REG1, REG2, FAIL_LABEL) \
+ sllx VADDR, 64 - (PGDIR_SHIFT + PGDIR_BITS), REG2; \
+ srlx REG2, 64 - PAGE_SHIFT, REG2; \
+ andn REG2, 0x3, REG2; \
+ lduwa [PHYS_PGD + REG2] ASI_PHYS_USE_EC, REG1; \
+ brz,pn REG1, FAIL_LABEL; \
+ sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \
+ srlx REG2, 64 - PAGE_SHIFT, REG2; \
+ sllx REG1, 11, REG1; \
+ andn REG2, 0x3, REG2; \
+ lduwa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
+ brz,pn REG1, FAIL_LABEL; \
+ sllx VADDR, 64 - PMD_SHIFT, REG2; \
+ srlx REG2, 64 - PAGE_SHIFT, REG2; \
+ sllx REG1, 11, REG1; \
+ andn REG2, 0x7, REG2; \
+ add REG1, REG2, REG1;
+
+/* Lookup a OBP mapping on VADDR in the prom_trans[] table at TL>0.
+ * If no entry is found, FAIL_LABEL will be branched to. On success
+ * the resulting PTE value will be left in REG1. VADDR is preserved
+ * by this routine.
+ */
+#define OBP_TRANS_LOOKUP(VADDR, REG1, REG2, REG3, FAIL_LABEL) \
+ sethi %hi(prom_trans), REG1; \
+ or REG1, %lo(prom_trans), REG1; \
+97: ldx [REG1 + 0x00], REG2; \
+ brz,pn REG2, FAIL_LABEL; \
+ nop; \
+ ldx [REG1 + 0x08], REG3; \
+ add REG2, REG3, REG3; \
+ cmp REG2, VADDR; \
+ bgu,pt %xcc, 98f; \
+ cmp VADDR, REG3; \
+ bgeu,pt %xcc, 98f; \
+ ldx [REG1 + 0x10], REG3; \
+ sub VADDR, REG2, REG2; \
+ ba,pt %xcc, 99f; \
+ add REG3, REG2, REG1; \
+98: ba,pt %xcc, 97b; \
+ add REG1, (3 * 8), REG1; \
+99:
+
+ /* Do a kernel TSB lookup at tl>0 on VADDR+TAG, branch to OK_LABEL
+ * on TSB hit. REG1, REG2, REG3, and REG4 are used as temporaries
+ * and the found TTE will be left in REG1. REG3 and REG4 must
+ * be an even/odd pair of registers.
+ *
+ * VADDR and TAG will be preserved and not clobbered by this macro.
+ */
+ /* XXX non-8K base page size support... */
+#define KERN_TSB_LOOKUP_TL1(VADDR, TAG, REG1, REG2, REG3, REG4, OK_LABEL) \
+ sethi %hi(swapper_tsb), REG1; \
+ or REG1, %lo(swapper_tsb), REG1; \
+ srlx VADDR, 13, REG2; \
+ and REG2, (512 - 1), REG2; \
+ sllx REG2, 4, REG2; \
+ add REG1, REG2, REG2; \
+ ldda [REG2] ASI_NUCLEUS_QUAD_LDD, REG3; \
+ cmp REG3, TAG; \
+ be,a,pt %xcc, OK_LABEL; \
+ mov REG4, REG1;
+
+#endif /* !(_SPARC64_TSB_H) */