[SPARC64]: Access TSB with physical addresses when possible.
authorDavid S. Miller <davem@sunset.davemloft.net>
Wed, 1 Feb 2006 23:55:21 +0000 (15:55 -0800)
committerDavid S. Miller <davem@sunset.davemloft.net>
Mon, 20 Mar 2006 09:11:32 +0000 (01:11 -0800)
This way we don't need to lock the TSB into the TLB.
The trick is that every TSB load/store is registered into
a special instruction patch section.  The default uses
virtual addresses, and the patch instructions use physical
address load/stores.

We can't do this on all chips because only cheetah+ and later
have the physical variant of the atomic quad load.

Signed-off-by: David S. Miller <davem@davemloft.net>
arch/sparc64/kernel/dtlb_miss.S
arch/sparc64/kernel/itlb_miss.S
arch/sparc64/kernel/ktlb.S
arch/sparc64/kernel/tsb.S
arch/sparc64/kernel/vmlinux.lds.S
arch/sparc64/mm/init.c
arch/sparc64/mm/tsb.c
include/asm-sparc64/mmu.h
include/asm-sparc64/tsb.h

index d0f1565..2ef6f6e 100644 (file)
@@ -4,7 +4,7 @@
        srlx    %g6, 48, %g5                    ! Get context
        brz,pn  %g5, kvmap_dtlb                 ! Context 0 processing
         nop                                    ! Delay slot (fill me)
-       ldda    [%g1] ASI_NUCLEUS_QUAD_LDD, %g4 ! Load TSB entry
+       TSB_LOAD_QUAD(%g1, %g4)                 ! Load TSB entry
        nop                                     ! Push branch to next I$ line
        cmp     %g4, %g6                        ! Compare TAG
 
index 6b6c8fe..97facce 100644 (file)
@@ -4,7 +4,7 @@
        srlx    %g6, 48, %g5                    ! Get context
        brz,pn  %g5, kvmap_itlb                 ! Context 0 processing
         nop                                    ! Delay slot (fill me)
-       ldda    [%g1] ASI_NUCLEUS_QUAD_LDD, %g4 ! Load TSB entry
+       TSB_LOAD_QUAD(%g1, %g4)                 ! Load TSB entry
        cmp     %g4, %g6                        ! Compare TAG
        sethi   %hi(_PAGE_EXEC), %g4            ! Setup exec check
 
index 2b5e71b..9b415ab 100644 (file)
@@ -44,14 +44,14 @@ kvmap_itlb_tsb_miss:
 kvmap_itlb_vmalloc_addr:
        KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_itlb_longpath)
 
-       TSB_LOCK_TAG(%g1, %g2, %g4)
+       KTSB_LOCK_TAG(%g1, %g2, %g4)
 
        /* Load and check PTE.  */
        ldxa            [%g5] ASI_PHYS_USE_EC, %g5
        brgez,a,pn      %g5, kvmap_itlb_longpath
-        stx            %g0, [%g1]
+        KTSB_STORE(%g1, %g0)
 
-       TSB_WRITE(%g1, %g5, %g6)
+       KTSB_WRITE(%g1, %g5, %g6)
 
        /* fallthrough to TLB load */
 
@@ -69,9 +69,9 @@ kvmap_itlb_longpath:
 kvmap_itlb_obp:
        OBP_TRANS_LOOKUP(%g4, %g5, %g2, %g3, kvmap_itlb_longpath)
 
-       TSB_LOCK_TAG(%g1, %g2, %g4)
+       KTSB_LOCK_TAG(%g1, %g2, %g4)
 
-       TSB_WRITE(%g1, %g5, %g6)
+       KTSB_WRITE(%g1, %g5, %g6)
 
        ba,pt           %xcc, kvmap_itlb_load
         nop
@@ -79,9 +79,9 @@ kvmap_itlb_obp:
 kvmap_dtlb_obp:
        OBP_TRANS_LOOKUP(%g4, %g5, %g2, %g3, kvmap_dtlb_longpath)
 
-       TSB_LOCK_TAG(%g1, %g2, %g4)
+       KTSB_LOCK_TAG(%g1, %g2, %g4)
 
-       TSB_WRITE(%g1, %g5, %g6)
+       KTSB_WRITE(%g1, %g5, %g6)
 
        ba,pt           %xcc, kvmap_dtlb_load
         nop
@@ -114,14 +114,14 @@ kvmap_linear_patch:
 kvmap_dtlb_vmalloc_addr:
        KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_dtlb_longpath)
 
-       TSB_LOCK_TAG(%g1, %g2, %g4)
+       KTSB_LOCK_TAG(%g1, %g2, %g4)
 
        /* Load and check PTE.  */
        ldxa            [%g5] ASI_PHYS_USE_EC, %g5
        brgez,a,pn      %g5, kvmap_dtlb_longpath
-        stx            %g0, [%g1]
+        KTSB_STORE(%g1, %g0)
 
-       TSB_WRITE(%g1, %g5, %g6)
+       KTSB_WRITE(%g1, %g5, %g6)
 
        /* fallthrough to TLB load */
 
index e1dd37f..ff6a79b 100644 (file)
@@ -53,7 +53,7 @@ tsb_reload:
        /* Load and check PTE.  */
        ldxa            [%g5] ASI_PHYS_USE_EC, %g5
        brgez,a,pn      %g5, tsb_do_fault
-        stx            %g0, [%g1]
+        TSB_STORE(%g1, %g0)
 
        /* If it is larger than the base page size, don't
         * bother putting it into the TSB.
@@ -64,7 +64,7 @@ tsb_reload:
        and             %g2, %g4, %g2
        cmp             %g2, %g7
        bne,a,pn        %xcc, tsb_tlb_reload
-        stx            %g0, [%g1]
+        TSB_STORE(%g1, %g0)
 
        TSB_WRITE(%g1, %g5, %g6)
 
@@ -131,13 +131,13 @@ winfix_trampoline:
 
        /* Insert an entry into the TSB.
         *
-        * %o0: TSB entry pointer
+        * %o0: TSB entry pointer (virt or phys address)
         * %o1: tag
         * %o2: pte
         */
        .align  32
-       .globl  tsb_insert
-tsb_insert:
+       .globl  __tsb_insert
+__tsb_insert:
        rdpr    %pstate, %o5
        wrpr    %o5, PSTATE_IE, %pstate
        TSB_LOCK_TAG(%o0, %g2, %g3)
@@ -146,6 +146,31 @@ tsb_insert:
        retl
         nop
 
+       /* Flush the given TSB entry if it has the matching
+        * tag.
+        *
+        * %o0: TSB entry pointer (virt or phys address)
+        * %o1: tag
+        */
+       .align  32
+       .globl  tsb_flush
+tsb_flush:
+       sethi   %hi(TSB_TAG_LOCK_HIGH), %g2
+1:     TSB_LOAD_TAG(%o0, %g1)
+       srlx    %g1, 32, %o3
+       andcc   %o3, %g2, %g0
+       bne,pn  %icc, 1b
+        membar #LoadLoad
+       cmp     %g1, %o1
+       bne,pt  %xcc, 2f
+        clr    %o3
+       TSB_CAS_TAG(%o0, %g1, %o3)
+       cmp     %g1, %o3
+       bne,pn  %xcc, 1b
+        nop
+2:     retl
+        TSB_MEMBAR
+
        /* Reload MMU related context switch state at
         * schedule() time.
         *
index 467d13a..71b943f 100644 (file)
@@ -70,6 +70,10 @@ SECTIONS
   .con_initcall.init : { *(.con_initcall.init) }
   __con_initcall_end = .;
   SECURITY_INIT
+  . = ALIGN(4);
+  __tsb_phys_patch = .;
+  .tsb_phys_patch : { *(.tsb_phys_patch) }
+  __tsb_phys_patch_end = .;
   . = ALIGN(8192); 
   __initramfs_start = .;
   .init.ramfs : { *(.init.ramfs) }
index 2c21d85..4893f3e 100644 (file)
@@ -39,6 +39,7 @@
 #include <asm/tlb.h>
 #include <asm/spitfire.h>
 #include <asm/sections.h>
+#include <asm/tsb.h>
 
 extern void device_scan(void);
 
@@ -244,6 +245,16 @@ static __inline__ void clear_dcache_dirty_cpu(struct page *page, unsigned long c
                             : "g1", "g7");
 }
 
+static inline void tsb_insert(struct tsb *ent, unsigned long tag, unsigned long pte)
+{
+       unsigned long tsb_addr = (unsigned long) ent;
+
+       if (tlb_type == cheetah_plus)
+               tsb_addr = __pa(tsb_addr);
+
+       __tsb_insert(tsb_addr, tag, pte);
+}
+
 void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t pte)
 {
        struct mm_struct *mm;
@@ -1040,6 +1051,24 @@ unsigned long __init find_ecache_flush_span(unsigned long size)
        return ~0UL;
 }
 
+static void __init tsb_phys_patch(void)
+{
+       struct tsb_phys_patch_entry *p;
+
+       p = &__tsb_phys_patch;
+       while (p < &__tsb_phys_patch_end) {
+               unsigned long addr = p->addr;
+
+               *(unsigned int *) addr = p->insn;
+               wmb();
+               __asm__ __volatile__("flush     %0"
+                                    : /* no outputs */
+                                    : "r" (addr));
+
+               p++;
+       }
+}
+
 /* paging_init() sets up the page tables */
 
 extern void cheetah_ecache_flush_init(void);
@@ -1052,6 +1081,9 @@ void __init paging_init(void)
        unsigned long end_pfn, pages_avail, shift;
        unsigned long real_end, i;
 
+       if (tlb_type == cheetah_plus)
+               tsb_phys_patch();
+
        /* Find available physical memory... */
        read_obp_memory("available", &pavail[0], &pavail_ents);
 
index 1c4e5c2..787533f 100644 (file)
@@ -20,12 +20,9 @@ static inline unsigned long tsb_hash(unsigned long vaddr, unsigned long nentries
        return vaddr & (nentries - 1);
 }
 
-static inline int tag_compare(struct tsb *entry, unsigned long vaddr, unsigned long context)
+static inline int tag_compare(unsigned long tag, unsigned long vaddr, unsigned long context)
 {
-       if (context == ~0UL)
-               return 1;
-
-       return (entry->tag == ((vaddr >> 22) | (context << 48)));
+       return (tag == ((vaddr >> 22) | (context << 48)));
 }
 
 /* TSB flushes need only occur on the processor initiating the address
@@ -41,7 +38,7 @@ void flush_tsb_kernel_range(unsigned long start, unsigned long end)
                unsigned long hash = tsb_hash(v, KERNEL_TSB_NENTRIES);
                struct tsb *ent = &swapper_tsb[hash];
 
-               if (tag_compare(ent, v, 0)) {
+               if (tag_compare(ent->tag, v, 0)) {
                        ent->tag = 0UL;
                        membar_storeload_storestore();
                }
@@ -52,24 +49,31 @@ void flush_tsb_user(struct mmu_gather *mp)
 {
        struct mm_struct *mm = mp->mm;
        struct tsb *tsb = mm->context.tsb;
-       unsigned long ctx = ~0UL;
        unsigned long nentries = mm->context.tsb_nentries;
+       unsigned long ctx, base;
        int i;
 
-       if (CTX_VALID(mm->context))
-               ctx = CTX_HWBITS(mm->context);
+       if (unlikely(!CTX_VALID(mm->context)))
+               return;
+
+       ctx = CTX_HWBITS(mm->context);
 
+       if (tlb_type == cheetah_plus)
+               base = __pa(tsb);
+       else
+               base = (unsigned long) tsb;
+       
        for (i = 0; i < mp->tlb_nr; i++) {
                unsigned long v = mp->vaddrs[i];
-               struct tsb *ent;
+               unsigned long tag, ent, hash;
 
                v &= ~0x1UL;
 
-               ent = &tsb[tsb_hash(v, nentries)];
-               if (tag_compare(ent, v, ctx)) {
-                       ent->tag = 0UL;
-                       membar_storeload_storestore();
-               }
+               hash = tsb_hash(v, nentries);
+               ent = base + (hash * sizeof(struct tsb));
+               tag = (v >> 22UL) | (ctx << 48UL);
+
+               tsb_flush(ent, tag);
        }
 }
 
@@ -84,6 +88,7 @@ static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_bytes)
        tte = (_PAGE_VALID | _PAGE_L | _PAGE_CP |
               _PAGE_CV    | _PAGE_P | _PAGE_W);
        tsb_paddr = __pa(mm->context.tsb);
+       BUG_ON(tsb_paddr & (tsb_bytes - 1UL));
 
        /* Use the smallest page size that can map the whole TSB
         * in one TLB entry.
@@ -144,13 +149,23 @@ static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_bytes)
                BUG();
        };
 
-       tsb_reg |= base;
-       tsb_reg |= (tsb_paddr & (page_sz - 1UL));
-       tte |= (tsb_paddr & ~(page_sz - 1UL));
+       if (tlb_type == cheetah_plus) {
+               /* Physical mapping, no locked TLB entry for TSB.  */
+               tsb_reg |= tsb_paddr;
+
+               mm->context.tsb_reg_val = tsb_reg;
+               mm->context.tsb_map_vaddr = 0;
+               mm->context.tsb_map_pte = 0;
+       } else {
+               tsb_reg |= base;
+               tsb_reg |= (tsb_paddr & (page_sz - 1UL));
+               tte |= (tsb_paddr & ~(page_sz - 1UL));
+
+               mm->context.tsb_reg_val = tsb_reg;
+               mm->context.tsb_map_vaddr = base;
+               mm->context.tsb_map_pte = tte;
+       }
 
-       mm->context.tsb_reg_val = tsb_reg;
-       mm->context.tsb_map_vaddr = base;
-       mm->context.tsb_map_pte = tte;
 }
 
 /* The page tables are locked against modifications while this
@@ -168,13 +183,21 @@ static void copy_tsb(struct tsb *old_tsb, unsigned long old_size,
        for (i = 0; i < old_nentries; i++) {
                register unsigned long tag asm("o4");
                register unsigned long pte asm("o5");
-               unsigned long v;
-               unsigned int hash;
-
-               __asm__ __volatile__(
-                       "ldda [%2] %3, %0"
-                       : "=r" (tag), "=r" (pte)
-                       : "r" (&old_tsb[i]), "i" (ASI_NUCLEUS_QUAD_LDD));
+               unsigned long v, hash;
+
+               if (tlb_type == cheetah_plus) {
+                       __asm__ __volatile__(
+                               "ldda [%2] %3, %0"
+                               : "=r" (tag), "=r" (pte)
+                               : "r" (__pa(&old_tsb[i])),
+                                 "i" (ASI_QUAD_LDD_PHYS));
+               } else {
+                       __asm__ __volatile__(
+                               "ldda [%2] %3, %0"
+                               : "=r" (tag), "=r" (pte)
+                               : "r" (&old_tsb[i]),
+                                 "i" (ASI_NUCLEUS_QUAD_LDD));
+               }
 
                if (!tag || (tag & (1UL << TSB_TAG_LOCK_BIT)))
                        continue;
@@ -198,8 +221,20 @@ static void copy_tsb(struct tsb *old_tsb, unsigned long old_size,
                v |= (i & (512UL - 1UL)) << 13UL;
 
                hash = tsb_hash(v, new_nentries);
-               new_tsb[hash].tag = tag;
-               new_tsb[hash].pte = pte;
+               if (tlb_type == cheetah_plus) {
+                       __asm__ __volatile__(
+                               "stxa   %0, [%1] %2\n\t"
+                               "stxa   %3, [%4] %2"
+                               : /* no outputs */
+                               : "r" (tag),
+                                 "r" (__pa(&new_tsb[hash].tag)),
+                                 "i" (ASI_PHYS_USE_EC),
+                                 "r" (pte),
+                                 "r" (__pa(&new_tsb[hash].pte)));
+               } else {
+                       new_tsb[hash].tag = tag;
+                       new_tsb[hash].pte = pte;
+               }
        }
 }
 
index 18f98ed..55e6227 100644 (file)
@@ -97,7 +97,8 @@ struct tsb {
        unsigned long pte;
 } __attribute__((aligned(TSB_ENTRY_ALIGNMENT)));
 
-extern void tsb_insert(struct tsb *ent, unsigned long tag, unsigned long pte);
+extern void __tsb_insert(unsigned long ent, unsigned long tag, unsigned long pte);
+extern void tsb_flush(unsigned long ent, unsigned long tag);
 
 typedef struct {
        unsigned long   sparc64_ctx_val;
index f384565..44709cd 100644 (file)
 
 #define TSB_MEMBAR     membar  #StoreStore
 
+/* Some cpus support physical address quad loads.  We want to use
+ * those if possible so we don't need to hard-lock the TSB mapping
+ * into the TLB.  We encode some instruction patching in order to
+ * support this.
+ *
+ * The kernel TSB is locked into the TLB by virtue of being in the
+ * kernel image, so we don't play these games for swapper_tsb access.
+ */
+#ifndef __ASSEMBLY__
+struct tsb_phys_patch_entry {
+       unsigned int    addr;
+       unsigned int    insn;
+};
+extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
+#endif
+#define TSB_LOAD_QUAD(TSB, REG)        \
+661:   ldda            [TSB] ASI_NUCLEUS_QUAD_LDD, REG; \
+       .section        .tsb_phys_patch, "ax"; \
+       .word           661b; \
+       ldda            [TSB] ASI_QUAD_LDD_PHYS, REG; \
+       .previous
+
+#define TSB_LOAD_TAG_HIGH(TSB, REG) \
+661:   lduwa           [TSB] ASI_N, REG; \
+       .section        .tsb_phys_patch, "ax"; \
+       .word           661b; \
+       lduwa           [TSB] ASI_PHYS_USE_EC, REG; \
+       .previous
+
+#define TSB_LOAD_TAG(TSB, REG) \
+661:   ldxa            [TSB] ASI_N, REG; \
+       .section        .tsb_phys_patch, "ax"; \
+       .word           661b; \
+       ldxa            [TSB] ASI_PHYS_USE_EC, REG; \
+       .previous
+
+#define TSB_CAS_TAG_HIGH(TSB, REG1, REG2) \
+661:   casa            [TSB] ASI_N, REG1, REG2; \
+       .section        .tsb_phys_patch, "ax"; \
+       .word           661b; \
+       casa            [TSB] ASI_PHYS_USE_EC, REG1, REG2; \
+       .previous
+
+#define TSB_CAS_TAG(TSB, REG1, REG2) \
+661:   casxa           [TSB] ASI_N, REG1, REG2; \
+       .section        .tsb_phys_patch, "ax"; \
+       .word           661b; \
+       casxa           [TSB] ASI_PHYS_USE_EC, REG1, REG2; \
+       .previous
+
+#define TSB_STORE(ADDR, VAL) \
+661:   stxa            VAL, [ADDR] ASI_N; \
+       .section        .tsb_phys_patch, "ax"; \
+       .word           661b; \
+       stxa            VAL, [ADDR] ASI_PHYS_USE_EC; \
+       .previous
+
 #define TSB_LOCK_TAG(TSB, REG1, REG2)  \
+99:    TSB_LOAD_TAG_HIGH(TSB, REG1);   \
+       sethi   %hi(TSB_TAG_LOCK_HIGH), REG2;\
+       andcc   REG1, REG2, %g0;        \
+       bne,pn  %icc, 99b;              \
+        nop;                           \
+       TSB_CAS_TAG_HIGH(TSB, REG1, REG2);      \
+       cmp     REG1, REG2;             \
+       bne,pn  %icc, 99b;              \
+        nop;                           \
+       TSB_MEMBAR
+
+#define TSB_WRITE(TSB, TTE, TAG) \
+       add     TSB, 0x8, TSB;   \
+       TSB_STORE(TSB, TTE);     \
+       sub     TSB, 0x8, TSB;   \
+       TSB_MEMBAR;              \
+       TSB_STORE(TSB, TAG);
+
+#define KTSB_LOAD_QUAD(TSB, REG) \
+       ldda            [TSB] ASI_NUCLEUS_QUAD_LDD, REG;
+
+#define KTSB_STORE(ADDR, VAL) \
+       stxa            VAL, [ADDR] ASI_N;
+
+#define KTSB_LOCK_TAG(TSB, REG1, REG2) \
 99:    lduwa   [TSB] ASI_N, REG1;      \
        sethi   %hi(TSB_TAG_LOCK_HIGH), REG2;\
        andcc   REG1, REG2, %g0;        \
         nop;                           \
        TSB_MEMBAR
 
-#define TSB_WRITE(TSB, TTE, TAG)          \
-       stx             TTE, [TSB + 0x08]; \
-       TSB_MEMBAR;                        \
-       stx             TAG, [TSB + 0x00];
+#define KTSB_WRITE(TSB, TTE, TAG) \
+       add     TSB, 0x8, TSB;   \
+       stxa    TTE, [TSB] ASI_N;     \
+       sub     TSB, 0x8, TSB;   \
+       TSB_MEMBAR;              \
+       stxa    TAG, [TSB] ASI_N;
 
        /* Do a kernel page table walk.  Leaves physical PTE pointer in
         * REG1.  Jumps to FAIL_LABEL on early page table walk termination.
        and             REG2, (KERNEL_TSB_NENTRIES - 1), REG2; \
        sllx            REG2, 4, REG2; \
        add             REG1, REG2, REG2; \
-       ldda            [REG2] ASI_NUCLEUS_QUAD_LDD, REG3; \
+       KTSB_LOAD_QUAD(REG2, REG3); \
        cmp             REG3, TAG; \
        be,a,pt         %xcc, OK_LABEL; \
         mov            REG4, REG1;