X-Git-Url: http://git.rot13.org/?a=blobdiff_plain;f=arch%2Fpowerpc%2Fmm%2Fhugetlbpage.c;h=54131b877da36cb3a827000d802ca76ce82507ad;hb=ac67c6247361b3b8644b34e5301a46d5069c1373;hp=0ea0994ed974e0185bad810cc1757cc4f20f5063;hpb=b6ec995a21a9428aef620b5adf46d047a18d88b8;p=powerpc.git diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 0ea0994ed9..54131b877d 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -47,10 +47,25 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) pu = pud_offset(pg, addr); if (!pud_none(*pu)) { pm = pmd_offset(pu, addr); +#ifdef CONFIG_PPC_64K_PAGES + /* Currently, we use the normal PTE offset within full + * size PTE pages, thus our huge PTEs are scattered in + * the PTE page and we do waste some. We may change + * that in the future, but the current mecanism keeps + * things much simpler + */ + if (!pmd_none(*pm)) { + /* Note: pte_offset_* are all equivalent on + * ppc64 as we don't have HIGHMEM + */ + pt = pte_offset_kernel(pm, addr); + return pt; + } +#else /* CONFIG_PPC_64K_PAGES */ + /* On 4k pages, we put huge PTEs in the PMD page */ pt = (pte_t *)pm; - BUG_ON(!pmd_none(*pm) - && !(pte_present(*pt) && pte_huge(*pt))); return pt; +#endif /* CONFIG_PPC_64K_PAGES */ } } @@ -74,9 +89,16 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) if (pu) { pm = pmd_alloc(mm, pu, addr); if (pm) { +#ifdef CONFIG_PPC_64K_PAGES + /* See comment in huge_pte_offset. Note that if we ever + * want to put the page size in the PMD, we would have + * to open code our own pte_alloc* function in order + * to populate and set the size atomically + */ + pt = pte_alloc_map(mm, pm, addr); +#else /* CONFIG_PPC_64K_PAGES */ pt = (pte_t *)pm; - BUG_ON(!pmd_none(*pm) - && !(pte_present(*pt) && pte_huge(*pt))); +#endif /* CONFIG_PPC_64K_PAGES */ return pt; } } @@ -84,35 +106,29 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) return NULL; } -#define HUGEPTE_BATCH_SIZE (HPAGE_SIZE / PMD_SIZE) - void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { - int i; - if (pte_present(*ptep)) { - pte_clear(mm, addr, ptep); + /* We open-code pte_clear because we need to pass the right + * argument to hpte_update (huge / !huge) + */ + unsigned long old = pte_update(ptep, ~0UL); + if (old & _PAGE_HASHPTE) + hpte_update(mm, addr & HPAGE_MASK, ptep, old, 1); flush_tlb_pending(); } - - for (i = 0; i < HUGEPTE_BATCH_SIZE; i++) { - *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); - ptep++; - } + *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); } pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { unsigned long old = pte_update(ptep, ~0UL); - int i; if (old & _PAGE_HASHPTE) - hpte_update(mm, addr, old, 0); - - for (i = 1; i < HUGEPTE_BATCH_SIZE; i++) - ptep[i] = __pte(0); + hpte_update(mm, addr & HPAGE_MASK, ptep, old, 1); + *ptep = __pte(0); return __pte(old); } @@ -132,43 +148,63 @@ int is_aligned_hugepage_range(unsigned long addr, unsigned long len) return 0; } +struct slb_flush_info { + struct mm_struct *mm; + u16 newareas; +}; + static void flush_low_segments(void *parm) { - u16 areas = (unsigned long) parm; + struct slb_flush_info *fi = parm; unsigned long i; - asm volatile("isync" : : : "memory"); + BUILD_BUG_ON((sizeof(fi->newareas)*8) != NUM_LOW_AREAS); + + if (current->active_mm != fi->mm) + return; - BUILD_BUG_ON((sizeof(areas)*8) != NUM_LOW_AREAS); + /* Only need to do anything if this CPU is working in the same + * mm as the one which has changed */ + /* update the paca copy of the context struct */ + get_paca()->context = current->active_mm->context; + + asm volatile("isync" : : : "memory"); for (i = 0; i < NUM_LOW_AREAS; i++) { - if (! (areas & (1U << i))) + if (! (fi->newareas & (1U << i))) continue; asm volatile("slbie %0" : : "r" ((i << SID_SHIFT) | SLBIE_C)); } - asm volatile("isync" : : : "memory"); } static void flush_high_segments(void *parm) { - u16 areas = (unsigned long) parm; + struct slb_flush_info *fi = parm; unsigned long i, j; - asm volatile("isync" : : : "memory"); - BUILD_BUG_ON((sizeof(areas)*8) != NUM_HIGH_AREAS); + BUILD_BUG_ON((sizeof(fi->newareas)*8) != NUM_HIGH_AREAS); + + if (current->active_mm != fi->mm) + return; + + /* Only need to do anything if this CPU is working in the same + * mm as the one which has changed */ + /* update the paca copy of the context struct */ + get_paca()->context = current->active_mm->context; + + asm volatile("isync" : : : "memory"); for (i = 0; i < NUM_HIGH_AREAS; i++) { - if (! (areas & (1U << i))) + if (! (fi->newareas & (1U << i))) continue; for (j = 0; j < (1UL << (HTLB_AREA_SHIFT-SID_SHIFT)); j++) asm volatile("slbie %0" :: "r" (((i << HTLB_AREA_SHIFT) - + (j << SID_SHIFT)) | SLBIE_C)); + + (j << SID_SHIFT)) | SLBIE_C)); } - asm volatile("isync" : : : "memory"); } @@ -196,6 +232,12 @@ static int prepare_high_area_for_htlb(struct mm_struct *mm, unsigned long area) BUG_ON(area >= NUM_HIGH_AREAS); + /* Hack, so that each addresses is controlled by exactly one + * of the high or low area bitmaps, the first high area starts + * at 4GB, not 0 */ + if (start == 0) + start = 0x100000000UL; + /* Check no VMAs are in the region */ vma = find_vma(mm, start); if (vma && (vma->vm_start < end)) @@ -207,6 +249,7 @@ static int prepare_high_area_for_htlb(struct mm_struct *mm, unsigned long area) static int open_low_hpage_areas(struct mm_struct *mm, u16 newareas) { unsigned long i; + struct slb_flush_info fi; BUILD_BUG_ON((sizeof(newareas)*8) != NUM_LOW_AREAS); BUILD_BUG_ON((sizeof(mm->context.low_htlb_areas)*8) != NUM_LOW_AREAS); @@ -222,19 +265,20 @@ static int open_low_hpage_areas(struct mm_struct *mm, u16 newareas) mm->context.low_htlb_areas |= newareas; - /* update the paca copy of the context struct */ - get_paca()->context = mm->context; - /* the context change must make it to memory before the flush, * so that further SLB misses do the right thing. */ mb(); - on_each_cpu(flush_low_segments, (void *)(unsigned long)newareas, 0, 1); + + fi.mm = mm; + fi.newareas = newareas; + on_each_cpu(flush_low_segments, &fi, 0, 1); return 0; } static int open_high_hpage_areas(struct mm_struct *mm, u16 newareas) { + struct slb_flush_info fi; unsigned long i; BUILD_BUG_ON((sizeof(newareas)*8) != NUM_HIGH_AREAS); @@ -258,22 +302,25 @@ static int open_high_hpage_areas(struct mm_struct *mm, u16 newareas) /* the context change must make it to memory before the flush, * so that further SLB misses do the right thing. */ mb(); - on_each_cpu(flush_high_segments, (void *)(unsigned long)newareas, 0, 1); + + fi.mm = mm; + fi.newareas = newareas; + on_each_cpu(flush_high_segments, &fi, 0, 1); return 0; } int prepare_hugepage_range(unsigned long addr, unsigned long len) { - int err; + int err = 0; if ( (addr+len) < addr ) return -EINVAL; - if ((addr + len) < 0x100000000UL) + if (addr < 0x100000000UL) err = open_low_hpage_areas(current->mm, LOW_ESID_MASK(addr, len)); - else + if ((addr + len) > 0x100000000UL) err = open_high_hpage_areas(current->mm, HTLB_AREA_MASK(addr, len)); if (err) { @@ -563,6 +610,8 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, int lastshift; u16 areamask, curareas; + if (HPAGE_SHIFT == 0) + return -EINVAL; if (len & ~HPAGE_MASK) return -EINVAL; @@ -615,23 +664,47 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, return -ENOMEM; } +/* + * Called by asm hashtable.S for doing lazy icache flush + */ +static unsigned int hash_huge_page_do_lazy_icache(unsigned long rflags, + pte_t pte, int trap) +{ + struct page *page; + int i; + + if (!pfn_valid(pte_pfn(pte))) + return rflags; + + page = pte_page(pte); + + /* page is dirty */ + if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) { + if (trap == 0x400) { + for (i = 0; i < (HPAGE_SIZE / PAGE_SIZE); i++) + __flush_dcache_icache(page_address(page+i)); + set_bit(PG_arch_1, &page->flags); + } else { + rflags |= HPTE_R_N; + } + } + return rflags; +} + int hash_huge_page(struct mm_struct *mm, unsigned long access, - unsigned long ea, unsigned long vsid, int local) + unsigned long ea, unsigned long vsid, int local, + unsigned long trap) { pte_t *ptep; - unsigned long va, vpn; - pte_t old_pte, new_pte; - unsigned long rflags, prpn; + unsigned long old_pte, new_pte; + unsigned long va, rflags, pa; long slot; int err = 1; - spin_lock(&mm->page_table_lock); - ptep = huge_pte_offset(mm, ea); /* Search the Linux page table for a match with va */ va = (vsid << 28) | (ea & 0x0fffffff); - vpn = va >> HPAGE_SHIFT; /* * If no pte found or not present, send the problem up to @@ -640,8 +713,6 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access, if (unlikely(!ptep || pte_none(*ptep))) goto out; -/* BUG_ON(pte_bad(*ptep)); */ - /* * Check the user's access rights to the page. If access should be * prevented then send the problem up to do_page_fault. @@ -661,58 +732,70 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access, */ - old_pte = *ptep; - new_pte = old_pte; - - rflags = 0x2 | (! (pte_val(new_pte) & _PAGE_RW)); + do { + old_pte = pte_val(*ptep); + if (old_pte & _PAGE_BUSY) + goto out; + new_pte = old_pte | _PAGE_BUSY | + _PAGE_ACCESSED | _PAGE_HASHPTE; + } while(old_pte != __cmpxchg_u64((unsigned long *)ptep, + old_pte, new_pte)); + + rflags = 0x2 | (!(new_pte & _PAGE_RW)); /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */ - rflags |= ((pte_val(new_pte) & _PAGE_EXEC) ? 0 : HW_NO_EXEC); + rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N); + if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) + /* No CPU has hugepages but lacks no execute, so we + * don't need to worry about that case */ + rflags = hash_huge_page_do_lazy_icache(rflags, __pte(old_pte), + trap); /* Check if pte already has an hpte (case 2) */ - if (unlikely(pte_val(old_pte) & _PAGE_HASHPTE)) { + if (unlikely(old_pte & _PAGE_HASHPTE)) { /* There MIGHT be an HPTE for this pte */ unsigned long hash, slot; - hash = hpt_hash(vpn, 1); - if (pte_val(old_pte) & _PAGE_SECONDARY) + hash = hpt_hash(va, HPAGE_SHIFT); + if (old_pte & _PAGE_F_SECOND) hash = ~hash; slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; - slot += (pte_val(old_pte) & _PAGE_GROUP_IX) >> 12; + slot += (old_pte & _PAGE_F_GIX) >> 12; - if (ppc_md.hpte_updatepp(slot, rflags, va, 1, local) == -1) - pte_val(old_pte) &= ~_PAGE_HPTEFLAGS; + if (ppc_md.hpte_updatepp(slot, rflags, va, mmu_huge_psize, + local) == -1) + old_pte &= ~_PAGE_HPTEFLAGS; } - if (likely(!(pte_val(old_pte) & _PAGE_HASHPTE))) { - unsigned long hash = hpt_hash(vpn, 1); + if (likely(!(old_pte & _PAGE_HASHPTE))) { + unsigned long hash = hpt_hash(va, HPAGE_SHIFT); unsigned long hpte_group; - prpn = pte_pfn(old_pte); + pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT; repeat: hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; - /* Update the linux pte with the HPTE slot */ - pte_val(new_pte) &= ~_PAGE_HPTEFLAGS; - pte_val(new_pte) |= _PAGE_HASHPTE; + /* clear HPTE slot informations in new PTE */ + new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE; /* Add in WIMG bits */ /* XXX We should store these in the pte */ + /* --BenH: I think they are ... */ rflags |= _PAGE_COHERENT; - slot = ppc_md.hpte_insert(hpte_group, va, prpn, - HPTE_V_LARGE, rflags); + /* Insert into the hash table, primary slot */ + slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0, + mmu_huge_psize); /* Primary is full, try the secondary */ if (unlikely(slot == -1)) { - pte_val(new_pte) |= _PAGE_SECONDARY; + new_pte |= _PAGE_F_SECOND; hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; - slot = ppc_md.hpte_insert(hpte_group, va, prpn, - HPTE_V_LARGE | + slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, HPTE_V_SECONDARY, - rflags); + mmu_huge_psize); if (slot == -1) { if (mftb() & 0x1) hpte_group = ((hash & htab_hash_mask) * @@ -726,20 +809,16 @@ repeat: if (unlikely(slot == -2)) panic("hash_huge_page: pte_insert failed\n"); - pte_val(new_pte) |= (slot<<12) & _PAGE_GROUP_IX; - - /* - * No need to use ldarx/stdcx here because all who - * might be updating the pte will hold the - * page_table_lock - */ - *ptep = new_pte; + new_pte |= (slot << 12) & _PAGE_F_GIX; } + /* + * No need to use ldarx/stdcx here + */ + *ptep = __pte(new_pte & ~_PAGE_BUSY); + err = 0; out: - spin_unlock(&mm->page_table_lock); - return err; }