2 * PowerPC64 port by Mike Corrigan and Dave Engebretsen
3 * {mikejc|engebret}@us.ibm.com
5 * Copyright (c) 2000 Mike Corrigan <mikejc@us.ibm.com>
7 * SMP scalability work:
8 * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
13 * PowerPC Hashed Page Table functions
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version
18 * 2 of the License, or (at your option) any later version.
21 #include <linux/config.h>
22 #include <linux/spinlock.h>
23 #include <linux/errno.h>
24 #include <linux/sched.h>
25 #include <linux/proc_fs.h>
26 #include <linux/stat.h>
27 #include <linux/sysctl.h>
28 #include <linux/ctype.h>
29 #include <linux/cache.h>
31 #include <asm/ppcdebug.h>
32 #include <asm/processor.h>
33 #include <asm/pgtable.h>
35 #include <asm/mmu_context.h>
37 #include <asm/types.h>
38 #include <asm/uaccess.h>
41 #include <asm/machdep.h>
43 #include <asm/abs_addr.h>
46 #include <asm/hvcall.h>
47 #include <asm/iSeries/LparData.h>
48 #include <asm/iSeries/HvCallHpt.h>
51 * Note: pte --> Linux PTE
52 * HPTE --> PowerPC Hashed Page Table Entry
55 * htab_initialize is called with the MMU off (of course), but
56 * the kernel has been copied down to zero so it can directly
57 * reference global data. At this point it is very difficult
58 * to print debug info.
62 HTAB htab_data = {NULL, 0, 0, 0, 0};
64 extern unsigned long _SDR1;
65 extern unsigned long klimit;
67 void make_pte(HPTE *htab, unsigned long va, unsigned long pa,
68 int mode, unsigned long hash_mask, int large);
69 long plpar_pte_enter(unsigned long flags,
71 unsigned long new_pteh, unsigned long new_ptel,
72 unsigned long *old_pteh_ret, unsigned long *old_ptel_ret);
73 static long hpte_remove(unsigned long hpte_group);
74 static long rpa_lpar_hpte_remove(unsigned long hpte_group);
75 static long iSeries_hpte_remove(unsigned long hpte_group);
76 inline unsigned long get_lock_slot(unsigned long vpn);
78 static spinlock_t pSeries_tlbie_lock = SPIN_LOCK_UNLOCKED;
79 static spinlock_t pSeries_lpar_tlbie_lock = SPIN_LOCK_UNLOCKED;
83 hash_table_lock_t hash_table_lock[128] __cacheline_aligned_in_smp = { [0 ... 31] = {SPIN_LOCK_UNLOCKED}};
85 hash_table_lock_t hash_table_lock[1] __cacheline_aligned_in_smp = { [0] = {SPIN_LOCK_UNLOCKED}};
94 volatile unsigned long x = 1;
100 create_pte_mapping(unsigned long start, unsigned long end,
101 unsigned long mode, unsigned long mask, int large)
104 HPTE *htab = (HPTE *)__v2a(htab_data.htab);
112 for (addr = start; addr < end; addr += step) {
113 unsigned long vsid = get_kernel_vsid(addr);
114 unsigned long va = (vsid << 28) | (addr & 0xfffffff);
115 make_pte(htab, va, (unsigned long)__v2a(addr),
121 htab_initialize(void)
123 unsigned long table, htab_size_bytes;
124 unsigned long pteg_count;
125 unsigned long mode_rw, mask, lock_shift;
128 /* Can't really do the call below since it calls the normal RTAS
129 * entry point and we're still relocate off at the moment.
130 * Temporarily diabling until it can call through the relocate off
131 * RTAS entry point. -Peter
133 ppc64_boot_msg(0x05, "htab init");
136 * Calculate the required size of the htab. We want the number of
137 * PTEGs to equal one half the number of real pages.
139 htab_size_bytes = 1UL << naca->pftSize;
140 pteg_count = htab_size_bytes >> 7;
142 /* For debug, make the HTAB 1/8 as big as it normally would be. */
143 ifppcdebug(PPCDBG_HTABSIZE) {
145 htab_size_bytes = pteg_count << 7;
148 htab_data.htab_num_ptegs = pteg_count;
149 htab_data.htab_hash_mask = pteg_count - 1;
152 * Calculate the number of bits to shift the pteg selector such that we
153 * use the high order 8 bits to select a page table lock.
155 asm ("cntlzd %0,%1" : "=r" (lock_shift) :
156 "r" (htab_data.htab_hash_mask));
157 htab_data.htab_lock_shift = (64 - lock_shift) - 8;
159 if(systemcfg->platform == PLATFORM_PSERIES) {
160 /* Find storage for the HPT. Must be contiguous in
161 * the absolute address space.
163 table = lmb_alloc(htab_size_bytes, htab_size_bytes);
165 ppc64_terminate_msg(0x20, "hpt space");
168 htab_data.htab = (HPTE *)__a2v(table);
170 /* htab absolute addr + encoded htabsize */
171 _SDR1 = table + __ilog2(pteg_count) - 11;
173 /* Initialize the HPT with no entries */
174 memset((void *)table, 0, htab_size_bytes);
176 /* Using a hypervisor which owns the htab */
177 htab_data.htab = NULL;
181 mode_rw = _PAGE_ACCESSED | _PAGE_COHERENT | PP_RWXX;
184 /* XXX we currently map kernel text rw, should fix this */
185 if ((systemcfg->platform & PLATFORM_PSERIES) &&
186 cpu_has_largepage() && (systemcfg->physicalMemorySize > 256*MB)) {
187 create_pte_mapping((unsigned long)KERNELBASE,
188 KERNELBASE + 256*MB, mode_rw, mask, 0);
189 create_pte_mapping((unsigned long)KERNELBASE + 256*MB,
190 KERNELBASE + (systemcfg->physicalMemorySize),
193 create_pte_mapping((unsigned long)KERNELBASE,
194 KERNELBASE+(systemcfg->physicalMemorySize),
198 /* Can't really do the call below since it calls the normal RTAS
199 * entry point and we're still relocate off at the moment.
200 * Temporarily diabling until it can call through the relocate off
201 * RTAS entry point. -Peter
203 ppc64_boot_msg(0x06, "htab done");
210 * Create a pte. Used during initialization only.
211 * We assume the PTE will fit in the primary PTEG.
213 void make_pte(HPTE *htab, unsigned long va, unsigned long pa,
214 int mode, unsigned long hash_mask, int large)
216 HPTE *hptep, local_hpte, rhpte;
217 unsigned long hash, vpn, flags, lpar_rc;
218 unsigned long i, dummy1, dummy2;
222 vpn = va >> LARGE_PAGE_SHIFT;
224 vpn = va >> PAGE_SHIFT;
226 hash = hpt_hash(vpn, large);
228 local_hpte.dw1.dword1 = pa | mode;
229 local_hpte.dw0.dword0 = 0;
230 local_hpte.dw0.dw0.avpn = va >> 23;
231 local_hpte.dw0.dw0.bolted = 1; /* bolted */
233 local_hpte.dw0.dw0.l = 1; /* large page */
234 local_hpte.dw0.dw0.avpn &= ~0x1UL;
236 local_hpte.dw0.dw0.v = 1;
238 if (systemcfg->platform == PLATFORM_PSERIES) {
239 hptep = htab + ((hash & hash_mask)*HPTES_PER_GROUP);
241 for (i = 0; i < 8; ++i, ++hptep) {
242 if (hptep->dw0.dw0.v == 0) { /* !valid */
247 } else if (systemcfg->platform == PLATFORM_PSERIES_LPAR) {
248 slot = ((hash & hash_mask)*HPTES_PER_GROUP);
250 /* Set CEC cookie to 0 */
252 /* I-cache Invalidate = 0 */
253 /* I-cache synchronize = 0 */
254 /* Exact = 0 - modify any entry in group */
257 lpar_rc = plpar_pte_enter(flags, slot, local_hpte.dw0.dword0,
258 local_hpte.dw1.dword1,
261 } else if (systemcfg->platform == PLATFORM_ISERIES_LPAR) {
262 slot = HvCallHpt_findValid(&rhpte, vpn);
264 /* Must find space in primary group */
265 panic("hash_page: hpte already exists\n");
267 HvCallHpt_addValidate(slot, 0, (HPTE *)&local_hpte );
271 /* We should _never_ get here and too early to call xmon. */
272 ppc64_terminate_msg(0x22, "hpte platform");
277 * find_linux_pte returns the address of a linux pte for a given
278 * effective address and directory. If not found, it returns zero.
280 pte_t *find_linux_pte(pgd_t *pgdir, unsigned long ea)
287 pg = pgdir + pgd_index(ea);
288 if (!pgd_none(*pg)) {
289 pm = pmd_offset(pg, ea);
290 if (!pmd_none(*pm)) {
291 pt = pte_offset(pm, ea);
293 if (!pte_present(pte))
301 static inline unsigned long computeHptePP(unsigned long pte)
303 return (pte & _PAGE_USER) |
304 (((pte & _PAGE_USER) >> 1) &
305 ((~((pte >> 2) & /* _PAGE_RW */
306 (pte >> 7))) & /* _PAGE_DIRTY */
311 * Handle a fault by adding an HPTE. If the address can't be determined
312 * to be valid via Linux page tables, return 1. If handled return 0
314 int __hash_page(unsigned long ea, unsigned long access,
315 unsigned long vsid, pte_t *ptep)
317 unsigned long va, vpn;
318 unsigned long newpp, prpn;
319 unsigned long hpteflags, lock_slot;
321 pte_t old_pte, new_pte;
323 /* Search the Linux page table for a match with va */
324 va = (vsid << 28) | (ea & 0x0fffffff);
325 vpn = va >> PAGE_SHIFT;
326 lock_slot = get_lock_slot(vpn);
328 /* Acquire the hash table lock to guarantee that the linux
329 * pte we fetch will not change
331 spin_lock(&hash_table_lock[lock_slot].lock);
334 * Check the user's access rights to the page. If access should be
335 * prevented then send the problem up to do_page_fault.
337 access |= _PAGE_PRESENT;
338 if (unlikely(access & ~(pte_val(*ptep)))) {
339 spin_unlock(&hash_table_lock[lock_slot].lock);
344 * We have found a pte (which was present).
345 * The spinlocks prevent this status from changing
346 * The hash_table_lock prevents the _PAGE_HASHPTE status
347 * from changing (RPN, DIRTY and ACCESSED too)
348 * The page_table_lock prevents the pte from being
349 * invalidated or modified
353 * At this point, we have a pte (old_pte) which can be used to build
354 * or update an HPTE. There are 2 cases:
356 * 1. There is a valid (present) pte with no associated HPTE (this is
357 * the most common case)
358 * 2. There is a valid (present) pte with an associated HPTE. The
359 * current values of the pp bits in the HPTE prevent access
360 * because we are doing software DIRTY bit management and the
361 * page is currently not DIRTY.
367 /* If the attempted access was a store */
368 if (access & _PAGE_RW)
369 pte_val(new_pte) |= _PAGE_ACCESSED | _PAGE_DIRTY;
371 pte_val(new_pte) |= _PAGE_ACCESSED;
373 newpp = computeHptePP(pte_val(new_pte));
375 /* Check if pte already has an hpte (case 2) */
376 if (unlikely(pte_val(old_pte) & _PAGE_HASHPTE)) {
377 /* There MIGHT be an HPTE for this pte */
378 unsigned long hash, slot, secondary;
380 /* XXX fix large pte flag */
381 hash = hpt_hash(vpn, 0);
382 secondary = (pte_val(old_pte) & _PAGE_SECONDARY) >> 15;
385 slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP;
386 slot += (pte_val(old_pte) & _PAGE_GROUP_IX) >> 12;
388 /* XXX fix large pte flag */
389 if (ppc_md.hpte_updatepp(slot, secondary,
390 newpp, va, 0) == -1) {
391 pte_val(old_pte) &= ~_PAGE_HPTEFLAGS;
393 if (!pte_same(old_pte, new_pte)) {
399 if (likely(!(pte_val(old_pte) & _PAGE_HASHPTE))) {
400 /* Update the linux pte with the HPTE slot */
401 pte_val(new_pte) &= ~_PAGE_HPTEFLAGS;
402 pte_val(new_pte) |= _PAGE_HASHPTE;
403 prpn = pte_val(old_pte) >> PTE_SHIFT;
405 /* copy appropriate flags from linux pte */
406 hpteflags = (pte_val(new_pte) & 0x1f8) | newpp;
408 slot = ppc_md.hpte_insert(vpn, prpn, hpteflags, 0, 0);
410 pte_val(new_pte) |= ((slot<<12) &
411 (_PAGE_GROUP_IX | _PAGE_SECONDARY));
416 spin_unlock(&hash_table_lock[lock_slot].lock);
422 * Handle a fault by adding an HPTE. If the address can't be determined
423 * to be valid via Linux page tables, return 1. If handled return 0
425 int hash_page(unsigned long ea, unsigned long access)
429 struct mm_struct *mm;
433 /* Check for invalid addresses. */
434 if (!IS_VALID_EA(ea)) return 1;
436 switch (REGION_ID(ea)) {
439 if (mm == NULL) return 1;
440 vsid = get_vsid(mm->context, ea);
444 vsid = get_kernel_vsid(ea);
446 case VMALLOC_REGION_ID:
448 vsid = get_kernel_vsid(ea);
452 * Should only be hit if there is an access to MMIO space
453 * which is protected by EEH.
454 * Send the problem up to do_page_fault
456 case KERNEL_REGION_ID:
458 * Should never get here - entire 0xC0... region is bolted.
459 * Send the problem up to do_page_fault
463 * Send the problem up to do_page_fault
470 if (pgdir == NULL) return 1;
473 * Lock the Linux page table to prevent mmap and kswapd
474 * from modifying entries while we search and update
476 spin_lock(&mm->page_table_lock);
478 ptep = find_linux_pte(pgdir, ea);
480 * If no pte found or not present, send the problem up to
483 if (ptep && pte_present(*ptep)) {
484 ret = __hash_page(ea, access, vsid, ptep);
486 /* If no pte, send the problem up to do_page_fault */
490 spin_unlock(&mm->page_table_lock);
495 void flush_hash_page(unsigned long context, unsigned long ea, pte_t *ptep)
497 unsigned long vsid, vpn, va, hash, secondary, slot, flags, lock_slot;
498 unsigned long large = 0, local = 0;
501 if ((ea >= USER_START) && (ea <= USER_END))
502 vsid = get_vsid(context, ea);
504 vsid = get_kernel_vsid(ea);
506 va = (vsid << 28) | (ea & 0x0fffffff);
508 vpn = va >> LARGE_PAGE_SHIFT;
510 vpn = va >> PAGE_SHIFT;
512 lock_slot = get_lock_slot(vpn);
513 hash = hpt_hash(vpn, large);
515 spin_lock_irqsave(&hash_table_lock[lock_slot].lock, flags);
517 pte = __pte(pte_update(ptep, _PAGE_HPTEFLAGS, 0));
518 secondary = (pte_val(pte) & _PAGE_SECONDARY) >> 15;
519 if (secondary) hash = ~hash;
520 slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP;
521 slot += (pte_val(pte) & _PAGE_GROUP_IX) >> 12;
523 if (pte_val(pte) & _PAGE_HASHPTE) {
524 ppc_md.hpte_invalidate(slot, secondary, va, large, local);
527 spin_unlock_irqrestore(&hash_table_lock[lock_slot].lock, flags);
530 long plpar_pte_enter(unsigned long flags,
532 unsigned long new_pteh, unsigned long new_ptel,
533 unsigned long *old_pteh_ret, unsigned long *old_ptel_ret)
535 unsigned long dummy, ret;
536 ret = plpar_hcall(H_ENTER, flags, ptex, new_pteh, new_ptel,
537 old_pteh_ret, old_ptel_ret, &dummy);
541 long plpar_pte_remove(unsigned long flags,
544 unsigned long *old_pteh_ret, unsigned long *old_ptel_ret)
547 return plpar_hcall(H_REMOVE, flags, ptex, avpn, 0,
548 old_pteh_ret, old_ptel_ret, &dummy);
551 long plpar_pte_read(unsigned long flags,
553 unsigned long *old_pteh_ret, unsigned long *old_ptel_ret)
556 return plpar_hcall(H_READ, flags, ptex, 0, 0,
557 old_pteh_ret, old_ptel_ret, &dummy);
560 long plpar_pte_protect(unsigned long flags,
564 return plpar_hcall_norets(H_PROTECT, flags, ptex, avpn);
567 static __inline__ void set_pp_bit(unsigned long pp, HPTE *addr)
570 unsigned long *p = &addr->dw1.dword1;
572 __asm__ __volatile__(
577 : "=&r" (old), "=m" (*p)
578 : "r" (pp), "r" (p), "m" (*p)
583 * Calculate which hash_table_lock to use, based on the pteg being used.
585 * Given a VPN, use the high order 8 bits to select one of 2^7 locks. The
586 * highest order bit is used to indicate primary vs. secondary group. If the
587 * secondary is selected, complement the lock select bits. This results in
588 * both the primary and secondary groups being covered under the same lock.
590 inline unsigned long get_lock_slot(unsigned long vpn)
592 unsigned long lock_slot;
594 lock_slot = (hpt_hash(vpn,0) >> htab_data.htab_lock_shift) & 0xff;
595 if(lock_slot & 0x80) lock_slot = (~lock_slot) & 0x7f;
603 * Functions used to retrieve word 0 of a given page table entry.
605 * Input : slot : PTE index within the page table of the entry to retrieve
606 * Output: Contents of word 0 of the specified entry
608 static unsigned long rpa_lpar_hpte_getword0(unsigned long slot)
610 unsigned long dword0;
611 unsigned long lpar_rc;
612 unsigned long dummy_word1;
615 /* Read 1 pte at a time */
616 /* Do not need RPN to logical page translation */
617 /* No cross CEC PFT access */
620 lpar_rc = plpar_pte_read(flags, slot, &dword0, &dummy_word1);
622 if (lpar_rc != H_Success)
623 panic("Error on pte read in get_hpte0 rc = %lx\n", lpar_rc);
628 unsigned long iSeries_hpte_getword0(unsigned long slot)
630 unsigned long dword0;
633 HvCallHpt_get(&hpte, slot);
634 dword0 = hpte.dw0.dword0;
640 * Functions used to find the PTE for a particular virtual address.
641 * Only used during boot when bolting pages.
643 * Input : vpn : virtual page number
644 * Output: PTE index within the page table of the entry
647 static long hpte_find(unsigned long vpn)
655 hash = hpt_hash(vpn, 0);
657 for (j = 0; j < 2; j++) {
658 slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP;
659 for (i = 0; i < HPTES_PER_GROUP; i++) {
660 hptep = htab_data.htab + slot;
661 dw0 = hptep->dw0.dw0;
663 if ((dw0.avpn == (vpn >> 11)) && dw0.v &&
678 static long rpa_lpar_hpte_find(unsigned long vpn)
684 unsigned long dword0;
689 hash = hpt_hash(vpn, 0);
691 for (j = 0; j < 2; j++) {
692 slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP;
693 for (i = 0; i < HPTES_PER_GROUP; i++) {
694 hpte_dw0.dword0 = rpa_lpar_hpte_getword0(slot);
697 if ((dw0.avpn == (vpn >> 11)) && dw0.v &&
712 static long iSeries_hpte_find(unsigned long vpn)
718 * The HvCallHpt_findValid interface is as follows:
719 * 0xffffffffffffffff : No entry found.
720 * 0x00000000xxxxxxxx : Entry found in primary group, slot x
721 * 0x80000000xxxxxxxx : Entry found in secondary group, slot x
723 slot = HvCallHpt_findValid(&hpte, vpn);
724 if (hpte.dw0.dw0.v) {
726 slot &= 0x7fffffffffffffff;
737 * Functions used to invalidate a page table entry from the page table
740 * Input : slot : PTE index within the page table of the entry to invalidated
741 * va : Virtual address of the entry being invalidated
742 * large : 1 = large page (16M)
743 * local : 1 = Use tlbiel to only invalidate the local tlb
745 static void hpte_invalidate(unsigned long slot,
746 unsigned long secondary,
748 int large, int local)
750 HPTE *hptep = htab_data.htab + slot;
752 unsigned long vpn, avpn;
756 vpn = va >> LARGE_PAGE_SHIFT;
758 vpn = va >> PAGE_SHIFT;
762 dw0 = hptep->dw0.dw0;
765 * Do not remove bolted entries. Alternatively, we could check
766 * the AVPN, hash group, and valid bits. By doing it this way,
767 * it is common with the pSeries LPAR optimal path.
769 if (dw0.bolted) return;
771 /* Invalidate the hpte. */
772 hptep->dw0.dword0 = 0;
774 /* Invalidate the tlb */
775 spin_lock_irqsave(&pSeries_tlbie_lock, flags);
777 spin_unlock_irqrestore(&pSeries_tlbie_lock, flags);
780 static void rpa_lpar_hpte_invalidate(unsigned long slot,
781 unsigned long secondary,
783 int large, int local)
785 unsigned long lpar_rc;
786 unsigned long dummy1, dummy2;
789 * Don't remove a bolted entry. This case can occur when we bolt
790 * pages dynamically after initial boot.
792 lpar_rc = plpar_pte_remove(H_ANDCOND, slot, (0x1UL << 4),
795 if (lpar_rc != H_Success)
796 panic("Bad return code from invalidate rc = %lx\n", lpar_rc);
799 static void iSeries_hpte_invalidate(unsigned long slot,
800 unsigned long secondary,
802 int large, int local)
805 unsigned long vpn, avpn;
808 vpn = va >> LARGE_PAGE_SHIFT;
810 vpn = va >> PAGE_SHIFT;
814 lhpte.dw0.dword0 = iSeries_hpte_getword0(slot);
816 if ((lhpte.dw0.dw0.avpn == avpn) &&
818 (lhpte.dw0.dw0.h == secondary)) {
819 HvCallHpt_invalidateSetSwBitsGet(slot, 0, 0);
824 * Functions used to update page protection bits.
826 * Input : slot : PTE index within the page table of the entry to update
827 * newpp : new page protection bits
828 * va : Virtual address of the entry being updated
829 * large : 1 = large page (16M)
830 * Output: 0 on success, -1 on failure
832 static long hpte_updatepp(unsigned long slot,
833 unsigned long secondary,
835 unsigned long va, int large)
837 HPTE *hptep = htab_data.htab + slot;
840 unsigned long vpn, avpn;
844 vpn = va >> LARGE_PAGE_SHIFT;
846 vpn = va >> PAGE_SHIFT;
850 dw0 = hptep->dw0.dw0;
851 if ((dw0.avpn == avpn) &&
852 (dw0.v) && (dw0.h == secondary)) {
853 /* Turn off valid bit in HPTE */
855 hptep->dw0.dw0 = dw0;
857 /* Ensure it is out of the tlb too */
858 spin_lock_irqsave(&pSeries_tlbie_lock, flags);
860 spin_unlock_irqrestore(&pSeries_tlbie_lock, flags);
862 /* Insert the new pp bits into the HPTE */
863 dw1 = hptep->dw1.dw1;
865 hptep->dw1.dw1 = dw1;
867 /* Ensure it is visible before validating */
868 __asm__ __volatile__ ("eieio" : : : "memory");
870 /* Turn the valid bit back on in HPTE */
872 hptep->dw0.dw0 = dw0;
874 __asm__ __volatile__ ("ptesync" : : : "memory");
882 static long rpa_lpar_hpte_updatepp(unsigned long slot,
883 unsigned long secondary,
885 unsigned long va, int large)
887 unsigned long lpar_rc;
888 unsigned long flags = (newpp & 7);
889 unsigned long avpn = va >> 23;
892 lpar_rc = plpar_pte_read(0, slot, &hpte.dw0.dword0, &hpte.dw1.dword1);
894 if ((hpte.dw0.dw0.avpn == avpn) &&
896 (hpte.dw0.dw0.h == secondary)) {
897 lpar_rc = plpar_pte_protect(flags, slot, 0);
898 if (lpar_rc != H_Success)
899 panic("bad return code from pte protect rc = %lx\n",
907 static long iSeries_hpte_updatepp(unsigned long slot,
908 unsigned long secondary,
910 unsigned long va, int large)
912 unsigned long vpn, avpn;
916 vpn = va >> LARGE_PAGE_SHIFT;
918 vpn = va >> PAGE_SHIFT;
922 HvCallHpt_get(&hpte, slot);
923 if ((hpte.dw0.dw0.avpn == avpn) &&
925 (hpte.dw0.dw0.h == secondary)) {
926 HvCallHpt_setPp(slot, newpp);
933 * Functions used to update the page protection bits. Intended to be used
934 * to create guard pages for kernel data structures on pages which are bolted
935 * in the HPT. Assumes pages being operated on will not be stolen.
936 * Does not work on large pages. No need to lock here because we are the
939 * Input : newpp : page protection flags
940 * ea : effective kernel address to bolt.
942 static void hpte_updateboltedpp(unsigned long newpp, unsigned long ea)
944 unsigned long vsid, va, vpn, flags;
948 vsid = get_kernel_vsid(ea);
949 va = (vsid << 28) | (ea & 0x0fffffff);
950 vpn = va >> PAGE_SHIFT;
952 slot = hpte_find(vpn);
954 panic("could not find page to bolt\n");
955 hptep = htab_data.htab + slot;
957 set_pp_bit(newpp, hptep);
959 /* Ensure it is out of the tlb too */
960 spin_lock_irqsave(&pSeries_tlbie_lock, flags);
962 spin_unlock_irqrestore(&pSeries_tlbie_lock, flags);
965 static void rpa_lpar_hpte_updateboltedpp(unsigned long newpp, unsigned long ea)
967 unsigned long lpar_rc;
968 unsigned long vsid, va, vpn, flags;
971 vsid = get_kernel_vsid(ea);
972 va = (vsid << 28) | (ea & 0x0fffffff);
973 vpn = va >> PAGE_SHIFT;
975 slot = rpa_lpar_hpte_find(vpn);
977 panic("updateboltedpp: Could not find page to bolt\n");
980 lpar_rc = plpar_pte_protect(flags, slot, 0);
982 if (lpar_rc != H_Success)
983 panic("Bad return code from pte bolted protect rc = %lx\n",
987 void iSeries_hpte_updateboltedpp(unsigned long newpp, unsigned long ea)
989 unsigned long vsid,va,vpn;
992 vsid = get_kernel_vsid( ea );
993 va = ( vsid << 28 ) | ( ea & 0x0fffffff );
994 vpn = va >> PAGE_SHIFT;
996 slot = iSeries_hpte_find(vpn);
998 panic("updateboltedpp: Could not find page to bolt\n");
1000 HvCallHpt_setPp(slot, newpp);
1004 * Functions used to insert new hardware page table entries.
1005 * Will castout non-bolted entries as necessary using a random
1008 * Input : vpn : virtual page number
1009 * prpn : real page number in absolute space
1010 * hpteflags: page protection flags
1011 * bolted : 1 = bolt the page
1012 * large : 1 = large page (16M)
1013 * Output: hsss, where h = hash group, sss = slot within that group
1015 static long hpte_insert(unsigned long vpn, unsigned long prpn,
1016 unsigned long hpteflags, int bolted, int large)
1022 unsigned long hash = hpt_hash(vpn, 0);
1023 unsigned long avpn = vpn >> 11;
1024 unsigned long arpn = physRpn_to_absRpn(prpn);
1025 unsigned long hpte_group;
1029 hpte_group = ((hash & htab_data.htab_hash_mask) *
1030 HPTES_PER_GROUP) & ~0x7UL;
1031 hptep = htab_data.htab + hpte_group;
1033 for (i = 0; i < HPTES_PER_GROUP; i++) {
1034 dw0 = hptep->dw0.dw0;
1036 /* retry with lock held */
1037 dw0 = hptep->dw0.dw0;
1044 if (i == HPTES_PER_GROUP) {
1046 hpte_group = ((~hash & htab_data.htab_hash_mask) *
1047 HPTES_PER_GROUP) & ~0x7UL;
1048 hptep = htab_data.htab + hpte_group;
1050 for (i = 0; i < HPTES_PER_GROUP; i++) {
1051 dw0 = hptep->dw0.dw0;
1053 /* retry with lock held */
1054 dw0 = hptep->dw0.dw0;
1060 if (i == HPTES_PER_GROUP) {
1062 hpte_group=((hash & htab_data.htab_hash_mask)*
1063 HPTES_PER_GROUP) & ~0x7UL;
1065 hpte_remove(hpte_group);
1070 lhpte.dw1.dword1 = 0;
1071 lhpte.dw1.dw1.rpn = arpn;
1072 lhpte.dw1.flags.flags = hpteflags;
1074 lhpte.dw0.dword0 = 0;
1075 lhpte.dw0.dw0.avpn = avpn;
1076 lhpte.dw0.dw0.h = secondary;
1077 lhpte.dw0.dw0.bolted = bolted;
1078 lhpte.dw0.dw0.v = 1;
1080 if (large) lhpte.dw0.dw0.l = 1;
1082 hptep->dw1.dword1 = lhpte.dw1.dword1;
1084 /* Guarantee the second dword is visible before the valid bit */
1085 __asm__ __volatile__ ("eieio" : : : "memory");
1088 * Now set the first dword including the valid bit
1089 * NOTE: this also unlocks the hpte
1091 hptep->dw0.dword0 = lhpte.dw0.dword0;
1093 __asm__ __volatile__ ("ptesync" : : : "memory");
1095 return ((secondary << 3) | i);
1098 static long rpa_lpar_hpte_insert(unsigned long vpn, unsigned long prpn,
1099 unsigned long hpteflags,
1100 int bolted, int large)
1102 /* XXX fix for large page */
1103 unsigned long lpar_rc;
1104 unsigned long flags;
1108 unsigned long hash = hpt_hash(vpn, 0);
1109 unsigned long avpn = vpn >> 11;
1110 unsigned long arpn = physRpn_to_absRpn(prpn);
1111 unsigned long hpte_group;
1113 /* Fill in the local HPTE with absolute rpn, avpn and flags */
1114 lhpte.dw1.dword1 = 0;
1115 lhpte.dw1.dw1.rpn = arpn;
1116 lhpte.dw1.flags.flags = hpteflags;
1118 lhpte.dw0.dword0 = 0;
1119 lhpte.dw0.dw0.avpn = avpn;
1120 lhpte.dw0.dw0.bolted = bolted;
1121 lhpte.dw0.dw0.v = 1;
1123 if (large) lhpte.dw0.dw0.l = 1;
1125 /* Now fill in the actual HPTE */
1126 /* Set CEC cookie to 0 */
1127 /* Large page = 0 */
1129 /* I-cache Invalidate = 0 */
1130 /* I-cache synchronize = 0 */
1134 /* XXX why is this here? - Anton */
1135 /* -- Because at one point we hit a case where non cachable
1136 * pages where marked coherent & this is rejected by the HV.
1137 * Perhaps it is no longer an issue ... DRENG.
1139 if (hpteflags & (_PAGE_GUARDED|_PAGE_NO_CACHE))
1140 lhpte.dw1.flags.flags &= ~_PAGE_COHERENT;
1144 lhpte.dw0.dw0.h = secondary;
1145 hpte_group = ((hash & htab_data.htab_hash_mask) *
1146 HPTES_PER_GROUP) & ~0x7UL;
1148 __asm__ __volatile__ (
1157 : "=r" (lpar_rc), "=r" (slot)
1158 : "r" (flags), "r" (hpte_group), "r" (lhpte.dw0.dword0),
1159 "r" (lhpte.dw1.dword1)
1160 : "r0", "r3", "r4", "r5", "r6", "r7",
1161 "r8", "r9", "r10", "r11", "r12", "cc");
1163 if (lpar_rc == H_PTEG_Full) {
1165 lhpte.dw0.dw0.h = secondary;
1166 hpte_group = ((~hash & htab_data.htab_hash_mask) *
1167 HPTES_PER_GROUP) & ~0x7UL;
1169 __asm__ __volatile__ (
1178 : "=r" (lpar_rc), "=r" (slot)
1179 : "r" (flags), "r" (hpte_group), "r" (lhpte.dw0.dword0),
1180 "r" (lhpte.dw1.dword1)
1181 : "r0", "r3", "r4", "r5", "r6", "r7",
1182 "r8", "r9", "r10", "r11", "r12", "cc");
1183 if (lpar_rc == H_PTEG_Full) {
1185 hpte_group=((hash & htab_data.htab_hash_mask)*
1186 HPTES_PER_GROUP) & ~0x7UL;
1188 rpa_lpar_hpte_remove(hpte_group);
1193 if (lpar_rc != H_Success)
1194 panic("Bad return code from pte enter rc = %lx\n", lpar_rc);
1196 return ((secondary << 3) | (slot & 0x7));
1199 static long iSeries_hpte_insert(unsigned long vpn, unsigned long prpn,
1200 unsigned long hpteflags,
1201 int bolted, int large)
1204 unsigned long hash, hpte_group;
1205 unsigned long avpn = vpn >> 11;
1206 unsigned long arpn = physRpn_to_absRpn( prpn );
1210 hash = hpt_hash(vpn, 0);
1213 slot = HvCallHpt_findValid(&lhpte, vpn);
1214 if (lhpte.dw0.dw0.v) {
1215 panic("select_hpte_slot found entry already valid\n");
1218 if (slot == -1) { /* No available entry found in either group */
1220 hpte_group=((hash & htab_data.htab_hash_mask)*
1221 HPTES_PER_GROUP) & ~0x7UL;
1223 hpte_group=((~hash & htab_data.htab_hash_mask)*
1224 HPTES_PER_GROUP) & ~0x7UL;
1227 hash = hpt_hash(vpn, 0);
1228 iSeries_hpte_remove(hpte_group);
1230 } else if (slot < 0) {
1231 slot &= 0x7fffffffffffffff;
1235 /* Create the HPTE */
1236 lhpte.dw1.dword1 = 0;
1237 lhpte.dw1.dw1.rpn = arpn;
1238 lhpte.dw1.flags.flags = hpteflags;
1240 lhpte.dw0.dword0 = 0;
1241 lhpte.dw0.dw0.avpn = avpn;
1242 lhpte.dw0.dw0.h = secondary;
1243 lhpte.dw0.dw0.bolted = bolted;
1244 lhpte.dw0.dw0.v = 1;
1246 /* Now fill in the actual HPTE */
1247 HvCallHpt_addValidate(slot, secondary, (HPTE *)&lhpte);
1248 return ((secondary << 3) | (slot & 0x7));
1252 * Functions used to remove hardware page table entries.
1254 * Input : hpte_group: PTE index of the first entry in a group
1255 * Output: offset within the group of the entry removed or
1258 static long hpte_remove(unsigned long hpte_group)
1264 unsigned long vsid, group, pi, pi_high;
1266 unsigned long flags;
1270 /* pick a random slot to start at */
1271 slot_offset = mftb() & 0x7;
1273 for (i = 0; i < HPTES_PER_GROUP; i++) {
1274 hptep = htab_data.htab + hpte_group + slot_offset;
1275 dw0 = hptep->dw0.dw0;
1277 if (dw0.v && !dw0.bolted) {
1278 /* retry with lock held */
1279 dw0 = hptep->dw0.dw0;
1280 if (dw0.v && !dw0.bolted)
1288 if (i == HPTES_PER_GROUP)
1293 /* Invalidate the hpte. NOTE: this also unlocks it */
1294 hptep->dw0.dword0 = 0;
1296 /* Invalidate the tlb */
1297 vsid = dw0.avpn >> 5;
1298 slot = hptep - htab_data.htab;
1302 pi = (vsid ^ group) & 0x7ff;
1303 pi_high = (dw0.avpn & 0x1f) << 11;
1307 va = pi << LARGE_PAGE_SHIFT;
1309 va = pi << PAGE_SHIFT;
1311 spin_lock_irqsave(&pSeries_tlbie_lock, flags);
1313 spin_unlock_irqrestore(&pSeries_tlbie_lock, flags);
1318 static long rpa_lpar_hpte_remove(unsigned long hpte_group)
1320 unsigned long slot_offset;
1321 unsigned long lpar_rc;
1323 unsigned long dummy1, dummy2;
1325 /* pick a random slot to start at */
1326 slot_offset = mftb() & 0x7;
1328 for (i = 0; i < HPTES_PER_GROUP; i++) {
1330 /* Don't remove a bolted entry */
1331 lpar_rc = plpar_pte_remove(H_ANDCOND, hpte_group + slot_offset,
1332 (0x1UL << 4), &dummy1, &dummy2);
1334 if (lpar_rc == H_Success)
1337 if (lpar_rc != H_Not_Found)
1338 panic("Bad return code from pte remove rc = %lx\n",
1348 static long iSeries_hpte_remove(unsigned long hpte_group)
1350 unsigned long slot_offset;
1354 /* Pick a random slot to start at */
1355 slot_offset = mftb() & 0x7;
1357 for (i = 0; i < HPTES_PER_GROUP; i++) {
1359 iSeries_hpte_getword0(hpte_group + slot_offset);
1361 if (!lhpte.dw0.dw0.bolted) {
1362 HvCallHpt_invalidateSetSwBitsGet(hpte_group +
1374 void hpte_init_pSeries(void)
1376 ppc_md.hpte_invalidate = hpte_invalidate;
1377 ppc_md.hpte_updatepp = hpte_updatepp;
1378 ppc_md.hpte_updateboltedpp = hpte_updateboltedpp;
1379 ppc_md.hpte_insert = hpte_insert;
1380 ppc_md.hpte_remove = hpte_remove;
1383 void pSeries_lpar_mm_init(void)
1385 ppc_md.hpte_invalidate = rpa_lpar_hpte_invalidate;
1386 ppc_md.hpte_updatepp = rpa_lpar_hpte_updatepp;
1387 ppc_md.hpte_updateboltedpp = rpa_lpar_hpte_updateboltedpp;
1388 ppc_md.hpte_insert = rpa_lpar_hpte_insert;
1389 ppc_md.hpte_remove = rpa_lpar_hpte_remove;
1392 void hpte_init_iSeries(void)
1394 ppc_md.hpte_invalidate = iSeries_hpte_invalidate;
1395 ppc_md.hpte_updatepp = iSeries_hpte_updatepp;
1396 ppc_md.hpte_updateboltedpp = iSeries_hpte_updateboltedpp;
1397 ppc_md.hpte_insert = iSeries_hpte_insert;
1398 ppc_md.hpte_remove = iSeries_hpte_remove;