2 * Dynamic DMA mapping support for AMD Hammer.
4 * Use the integrated AGP GART in the Hammer northbridge as an IOMMU for PCI.
5 * This allows to use PCI devices that only support 32bit addresses on systems
8 * See Documentation/DMA-mapping.txt for the interface specification.
10 * Copyright 2002 Andi Kleen, SuSE Labs.
11 * $Id: pci-gart.c,v 1.32 2004/02/27 18:30:19 ak Exp $
14 #include <linux/config.h>
15 #include <linux/types.h>
16 #include <linux/ctype.h>
17 #include <linux/agp_backend.h>
18 #include <linux/init.h>
20 #include <linux/string.h>
21 #include <linux/spinlock.h>
22 #include <linux/pci.h>
23 #include <linux/pci_ids.h>
24 #include <linux/module.h>
27 #include <asm/bitops.h>
28 #include <asm/pgtable.h>
29 #include <asm/proto.h>
30 #include "pci-x86_64.h"
32 unsigned long iommu_bus_base; /* GART remapping area (physical) */
33 static unsigned long iommu_size; /* size of remapping area bytes */
34 static unsigned long iommu_pages; /* .. and in pages */
36 u32 *iommu_gatt_base; /* Remapping table */
40 #ifdef CONFIG_IOMMU_DEBUG
45 int iommu_fullflush = 1;
47 extern int fallback_aper_order;
48 extern int fallback_aper_force;
51 extern char *io_tlb_start, *io_tlb_end;
54 /* Allocation bitmap for the remapping area */
55 static spinlock_t iommu_bitmap_lock = SPIN_LOCK_UNLOCKED;
56 static unsigned long *iommu_gart_bitmap; /* guarded by iommu_bitmap_lock */
59 #define GPTE_COHERENT 2
60 #define GPTE_ENCODE(x) (((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT)
61 #define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28))
63 #define for_all_nb(dev) \
64 pci_for_each_dev(dev) \
65 if (dev->vendor == PCI_VENDOR_ID_AMD && dev->device==0x1103 &&\
66 dev->bus->number == 0 && PCI_FUNC(dev->devfn) == 3 && \
67 (PCI_SLOT(dev->devfn) >= 24) && (PCI_SLOT(dev->devfn) <= 31))
69 #define EMERGENCY_PAGES 32 /* = 128KB */
72 extern int agp_init(void);
73 #define AGPEXTERN extern
78 /* backdoor interface to AGP driver */
79 AGPEXTERN int agp_memory_reserved;
80 AGPEXTERN __u32 *agp_gatt_table;
82 static unsigned long next_bit; /* protected by iommu_bitmap_lock */
84 static struct pci_dev *northbridges[NR_CPUS + 1];
85 static u32 northbridge_flush_word[NR_CPUS + 1];
86 static int need_flush; /* global flush state. set for each gart wrap */
87 static unsigned long alloc_iommu(int size)
89 unsigned long offset, flags;
91 spin_lock_irqsave(&iommu_bitmap_lock, flags);
92 offset = find_next_zero_string(iommu_gart_bitmap,next_bit,iommu_pages,size);
95 offset = find_next_zero_string(iommu_gart_bitmap,0,next_bit,size);
98 set_bit_string(iommu_gart_bitmap, offset, size);
99 next_bit = offset+size;
100 if (next_bit >= iommu_pages) {
107 spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
111 static void free_iommu(unsigned long offset, int size)
114 spin_lock_irqsave(&iommu_bitmap_lock, flags);
115 clear_bit_string(iommu_gart_bitmap, offset, size);
116 spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
121 * Use global flush state to avoid races with multiple flushers.
123 static void __flush_gart(void)
129 spin_lock_irqsave(&iommu_bitmap_lock, flags);
130 /* recheck flush count inside lock */
132 for (i = 0; northbridges[i]; i++) {
134 pci_write_config_dword(northbridges[i], 0x9c,
135 northbridge_flush_word[i] | 1);
137 pci_read_config_dword(northbridges[i], 0x9c, &w);
142 printk("nothing to flush?\n");
145 spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
148 static inline void flush_gart(void)
154 void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size,
155 dma_addr_t *dma_handle)
158 int gfp = GFP_ATOMIC;
160 unsigned long iommu_page;
162 if (hwdev == NULL || hwdev->dma_mask < 0xffffffff || (no_iommu && !swiotlb))
166 * First try to allocate continuous and use directly if already
169 size = round_up(size, PAGE_SIZE);
170 memory = (void *)__get_free_pages(gfp, get_order(size));
171 if (memory == NULL) {
175 if (((unsigned long)virt_to_bus(memory) + size) > 0xffffffffUL)
178 if (force_mmu && !(gfp & GFP_DMA))
181 #ifdef CONFIG_SWIOTLB
182 if (swiotlb && high && hwdev) {
183 unsigned long dma_mask = 0;
184 if (hwdev->dma_mask == ~0UL) {
185 hwdev->dma_mask = 0xffffffff;
188 *dma_handle = swiotlb_map_single(hwdev, memory, size,
191 hwdev->dma_mask = dma_mask;
192 memset(phys_to_virt(*dma_handle), 0, size);
193 free_pages((unsigned long)memory, get_order(size));
194 return phys_to_virt(*dma_handle);
197 if (high) goto error;
200 memset(memory, 0, size);
202 *dma_handle = virt_to_bus(memory);
209 iommu_page = alloc_iommu(size);
210 if (iommu_page == -1)
213 /* Fill in the GATT, allocating pages as needed. */
214 for (i = 0; i < size; i++) {
215 unsigned long phys_mem;
216 void *mem = memory + i*PAGE_SIZE;
218 atomic_inc(&virt_to_page(mem)->count);
219 phys_mem = virt_to_phys(mem);
220 BUG_ON(phys_mem & ~PHYSICAL_PAGE_MASK);
221 iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem);
225 *dma_handle = iommu_bus_base + (iommu_page << PAGE_SHIFT);
229 free_pages((unsigned long)memory, get_order(size));
234 * Unmap consistent memory.
235 * The caller must ensure that the device has finished accessing the mapping.
237 void pci_free_consistent(struct pci_dev *hwdev, size_t size,
238 void *vaddr, dma_addr_t bus)
240 unsigned long iommu_page;
242 size = round_up(size, PAGE_SIZE);
243 #ifdef CONFIG_SWIOTLB
244 /* Overlap should not happen */
245 if (swiotlb && vaddr >= (void *)io_tlb_start &&
246 vaddr < (void *)io_tlb_end) {
247 swiotlb_unmap_single (hwdev, bus, size, PCI_DMA_TODEVICE);
251 if (bus >= iommu_bus_base && bus < iommu_bus_base + iommu_size) {
252 unsigned pages = size >> PAGE_SHIFT;
253 iommu_page = (bus - iommu_bus_base) >> PAGE_SHIFT;
254 vaddr = __va(GPTE_DECODE(iommu_gatt_base[iommu_page]));
256 for (i = 0; i < pages; i++) {
257 u64 pte = iommu_gatt_base[iommu_page + i];
258 BUG_ON((pte & GPTE_VALID) == 0);
259 iommu_gatt_base[iommu_page + i] = 0;
261 free_iommu(iommu_page, pages);
263 free_pages((unsigned long)vaddr, get_order(size));
266 #ifdef CONFIG_IOMMU_LEAK
267 /* Debugging aid for drivers that don't free their IOMMU tables */
268 static void **iommu_leak_tab;
269 static int leak_trace;
270 int iommu_leak_pages = 20;
271 extern unsigned long printk_address(unsigned long);
276 if (dump || !iommu_leak_tab) return;
279 /* Very crude. dump some from the end of the table too */
280 printk("Dumping %d pages from end of IOMMU:\n", iommu_leak_pages);
281 for (i = 0; i < iommu_leak_pages; i+=2) {
282 printk("%lu: ", iommu_pages-i);
283 printk_address((unsigned long) iommu_leak_tab[iommu_pages-i]);
284 printk("%c", (i+1)%2 == 0 ? '\n' : ' ');
290 static void iommu_full(struct pci_dev *dev, void *addr, size_t size, int dir)
293 * Ran out of IOMMU space for this operation. This is very bad.
294 * Unfortunately the drivers cannot handle this operation properly.
295 * Return some non mapped prereserved space in the aperture and
296 * let the Northbridge deal with it. This will result in garbage
297 * in the IO operation. When the size exceeds the prereserved spa * memory corruption will occur or random memory will be DMAed
298 * out. Hopefully no network devices use single mappings that big.
302 "PCI-DMA: Error: ran out out IOMMU space for %p size %lu at device %s[%s]\n",
303 addr,size, dev ? dev->name : "?", dev ? dev->slot_name : "?");
305 if (size > PAGE_SIZE*EMERGENCY_PAGES) {
306 if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL)
307 panic("PCI-DMA: Memory will be corrupted\n");
308 if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL)
309 panic("PCI-DMA: Random memory will be DMAed\n");
312 #ifdef CONFIG_IOMMU_LEAK
317 static inline int need_iommu(struct pci_dev *dev, unsigned long addr, size_t size)
319 u64 mask = dev ? dev->dma_mask : 0xffffffff;
320 int high = (~mask & (unsigned long)(addr + size)) != 0;
326 panic("pci_map_single: high address but no IOMMU.\n");
332 dma_addr_t pci_map_single(struct pci_dev *dev, void *addr, size_t size,
335 unsigned long iommu_page;
336 unsigned long phys_mem, bus;
339 BUG_ON(dir == PCI_DMA_NONE);
341 #ifdef CONFIG_SWIOTLB
343 return swiotlb_map_single(dev,addr,size,dir);
347 phys_mem = virt_to_phys(addr);
348 if (!need_iommu(dev, phys_mem, size))
351 npages = round_up(size + ((u64)addr & ~PAGE_MASK), PAGE_SIZE) >> PAGE_SHIFT;
353 iommu_page = alloc_iommu(npages);
354 if (iommu_page == -1) {
355 iommu_full(dev, addr, size, dir);
356 return iommu_bus_base;
359 phys_mem &= PAGE_MASK;
360 for (i = 0; i < npages; i++, phys_mem += PAGE_SIZE) {
361 BUG_ON(phys_mem & ~PHYSICAL_PAGE_MASK);
364 * Set coherent mapping here to avoid needing to flush
365 * the caches on mapping.
367 iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem);
369 #ifdef CONFIG_IOMMU_LEAK
370 /* XXX need eventually caller of pci_map_sg */
372 iommu_leak_tab[iommu_page + i] = __builtin_return_address(0);
377 bus = iommu_bus_base + iommu_page*PAGE_SIZE;
378 return bus + ((unsigned long)addr & ~PAGE_MASK);
382 * Free a temporary PCI mapping.
384 void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr,
385 size_t size, int direction)
387 unsigned long iommu_page;
390 #ifdef CONFIG_SWIOTLB
392 swiotlb_unmap_single(hwdev,dma_addr,size,direction);
398 if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE ||
399 dma_addr >= iommu_bus_base + iommu_size)
401 iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT;
402 npages = round_up(size + (dma_addr & ~PAGE_MASK), PAGE_SIZE) >> PAGE_SHIFT;
404 for (i = 0; i < npages; i++) {
405 iommu_gatt_base[iommu_page + i] = 0;
406 #ifdef CONFIG_IOMMU_LEAK
408 iommu_leak_tab[iommu_page + i] = 0;
411 free_iommu(iommu_page, npages);
414 EXPORT_SYMBOL(pci_map_single);
415 EXPORT_SYMBOL(pci_unmap_single);
417 static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
421 iommu_size = aper_size;
426 a = aper + iommu_size;
427 iommu_size -= round_up(a, LARGE_PAGE_SIZE) - a;
429 if (iommu_size < 64*1024*1024)
431 "PCI-DMA: Warning: Small IOMMU %luMB. Consider increasing the AGP aperture in BIOS\n",iommu_size>>20);
436 static __init unsigned read_aperture(struct pci_dev *dev, u32 *size)
438 unsigned aper_size = 0, aper_base_32;
442 pci_read_config_dword(dev, 0x94, &aper_base_32);
443 pci_read_config_dword(dev, 0x90, &aper_order);
444 aper_order = (aper_order >> 1) & 7;
446 aper_base = aper_base_32 & 0x7fff;
449 aper_size = (32 * 1024 * 1024) << aper_order;
450 if (aper_base + aper_size >= 0xffffffff || !aper_size)
458 * Private Northbridge GATT initialization in case we cannot use the
459 * AGP driver for some reason.
461 static __init int init_k8_gatt(agp_kern_info *info)
465 unsigned aper_base, new_aper_base;
466 unsigned aper_size, gatt_size, new_aper_size;
468 aper_size = aper_base = info->aper_size = 0;
470 new_aper_base = read_aperture(dev, &new_aper_size);
475 aper_size = new_aper_size;
476 aper_base = new_aper_base;
478 if (aper_size != new_aper_size || aper_base != new_aper_base)
483 info->aper_base = aper_base;
484 info->aper_size = aper_size>>20;
486 gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32);
487 gatt = (void *)__get_free_pages(GFP_KERNEL, get_order(gatt_size));
489 panic("Cannot allocate GATT table");
490 memset(gatt, 0, gatt_size);
491 change_page_attr(virt_to_page(gatt), gatt_size/PAGE_SIZE, PAGE_KERNEL_NOCACHE);
492 agp_gatt_table = gatt;
498 gatt_reg = __pa(gatt) >> 12;
500 pci_write_config_dword(dev, 0x98, gatt_reg);
501 pci_read_config_dword(dev, 0x90, &ctl);
504 ctl &= ~((1<<4) | (1<<5));
506 pci_write_config_dword(dev, 0x90, ctl);
511 printk("PCI-DMA: aperture base @ %x size %u KB\n", aper_base, aper_size>>10);
515 /* XXX: reject 0xffffffff mask now in pci mapping functions */
516 if (end_pfn >= 0xffffffff>>PAGE_SHIFT)
517 printk(KERN_ERR "PCI-DMA: More than 4GB of RAM and no IOMMU\n"
518 KERN_ERR "PCI-DMA: 32bit PCI IO may malfunction.");
522 void __init pci_iommu_init(void)
525 unsigned long aper_size;
526 unsigned long iommu_start;
531 no_agp = no_agp || (agp_init() < 0) || (agp_copy_info(&info) < 0);
534 #ifdef CONFIG_SWIOTLB
537 printk(KERN_INFO "PCI-DMA: Using SWIOTLB\n");
542 if (no_iommu || (!force_mmu && end_pfn < 0xffffffff>>PAGE_SHIFT) || !iommu_aperture) {
543 printk(KERN_INFO "PCI-DMA: Disabling IOMMU.\n");
550 printk(KERN_INFO "PCI-DMA: Disabling AGP.\n");
552 if (force_mmu || end_pfn >= 0xffffffff>>PAGE_SHIFT)
553 err = init_k8_gatt(&info);
555 printk(KERN_INFO "PCI-DMA: Disabling IOMMU.\n");
561 aper_size = info.aper_size * 1024 * 1024;
562 iommu_size = check_iommu_size(info.aper_base, aper_size);
563 iommu_pages = iommu_size >> PAGE_SHIFT;
565 iommu_gart_bitmap = (void*)__get_free_pages(GFP_KERNEL,
566 get_order(iommu_pages/8));
567 if (!iommu_gart_bitmap)
568 panic("Cannot allocate iommu bitmap\n");
569 memset(iommu_gart_bitmap, 0, iommu_pages/8);
571 #ifdef CONFIG_IOMMU_LEAK
573 iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL,
574 get_order(iommu_pages*sizeof(void *)));
576 memset(iommu_leak_tab, 0, iommu_pages * 8);
578 printk("PCI-DMA: Cannot allocate leak trace area\n");
583 * Out of IOMMU space handling.
584 * Reserve some invalid pages at the beginning of the GART.
586 set_bit_string(iommu_gart_bitmap, 0, EMERGENCY_PAGES);
588 agp_memory_reserved = iommu_size;
589 printk(KERN_INFO"PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n",
592 iommu_start = aper_size - iommu_size;
593 iommu_bus_base = info.aper_base + iommu_start;
594 iommu_gatt_base = agp_gatt_table + (iommu_start>>PAGE_SHIFT);
595 bad_dma_address = iommu_bus_base;
598 * Unmap the IOMMU part of the GART. The alias of the page is always mapped
599 * with cache enabled and there is no full cache coherency across the GART
600 * remapping. The unmapping avoids automatic prefetches from the CPU
601 * allocating cache lines in there. All CPU accesses are done via the
602 * direct mapping to the backing memory. The GART address is only used by PCI
605 clear_kernel_mapping((unsigned long)__va(iommu_bus_base), iommu_size);
610 int cpu = PCI_SLOT(dev->devfn) - 24;
613 northbridges[cpu] = dev;
615 pci_read_config_dword(dev, 0x9c, &flag); /* cache flush word */
616 northbridge_flush_word[cpu] = flag;
619 asm volatile("wbinvd" ::: "memory");
624 /* iommu=[size][,noagp][,off][,force][,noforce][,leak][,memaper[=order]]
625 size set size of iommu (in bytes)
626 noagp don't initialize the AGP driver and use full aperture.
627 off don't use the IOMMU
628 leak turn on simple iommu leak tracing (only when CONFIG_IOMMU_LEAK is on)
629 memaper[=order] allocate an own aperture over RAM with size 32MB^order.
630 noforce don't force IOMMU usage. Default
631 force Force IOMMU for all devices.
632 nofullflush use optimized IOMMU flushing (may break on some devices).
635 __init int iommu_setup(char *opt)
641 if (!memcmp(p,"noagp", 5))
643 if (!memcmp(p,"off", 3))
645 if (!memcmp(p,"force", 5))
647 if (!memcmp(p,"noforce", 7))
649 if (!memcmp(p,"nofullflush", 11))
651 if (!memcmp(p, "memaper", 7)) {
652 fallback_aper_force = 1;
656 if (get_option(&p, &arg))
657 fallback_aper_order = arg;
660 #ifdef CONFIG_IOMMU_LEAK
661 if (!memcmp(p,"leak", 4)) {
665 if (isdigit(*p) && get_option(&p, &arg))
666 iommu_leak_pages = arg;
669 if (isdigit(*p) && get_option(&p, &arg))
672 if (*p == ' ' || *p == 0)
674 } while (*p++ != ',');