Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
[powerpc.git] / arch / x86_64 / kernel / pci-calgary.c
index d91cb84..cfb09b0 100644 (file)
@@ -1,9 +1,11 @@
 /*
  * Derived from arch/powerpc/kernel/iommu.c
  *
- * Copyright (C) 2006 Jon Mason <jdmason@us.ibm.com>, IBM Corporation
- * Copyright (C) 2006 Muli Ben-Yehuda <muli@il.ibm.com>, IBM Corporation
+ * Copyright (C) IBM Corporation, 2006
  *
+ * Author: Jon Mason <jdmason@us.ibm.com>
+ * Author: Muli Ben-Yehuda <muli@il.ibm.com>
+
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
@@ -83,7 +85,9 @@
 #define CSR_AGENT_MASK         0xffe0ffff
 
 #define MAX_NUM_OF_PHBS                8 /* how many PHBs in total? */
-#define MAX_PHB_BUS_NUM                (MAX_NUM_OF_PHBS * 2) /* max dev->bus->number */
+#define MAX_NUM_CHASSIS                8 /* max number of chassis */
+/* MAX_PHB_BUS_NUM is the maximal possible dev->bus->number */
+#define MAX_PHB_BUS_NUM                (MAX_NUM_OF_PHBS * MAX_NUM_CHASSIS * 2)
 #define PHBS_PER_CALGARY       4
 
 /* register offsets in Calgary's internal register space */
@@ -108,30 +112,49 @@ static const unsigned long phb_offsets[] = {
        0xB000 /* PHB3 */
 };
 
-void* tce_table_kva[MAX_NUM_OF_PHBS * MAX_NUMNODES];
 unsigned int specified_table_size = TCE_TABLE_SIZE_UNSPECIFIED;
 static int translate_empty_slots __read_mostly = 0;
 static int calgary_detected __read_mostly = 0;
 
-/*
- * the bitmap of PHBs the user requested that we disable
- * translation on.
- */
-static DECLARE_BITMAP(translation_disabled, MAX_NUMNODES * MAX_PHB_BUS_NUM);
+struct calgary_bus_info {
+       void *tce_space;
+       unsigned char translation_disabled;
+       signed char phbid;
+};
+
+static struct calgary_bus_info bus_info[MAX_PHB_BUS_NUM] = { { NULL, 0, 0 }, };
 
 static void tce_cache_blast(struct iommu_table *tbl);
 
 /* enable this to stress test the chip's TCE cache */
 #ifdef CONFIG_IOMMU_DEBUG
-static inline void tce_cache_blast_stress(struct iommu_table *tbl)
+int debugging __read_mostly = 1;
+
+static inline unsigned long verify_bit_range(unsigned long* bitmap,
+       int expected, unsigned long start, unsigned long end)
 {
-       tce_cache_blast(tbl);
+       unsigned long idx = start;
+
+       BUG_ON(start >= end);
+
+       while (idx < end) {
+               if (!!test_bit(idx, bitmap) != expected)
+                       return idx;
+               ++idx;
+       }
+
+       /* all bits have the expected value */
+       return ~0UL;
 }
-#else
-static inline void tce_cache_blast_stress(struct iommu_table *tbl)
+#else /* debugging is disabled */
+int debugging __read_mostly = 0;
+
+static inline unsigned long verify_bit_range(unsigned long* bitmap,
+       int expected, unsigned long start, unsigned long end)
 {
+       return ~0UL;
 }
-#endif /* BLAST_TCE_CACHE_ON_UNMAP */
+#endif /* CONFIG_IOMMU_DEBUG */
 
 static inline unsigned int num_dma_pages(unsigned long dma, unsigned int dmalen)
 {
@@ -145,7 +168,7 @@ static inline unsigned int num_dma_pages(unsigned long dma, unsigned int dmalen)
 
 static inline int translate_phb(struct pci_dev* dev)
 {
-       int disabled = test_bit(dev->bus->number, translation_disabled);
+       int disabled = bus_info[dev->bus->number].translation_disabled;
        return !disabled;
 }
 
@@ -154,6 +177,7 @@ static void iommu_range_reserve(struct iommu_table *tbl,
 {
        unsigned long index;
        unsigned long end;
+       unsigned long badbit;
 
        index = start_addr >> PAGE_SHIFT;
 
@@ -165,14 +189,15 @@ static void iommu_range_reserve(struct iommu_table *tbl,
        if (end > tbl->it_size) /* don't go off the table */
                end = tbl->it_size;
 
-       while (index < end) {
-               if (test_bit(index, tbl->it_map))
+       badbit = verify_bit_range(tbl->it_map, 0, index, end);
+       if (badbit != ~0UL) {
+               if (printk_ratelimit())
                        printk(KERN_ERR "Calgary: entry already allocated at "
                               "0x%lx tbl %p dma 0x%lx npages %u\n",
-                              index, tbl, start_addr, npages);
-               ++index;
+                              badbit, tbl, start_addr, npages);
        }
-       set_bit_string(tbl->it_map, start_addr >> PAGE_SHIFT, npages);
+
+       set_bit_string(tbl->it_map, index, npages);
 }
 
 static unsigned long iommu_range_alloc(struct iommu_table *tbl,
@@ -239,7 +264,7 @@ static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
        unsigned int npages)
 {
        unsigned long entry;
-       unsigned long i;
+       unsigned long badbit;
 
        entry = dma_addr >> PAGE_SHIFT;
 
@@ -247,16 +272,15 @@ static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
 
        tce_free(tbl, entry, npages);
 
-       for (i = 0; i < npages; ++i) {
-               if (!test_bit(entry + i, tbl->it_map))
+       badbit = verify_bit_range(tbl->it_map, 1, entry, entry + npages);
+       if (badbit != ~0UL) {
+               if (printk_ratelimit())
                        printk(KERN_ERR "Calgary: bit is off at 0x%lx "
                               "tbl %p dma 0x%Lx entry 0x%lx npages %u\n",
-                              entry + i, tbl, dma_addr, entry, npages);
+                              badbit, tbl, dma_addr, entry, npages);
        }
 
        __clear_bit_string(tbl->it_map, entry, npages);
-
-       tce_cache_blast_stress(tbl);
 }
 
 static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
@@ -450,7 +474,7 @@ static struct dma_mapping_ops calgary_dma_ops = {
 
 static inline int busno_to_phbid(unsigned char num)
 {
-       return bus_to_phb(num) % PHBS_PER_CALGARY;
+       return bus_info[num].phbid;
 }
 
 static inline unsigned long split_queue_offset(unsigned char num)
@@ -627,6 +651,10 @@ static int __init calgary_setup_tar(struct pci_dev *dev, void __iomem *bbar)
        if (ret)
                return ret;
 
+       tbl = dev->sysdata;
+       tbl->it_base = (unsigned long)bus_info[dev->bus->number].tce_space;
+       tce_free(tbl, 0, tbl->it_size);
+
        calgary_reserve_regions(dev);
 
        /* set TARs for each PHB */
@@ -650,11 +678,12 @@ static int __init calgary_setup_tar(struct pci_dev *dev, void __iomem *bbar)
        return 0;
 }
 
-static void __init calgary_free_tar(struct pci_dev *dev)
+static void __init calgary_free_bus(struct pci_dev *dev)
 {
        u64 val64;
        struct iommu_table *tbl = dev->sysdata;
        void __iomem *target;
+       unsigned int bitmapsz;
 
        target = calgary_reg(tbl->bbar, tar_offset(dev->bus->number));
        val64 = be64_to_cpu(readq(target));
@@ -662,8 +691,15 @@ static void __init calgary_free_tar(struct pci_dev *dev)
        writeq(cpu_to_be64(val64), target);
        readq(target); /* flush */
 
+       bitmapsz = tbl->it_size / BITS_PER_BYTE;
+       free_pages((unsigned long)tbl->it_map, get_order(bitmapsz));
+       tbl->it_map = NULL;
+
        kfree(tbl);
        dev->sysdata = NULL;
+
+       /* Can't free bootmem allocated memory after system is up :-( */
+       bus_info[dev->bus->number].tce_space = NULL;
 }
 
 static void calgary_watchdog(unsigned long data)
@@ -768,12 +804,11 @@ static inline unsigned int __init locate_register_space(struct pci_dev *dev)
        return address;
 }
 
-static int __init calgary_init_one_nontraslated(struct pci_dev *dev)
+static void __init calgary_init_one_nontraslated(struct pci_dev *dev)
 {
+       pci_dev_get(dev);
        dev->sysdata = NULL;
        dev->bus->self = dev;
-
-       return 0;
 }
 
 static int __init calgary_init_one(struct pci_dev *dev)
@@ -794,6 +829,7 @@ static int __init calgary_init_one(struct pci_dev *dev)
        if (ret)
                goto iounmap;
 
+       pci_dev_get(dev);
        dev->bus->self = dev;
        calgary_enable_translation(dev);
 
@@ -810,7 +846,7 @@ static int __init calgary_init(void)
        int i, ret = -ENODEV;
        struct pci_dev *dev = NULL;
 
-       for (i = 0; i <= num_online_nodes() * MAX_NUM_OF_PHBS; i++) {
+       for (i = 0; i < MAX_PHB_BUS_NUM; i++) {
                dev = pci_get_device(PCI_VENDOR_ID_IBM,
                                     PCI_DEVICE_ID_IBM_CALGARY,
                                     dev);
@@ -820,10 +856,9 @@ static int __init calgary_init(void)
                        calgary_init_one_nontraslated(dev);
                        continue;
                }
-               if (!tce_table_kva[i] && !translate_empty_slots) {
-                       pci_dev_put(dev);
+               if (!bus_info[dev->bus->number].tce_space && !translate_empty_slots)
                        continue;
-               }
+
                ret = calgary_init_one(dev);
                if (ret)
                        goto error;
@@ -836,15 +871,18 @@ error:
                dev = pci_find_device_reverse(PCI_VENDOR_ID_IBM,
                                              PCI_DEVICE_ID_IBM_CALGARY,
                                              dev);
+               if (!dev)
+                       break;
                if (!translate_phb(dev)) {
                        pci_dev_put(dev);
                        continue;
                }
-               if (!tce_table_kva[i] && !translate_empty_slots)
+               if (!bus_info[dev->bus->number].tce_space && !translate_empty_slots)
                        continue;
+
                calgary_disable_translation(dev);
-               calgary_free_tar(dev);
-               pci_dev_put(dev);
+               calgary_free_bus(dev);
+               pci_dev_put(dev); /* Undo calgary_init_one()'s pci_dev_get() */
        }
 
        return ret;
@@ -874,9 +912,10 @@ static inline int __init determine_tce_table_size(u64 ram)
 void __init detect_calgary(void)
 {
        u32 val;
-       int bus, table_idx;
+       int bus;
        void *tbl;
-       int detected = 0;
+       int calgary_found = 0;
+       int phb = -1;
 
        /*
         * if the user specified iommu=off or iommu=soft or we found
@@ -885,51 +924,63 @@ void __init detect_calgary(void)
        if (swiotlb || no_iommu || iommu_detected)
                return;
 
+       if (!early_pci_allowed())
+               return;
+
        specified_table_size = determine_tce_table_size(end_pfn * PAGE_SIZE);
 
-       for (bus = 0, table_idx = 0;
-            bus <= num_online_nodes() * MAX_PHB_BUS_NUM;
-            bus++) {
-               BUG_ON(bus > MAX_NUMNODES * MAX_PHB_BUS_NUM);
+       for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) {
+               int dev;
+               struct calgary_bus_info *info = &bus_info[bus];
+               info->phbid = -1;
+
                if (read_pci_config(bus, 0, 0, 0) != PCI_VENDOR_DEVICE_ID_CALGARY)
                        continue;
-               if (test_bit(bus, translation_disabled)) {
-                       printk(KERN_INFO "Calgary: translation is disabled for "
-                              "PHB 0x%x\n", bus);
-                       /* skip this phb, don't allocate a tbl for it */
-                       tce_table_kva[table_idx] = NULL;
-                       table_idx++;
+
+               /*
+                * There are 4 PHBs per Calgary chip.  Set phb to which phb (0-3)
+                * it is connected to releative to the clagary chip.
+                */
+               phb = (phb + 1) % PHBS_PER_CALGARY;
+
+               if (info->translation_disabled)
                        continue;
-               }
+
                /*
-                * scan the first slot of the PCI bus to see if there
-                * are any devices present
+                * Scan the slots of the PCI bus to see if there is a device present.
+                * The parent bus will be the zero-ith device, so start at 1.
                 */
-               val = read_pci_config(bus, 1, 0, 0);
-               if (val != 0xffffffff || translate_empty_slots) {
-                       tbl = alloc_tce_table();
-                       if (!tbl)
-                               goto cleanup;
-                       detected = 1;
-               } else
-                       tbl = NULL;
-
-               tce_table_kva[table_idx] = tbl;
-               table_idx++;
+               for (dev = 1; dev < 8; dev++) {
+                       val = read_pci_config(bus, dev, 0, 0);
+                       if (val != 0xffffffff || translate_empty_slots) {
+                               tbl = alloc_tce_table();
+                               if (!tbl)
+                                       goto cleanup;
+                               info->tce_space = tbl;
+                               info->phbid = phb;
+                               calgary_found = 1;
+                               break;
+                       }
+               }
        }
 
-       if (detected) {
+       if (calgary_found) {
                iommu_detected = 1;
                calgary_detected = 1;
-               printk(KERN_INFO "PCI-DMA: Calgary IOMMU detected. "
-                      "TCE table spec is %d.\n", specified_table_size);
+               printk(KERN_INFO "PCI-DMA: Calgary IOMMU detected.\n");
+               printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d, "
+                      "CONFIG_IOMMU_DEBUG is %s.\n", specified_table_size,
+                      debugging ? "enabled" : "disabled");
        }
        return;
 
 cleanup:
-       for (--table_idx; table_idx >= 0; --table_idx)
-               if (tce_table_kva[table_idx])
-                       free_tce_table(tce_table_kva[table_idx]);
+       for (--bus; bus >= 0; --bus) {
+               struct calgary_bus_info *info = &bus_info[bus];
+
+               if (info->tce_space)
+                       free_tce_table(info->tce_space);
+       }
 }
 
 int __init calgary_iommu_init(void)
@@ -1000,10 +1051,10 @@ static int __init calgary_parse_options(char *p)
                        if (p == endp)
                                break;
 
-                       if (bridge <= (num_online_nodes() * MAX_PHB_BUS_NUM)) {
+                       if (bridge < MAX_PHB_BUS_NUM) {
                                printk(KERN_INFO "Calgary: disabling "
                                       "translation for PHB 0x%x\n", bridge);
-                               set_bit(bridge, translation_disabled);
+                               bus_info[bridge].translation_disabled = 1;
                        }
                }