Merge git://git.kernel.org/pub/scm/linux/kernel/git/mingo/linux-2.6-sched
[powerpc.git] / drivers / dma / ioat_dma.c
index 66c5bb5..7e4a785 100644 (file)
 /* internal functions */
 static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan);
 static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan);
+static struct ioat_desc_sw *
+ioat_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan);
 
-static struct ioat_dma_chan *ioat_lookup_chan_by_index(struct ioatdma_device *device,
-                                                      int index)
+static inline struct ioat_dma_chan *ioat_lookup_chan_by_index(
+                                               struct ioatdma_device *device,
+                                               int index)
 {
        return device->idx[index];
 }
@@ -148,57 +151,102 @@ static void ioat_set_src(dma_addr_t addr,
                         struct dma_async_tx_descriptor *tx,
                         int index)
 {
-       struct ioat_desc_sw *iter, *desc = tx_to_ioat_desc(tx);
-       struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan);
-
-       pci_unmap_addr_set(desc, src, addr);
-
-       list_for_each_entry(iter, &desc->async_tx.tx_list, node) {
-               iter->hw->src_addr = addr;
-               addr += ioat_chan->xfercap;
-       }
-
+       tx_to_ioat_desc(tx)->src = addr;
 }
 
 static void ioat_set_dest(dma_addr_t addr,
                          struct dma_async_tx_descriptor *tx,
                          int index)
 {
-       struct ioat_desc_sw *iter, *desc = tx_to_ioat_desc(tx);
-       struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan);
-
-       pci_unmap_addr_set(desc, dst, addr);
-
-       list_for_each_entry(iter, &desc->async_tx.tx_list, node) {
-               iter->hw->dst_addr = addr;
-               addr += ioat_chan->xfercap;
-       }
+       tx_to_ioat_desc(tx)->dst = addr;
 }
 
 static dma_cookie_t ioat_tx_submit(struct dma_async_tx_descriptor *tx)
 {
        struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan);
-       struct ioat_desc_sw *desc = tx_to_ioat_desc(tx);
+       struct ioat_desc_sw *first = tx_to_ioat_desc(tx);
+       struct ioat_desc_sw *prev, *new;
+       struct ioat_dma_descriptor *hw;
        int append = 0;
        dma_cookie_t cookie;
-       struct ioat_desc_sw *group_start;
+       LIST_HEAD(new_chain);
+       u32 copy;
+       size_t len;
+       dma_addr_t src, dst;
+       int orig_ack;
+       unsigned int desc_count = 0;
+
+       /* src and dest and len are stored in the initial descriptor */
+       len = first->len;
+       src = first->src;
+       dst = first->dst;
+       orig_ack = first->async_tx.ack;
+       new = first;
 
-       group_start = list_entry(desc->async_tx.tx_list.next,
-                                struct ioat_desc_sw, node);
        spin_lock_bh(&ioat_chan->desc_lock);
+       prev = to_ioat_desc(ioat_chan->used_desc.prev);
+       prefetch(prev->hw);
+       do {
+               copy = min((u32) len, ioat_chan->xfercap);
+
+               new->async_tx.ack = 1;
+
+               hw = new->hw;
+               hw->size = copy;
+               hw->ctl = 0;
+               hw->src_addr = src;
+               hw->dst_addr = dst;
+               hw->next = 0;
+
+               /* chain together the physical address list for the HW */
+               wmb();
+               prev->hw->next = (u64) new->async_tx.phys;
+
+               len -= copy;
+               dst += copy;
+               src += copy;
+
+               list_add_tail(&new->node, &new_chain);
+               desc_count++;
+               prev = new;
+       } while (len && (new = ioat_dma_get_next_descriptor(ioat_chan)));
+
+       hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
+       if (new->async_tx.callback) {
+               hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN;
+               if (first != new) {
+                       /* move callback into to last desc */
+                       new->async_tx.callback = first->async_tx.callback;
+                       new->async_tx.callback_param
+                                       = first->async_tx.callback_param;
+                       first->async_tx.callback = NULL;
+                       first->async_tx.callback_param = NULL;
+               }
+       }
+
+       new->tx_cnt = desc_count;
+       new->async_tx.ack = orig_ack; /* client is in control of this ack */
+
+       /* store the original values for use in later cleanup */
+       if (new != first) {
+               new->src = first->src;
+               new->dst = first->dst;
+               new->len = first->len;
+       }
+
        /* cookie incr and addition to used_list must be atomic */
        cookie = ioat_chan->common.cookie;
        cookie++;
        if (cookie < 0)
                cookie = 1;
-       ioat_chan->common.cookie = desc->async_tx.cookie = cookie;
+       ioat_chan->common.cookie = new->async_tx.cookie = cookie;
 
        /* write address into NextDescriptor field of last desc in chain */
        to_ioat_desc(ioat_chan->used_desc.prev)->hw->next =
-                                               group_start->async_tx.phys;
-       list_splice_init(&desc->async_tx.tx_list, ioat_chan->used_desc.prev);
+                                                       first->async_tx.phys;
+       __list_splice(&new_chain, ioat_chan->used_desc.prev);
 
-       ioat_chan->pending += desc->tx_cnt;
+       ioat_chan->pending += desc_count;
        if (ioat_chan->pending >= 4) {
                append = 1;
                ioat_chan->pending = 0;
@@ -267,7 +315,7 @@ static int ioat_dma_alloc_chan_resources(struct dma_chan *chan)
        chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
        if (chanerr) {
                dev_err(&ioat_chan->device->pdev->dev,
-                       "ioatdma: CHANERR = %x, clearing\n", chanerr);
+                       "CHANERR = %x, clearing\n", chanerr);
                writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
        }
 
@@ -276,7 +324,7 @@ static int ioat_dma_alloc_chan_resources(struct dma_chan *chan)
                desc = ioat_dma_alloc_descriptor(ioat_chan, GFP_KERNEL);
                if (!desc) {
                        dev_err(&ioat_chan->device->pdev->dev,
-                               "ioatdma: Only %d initial descriptors\n", i);
+                               "Only %d initial descriptors\n", i);
                        break;
                }
                list_add_tail(&desc->node, &tmp_list);
@@ -342,12 +390,13 @@ static void ioat_dma_free_chan_resources(struct dma_chan *chan)
        /* one is ok since we left it on there on purpose */
        if (in_use_descs > 1)
                dev_err(&ioat_chan->device->pdev->dev,
-                       "ioatdma: Freeing %d in use descriptors!\n",
+                       "Freeing %d in use descriptors!\n",
                        in_use_descs - 1);
 
        ioat_chan->last_completion = ioat_chan->completion_addr = 0;
        ioat_chan->pending = 0;
 }
+
 /**
  * ioat_dma_get_next_descriptor - return the next available descriptor
  * @ioat_chan: IOAT DMA channel handle
@@ -356,8 +405,8 @@ static void ioat_dma_free_chan_resources(struct dma_chan *chan)
  * channel's desc_lock held.  Allocates more descriptors if the channel
  * has run out.
  */
-static struct ioat_desc_sw *ioat_dma_get_next_descriptor(
-                                               struct ioat_dma_chan *ioat_chan)
+static struct ioat_desc_sw *
+ioat_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan)
 {
        struct ioat_desc_sw *new = NULL;
 
@@ -382,51 +431,11 @@ static struct dma_async_tx_descriptor *ioat_dma_prep_memcpy(
                                                int int_en)
 {
        struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
-       struct ioat_desc_sw *first, *prev, *new;
-       LIST_HEAD(new_chain);
-       u32 copy;
-       size_t orig_len;
-       int desc_count = 0;
-
-       if (!len)
-               return NULL;
-
-       orig_len = len;
-
-       first = NULL;
-       prev = NULL;
+       struct ioat_desc_sw *new;
 
        spin_lock_bh(&ioat_chan->desc_lock);
-       while (len) {
-               new = ioat_dma_get_next_descriptor(ioat_chan);
-               copy = min((u32) len, ioat_chan->xfercap);
-
-               new->hw->size = copy;
-               new->hw->ctl = 0;
-               new->async_tx.cookie = 0;
-               new->async_tx.ack = 1;
-
-               /* chain together the physical address list for the HW */
-               if (!first)
-                       first = new;
-               else
-                       prev->hw->next = (u64) new->async_tx.phys;
-
-               prev = new;
-               len  -= copy;
-               list_add_tail(&new->node, &new_chain);
-               desc_count++;
-       }
-
-       list_splice(&new_chain, &new->async_tx.tx_list);
-
-       new->hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
-       new->hw->next = 0;
-       new->tx_cnt = desc_count;
-       new->async_tx.ack = 0; /* client is in control of this ack */
-       new->async_tx.cookie = -EBUSY;
-
-       pci_unmap_len_set(new, len, orig_len);
+       new = ioat_dma_get_next_descriptor(ioat_chan);
+       new->len = len;
        spin_unlock_bh(&ioat_chan->desc_lock);
 
        return new ? &new->async_tx : NULL;
@@ -464,7 +473,7 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan)
 
        prefetch(ioat_chan->completion_virt);
 
-       if (!spin_trylock(&ioat_chan->cleanup_lock))
+       if (!spin_trylock_bh(&ioat_chan->cleanup_lock))
                return;
 
        /* The completion writeback can happen at any time,
@@ -474,22 +483,25 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan)
 
 #if (BITS_PER_LONG == 64)
        phys_complete =
-       ioat_chan->completion_virt->full & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
+               ioat_chan->completion_virt->full
+               & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
 #else
-       phys_complete = ioat_chan->completion_virt->low & IOAT_LOW_COMPLETION_MASK;
+       phys_complete =
+               ioat_chan->completion_virt->low & IOAT_LOW_COMPLETION_MASK;
 #endif
 
-       if ((ioat_chan->completion_virt->full & IOAT_CHANSTS_DMA_TRANSFER_STATUS) ==
+       if ((ioat_chan->completion_virt->full
+               & IOAT_CHANSTS_DMA_TRANSFER_STATUS) ==
                                IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED) {
                dev_err(&ioat_chan->device->pdev->dev,
-                       "ioatdma: Channel halted, chanerr = %x\n",
+                       "Channel halted, chanerr = %x\n",
                        readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET));
 
                /* TODO do something to salvage the situation */
        }
 
        if (phys_complete == ioat_chan->last_completion) {
-               spin_unlock(&ioat_chan->cleanup_lock);
+               spin_unlock_bh(&ioat_chan->cleanup_lock);
                return;
        }
 
@@ -517,6 +529,11 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan)
                                        pci_unmap_addr(desc, src),
                                        pci_unmap_len(desc, len),
                                        PCI_DMA_TODEVICE);
+                       if (desc->async_tx.callback) {
+                               desc->async_tx.callback(
+                                               desc->async_tx.callback_param);
+                               desc->async_tx.callback = NULL;
+                       }
                }
 
                if (desc->async_tx.phys != phys_complete) {
@@ -548,7 +565,7 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan)
        if (cookie != 0)
                ioat_chan->completed_cookie = cookie;
 
-       spin_unlock(&ioat_chan->cleanup_lock);
+       spin_unlock_bh(&ioat_chan->cleanup_lock);
 }
 
 static void ioat_dma_dependency_added(struct dma_chan *chan)
@@ -613,8 +630,13 @@ static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan)
        spin_lock_bh(&ioat_chan->desc_lock);
 
        desc = ioat_dma_get_next_descriptor(ioat_chan);
-       desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL;
+       desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL
+                               | IOAT_DMA_DESCRIPTOR_CTL_INT_GN
+                               | IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
        desc->hw->next = 0;
+       desc->hw->size = 0;
+       desc->hw->src_addr = 0;
+       desc->hw->dst_addr = 0;
        desc->async_tx.ack = 1;
 
        list_add_tail(&desc->node, &ioat_chan->used_desc);
@@ -633,6 +655,12 @@ static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan)
  */
 #define IOAT_TEST_SIZE 2000
 
+static void ioat_dma_test_callback(void *dma_async_param)
+{
+       printk(KERN_ERR "ioatdma: ioat_dma_test_callback(%p)\n",
+                       dma_async_param);
+}
+
 /**
  * ioat_dma_self_test - Perform a IOAT transaction to verify the HW works.
  * @device: device to be tested
@@ -643,7 +671,7 @@ static int ioat_dma_self_test(struct ioatdma_device *device)
        u8 *src;
        u8 *dest;
        struct dma_chan *dma_chan;
-       struct dma_async_tx_descriptor *tx;
+       struct dma_async_tx_descriptor *tx = NULL;
        dma_addr_t addr;
        dma_cookie_t cookie;
        int err = 0;
@@ -673,6 +701,13 @@ static int ioat_dma_self_test(struct ioatdma_device *device)
        }
 
        tx = ioat_dma_prep_memcpy(dma_chan, IOAT_TEST_SIZE, 0);
+       if (!tx) {
+               dev_err(&device->pdev->dev,
+                       "Self-test prep failed, disabling\n");
+               err = -ENODEV;
+               goto free_resources;
+       }
+
        async_tx_ack(tx);
        addr = dma_map_single(dma_chan->device->dev, src, IOAT_TEST_SIZE,
                        DMA_TO_DEVICE);
@@ -680,19 +715,27 @@ static int ioat_dma_self_test(struct ioatdma_device *device)
        addr = dma_map_single(dma_chan->device->dev, dest, IOAT_TEST_SIZE,
                        DMA_FROM_DEVICE);
        ioat_set_dest(addr, tx, 0);
+       tx->callback = ioat_dma_test_callback;
+       tx->callback_param = (void *)0x8086;
        cookie = ioat_tx_submit(tx);
+       if (cookie < 0) {
+               dev_err(&device->pdev->dev,
+                       "Self-test setup failed, disabling\n");
+               err = -ENODEV;
+               goto free_resources;
+       }
        ioat_dma_memcpy_issue_pending(dma_chan);
        msleep(1);
 
        if (ioat_dma_is_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
                dev_err(&device->pdev->dev,
-                       "ioatdma: Self-test copy timed out, disabling\n");
+                       "Self-test copy timed out, disabling\n");
                err = -ENODEV;
                goto free_resources;
        }
        if (memcmp(src, dest, IOAT_TEST_SIZE)) {
                dev_err(&device->pdev->dev,
-                       "ioatdma: Self-test copy failed compare, disabling\n");
+                       "Self-test copy failed compare, disabling\n");
                err = -ENODEV;
                goto free_resources;
        }
@@ -730,6 +773,9 @@ static int ioat_dma_setup_interrupts(struct ioatdma_device *device)
                goto msi;
        if (!strcmp(ioat_interrupt_style, "intx"))
                goto intx;
+       dev_err(&device->pdev->dev, "invalid ioat_interrupt_style %s\n",
+               ioat_interrupt_style);
+       goto err_no_irq;
 
 msix:
        /* The number of MSI-X vectors should equal the number of channels */
@@ -906,9 +952,9 @@ struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev,
        device->common.device_dependency_added = ioat_dma_dependency_added;
        device->common.dev = &pdev->dev;
        dev_err(&device->pdev->dev,
-               "ioatdma: Intel(R) I/OAT DMA Engine found,"
-               " %d channels, device version 0x%02x\n",
-               device->common.chancnt, device->version);
+               "Intel(R) I/OAT DMA Engine found,"
+               " %d channels, device version 0x%02x, driver version %s\n",
+               device->common.chancnt, device->version, IOAT_DMA_VERSION);
 
        err = ioat_dma_setup_interrupts(device);
        if (err)
@@ -931,9 +977,8 @@ err_completion_pool:
 err_dma_pool:
        kfree(device);
 err_kzalloc:
-       iounmap(iobase);
        dev_err(&device->pdev->dev,
-               "ioatdma: Intel(R) I/OAT DMA Engine initialization failed\n");
+               "Intel(R) I/OAT DMA Engine initialization failed\n");
        return NULL;
 }
 
@@ -942,13 +987,17 @@ void ioat_dma_remove(struct ioatdma_device *device)
        struct dma_chan *chan, *_chan;
        struct ioat_dma_chan *ioat_chan;
 
-       dma_async_device_unregister(&device->common);
-
        ioat_dma_remove_interrupts(device);
 
+       dma_async_device_unregister(&device->common);
+
        pci_pool_destroy(device->dma_pool);
        pci_pool_destroy(device->completion_pool);
 
+       iounmap(device->reg_base);
+       pci_release_regions(device->pdev);
+       pci_disable_device(device->pdev);
+
        list_for_each_entry_safe(chan, _chan,
                                 &device->common.channels, device_node) {
                ioat_chan = to_ioat_chan(chan);