ahci: consider SDB FIS containing spurious NCQ completions HSM violation (regenerated)
authorTejun Heo <htejun@gmail.com>
Wed, 21 Feb 2007 07:34:25 +0000 (16:34 +0900)
committerJeff Garzik <jeff@garzik.org>
Fri, 23 Feb 2007 10:37:10 +0000 (05:37 -0500)
SDB FIS containing spurious NCQ completions is a clear protocol
violation.  Currently, only some Maxtors with early firmware revisions
are showing this problem.  Those firmwares have other NCQ related
problems including buggy NCQ error reporting and occasional lock up
after NCQ errors.

Consider spurious NCQ completions HSM violation and freeze the port
after it.  EH will turn off NCQ after this happens several times.
Eventually drives which show this behavior should be blacklisted for
NCQ.

Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
drivers/ata/ahci.c

index 6a3543e..334f54c 100644 (file)
@@ -198,7 +198,6 @@ struct ahci_port_priv {
        void                    *rx_fis;
        dma_addr_t              rx_fis_dma;
        /* for NCQ spurious interrupt analysis */
-       int                     ncq_saw_spurious_sdb_cnt;
        unsigned int            ncq_saw_d2h:1;
        unsigned int            ncq_saw_dmas:1;
 };
@@ -1160,23 +1159,24 @@ static void ahci_host_intr(struct ata_port *ap)
                known_irq = 1;
        }
 
-       if (status & PORT_IRQ_SDB_FIS &&
-                  pp->ncq_saw_spurious_sdb_cnt < 10) {
+       if (status & PORT_IRQ_SDB_FIS) {
                /* SDB FIS containing spurious completions might be
-                * dangerous, we need to know more about them.  Print
-                * more of it.
-                */
+                * dangerous, whine and fail commands with HSM
+                * violation.  EH will turn off NCQ after several such
+                * failures.
+                */
                const __le32 *f = pp->rx_fis + RX_FIS_SDB;
 
-               ata_port_printk(ap, KERN_INFO, "Spurious SDB FIS during NCQ "
-                               "issue=0x%x SAct=0x%x FIS=%08x:%08x%s\n",
-                               readl(port_mmio + PORT_CMD_ISSUE),
-                               readl(port_mmio + PORT_SCR_ACT),
-                               le32_to_cpu(f[0]), le32_to_cpu(f[1]),
-                               pp->ncq_saw_spurious_sdb_cnt < 10 ?
-                               "" : ", shutting up");
+               ata_ehi_push_desc(ehi, "spurious completion during NCQ "
+                                 "issue=0x%x SAct=0x%x FIS=%08x:%08x",
+                                 readl(port_mmio + PORT_CMD_ISSUE),
+                                 readl(port_mmio + PORT_SCR_ACT),
+                                 le32_to_cpu(f[0]), le32_to_cpu(f[1]));
+
+               ehi->err_mask |= AC_ERR_HSM;
+               ehi->action |= ATA_EH_SOFTRESET;
+               ata_port_freeze(ap);
 
-               pp->ncq_saw_spurious_sdb_cnt++;
                known_irq = 1;
        }