X-Git-Url: http://git.rot13.org/?a=blobdiff_plain;f=arch%2Fpowerpc%2Fplatforms%2Fpseries%2Feeh_driver.c;h=aaad2c0afcbf93868c581bf9d1f971bcbdc41794;hb=026477c1141b67e98e3bd8bdedb7d4b88a3ecd09;hp=b811d5ff92feea02696adabf0313c9c45a7c40f2;hpb=dc5cdd8ec146a60152f2264c8fe920ca5ecc71ea;p=powerpc.git diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c index b811d5ff92..aaad2c0afc 100644 --- a/arch/powerpc/platforms/pseries/eeh_driver.c +++ b/arch/powerpc/platforms/pseries/eeh_driver.c @@ -23,9 +23,8 @@ * */ #include -#include #include -#include +#include #include #include #include @@ -176,7 +175,7 @@ static void eeh_report_failure(struct pci_dev *dev, void *userdata) * * pSeries systems will isolate a PCI slot if the PCI-Host * bridge detects address or data parity errors, DMA's - * occuring to wild addresses (which usually happen due to + * occurring to wild addresses (which usually happen due to * bugs in device drivers or in PCI adapter firmware). * Slot isolations also occur if #SERR, #PERR or other misc * PCI-related errors are detected. @@ -202,7 +201,11 @@ static void eeh_report_failure(struct pci_dev *dev, void *userdata) static int eeh_reset_device (struct pci_dn *pe_dn, struct pci_bus *bus) { - int rc; + int cnt, rc; + + /* pcibios will clear the counter; save the value */ + cnt = pe_dn->eeh_freeze_count; + if (bus) pcibios_remove_pci_devices(bus); @@ -241,6 +244,7 @@ static int eeh_reset_device (struct pci_dn *pe_dn, struct pci_bus *bus) ssleep (5); pcibios_add_pci_devices(bus); } + pe_dn->eeh_freeze_count = cnt; return 0; } @@ -250,22 +254,29 @@ static int eeh_reset_device (struct pci_dn *pe_dn, struct pci_bus *bus) */ #define MAX_WAIT_FOR_RECOVERY 15 -void handle_eeh_events (struct eeh_event *event) +struct pci_dn * handle_eeh_events (struct eeh_event *event) { struct device_node *frozen_dn; struct pci_dn *frozen_pdn; struct pci_bus *frozen_bus; int rc = 0; enum pci_ers_result result = PCI_ERS_RESULT_NONE; + const char *location, *pci_str, *drv_str; frozen_dn = find_device_pe(event->dn); frozen_bus = pcibios_find_pci_bus(frozen_dn); if (!frozen_dn) { - printk(KERN_ERR "EEH: Error: Cannot find partition endpoint for %s\n", - pci_name(event->dev)); - return; + + location = (char *) get_property(event->dn, "ibm,loc-code", NULL); + location = location ? location : "unknown"; + printk(KERN_ERR "EEH: Error: Cannot find partition endpoint " + "for location=%s pci addr=%s\n", + location, pci_name(event->dev)); + return NULL; } + location = (char *) get_property(frozen_dn, "ibm,loc-code", NULL); + location = location ? location : "unknown"; /* There are two different styles for coming up with the PE. * In the old style, it was the highest EEH-capable device @@ -277,9 +288,10 @@ void handle_eeh_events (struct eeh_event *event) frozen_bus = pcibios_find_pci_bus (frozen_dn->parent); if (!frozen_bus) { - printk(KERN_ERR "EEH: Cannot find PCI bus for %s\n", - frozen_dn->full_name); - return; + printk(KERN_ERR "EEH: Cannot find PCI bus " + "for location=%s dn=%s\n", + location, frozen_dn->full_name); + return NULL; } #if 0 @@ -291,9 +303,17 @@ void handle_eeh_events (struct eeh_event *event) frozen_pdn = PCI_DN(frozen_dn); frozen_pdn->eeh_freeze_count++; + + if (frozen_pdn->pcidev) { + pci_str = pci_name (frozen_pdn->pcidev); + drv_str = pcid_name (frozen_pdn->pcidev); + } else { + pci_str = pci_name (event->dev); + drv_str = pcid_name (event->dev); + } if (frozen_pdn->eeh_freeze_count > EEH_MAX_ALLOWED_FREEZES) - goto hard_fail; + goto excess_failures; /* If the reset state is a '5' and the time to reset is 0 (infinity) * or is more then 15 seconds, then mark this as a permanent failure. @@ -305,10 +325,9 @@ void handle_eeh_events (struct eeh_event *event) eeh_slot_error_detail(frozen_pdn, 1 /* Temporary Error */); printk(KERN_WARNING - "EEH: This PCI device has failed %d times since last reboot: %s - %s\n", - frozen_pdn->eeh_freeze_count, - pci_name (frozen_pdn->pcidev), - pcid_name(frozen_pdn->pcidev)); + "EEH: This PCI device has failed %d times since last reboot: " + "location=%s driver=%s pci addr=%s\n", + frozen_pdn->eeh_freeze_count, location, drv_str, pci_str); /* Walk the various device drivers attached to this slot through * a reset sequence, giving each an opportunity to do what it needs @@ -348,22 +367,29 @@ void handle_eeh_events (struct eeh_event *event) /* Tell all device drivers that they can resume operations */ pci_walk_bus(frozen_bus, eeh_report_resume, NULL); - return; + return frozen_pdn; -hard_fail: +excess_failures: /* * About 90% of all real-life EEH failures in the field * are due to poorly seated PCI cards. Only 10% or so are * due to actual, failed cards. */ printk(KERN_ERR - "EEH: PCI device %s - %s has failed %d times \n" - "and has been permanently disabled. Please try reseating\n" - "this device or replacing it.\n", - pci_name (frozen_pdn->pcidev), - pcid_name(frozen_pdn->pcidev), - frozen_pdn->eeh_freeze_count); + "EEH: PCI device at location=%s driver=%s pci addr=%s \n" + "has failed %d times and has been permanently disabled. \n" + "Please try reseating this device or replacing it.\n", + location, drv_str, pci_str, frozen_pdn->eeh_freeze_count); + goto perm_error; +hard_fail: + printk(KERN_ERR + "EEH: Unable to recover from failure of PCI device " + "at location=%s driver=%s pci addr=%s \n" + "Please try reseating this device or replacing it.\n", + location, drv_str, pci_str); + +perm_error: eeh_slot_error_detail(frozen_pdn, 2 /* Permanent Error */); /* Notify all devices that they're about to go down. */ @@ -371,6 +397,8 @@ hard_fail: /* Shut down the device drivers for good. */ pcibios_remove_pci_devices(frozen_bus); + + return NULL; } /* ---------- end of file ---------- */