Merge master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
[powerpc.git] / arch / i386 / kernel / io_apic.c
index 350192d..1b623cd 100644 (file)
@@ -34,6 +34,8 @@
 #include <linux/pci.h>
 #include <linux/msi.h>
 #include <linux/htirq.h>
+#include <linux/freezer.h>
+#include <linux/kthread.h>
 
 #include <asm/io.h>
 #include <asm/smp.h>
@@ -91,6 +93,46 @@ static struct irq_pin_list {
        int apic, pin, next;
 } irq_2_pin[PIN_MAP_SIZE];
 
+struct io_apic {
+       unsigned int index;
+       unsigned int unused[3];
+       unsigned int data;
+};
+
+static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
+{
+       return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
+               + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK);
+}
+
+static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
+{
+       struct io_apic __iomem *io_apic = io_apic_base(apic);
+       writel(reg, &io_apic->index);
+       return readl(&io_apic->data);
+}
+
+static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
+{
+       struct io_apic __iomem *io_apic = io_apic_base(apic);
+       writel(reg, &io_apic->index);
+       writel(value, &io_apic->data);
+}
+
+/*
+ * Re-write a value: to be used for read-modify-write
+ * cycles where the read already set up the index register.
+ *
+ * Older SiS APIC requires we rewrite the index register
+ */
+static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
+{
+       volatile struct io_apic __iomem *io_apic = io_apic_base(apic);
+       if (sis_apic_bug)
+               writel(reg, &io_apic->index);
+       writel(value, &io_apic->data);
+}
+
 union entry_union {
        struct { u32 w1, w2; };
        struct IO_APIC_route_entry entry;
@@ -107,11 +149,39 @@ static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
        return eu.entry;
 }
 
-static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
+/*
+ * When we write a new IO APIC routing entry, we need to write the high
+ * word first! If the mask bit in the low word is clear, we will enable
+ * the interrupt, and we need to make sure the entry is fully populated
+ * before that happens.
+ */
+static void
+__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
 {
-       unsigned long flags;
        union entry_union eu;
        eu.entry = e;
+       io_apic_write(apic, 0x11 + 2*pin, eu.w2);
+       io_apic_write(apic, 0x10 + 2*pin, eu.w1);
+}
+
+static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
+{
+       unsigned long flags;
+       spin_lock_irqsave(&ioapic_lock, flags);
+       __ioapic_write_entry(apic, pin, e);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+/*
+ * When we mask an IO APIC routing entry, we need to write the low
+ * word first, in order to set the mask bit before we change the
+ * high bits!
+ */
+static void ioapic_mask_entry(int apic, int pin)
+{
+       unsigned long flags;
+       union entry_union eu = { .entry.mask = 1 };
+
        spin_lock_irqsave(&ioapic_lock, flags);
        io_apic_write(apic, 0x10 + 2*pin, eu.w1);
        io_apic_write(apic, 0x11 + 2*pin, eu.w2);
@@ -234,9 +304,7 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
        /*
         * Disable it in the IO-APIC irq-routing table:
         */
-       memset(&entry, 0, sizeof(entry));
-       entry.mask = 1;
-       ioapic_write_entry(apic, pin, entry);
+       ioapic_mask_entry(apic, pin);
 }
 
 static void clear_IO_APIC (void)
@@ -276,7 +344,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
                        break;
                entry = irq_2_pin + entry->next;
        }
-       set_native_irq_info(irq, cpumask);
+       irq_desc[irq].affinity = cpumask;
        spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
@@ -415,8 +483,8 @@ static void do_irq_balance(void)
                package_index = CPU_TO_PACKAGEINDEX(i);
                for (j = 0; j < NR_IRQS; j++) {
                        unsigned long value_now, delta;
-                       /* Is this an active IRQ? */
-                       if (!irq_desc[j].action)
+                       /* Is this an active IRQ or balancing disabled ? */
+                       if (!irq_desc[j].action || irq_balancing_disabled(j))
                                continue;
                        if ( package_index == i )
                                IRQ_DELTA(package_index,j) = 0;
@@ -594,8 +662,6 @@ static int balanced_irq(void *unused)
        unsigned long prev_balance_time = jiffies;
        long time_remaining = balanced_irq_interval;
 
-       daemonize("kirqd");
-       
        /* push everything to CPU 0 to give us a starting point.  */
        for (i = 0 ; i < NR_IRQS ; i++) {
                irq_desc[i].pending_mask = cpumask_of_cpu(0);
@@ -655,10 +721,9 @@ static int __init balanced_irq_init(void)
        }
        
        printk(KERN_INFO "Starting balanced_irq\n");
-       if (kernel_thread(balanced_irq, NULL, CLONE_KERNEL) >= 0) 
+       if (!IS_ERR(kthread_run(balanced_irq, NULL, "kirqd")))
                return 0;
-       else 
-               printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
+       printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
 failed:
        for_each_possible_cpu(i) {
                kfree(irq_cpu_data[i].irq_delta);
@@ -669,7 +734,7 @@ failed:
        return 0;
 }
 
-int __init irqbalance_disable(char *str)
+int __devinit irqbalance_disable(char *str)
 {
        irqbalance_disabled = 1;
        return 1;
@@ -776,8 +841,7 @@ static int __init find_isa_irq_pin(int irq, int type)
 
                if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
                     mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
-                    mp_bus_id_to_type[lbus] == MP_BUS_MCA ||
-                    mp_bus_id_to_type[lbus] == MP_BUS_NEC98
+                    mp_bus_id_to_type[lbus] == MP_BUS_MCA
                    ) &&
                    (mp_irqs[i].mpc_irqtype == type) &&
                    (mp_irqs[i].mpc_srcbusirq == irq))
@@ -796,8 +860,7 @@ static int __init find_isa_irq_apic(int irq, int type)
 
                if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
                     mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
-                    mp_bus_id_to_type[lbus] == MP_BUS_MCA ||
-                    mp_bus_id_to_type[lbus] == MP_BUS_NEC98
+                    mp_bus_id_to_type[lbus] == MP_BUS_MCA
                    ) &&
                    (mp_irqs[i].mpc_irqtype == type) &&
                    (mp_irqs[i].mpc_srcbusirq == irq))
@@ -927,12 +990,6 @@ static int EISA_ELCR(unsigned int irq)
 #define default_MCA_trigger(idx)       (1)
 #define default_MCA_polarity(idx)      (0)
 
-/* NEC98 interrupts are always polarity zero edge triggered,
- * when listed as conforming in the MP table. */
-
-#define default_NEC98_trigger(idx)     (0)
-#define default_NEC98_polarity(idx)    (0)
-
 static int __init MPBIOS_polarity(int idx)
 {
        int bus = mp_irqs[idx].mpc_srcbus;
@@ -967,11 +1024,6 @@ static int __init MPBIOS_polarity(int idx)
                                        polarity = default_MCA_polarity(idx);
                                        break;
                                }
-                               case MP_BUS_NEC98: /* NEC 98 pin */
-                               {
-                                       polarity = default_NEC98_polarity(idx);
-                                       break;
-                               }
                                default:
                                {
                                        printk(KERN_WARNING "broken BIOS!!\n");
@@ -1041,11 +1093,6 @@ static int MPBIOS_trigger(int idx)
                                        trigger = default_MCA_trigger(idx);
                                        break;
                                }
-                               case MP_BUS_NEC98: /* NEC 98 pin */
-                               {
-                                       trigger = default_NEC98_trigger(idx);
-                                       break;
-                               }
                                default:
                                {
                                        printk(KERN_WARNING "broken BIOS!!\n");
@@ -1107,7 +1154,6 @@ static int pin_2_irq(int idx, int apic, int pin)
                case MP_BUS_ISA: /* ISA pin */
                case MP_BUS_EISA:
                case MP_BUS_MCA:
-               case MP_BUS_NEC98:
                {
                        irq = mp_irqs[idx].mpc_srcbusirq;
                        break;
@@ -1175,30 +1221,36 @@ static inline int IO_APIC_irq_trigger(int irq)
 }
 
 /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
-u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 };
+static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 };
 
 static int __assign_irq_vector(int irq)
 {
-       static int current_vector = FIRST_DEVICE_VECTOR, offset = 0;
-       int vector;
+       static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
+       int vector, offset, i;
 
        BUG_ON((unsigned)irq >= NR_IRQ_VECTORS);
 
        if (irq_vector[irq] > 0)
                return irq_vector[irq];
 
-       current_vector += 8;
-       if (current_vector == SYSCALL_VECTOR)
-               current_vector += 8;
-
-       if (current_vector >= FIRST_SYSTEM_VECTOR) {
-               offset++;
-               if (!(offset % 8))
-                       return -ENOSPC;
-               current_vector = FIRST_DEVICE_VECTOR + offset;
-       }
-
        vector = current_vector;
+       offset = current_offset;
+next:
+       vector += 8;
+       if (vector >= FIRST_SYSTEM_VECTOR) {
+               offset = (offset + 1) % 8;
+               vector = FIRST_DEVICE_VECTOR + offset;
+       }
+       if (vector == current_vector)
+               return -ENOSPC;
+       if (vector == SYSCALL_VECTOR)
+               goto next;
+       for (i = 0; i < NR_IRQ_VECTORS; i++)
+               if (irq_vector[i] == vector)
+                       goto next;
+
+       current_vector = vector;
+       current_offset = offset;
        irq_vector[irq] = vector;
 
        return vector;
@@ -1298,9 +1350,8 @@ static void __init setup_IO_APIC_irqs(void)
                        if (!apic && (irq < 16))
                                disable_8259A_irq(irq);
                }
-               ioapic_write_entry(apic, pin, entry);
                spin_lock_irqsave(&ioapic_lock, flags);
-               set_native_irq_info(irq, TARGET_CPUS);
+               __ioapic_write_entry(apic, pin, entry);
                spin_unlock_irqrestore(&ioapic_lock, flags);
        }
        }
@@ -1350,10 +1401,6 @@ static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, in
        enable_8259A_irq(0);
 }
 
-static inline void UNEXPECTED_IO_APIC(void)
-{
-}
-
 void __init print_IO_APIC(void)
 {
        int apic, i;
@@ -1393,34 +1440,12 @@ void __init print_IO_APIC(void)
        printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID);
        printk(KERN_DEBUG ".......    : Delivery Type: %X\n", reg_00.bits.delivery_type);
        printk(KERN_DEBUG ".......    : LTS          : %X\n", reg_00.bits.LTS);
-       if (reg_00.bits.ID >= get_physical_broadcast())
-               UNEXPECTED_IO_APIC();
-       if (reg_00.bits.__reserved_1 || reg_00.bits.__reserved_2)
-               UNEXPECTED_IO_APIC();
 
        printk(KERN_DEBUG ".... register #01: %08X\n", reg_01.raw);
        printk(KERN_DEBUG ".......     : max redirection entries: %04X\n", reg_01.bits.entries);
-       if (    (reg_01.bits.entries != 0x0f) && /* older (Neptune) boards */
-               (reg_01.bits.entries != 0x17) && /* typical ISA+PCI boards */
-               (reg_01.bits.entries != 0x1b) && /* Compaq Proliant boards */
-               (reg_01.bits.entries != 0x1f) && /* dual Xeon boards */
-               (reg_01.bits.entries != 0x22) && /* bigger Xeon boards */
-               (reg_01.bits.entries != 0x2E) &&
-               (reg_01.bits.entries != 0x3F)
-       )
-               UNEXPECTED_IO_APIC();
 
        printk(KERN_DEBUG ".......     : PRQ implemented: %X\n", reg_01.bits.PRQ);
        printk(KERN_DEBUG ".......     : IO APIC version: %04X\n", reg_01.bits.version);
-       if (    (reg_01.bits.version != 0x01) && /* 82489DX IO-APICs */
-               (reg_01.bits.version != 0x10) && /* oldest IO-APICs */
-               (reg_01.bits.version != 0x11) && /* Pentium/Pro IO-APICs */
-               (reg_01.bits.version != 0x13) && /* Xeon IO-APICs */
-               (reg_01.bits.version != 0x20)    /* Intel P64H (82806 AA) */
-       )
-               UNEXPECTED_IO_APIC();
-       if (reg_01.bits.__reserved_1 || reg_01.bits.__reserved_2)
-               UNEXPECTED_IO_APIC();
 
        /*
         * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
@@ -1430,8 +1455,6 @@ void __init print_IO_APIC(void)
        if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
                printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
                printk(KERN_DEBUG ".......     : arbitration: %02X\n", reg_02.bits.arbitration);
-               if (reg_02.bits.__reserved_1 || reg_02.bits.__reserved_2)
-                       UNEXPECTED_IO_APIC();
        }
 
        /*
@@ -1443,8 +1466,6 @@ void __init print_IO_APIC(void)
            reg_03.raw != reg_01.raw) {
                printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
                printk(KERN_DEBUG ".......     : Boot DT    : %X\n", reg_03.bits.boot_DT);
-               if (reg_03.bits.__reserved_1)
-                       UNEXPECTED_IO_APIC();
        }
 
        printk(KERN_DEBUG ".... IRQ redirection table:\n");
@@ -1532,7 +1553,7 @@ void /*__init*/ print_local_APIC(void * dummy)
        v = apic_read(APIC_LVR);
        printk(KERN_INFO "... APIC VERSION: %08x\n", v);
        ver = GET_APIC_VERSION(v);
-       maxlvt = get_maxlvt();
+       maxlvt = lapic_get_maxlvt();
 
        v = apic_read(APIC_TASKPRI);
        printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
@@ -1864,6 +1885,15 @@ static void __init setup_ioapic_ids_from_mpc(void)
 static void __init setup_ioapic_ids_from_mpc(void) { }
 #endif
 
+int no_timer_check __initdata;
+
+static int __init notimercheck(char *s)
+{
+       no_timer_check = 1;
+       return 1;
+}
+__setup("no_timer_check", notimercheck);
+
 /*
  * There is a nasty bug in some older SMP boards, their mptable lies
  * about the timer IRQ. We do the following to work around the situation:
@@ -1872,10 +1902,13 @@ static void __init setup_ioapic_ids_from_mpc(void) { }
  *     - if this function detects that timer IRQs are defunct, then we fall
  *       back to ISA timer IRQs
  */
-static int __init timer_irq_works(void)
+int __init timer_irq_works(void)
 {
        unsigned long t1 = jiffies;
 
+       if (no_timer_check)
+               return 1;
+
        local_irq_enable();
        /* Let ten ticks pass... */
        mdelay((10 * 1000) / HZ);
@@ -2099,9 +2132,15 @@ static inline void unlock_ExtINT_logic(void)
        unsigned char save_control, save_freq_select;
 
        pin  = find_isa_irq_pin(8, mp_INT);
+       if (pin == -1) {
+               WARN_ON_ONCE(1);
+               return;
+       }
        apic = find_isa_irq_apic(8, mp_INT);
-       if (pin == -1)
+       if (apic == -1) {
+               WARN_ON_ONCE(1);
                return;
+       }
 
        entry0 = ioapic_read_entry(apic, pin);
        clear_IO_APIC_pin(apic, pin);
@@ -2146,7 +2185,7 @@ int timer_uses_ioapic_pin_0;
  * is so screwy.  Thanks to Brian Perkins for testing/hacking this beast
  * fanatically on his truly buggy board.
  */
-static inline void check_timer(void)
+static inline void __init check_timer(void)
 {
        int apic1, pin1, apic2, pin2;
        int vector;
@@ -2236,7 +2275,7 @@ static inline void check_timer(void)
 
        disable_8259A_irq(0);
        set_irq_chip_and_handler_name(0, &lapic_chip, handle_fasteoi_irq,
-                                     "fasteio");
+                                     "fasteoi");
        apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector);   /* Fixed mode */
        enable_8259A_irq(0);
 
@@ -2417,7 +2456,7 @@ device_initcall(ioapic_init_sysfs);
 int create_irq(void)
 {
        /* Allocate an unused irq */
-       int irq, new, vector;
+       int irq, new, vector = 0;
        unsigned long flags;
 
        irq = -ENOSPC;
@@ -2513,7 +2552,7 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
        msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
        write_msi_msg(irq, &msg);
-       set_native_irq_info(irq, mask);
+       irq_desc[irq].affinity = mask;
 }
 #endif /* CONFIG_SMP */
 
@@ -2532,14 +2571,21 @@ static struct irq_chip msi_chip = {
        .retrigger      = ioapic_retrigger_irq,
 };
 
-int arch_setup_msi_irq(unsigned int irq, struct pci_dev *dev)
+int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
 {
        struct msi_msg msg;
-       int ret;
+       int irq, ret;
+       irq = create_irq();
+       if (irq < 0)
+               return irq;
+
        ret = msi_compose_msg(dev, irq, &msg);
-       if (ret < 0)
+       if (ret < 0) {
+               destroy_irq(irq);
                return ret;
+       }
 
+       set_irq_msi(irq, desc);
        write_msi_msg(irq, &msg);
 
        set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq,
@@ -2550,7 +2596,7 @@ int arch_setup_msi_irq(unsigned int irq, struct pci_dev *dev)
 
 void arch_teardown_msi_irq(unsigned int irq)
 {
-       return;
+       destroy_irq(irq);
 }
 
 #endif /* CONFIG_PCI_MSI */
@@ -2564,18 +2610,16 @@ void arch_teardown_msi_irq(unsigned int irq)
 
 static void target_ht_irq(unsigned int irq, unsigned int dest)
 {
-       u32 low, high;
-       low  = read_ht_irq_low(irq);
-       high = read_ht_irq_high(irq);
+       struct ht_irq_msg msg;
+       fetch_ht_irq_msg(irq, &msg);
 
-       low  &= ~(HT_IRQ_LOW_DEST_ID_MASK);
-       high &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
+       msg.address_lo &= ~(HT_IRQ_LOW_DEST_ID_MASK);
+       msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
 
-       low  |= HT_IRQ_LOW_DEST_ID(dest);
-       high |= HT_IRQ_HIGH_DEST_ID(dest);
+       msg.address_lo |= HT_IRQ_LOW_DEST_ID(dest);
+       msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
 
-       write_ht_irq_low(irq, low);
-       write_ht_irq_high(irq, high);
+       write_ht_irq_msg(irq, &msg);
 }
 
 static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
@@ -2592,7 +2636,7 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
        dest = cpu_mask_to_apicid(mask);
 
        target_ht_irq(irq, dest);
-       set_native_irq_info(irq, mask);
+       irq_desc[irq].affinity = mask;
 }
 #endif
 
@@ -2613,7 +2657,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 
        vector = assign_irq_vector(irq);
        if (vector >= 0) {
-               u32 low, high;
+               struct ht_irq_msg msg;
                unsigned dest;
                cpumask_t tmp;
 
@@ -2621,9 +2665,10 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
                cpu_set(vector >> 8, tmp);
                dest = cpu_mask_to_apicid(tmp);
 
-               high =  HT_IRQ_HIGH_DEST_ID(dest);
+               msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
 
-               low =   HT_IRQ_LOW_BASE |
+               msg.address_lo =
+                       HT_IRQ_LOW_BASE |
                        HT_IRQ_LOW_DEST_ID(dest) |
                        HT_IRQ_LOW_VECTOR(vector) |
                        ((INT_DEST_MODE == 0) ?
@@ -2635,8 +2680,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
                                HT_IRQ_LOW_MT_ARBITRATED) |
                        HT_IRQ_LOW_IRQ_MASKED;
 
-               write_ht_irq_low(irq, low);
-               write_ht_irq_high(irq, high);
+               write_ht_irq_msg(irq, &msg);
 
                set_irq_chip_and_handler_name(irq, &ht_irq_chip,
                                              handle_edge_irq, "edge");
@@ -2796,9 +2840,8 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a
        if (!ioapic && (irq < 16))
                disable_8259A_irq(irq);
 
-       ioapic_write_entry(ioapic, pin, entry);
        spin_lock_irqsave(&ioapic_lock, flags);
-       set_native_irq_info(irq, TARGET_CPUS);
+       __ioapic_write_entry(ioapic, pin, entry);
        spin_unlock_irqrestore(&ioapic_lock, flags);
 
        return 0;