Merge master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
[powerpc.git] / arch / i386 / kernel / io_apic.c
index fd0df75..1b623cd 100644 (file)
 #include <linux/acpi.h>
 #include <linux/module.h>
 #include <linux/sysdev.h>
+#include <linux/pci.h>
+#include <linux/msi.h>
+#include <linux/htirq.h>
+#include <linux/freezer.h>
+#include <linux/kthread.h>
 
 #include <asm/io.h>
 #include <asm/smp.h>
@@ -38,6 +43,8 @@
 #include <asm/timer.h>
 #include <asm/i8259.h>
 #include <asm/nmi.h>
+#include <asm/msidef.h>
+#include <asm/hypertransport.h>
 
 #include <mach_apic.h>
 #include <mach_apicdef.h>
@@ -86,14 +93,45 @@ static struct irq_pin_list {
        int apic, pin, next;
 } irq_2_pin[PIN_MAP_SIZE];
 
-int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1};
-#ifdef CONFIG_PCI_MSI
-#define vector_to_irq(vector)  \
-       (platform_legacy_irq(vector) ? vector : vector_irq[vector])
-#else
-#define vector_to_irq(vector)  (vector)
-#endif
+struct io_apic {
+       unsigned int index;
+       unsigned int unused[3];
+       unsigned int data;
+};
+
+static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
+{
+       return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
+               + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK);
+}
+
+static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
+{
+       struct io_apic __iomem *io_apic = io_apic_base(apic);
+       writel(reg, &io_apic->index);
+       return readl(&io_apic->data);
+}
+
+static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
+{
+       struct io_apic __iomem *io_apic = io_apic_base(apic);
+       writel(reg, &io_apic->index);
+       writel(value, &io_apic->data);
+}
 
+/*
+ * Re-write a value: to be used for read-modify-write
+ * cycles where the read already set up the index register.
+ *
+ * Older SiS APIC requires we rewrite the index register
+ */
+static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
+{
+       volatile struct io_apic __iomem *io_apic = io_apic_base(apic);
+       if (sis_apic_bug)
+               writel(reg, &io_apic->index);
+       writel(value, &io_apic->data);
+}
 
 union entry_union {
        struct { u32 w1, w2; };
@@ -111,11 +149,39 @@ static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
        return eu.entry;
 }
 
-static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
+/*
+ * When we write a new IO APIC routing entry, we need to write the high
+ * word first! If the mask bit in the low word is clear, we will enable
+ * the interrupt, and we need to make sure the entry is fully populated
+ * before that happens.
+ */
+static void
+__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
 {
-       unsigned long flags;
        union entry_union eu;
        eu.entry = e;
+       io_apic_write(apic, 0x11 + 2*pin, eu.w2);
+       io_apic_write(apic, 0x10 + 2*pin, eu.w1);
+}
+
+static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
+{
+       unsigned long flags;
+       spin_lock_irqsave(&ioapic_lock, flags);
+       __ioapic_write_entry(apic, pin, e);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+/*
+ * When we mask an IO APIC routing entry, we need to write the low
+ * word first, in order to set the mask bit before we change the
+ * high bits!
+ */
+static void ioapic_mask_entry(int apic, int pin)
+{
+       unsigned long flags;
+       union entry_union eu = { .entry.mask = 1 };
+
        spin_lock_irqsave(&ioapic_lock, flags);
        io_apic_write(apic, 0x10 + 2*pin, eu.w1);
        io_apic_write(apic, 0x11 + 2*pin, eu.w2);
@@ -238,9 +304,7 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
        /*
         * Disable it in the IO-APIC irq-routing table:
         */
-       memset(&entry, 0, sizeof(entry));
-       entry.mask = 1;
-       ioapic_write_entry(apic, pin, entry);
+       ioapic_mask_entry(apic, pin);
 }
 
 static void clear_IO_APIC (void)
@@ -280,7 +344,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
                        break;
                entry = irq_2_pin + entry->next;
        }
-       set_irq_info(irq, cpumask);
+       irq_desc[irq].affinity = cpumask;
        spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
@@ -419,8 +483,8 @@ static void do_irq_balance(void)
                package_index = CPU_TO_PACKAGEINDEX(i);
                for (j = 0; j < NR_IRQS; j++) {
                        unsigned long value_now, delta;
-                       /* Is this an active IRQ? */
-                       if (!irq_desc[j].action)
+                       /* Is this an active IRQ or balancing disabled ? */
+                       if (!irq_desc[j].action || irq_balancing_disabled(j))
                                continue;
                        if ( package_index == i )
                                IRQ_DELTA(package_index,j) = 0;
@@ -598,8 +662,6 @@ static int balanced_irq(void *unused)
        unsigned long prev_balance_time = jiffies;
        long time_remaining = balanced_irq_interval;
 
-       daemonize("kirqd");
-       
        /* push everything to CPU 0 to give us a starting point.  */
        for (i = 0 ; i < NR_IRQS ; i++) {
                irq_desc[i].pending_mask = cpumask_of_cpu(0);
@@ -659,10 +721,9 @@ static int __init balanced_irq_init(void)
        }
        
        printk(KERN_INFO "Starting balanced_irq\n");
-       if (kernel_thread(balanced_irq, NULL, CLONE_KERNEL) >= 0) 
+       if (!IS_ERR(kthread_run(balanced_irq, NULL, "kirqd")))
                return 0;
-       else 
-               printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
+       printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
 failed:
        for_each_possible_cpu(i) {
                kfree(irq_cpu_data[i].irq_delta);
@@ -673,7 +734,7 @@ failed:
        return 0;
 }
 
-int __init irqbalance_disable(char *str)
+int __devinit irqbalance_disable(char *str)
 {
        irqbalance_disabled = 1;
        return 1;
@@ -780,8 +841,7 @@ static int __init find_isa_irq_pin(int irq, int type)
 
                if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
                     mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
-                    mp_bus_id_to_type[lbus] == MP_BUS_MCA ||
-                    mp_bus_id_to_type[lbus] == MP_BUS_NEC98
+                    mp_bus_id_to_type[lbus] == MP_BUS_MCA
                    ) &&
                    (mp_irqs[i].mpc_irqtype == type) &&
                    (mp_irqs[i].mpc_srcbusirq == irq))
@@ -800,8 +860,7 @@ static int __init find_isa_irq_apic(int irq, int type)
 
                if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
                     mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
-                    mp_bus_id_to_type[lbus] == MP_BUS_MCA ||
-                    mp_bus_id_to_type[lbus] == MP_BUS_NEC98
+                    mp_bus_id_to_type[lbus] == MP_BUS_MCA
                    ) &&
                    (mp_irqs[i].mpc_irqtype == type) &&
                    (mp_irqs[i].mpc_srcbusirq == irq))
@@ -931,12 +990,6 @@ static int EISA_ELCR(unsigned int irq)
 #define default_MCA_trigger(idx)       (1)
 #define default_MCA_polarity(idx)      (0)
 
-/* NEC98 interrupts are always polarity zero edge triggered,
- * when listed as conforming in the MP table. */
-
-#define default_NEC98_trigger(idx)     (0)
-#define default_NEC98_polarity(idx)    (0)
-
 static int __init MPBIOS_polarity(int idx)
 {
        int bus = mp_irqs[idx].mpc_srcbus;
@@ -971,11 +1024,6 @@ static int __init MPBIOS_polarity(int idx)
                                        polarity = default_MCA_polarity(idx);
                                        break;
                                }
-                               case MP_BUS_NEC98: /* NEC 98 pin */
-                               {
-                                       polarity = default_NEC98_polarity(idx);
-                                       break;
-                               }
                                default:
                                {
                                        printk(KERN_WARNING "broken BIOS!!\n");
@@ -1045,11 +1093,6 @@ static int MPBIOS_trigger(int idx)
                                        trigger = default_MCA_trigger(idx);
                                        break;
                                }
-                               case MP_BUS_NEC98: /* NEC 98 pin */
-                               {
-                                       trigger = default_NEC98_trigger(idx);
-                                       break;
-                               }
                                default:
                                {
                                        printk(KERN_WARNING "broken BIOS!!\n");
@@ -1111,7 +1154,6 @@ static int pin_2_irq(int idx, int apic, int pin)
                case MP_BUS_ISA: /* ISA pin */
                case MP_BUS_EISA:
                case MP_BUS_MCA:
-               case MP_BUS_NEC98:
                {
                        irq = mp_irqs[idx].mpc_srcbusirq;
                        break;
@@ -1179,48 +1221,53 @@ static inline int IO_APIC_irq_trigger(int irq)
 }
 
 /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
-u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 };
+static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 };
 
-int assign_irq_vector(int irq)
+static int __assign_irq_vector(int irq)
 {
-       static int current_vector = FIRST_DEVICE_VECTOR, offset = 0;
-       unsigned long flags;
-       int vector;
+       static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
+       int vector, offset, i;
 
-       BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS);
+       BUG_ON((unsigned)irq >= NR_IRQ_VECTORS);
 
-       spin_lock_irqsave(&vector_lock, flags);
+       if (irq_vector[irq] > 0)
+               return irq_vector[irq];
 
-       if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) {
-               spin_unlock_irqrestore(&vector_lock, flags);
-               return IO_APIC_VECTOR(irq);
-       }
+       vector = current_vector;
+       offset = current_offset;
 next:
-       current_vector += 8;
-       if (current_vector == SYSCALL_VECTOR)
+       vector += 8;
+       if (vector >= FIRST_SYSTEM_VECTOR) {
+               offset = (offset + 1) % 8;
+               vector = FIRST_DEVICE_VECTOR + offset;
+       }
+       if (vector == current_vector)
+               return -ENOSPC;
+       if (vector == SYSCALL_VECTOR)
                goto next;
+       for (i = 0; i < NR_IRQ_VECTORS; i++)
+               if (irq_vector[i] == vector)
+                       goto next;
 
-       if (current_vector >= FIRST_SYSTEM_VECTOR) {
-               offset++;
-               if (!(offset%8)) {
-                       spin_unlock_irqrestore(&vector_lock, flags);
-                       return -ENOSPC;
-               }
-               current_vector = FIRST_DEVICE_VECTOR + offset;
-       }
+       current_vector = vector;
+       current_offset = offset;
+       irq_vector[irq] = vector;
 
-       vector = current_vector;
-       vector_irq[vector] = irq;
-       if (irq != AUTO_ASSIGN)
-               IO_APIC_VECTOR(irq) = vector;
+       return vector;
+}
 
+static int assign_irq_vector(int irq)
+{
+       unsigned long flags;
+       int vector;
+
+       spin_lock_irqsave(&vector_lock, flags);
+       vector = __assign_irq_vector(irq);
        spin_unlock_irqrestore(&vector_lock, flags);
 
        return vector;
 }
-
-static struct hw_interrupt_type ioapic_level_type;
-static struct hw_interrupt_type ioapic_edge_type;
+static struct irq_chip ioapic_chip;
 
 #define IOAPIC_AUTO    -1
 #define IOAPIC_EDGE    0
@@ -1228,16 +1275,14 @@ static struct hw_interrupt_type ioapic_edge_type;
 
 static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
 {
-       unsigned idx;
-
-       idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq;
-
        if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
                        trigger == IOAPIC_LEVEL)
-               irq_desc[idx].chip = &ioapic_level_type;
+               set_irq_chip_and_handler_name(irq, &ioapic_chip,
+                                        handle_fasteoi_irq, "fasteoi");
        else
-               irq_desc[idx].chip = &ioapic_edge_type;
-       set_intr_gate(vector, interrupt[idx]);
+               set_irq_chip_and_handler_name(irq, &ioapic_chip,
+                                        handle_edge_irq, "edge");
+       set_intr_gate(vector, interrupt[irq]);
 }
 
 static void __init setup_IO_APIC_irqs(void)
@@ -1305,9 +1350,8 @@ static void __init setup_IO_APIC_irqs(void)
                        if (!apic && (irq < 16))
                                disable_8259A_irq(irq);
                }
-               ioapic_write_entry(apic, pin, entry);
                spin_lock_irqsave(&ioapic_lock, flags);
-               set_native_irq_info(irq, TARGET_CPUS);
+               __ioapic_write_entry(apic, pin, entry);
                spin_unlock_irqrestore(&ioapic_lock, flags);
        }
        }
@@ -1346,7 +1390,8 @@ static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, in
         * The timer IRQ doesn't have to know that behind the
         * scene we have a 8259A-master in AEOI mode ...
         */
-       irq_desc[0].chip = &ioapic_edge_type;
+       irq_desc[0].chip = &ioapic_chip;
+       set_irq_handler(0, handle_edge_irq);
 
        /*
         * Add it to the IO-APIC irq-routing table:
@@ -1356,10 +1401,6 @@ static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, in
        enable_8259A_irq(0);
 }
 
-static inline void UNEXPECTED_IO_APIC(void)
-{
-}
-
 void __init print_IO_APIC(void)
 {
        int apic, i;
@@ -1399,34 +1440,12 @@ void __init print_IO_APIC(void)
        printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID);
        printk(KERN_DEBUG ".......    : Delivery Type: %X\n", reg_00.bits.delivery_type);
        printk(KERN_DEBUG ".......    : LTS          : %X\n", reg_00.bits.LTS);
-       if (reg_00.bits.ID >= get_physical_broadcast())
-               UNEXPECTED_IO_APIC();
-       if (reg_00.bits.__reserved_1 || reg_00.bits.__reserved_2)
-               UNEXPECTED_IO_APIC();
 
        printk(KERN_DEBUG ".... register #01: %08X\n", reg_01.raw);
        printk(KERN_DEBUG ".......     : max redirection entries: %04X\n", reg_01.bits.entries);
-       if (    (reg_01.bits.entries != 0x0f) && /* older (Neptune) boards */
-               (reg_01.bits.entries != 0x17) && /* typical ISA+PCI boards */
-               (reg_01.bits.entries != 0x1b) && /* Compaq Proliant boards */
-               (reg_01.bits.entries != 0x1f) && /* dual Xeon boards */
-               (reg_01.bits.entries != 0x22) && /* bigger Xeon boards */
-               (reg_01.bits.entries != 0x2E) &&
-               (reg_01.bits.entries != 0x3F)
-       )
-               UNEXPECTED_IO_APIC();
 
        printk(KERN_DEBUG ".......     : PRQ implemented: %X\n", reg_01.bits.PRQ);
        printk(KERN_DEBUG ".......     : IO APIC version: %04X\n", reg_01.bits.version);
-       if (    (reg_01.bits.version != 0x01) && /* 82489DX IO-APICs */
-               (reg_01.bits.version != 0x10) && /* oldest IO-APICs */
-               (reg_01.bits.version != 0x11) && /* Pentium/Pro IO-APICs */
-               (reg_01.bits.version != 0x13) && /* Xeon IO-APICs */
-               (reg_01.bits.version != 0x20)    /* Intel P64H (82806 AA) */
-       )
-               UNEXPECTED_IO_APIC();
-       if (reg_01.bits.__reserved_1 || reg_01.bits.__reserved_2)
-               UNEXPECTED_IO_APIC();
 
        /*
         * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
@@ -1436,8 +1455,6 @@ void __init print_IO_APIC(void)
        if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
                printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
                printk(KERN_DEBUG ".......     : arbitration: %02X\n", reg_02.bits.arbitration);
-               if (reg_02.bits.__reserved_1 || reg_02.bits.__reserved_2)
-                       UNEXPECTED_IO_APIC();
        }
 
        /*
@@ -1449,8 +1466,6 @@ void __init print_IO_APIC(void)
            reg_03.raw != reg_01.raw) {
                printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
                printk(KERN_DEBUG ".......     : Boot DT    : %X\n", reg_03.bits.boot_DT);
-               if (reg_03.bits.__reserved_1)
-                       UNEXPECTED_IO_APIC();
        }
 
        printk(KERN_DEBUG ".... IRQ redirection table:\n");
@@ -1481,17 +1496,12 @@ void __init print_IO_APIC(void)
                );
        }
        }
-       if (use_pci_vector())
-               printk(KERN_INFO "Using vector-based indexing\n");
        printk(KERN_DEBUG "IRQ to pin mappings:\n");
        for (i = 0; i < NR_IRQS; i++) {
                struct irq_pin_list *entry = irq_2_pin + i;
                if (entry->pin < 0)
                        continue;
-               if (use_pci_vector() && !platform_legacy_irq(i))
-                       printk(KERN_DEBUG "IRQ%d ", IO_APIC_VECTOR(i));
-               else
-                       printk(KERN_DEBUG "IRQ%d ", i);
+               printk(KERN_DEBUG "IRQ%d ", i);
                for (;;) {
                        printk("-> %d:%d", entry->apic, entry->pin);
                        if (!entry->next)
@@ -1543,7 +1553,7 @@ void /*__init*/ print_local_APIC(void * dummy)
        v = apic_read(APIC_LVR);
        printk(KERN_INFO "... APIC VERSION: %08x\n", v);
        ver = GET_APIC_VERSION(v);
-       maxlvt = get_maxlvt();
+       maxlvt = lapic_get_maxlvt();
 
        v = apic_read(APIC_TASKPRI);
        printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
@@ -1875,6 +1885,15 @@ static void __init setup_ioapic_ids_from_mpc(void)
 static void __init setup_ioapic_ids_from_mpc(void) { }
 #endif
 
+int no_timer_check __initdata;
+
+static int __init notimercheck(char *s)
+{
+       no_timer_check = 1;
+       return 1;
+}
+__setup("no_timer_check", notimercheck);
+
 /*
  * There is a nasty bug in some older SMP boards, their mptable lies
  * about the timer IRQ. We do the following to work around the situation:
@@ -1883,10 +1902,13 @@ static void __init setup_ioapic_ids_from_mpc(void) { }
  *     - if this function detects that timer IRQs are defunct, then we fall
  *       back to ISA timer IRQs
  */
-static int __init timer_irq_works(void)
+int __init timer_irq_works(void)
 {
        unsigned long t1 = jiffies;
 
+       if (no_timer_check)
+               return 1;
+
        local_irq_enable();
        /* Let ten ticks pass... */
        mdelay((10 * 1000) / HZ);
@@ -1918,6 +1940,8 @@ static int __init timer_irq_works(void)
  */
 
 /*
+ * Startup quirk:
+ *
  * Starting up a edge-triggered IO-APIC interrupt is
  * nasty - we need to make sure that we get the edge.
  * If it is already asserted for some reason, we need
@@ -1925,8 +1949,10 @@ static int __init timer_irq_works(void)
  *
  * This is not complete - we should be able to fake
  * an edge even if it isn't on the 8259A...
+ *
+ * (We do this for level-triggered IRQs too - it cannot hurt.)
  */
-static unsigned int startup_edge_ioapic_irq(unsigned int irq)
+static unsigned int startup_ioapic_irq(unsigned int irq)
 {
        int was_pending = 0;
        unsigned long flags;
@@ -1943,47 +1969,18 @@ static unsigned int startup_edge_ioapic_irq(unsigned int irq)
        return was_pending;
 }
 
-/*
- * Once we have recorded IRQ_PENDING already, we can mask the
- * interrupt for real. This prevents IRQ storms from unhandled
- * devices.
- */
-static void ack_edge_ioapic_irq(unsigned int irq)
+static void ack_ioapic_irq(unsigned int irq)
 {
-       move_irq(irq);
-       if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
-                                       == (IRQ_PENDING | IRQ_DISABLED))
-               mask_IO_APIC_irq(irq);
+       move_native_irq(irq);
        ack_APIC_irq();
 }
 
-/*
- * Level triggered interrupts can just be masked,
- * and shutting down and starting up the interrupt
- * is the same as enabling and disabling them -- except
- * with a startup need to return a "was pending" value.
- *
- * Level triggered interrupts are special because we
- * do not touch any IO-APIC register while handling
- * them. We ack the APIC in the end-IRQ handler, not
- * in the start-IRQ-handler. Protection against reentrance
- * from the same interrupt is still provided, both by the
- * generic IRQ layer and by the fact that an unacked local
- * APIC does not accept IRQs.
- */
-static unsigned int startup_level_ioapic_irq (unsigned int irq)
-{
-       unmask_IO_APIC_irq(irq);
-
-       return 0; /* don't check for pending */
-}
-
-static void end_level_ioapic_irq (unsigned int irq)
+static void ack_ioapic_quirk_irq(unsigned int irq)
 {
        unsigned long v;
        int i;
 
-       move_irq(irq);
+       move_native_irq(irq);
 /*
  * It appears there is an erratum which affects at least version 0x11
  * of I/O APIC (that's the 82093AA and cores integrated into various
@@ -2003,7 +2000,7 @@ static void end_level_ioapic_irq (unsigned int irq)
  * operation to prevent an edge-triggered interrupt escaping meanwhile.
  * The idea is from Manfred Spraul.  --macro
  */
-       i = IO_APIC_VECTOR(irq);
+       i = irq_vector[irq];
 
        v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
 
@@ -2018,105 +2015,26 @@ static void end_level_ioapic_irq (unsigned int irq)
        }
 }
 
-#ifdef CONFIG_PCI_MSI
-static unsigned int startup_edge_ioapic_vector(unsigned int vector)
-{
-       int irq = vector_to_irq(vector);
-
-       return startup_edge_ioapic_irq(irq);
-}
-
-static void ack_edge_ioapic_vector(unsigned int vector)
-{
-       int irq = vector_to_irq(vector);
-
-       move_native_irq(vector);
-       ack_edge_ioapic_irq(irq);
-}
-
-static unsigned int startup_level_ioapic_vector (unsigned int vector)
-{
-       int irq = vector_to_irq(vector);
-
-       return startup_level_ioapic_irq (irq);
-}
-
-static void end_level_ioapic_vector (unsigned int vector)
-{
-       int irq = vector_to_irq(vector);
-
-       move_native_irq(vector);
-       end_level_ioapic_irq(irq);
-}
-
-static void mask_IO_APIC_vector (unsigned int vector)
+static int ioapic_retrigger_irq(unsigned int irq)
 {
-       int irq = vector_to_irq(vector);
-
-       mask_IO_APIC_irq(irq);
-}
-
-static void unmask_IO_APIC_vector (unsigned int vector)
-{
-       int irq = vector_to_irq(vector);
-
-       unmask_IO_APIC_irq(irq);
-}
-
-#ifdef CONFIG_SMP
-static void set_ioapic_affinity_vector (unsigned int vector,
-                                       cpumask_t cpu_mask)
-{
-       int irq = vector_to_irq(vector);
-
-       set_native_irq_info(vector, cpu_mask);
-       set_ioapic_affinity_irq(irq, cpu_mask);
-}
-#endif
-#endif
-
-static int ioapic_retrigger(unsigned int irq)
-{
-       send_IPI_self(IO_APIC_VECTOR(irq));
+       send_IPI_self(irq_vector[irq]);
 
        return 1;
 }
 
-/*
- * Level and edge triggered IO-APIC interrupts need different handling,
- * so we use two separate IRQ descriptors. Edge triggered IRQs can be
- * handled with the level-triggered descriptor, but that one has slightly
- * more overhead. Level-triggered interrupts cannot be handled with the
- * edge-triggered handler, without risking IRQ storms and other ugly
- * races.
- */
-static struct hw_interrupt_type ioapic_edge_type __read_mostly = {
-       .typename       = "IO-APIC-edge",
-       .startup        = startup_edge_ioapic,
-       .shutdown       = shutdown_edge_ioapic,
-       .enable         = enable_edge_ioapic,
-       .disable        = disable_edge_ioapic,
-       .ack            = ack_edge_ioapic,
-       .end            = end_edge_ioapic,
+static struct irq_chip ioapic_chip __read_mostly = {
+       .name           = "IO-APIC",
+       .startup        = startup_ioapic_irq,
+       .mask           = mask_IO_APIC_irq,
+       .unmask         = unmask_IO_APIC_irq,
+       .ack            = ack_ioapic_irq,
+       .eoi            = ack_ioapic_quirk_irq,
 #ifdef CONFIG_SMP
-       .set_affinity   = set_ioapic_affinity,
+       .set_affinity   = set_ioapic_affinity_irq,
 #endif
-       .retrigger      = ioapic_retrigger,
+       .retrigger      = ioapic_retrigger_irq,
 };
 
-static struct hw_interrupt_type ioapic_level_type __read_mostly = {
-       .typename       = "IO-APIC-level",
-       .startup        = startup_level_ioapic,
-       .shutdown       = shutdown_level_ioapic,
-       .enable         = enable_level_ioapic,
-       .disable        = disable_level_ioapic,
-       .ack            = mask_and_ack_level_ioapic,
-       .end            = end_level_ioapic,
-#ifdef CONFIG_SMP
-       .set_affinity   = set_ioapic_affinity,
-#endif
-       .retrigger      = ioapic_retrigger,
-};
 
 static inline void init_IO_APIC_traps(void)
 {
@@ -2135,12 +2053,7 @@ static inline void init_IO_APIC_traps(void)
         */
        for (irq = 0; irq < NR_IRQS ; irq++) {
                int tmp = irq;
-               if (use_pci_vector()) {
-                       if (!platform_legacy_irq(tmp))
-                               if ((tmp = vector_to_irq(tmp)) == -1)
-                                       continue;
-               }
-               if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) {
+               if (IO_APIC_IRQ(tmp) && !irq_vector[tmp]) {
                        /*
                         * Hmm.. We don't have an entry for this,
                         * so default to an old-fashioned 8259
@@ -2150,20 +2063,21 @@ static inline void init_IO_APIC_traps(void)
                                make_8259A_irq(irq);
                        else
                                /* Strange. Oh, well.. */
-                               irq_desc[irq].chip = &no_irq_type;
+                               irq_desc[irq].chip = &no_irq_chip;
                }
        }
 }
 
-static void enable_lapic_irq (unsigned int irq)
-{
-       unsigned long v;
+/*
+ * The local APIC irq-chip implementation:
+ */
 
-       v = apic_read(APIC_LVT0);
-       apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED);
+static void ack_apic(unsigned int irq)
+{
+       ack_APIC_irq();
 }
 
-static void disable_lapic_irq (unsigned int irq)
+static void mask_lapic_irq (unsigned int irq)
 {
        unsigned long v;
 
@@ -2171,21 +2085,19 @@ static void disable_lapic_irq (unsigned int irq)
        apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
 }
 
-static void ack_lapic_irq (unsigned int irq)
+static void unmask_lapic_irq (unsigned int irq)
 {
-       ack_APIC_irq();
-}
+       unsigned long v;
 
-static void end_lapic_irq (unsigned int i) { /* nothing */ }
+       v = apic_read(APIC_LVT0);
+       apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED);
+}
 
-static struct hw_interrupt_type lapic_irq_type __read_mostly = {
-       .typename       = "local-APIC-edge",
-       .startup        = NULL, /* startup_irq() not used for IRQ0 */
-       .shutdown       = NULL, /* shutdown_irq() not used for IRQ0 */
-       .enable         = enable_lapic_irq,
-       .disable        = disable_lapic_irq,
-       .ack            = ack_lapic_irq,
-       .end            = end_lapic_irq
+static struct irq_chip lapic_chip __read_mostly = {
+       .name           = "local-APIC-edge",
+       .mask           = mask_lapic_irq,
+       .unmask         = unmask_lapic_irq,
+       .eoi            = ack_apic,
 };
 
 static void setup_nmi (void)
@@ -2220,9 +2132,15 @@ static inline void unlock_ExtINT_logic(void)
        unsigned char save_control, save_freq_select;
 
        pin  = find_isa_irq_pin(8, mp_INT);
+       if (pin == -1) {
+               WARN_ON_ONCE(1);
+               return;
+       }
        apic = find_isa_irq_apic(8, mp_INT);
-       if (pin == -1)
+       if (apic == -1) {
+               WARN_ON_ONCE(1);
                return;
+       }
 
        entry0 = ioapic_read_entry(apic, pin);
        clear_IO_APIC_pin(apic, pin);
@@ -2267,7 +2185,7 @@ int timer_uses_ioapic_pin_0;
  * is so screwy.  Thanks to Brian Perkins for testing/hacking this beast
  * fanatically on his truly buggy board.
  */
-static inline void check_timer(void)
+static inline void __init check_timer(void)
 {
        int apic1, pin1, apic2, pin2;
        int vector;
@@ -2356,7 +2274,8 @@ static inline void check_timer(void)
        printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
 
        disable_8259A_irq(0);
-       irq_desc[0].chip = &lapic_irq_type;
+       set_irq_chip_and_handler_name(0, &lapic_chip, handle_fasteoi_irq,
+                                     "fasteoi");
        apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector);   /* Fixed mode */
        enable_8259A_irq(0);
 
@@ -2531,6 +2450,245 @@ static int __init ioapic_init_sysfs(void)
 
 device_initcall(ioapic_init_sysfs);
 
+/*
+ * Dynamic irq allocate and deallocation
+ */
+int create_irq(void)
+{
+       /* Allocate an unused irq */
+       int irq, new, vector = 0;
+       unsigned long flags;
+
+       irq = -ENOSPC;
+       spin_lock_irqsave(&vector_lock, flags);
+       for (new = (NR_IRQS - 1); new >= 0; new--) {
+               if (platform_legacy_irq(new))
+                       continue;
+               if (irq_vector[new] != 0)
+                       continue;
+               vector = __assign_irq_vector(new);
+               if (likely(vector > 0))
+                       irq = new;
+               break;
+       }
+       spin_unlock_irqrestore(&vector_lock, flags);
+
+       if (irq >= 0) {
+               set_intr_gate(vector, interrupt[irq]);
+               dynamic_irq_init(irq);
+       }
+       return irq;
+}
+
+void destroy_irq(unsigned int irq)
+{
+       unsigned long flags;
+
+       dynamic_irq_cleanup(irq);
+
+       spin_lock_irqsave(&vector_lock, flags);
+       irq_vector[irq] = 0;
+       spin_unlock_irqrestore(&vector_lock, flags);
+}
+
+/*
+ * MSI mesage composition
+ */
+#ifdef CONFIG_PCI_MSI
+static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
+{
+       int vector;
+       unsigned dest;
+
+       vector = assign_irq_vector(irq);
+       if (vector >= 0) {
+               dest = cpu_mask_to_apicid(TARGET_CPUS);
+
+               msg->address_hi = MSI_ADDR_BASE_HI;
+               msg->address_lo =
+                       MSI_ADDR_BASE_LO |
+                       ((INT_DEST_MODE == 0) ?
+                               MSI_ADDR_DEST_MODE_PHYSICAL:
+                               MSI_ADDR_DEST_MODE_LOGICAL) |
+                       ((INT_DELIVERY_MODE != dest_LowestPrio) ?
+                               MSI_ADDR_REDIRECTION_CPU:
+                               MSI_ADDR_REDIRECTION_LOWPRI) |
+                       MSI_ADDR_DEST_ID(dest);
+
+               msg->data =
+                       MSI_DATA_TRIGGER_EDGE |
+                       MSI_DATA_LEVEL_ASSERT |
+                       ((INT_DELIVERY_MODE != dest_LowestPrio) ?
+                               MSI_DATA_DELIVERY_FIXED:
+                               MSI_DATA_DELIVERY_LOWPRI) |
+                       MSI_DATA_VECTOR(vector);
+       }
+       return vector;
+}
+
+#ifdef CONFIG_SMP
+static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
+{
+       struct msi_msg msg;
+       unsigned int dest;
+       cpumask_t tmp;
+       int vector;
+
+       cpus_and(tmp, mask, cpu_online_map);
+       if (cpus_empty(tmp))
+               tmp = TARGET_CPUS;
+
+       vector = assign_irq_vector(irq);
+       if (vector < 0)
+               return;
+
+       dest = cpu_mask_to_apicid(mask);
+
+       read_msi_msg(irq, &msg);
+
+       msg.data &= ~MSI_DATA_VECTOR_MASK;
+       msg.data |= MSI_DATA_VECTOR(vector);
+       msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
+       msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+
+       write_msi_msg(irq, &msg);
+       irq_desc[irq].affinity = mask;
+}
+#endif /* CONFIG_SMP */
+
+/*
+ * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
+ * which implement the MSI or MSI-X Capability Structure.
+ */
+static struct irq_chip msi_chip = {
+       .name           = "PCI-MSI",
+       .unmask         = unmask_msi_irq,
+       .mask           = mask_msi_irq,
+       .ack            = ack_ioapic_irq,
+#ifdef CONFIG_SMP
+       .set_affinity   = set_msi_irq_affinity,
+#endif
+       .retrigger      = ioapic_retrigger_irq,
+};
+
+int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
+{
+       struct msi_msg msg;
+       int irq, ret;
+       irq = create_irq();
+       if (irq < 0)
+               return irq;
+
+       ret = msi_compose_msg(dev, irq, &msg);
+       if (ret < 0) {
+               destroy_irq(irq);
+               return ret;
+       }
+
+       set_irq_msi(irq, desc);
+       write_msi_msg(irq, &msg);
+
+       set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq,
+                                     "edge");
+
+       return 0;
+}
+
+void arch_teardown_msi_irq(unsigned int irq)
+{
+       destroy_irq(irq);
+}
+
+#endif /* CONFIG_PCI_MSI */
+
+/*
+ * Hypertransport interrupt support
+ */
+#ifdef CONFIG_HT_IRQ
+
+#ifdef CONFIG_SMP
+
+static void target_ht_irq(unsigned int irq, unsigned int dest)
+{
+       struct ht_irq_msg msg;
+       fetch_ht_irq_msg(irq, &msg);
+
+       msg.address_lo &= ~(HT_IRQ_LOW_DEST_ID_MASK);
+       msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
+
+       msg.address_lo |= HT_IRQ_LOW_DEST_ID(dest);
+       msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
+
+       write_ht_irq_msg(irq, &msg);
+}
+
+static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
+{
+       unsigned int dest;
+       cpumask_t tmp;
+
+       cpus_and(tmp, mask, cpu_online_map);
+       if (cpus_empty(tmp))
+               tmp = TARGET_CPUS;
+
+       cpus_and(mask, tmp, CPU_MASK_ALL);
+
+       dest = cpu_mask_to_apicid(mask);
+
+       target_ht_irq(irq, dest);
+       irq_desc[irq].affinity = mask;
+}
+#endif
+
+static struct irq_chip ht_irq_chip = {
+       .name           = "PCI-HT",
+       .mask           = mask_ht_irq,
+       .unmask         = unmask_ht_irq,
+       .ack            = ack_ioapic_irq,
+#ifdef CONFIG_SMP
+       .set_affinity   = set_ht_irq_affinity,
+#endif
+       .retrigger      = ioapic_retrigger_irq,
+};
+
+int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
+{
+       int vector;
+
+       vector = assign_irq_vector(irq);
+       if (vector >= 0) {
+               struct ht_irq_msg msg;
+               unsigned dest;
+               cpumask_t tmp;
+
+               cpus_clear(tmp);
+               cpu_set(vector >> 8, tmp);
+               dest = cpu_mask_to_apicid(tmp);
+
+               msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
+
+               msg.address_lo =
+                       HT_IRQ_LOW_BASE |
+                       HT_IRQ_LOW_DEST_ID(dest) |
+                       HT_IRQ_LOW_VECTOR(vector) |
+                       ((INT_DEST_MODE == 0) ?
+                               HT_IRQ_LOW_DM_PHYSICAL :
+                               HT_IRQ_LOW_DM_LOGICAL) |
+                       HT_IRQ_LOW_RQEOI_EDGE |
+                       ((INT_DELIVERY_MODE != dest_LowestPrio) ?
+                               HT_IRQ_LOW_MT_FIXED :
+                               HT_IRQ_LOW_MT_ARBITRATED) |
+                       HT_IRQ_LOW_IRQ_MASKED;
+
+               write_ht_irq_msg(irq, &msg);
+
+               set_irq_chip_and_handler_name(irq, &ht_irq_chip,
+                                             handle_edge_irq, "edge");
+       }
+       return vector;
+}
+#endif /* CONFIG_HT_IRQ */
+
 /* --------------------------------------------------------------------------
                           ACPI-based IOAPIC Configuration
    -------------------------------------------------------------------------- */
@@ -2682,9 +2840,8 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a
        if (!ioapic && (irq < 16))
                disable_8259A_irq(irq);
 
-       ioapic_write_entry(ioapic, pin, entry);
        spin_lock_irqsave(&ioapic_lock, flags);
-       set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS);
+       __ioapic_write_entry(ioapic, pin, entry);
        spin_unlock_irqrestore(&ioapic_lock, flags);
 
        return 0;