Merge branch 'for-linus' of master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband
[powerpc.git] / arch / x86_64 / kernel / nmi.c
index 186aebb..dfab9f1 100644 (file)
@@ -39,15 +39,17 @@ int panic_on_unrecovered_nmi;
  *   different subsystems this reservation system just tries to coordinate
  *   things a little
  */
-static DEFINE_PER_CPU(unsigned, perfctr_nmi_owner);
-static DEFINE_PER_CPU(unsigned, evntsel_nmi_owner[2]);
-
-static cpumask_t backtrace_mask = CPU_MASK_NONE;
 
 /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
  * offset from MSR_P4_BSU_ESCR0.  It will be the max for all platforms (for now)
  */
 #define NMI_MAX_COUNTER_BITS 66
+#define NMI_MAX_COUNTER_LONGS BITS_TO_LONGS(NMI_MAX_COUNTER_BITS)
+
+static DEFINE_PER_CPU(unsigned, perfctr_nmi_owner[NMI_MAX_COUNTER_LONGS]);
+static DEFINE_PER_CPU(unsigned, evntsel_nmi_owner[NMI_MAX_COUNTER_LONGS]);
+
+static cpumask_t backtrace_mask = CPU_MASK_NONE;
 
 /* nmi_active:
  * >0: the lapic NMI watchdog is active, but can be disabled
@@ -108,71 +110,135 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
 /* checks for a bit availability (hack for oprofile) */
 int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
 {
+       int cpu;
        BUG_ON(counter > NMI_MAX_COUNTER_BITS);
-
-       return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner)));
+       for_each_possible_cpu (cpu) {
+               if (test_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)))
+                       return 0;
+       }
+       return 1;
 }
 
 /* checks the an msr for availability */
 int avail_to_resrv_perfctr_nmi(unsigned int msr)
 {
        unsigned int counter;
+       int cpu;
 
        counter = nmi_perfctr_msr_to_bit(msr);
        BUG_ON(counter > NMI_MAX_COUNTER_BITS);
 
-       return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner)));
+       for_each_possible_cpu (cpu) {
+               if (test_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)))
+                       return 0;
+       }
+       return 1;
 }
 
-int reserve_perfctr_nmi(unsigned int msr)
+static int __reserve_perfctr_nmi(int cpu, unsigned int msr)
 {
        unsigned int counter;
+       if (cpu < 0)
+               cpu = smp_processor_id();
 
        counter = nmi_perfctr_msr_to_bit(msr);
        BUG_ON(counter > NMI_MAX_COUNTER_BITS);
 
-       if (!test_and_set_bit(counter, &__get_cpu_var(perfctr_nmi_owner)))
+       if (!test_and_set_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)))
                return 1;
        return 0;
 }
 
-void release_perfctr_nmi(unsigned int msr)
+static void __release_perfctr_nmi(int cpu, unsigned int msr)
 {
        unsigned int counter;
+       if (cpu < 0)
+               cpu = smp_processor_id();
 
        counter = nmi_perfctr_msr_to_bit(msr);
        BUG_ON(counter > NMI_MAX_COUNTER_BITS);
 
-       clear_bit(counter, &__get_cpu_var(perfctr_nmi_owner));
+       clear_bit(counter, &per_cpu(perfctr_nmi_owner, cpu));
 }
 
-int reserve_evntsel_nmi(unsigned int msr)
+int reserve_perfctr_nmi(unsigned int msr)
+{
+       int cpu, i;
+       for_each_possible_cpu (cpu) {
+               if (!__reserve_perfctr_nmi(cpu, msr)) {
+                       for_each_possible_cpu (i) {
+                               if (i >= cpu)
+                                       break;
+                               __release_perfctr_nmi(i, msr);
+                       }
+                       return 0;
+               }
+       }
+       return 1;
+}
+
+void release_perfctr_nmi(unsigned int msr)
+{
+       int cpu;
+       for_each_possible_cpu (cpu)
+               __release_perfctr_nmi(cpu, msr);
+}
+
+int __reserve_evntsel_nmi(int cpu, unsigned int msr)
 {
        unsigned int counter;
+       if (cpu < 0)
+               cpu = smp_processor_id();
 
        counter = nmi_evntsel_msr_to_bit(msr);
        BUG_ON(counter > NMI_MAX_COUNTER_BITS);
 
-       if (!test_and_set_bit(counter, &__get_cpu_var(evntsel_nmi_owner)))
+       if (!test_and_set_bit(counter, &per_cpu(evntsel_nmi_owner, cpu)[0]))
                return 1;
        return 0;
 }
 
-void release_evntsel_nmi(unsigned int msr)
+static void __release_evntsel_nmi(int cpu, unsigned int msr)
 {
        unsigned int counter;
+       if (cpu < 0)
+               cpu = smp_processor_id();
 
        counter = nmi_evntsel_msr_to_bit(msr);
        BUG_ON(counter > NMI_MAX_COUNTER_BITS);
 
-       clear_bit(counter, &__get_cpu_var(evntsel_nmi_owner));
+       clear_bit(counter, &per_cpu(evntsel_nmi_owner, cpu)[0]);
+}
+
+int reserve_evntsel_nmi(unsigned int msr)
+{
+       int cpu, i;
+       for_each_possible_cpu (cpu) {
+               if (!__reserve_evntsel_nmi(cpu, msr)) {
+                       for_each_possible_cpu (i) {
+                               if (i >= cpu)
+                                       break;
+                               __release_evntsel_nmi(i, msr);
+                       }
+                       return 0;
+               }
+       }
+       return 1;
+}
+
+void release_evntsel_nmi(unsigned int msr)
+{
+       int cpu;
+       for_each_possible_cpu (cpu) {
+               __release_evntsel_nmi(cpu, msr);
+       }
 }
 
 static __cpuinit inline int nmi_known_cpu(void)
 {
        switch (boot_cpu_data.x86_vendor) {
        case X86_VENDOR_AMD:
-               return boot_cpu_data.x86 == 15;
+               return boot_cpu_data.x86 == 15 || boot_cpu_data.x86 == 16;
        case X86_VENDOR_INTEL:
                if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
                        return 1;
@@ -187,10 +253,7 @@ void nmi_watchdog_default(void)
 {
        if (nmi_watchdog != NMI_DEFAULT)
                return;
-       if (nmi_known_cpu())
-               nmi_watchdog = NMI_LOCAL_APIC;
-       else
-               nmi_watchdog = NMI_IO_APIC;
+       nmi_watchdog = NMI_NONE;
 }
 
 static int endflag __initdata = 0;
@@ -214,6 +277,23 @@ static __init void nmi_cpu_busy(void *data)
 }
 #endif
 
+static unsigned int adjust_for_32bit_ctr(unsigned int hz)
+{
+       unsigned int retval = hz;
+
+       /*
+        * On Intel CPUs with ARCH_PERFMON only 32 bits in the counter
+        * are writable, with higher bits sign extending from bit 31.
+        * So, we can only program the counter with 31 bit values and
+        * 32nd bit should be 1, for 33.. to be 1.
+        * Find the appropriate nmi_hz
+        */
+       if ((((u64)cpu_khz * 1000) / retval) > 0x7fffffffULL) {
+               retval = ((u64)cpu_khz * 1000) / 0x7fffffffUL + 1;
+       }
+       return retval;
+}
+
 int __init check_nmi_watchdog (void)
 {
        int *counts;
@@ -239,7 +319,7 @@ int __init check_nmi_watchdog (void)
        for (cpu = 0; cpu < NR_CPUS; cpu++)
                counts[cpu] = cpu_pda(cpu)->__nmi_count;
        local_irq_enable();
-       mdelay((10*1000)/nmi_hz); // wait 10 ticks
+       mdelay((20*1000)/nmi_hz); // wait 20 ticks
 
        for_each_online_cpu(cpu) {
                if (!per_cpu(nmi_watchdog_ctlblk, cpu).enabled)
@@ -268,17 +348,8 @@ int __init check_nmi_watchdog (void)
                struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
 
                nmi_hz = 1;
-               /*
-                * On Intel CPUs with ARCH_PERFMON only 32 bits in the counter
-                * are writable, with higher bits sign extending from bit 31.
-                * So, we can only program the counter with 31 bit values and
-                * 32nd bit should be 1, for 33.. to be 1.
-                * Find the appropriate nmi_hz
-                */
-               if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0 &&
-                       ((u64)cpu_khz * 1000) > 0x7fffffffULL) {
-                       nmi_hz = ((u64)cpu_khz * 1000) / 0x7fffffffUL + 1;
-               }
+               if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0)
+                       nmi_hz = adjust_for_32bit_ctr(nmi_hz);
        }
 
        kfree(counts);
@@ -302,8 +373,6 @@ int __init setup_nmi_watchdog(char *str)
        if ((nmi >= NMI_INVALID) || (nmi < NMI_NONE))
                return 0;
 
-       if ((nmi == NMI_LOCAL_APIC) && (nmi_known_cpu() == 0))
-               return 0;  /* no lapic support */
        nmi_watchdog = nmi;
        return 1;
 }
@@ -362,6 +431,33 @@ void enable_timer_nmi_watchdog(void)
        }
 }
 
+static void __acpi_nmi_disable(void *__unused)
+{
+       apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
+}
+
+/*
+ * Disable timer based NMIs on all CPUs:
+ */
+void acpi_nmi_disable(void)
+{
+       if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
+               on_each_cpu(__acpi_nmi_disable, NULL, 0, 1);
+}
+
+static void __acpi_nmi_enable(void *__unused)
+{
+       apic_write(APIC_LVT0, APIC_DM_NMI);
+}
+
+/*
+ * Enable timer based NMIs on all CPUs:
+ */
+void acpi_nmi_enable(void)
+{
+       if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
+               on_each_cpu(__acpi_nmi_enable, NULL, 0, 1);
+}
 #ifdef CONFIG_PM
 
 static int nmi_pm_active; /* nmi_active before suspend */
@@ -442,10 +538,10 @@ static int setup_k7_watchdog(void)
 
        perfctr_msr = MSR_K7_PERFCTR0;
        evntsel_msr = MSR_K7_EVNTSEL0;
-       if (!reserve_perfctr_nmi(perfctr_msr))
+       if (!__reserve_perfctr_nmi(-1, perfctr_msr))
                goto fail;
 
-       if (!reserve_evntsel_nmi(evntsel_msr))
+       if (!__reserve_evntsel_nmi(-1, evntsel_msr))
                goto fail1;
 
        /* Simulator may not support it */
@@ -471,9 +567,9 @@ static int setup_k7_watchdog(void)
        wd->check_bit = 1ULL<<63;
        return 1;
 fail2:
-       release_evntsel_nmi(evntsel_msr);
+       __release_evntsel_nmi(-1, evntsel_msr);
 fail1:
-       release_perfctr_nmi(perfctr_msr);
+       __release_perfctr_nmi(-1, perfctr_msr);
 fail:
        return 0;
 }
@@ -484,8 +580,8 @@ static void stop_k7_watchdog(void)
 
        wrmsr(wd->evntsel_msr, 0, 0);
 
-       release_evntsel_nmi(wd->evntsel_msr);
-       release_perfctr_nmi(wd->perfctr_msr);
+       __release_evntsel_nmi(-1, wd->evntsel_msr);
+       __release_perfctr_nmi(-1, wd->perfctr_msr);
 }
 
 /* Note that these events don't tick when the CPU idles. This means
@@ -551,10 +647,10 @@ static int setup_p4_watchdog(void)
                cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4);
        }
 
-       if (!reserve_perfctr_nmi(perfctr_msr))
+       if (!__reserve_perfctr_nmi(-1, perfctr_msr))
                goto fail;
 
-       if (!reserve_evntsel_nmi(evntsel_msr))
+       if (!__reserve_evntsel_nmi(-1, evntsel_msr))
                goto fail1;
 
        evntsel = P4_ESCR_EVENT_SELECT(0x3F)
@@ -579,7 +675,7 @@ static int setup_p4_watchdog(void)
        wd->check_bit = 1ULL<<39;
        return 1;
 fail1:
-       release_perfctr_nmi(perfctr_msr);
+       __release_perfctr_nmi(-1, perfctr_msr);
 fail:
        return 0;
 }
@@ -591,8 +687,8 @@ static void stop_p4_watchdog(void)
        wrmsr(wd->cccr_msr, 0, 0);
        wrmsr(wd->evntsel_msr, 0, 0);
 
-       release_evntsel_nmi(wd->evntsel_msr);
-       release_perfctr_nmi(wd->perfctr_msr);
+       __release_evntsel_nmi(-1, wd->evntsel_msr);
+       __release_perfctr_nmi(-1, wd->perfctr_msr);
 }
 
 #define ARCH_PERFMON_NMI_EVENT_SEL     ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
@@ -620,10 +716,10 @@ static int setup_intel_arch_watchdog(void)
        perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
        evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL0;
 
-       if (!reserve_perfctr_nmi(perfctr_msr))
+       if (!__reserve_perfctr_nmi(-1, perfctr_msr))
                goto fail;
 
-       if (!reserve_evntsel_nmi(evntsel_msr))
+       if (!__reserve_evntsel_nmi(-1, evntsel_msr))
                goto fail1;
 
        wrmsrl(perfctr_msr, 0UL);
@@ -636,7 +732,9 @@ static int setup_intel_arch_watchdog(void)
 
        /* setup the timer */
        wrmsr(evntsel_msr, evntsel, 0);
-       wrmsrl(perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz));
+
+       nmi_hz = adjust_for_32bit_ctr(nmi_hz);
+       wrmsr(perfctr_msr, (u32)(-((u64)cpu_khz * 1000 / nmi_hz)), 0);
 
        apic_write(APIC_LVTPC, APIC_DM_NMI);
        evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
@@ -648,7 +746,7 @@ static int setup_intel_arch_watchdog(void)
        wd->check_bit = 1ULL << (eax.split.bit_width - 1);
        return 1;
 fail1:
-       release_perfctr_nmi(perfctr_msr);
+       __release_perfctr_nmi(-1, perfctr_msr);
 fail:
        return 0;
 }
@@ -672,8 +770,8 @@ static void stop_intel_arch_watchdog(void)
 
        wrmsr(wd->evntsel_msr, 0, 0);
 
-       release_evntsel_nmi(wd->evntsel_msr);
-       release_perfctr_nmi(wd->perfctr_msr);
+       __release_evntsel_nmi(-1, wd->evntsel_msr);
+       __release_perfctr_nmi(-1, wd->perfctr_msr);
 }
 
 void setup_apic_nmi_watchdog(void *unused)
@@ -857,15 +955,23 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
                                dummy &= ~P4_CCCR_OVF;
                                wrmsrl(wd->cccr_msr, dummy);
                                apic_write(APIC_LVTPC, APIC_DM_NMI);
+                               /* start the cycle over again */
+                               wrmsrl(wd->perfctr_msr,
+                                      -((u64)cpu_khz * 1000 / nmi_hz));
                        } else if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
                                /*
                                 * ArchPerfom/Core Duo needs to re-unmask
                                 * the apic vector
                                 */
                                apic_write(APIC_LVTPC, APIC_DM_NMI);
+                               /* ARCH_PERFMON has 32 bit counter writes */
+                               wrmsr(wd->perfctr_msr,
+                                    (u32)(-((u64)cpu_khz * 1000 / nmi_hz)), 0);
+                       } else {
+                               /* start the cycle over again */
+                               wrmsrl(wd->perfctr_msr,
+                                      -((u64)cpu_khz * 1000 / nmi_hz));
                        }
-                       /* start the cycle over again */
-                       wrmsrl(wd->perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz));
                        rc = 1;
                } else  if (nmi_watchdog == NMI_IO_APIC) {
                        /* don't know how to accurately check for this.