[PATCH] IRQ: prevent enabling of previously disabled interrupt
[powerpc.git] / kernel / timer.c
index fd74268..13fa72c 100644 (file)
@@ -33,6 +33,7 @@
 #include <linux/posix-timers.h>
 #include <linux/cpu.h>
 #include <linux/syscalls.h>
+#include <linux/delay.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -85,7 +86,8 @@ struct tvec_t_base_s {
 } ____cacheline_aligned_in_smp;
 
 typedef struct tvec_t_base_s tvec_base_t;
-static DEFINE_PER_CPU(tvec_base_t, tvec_bases);
+static DEFINE_PER_CPU(tvec_base_t *, tvec_bases);
+static tvec_base_t boot_tvec_bases;
 
 static inline void set_running_timer(tvec_base_t *base,
                                        struct timer_list *timer)
@@ -156,7 +158,7 @@ EXPORT_SYMBOL(__init_timer_base);
 void fastcall init_timer(struct timer_list *timer)
 {
        timer->entry.next = NULL;
-       timer->base = &per_cpu(tvec_bases, raw_smp_processor_id()).t_base;
+       timer->base = &per_cpu(tvec_bases, raw_smp_processor_id())->t_base;
 }
 EXPORT_SYMBOL(init_timer);
 
@@ -217,7 +219,7 @@ int __mod_timer(struct timer_list *timer, unsigned long expires)
                ret = 1;
        }
 
-       new_base = &__get_cpu_var(tvec_bases);
+       new_base = __get_cpu_var(tvec_bases);
 
        if (base != &new_base->t_base) {
                /*
@@ -257,7 +259,7 @@ EXPORT_SYMBOL(__mod_timer);
  */
 void add_timer_on(struct timer_list *timer, int cpu)
 {
-       tvec_base_t *base = &per_cpu(tvec_bases, cpu);
+       tvec_base_t *base = per_cpu(tvec_bases, cpu);
        unsigned long flags;
 
        BUG_ON(timer_pending(timer) || !timer->function);
@@ -488,13 +490,25 @@ unsigned long next_timer_interrupt(void)
        struct list_head *list;
        struct timer_list *nte;
        unsigned long expires;
+       unsigned long hr_expires = MAX_JIFFY_OFFSET;
+       ktime_t hr_delta;
        tvec_t *varray[4];
        int i, j;
 
-       base = &__get_cpu_var(tvec_bases);
+       hr_delta = hrtimer_get_next_event();
+       if (hr_delta.tv64 != KTIME_MAX) {
+               struct timespec tsdelta;
+               tsdelta = ktime_to_timespec(hr_delta);
+               hr_expires = timespec_to_jiffies(&tsdelta);
+               if (hr_expires < 3)
+                       return hr_expires + jiffies;
+       }
+       hr_expires += jiffies;
+
+       base = __get_cpu_var(tvec_bases);
        spin_lock(&base->t_base.lock);
        expires = base->timer_jiffies + (LONG_MAX >> 1);
-       list = 0;
+       list = NULL;
 
        /* Look for timer events in tv1. */
        j = base->timer_jiffies & TVR_MASK;
@@ -541,6 +555,10 @@ found:
                }
        }
        spin_unlock(&base->t_base.lock);
+
+       if (time_before(hr_expires, expires))
+               return hr_expires;
+
        return expires;
 }
 #endif
@@ -716,12 +734,16 @@ static void second_overflow(void)
 #endif
 }
 
-/* in the NTP reference this is called "hardclock()" */
-static void update_wall_time_one_tick(void)
+/*
+ * Returns how many microseconds we need to add to xtime this tick
+ * in doing an adjustment requested with adjtime.
+ */
+static long adjtime_adjustment(void)
 {
-       long time_adjust_step, delta_nsec;
+       long time_adjust_step;
 
-       if ((time_adjust_step = time_adjust) != 0 ) {
+       time_adjust_step = time_adjust;
+       if (time_adjust_step) {
                /*
                 * We are doing an adjtime thing.  Prepare time_adjust_step to
                 * be within bounds.  Note that a positive time_adjust means we
@@ -732,10 +754,19 @@ static void update_wall_time_one_tick(void)
                 */
                time_adjust_step = min(time_adjust_step, (long)tickadj);
                time_adjust_step = max(time_adjust_step, (long)-tickadj);
+       }
+       return time_adjust_step;
+}
+
+/* in the NTP reference this is called "hardclock()" */
+static void update_wall_time_one_tick(void)
+{
+       long time_adjust_step, delta_nsec;
 
+       time_adjust_step = adjtime_adjustment();
+       if (time_adjust_step)
                /* Reduce by this step the amount of time left  */
                time_adjust -= time_adjust_step;
-       }
        delta_nsec = tick_nsec + time_adjust_step * 1000;
        /*
         * Advance the phase, once it gets to one microsecond, then
@@ -757,6 +788,22 @@ static void update_wall_time_one_tick(void)
        }
 }
 
+/*
+ * Return how long ticks are at the moment, that is, how much time
+ * update_wall_time_one_tick will add to xtime next time we call it
+ * (assuming no calls to do_adjtimex in the meantime).
+ * The return value is in fixed-point nanoseconds with SHIFT_SCALE-10
+ * bits to the right of the binary point.
+ * This function has no side-effects.
+ */
+u64 current_tick_length(void)
+{
+       long delta_nsec;
+
+       delta_nsec = tick_nsec + adjtime_adjustment() * 1000;
+       return ((u64) delta_nsec << (SHIFT_SCALE - 10)) + time_adj;
+}
+
 /*
  * Using a loop looks inefficient, but "ticks" is
  * usually just one (we shouldn't be losing ticks,
@@ -855,8 +902,9 @@ EXPORT_SYMBOL(xtime_lock);
  */
 static void run_timer_softirq(struct softirq_action *h)
 {
-       tvec_base_t *base = &__get_cpu_var(tvec_bases);
+       tvec_base_t *base = __get_cpu_var(tvec_bases);
 
+       hrtimer_run_queues();
        if (time_after_eq(jiffies, base->timer_jiffies))
                __run_timers(base);
 }
@@ -867,6 +915,7 @@ static void run_timer_softirq(struct softirq_action *h)
 void run_local_timers(void)
 {
        raise_softirq(TIMER_SOFTIRQ);
+       softlockup_tick();
 }
 
 /*
@@ -894,8 +943,9 @@ static inline void update_times(void)
 void do_timer(struct pt_regs *regs)
 {
        jiffies_64++;
+       /* prevent loading jiffies before storing new jiffies_64 value. */
+       barrier();
        update_times();
-       softlockup_tick(regs);
 }
 
 #ifdef __ARCH_WANT_SYS_ALARM
@@ -906,19 +956,7 @@ void do_timer(struct pt_regs *regs)
  */
 asmlinkage unsigned long sys_alarm(unsigned int seconds)
 {
-       struct itimerval it_new, it_old;
-       unsigned int oldalarm;
-
-       it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0;
-       it_new.it_value.tv_sec = seconds;
-       it_new.it_value.tv_usec = 0;
-       do_setitimer(ITIMER_REAL, &it_new, &it_old);
-       oldalarm = it_old.it_value.tv_sec;
-       /* ehhh.. We can't return 0 if we have an alarm pending.. */
-       /* And we'd better return too much than too little anyway */
-       if ((!oldalarm && it_old.it_value.tv_usec) || it_old.it_value.tv_usec >= 500000)
-               oldalarm++;
-       return oldalarm;
+       return alarm_setitimer(seconds);
 }
 
 #endif
@@ -1118,62 +1156,6 @@ asmlinkage long sys_gettid(void)
        return current->pid;
 }
 
-static long __sched nanosleep_restart(struct restart_block *restart)
-{
-       unsigned long expire = restart->arg0, now = jiffies;
-       struct timespec __user *rmtp = (struct timespec __user *) restart->arg1;
-       long ret;
-
-       /* Did it expire while we handled signals? */
-       if (!time_after(expire, now))
-               return 0;
-
-       expire = schedule_timeout_interruptible(expire - now);
-
-       ret = 0;
-       if (expire) {
-               struct timespec t;
-               jiffies_to_timespec(expire, &t);
-
-               ret = -ERESTART_RESTARTBLOCK;
-               if (rmtp && copy_to_user(rmtp, &t, sizeof(t)))
-                       ret = -EFAULT;
-               /* The 'restart' block is already filled in */
-       }
-       return ret;
-}
-
-asmlinkage long sys_nanosleep(struct timespec __user *rqtp, struct timespec __user *rmtp)
-{
-       struct timespec t;
-       unsigned long expire;
-       long ret;
-
-       if (copy_from_user(&t, rqtp, sizeof(t)))
-               return -EFAULT;
-
-       if ((t.tv_nsec >= 1000000000L) || (t.tv_nsec < 0) || (t.tv_sec < 0))
-               return -EINVAL;
-
-       expire = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec);
-       expire = schedule_timeout_interruptible(expire);
-
-       ret = 0;
-       if (expire) {
-               struct restart_block *restart;
-               jiffies_to_timespec(expire, &t);
-               if (rmtp && copy_to_user(rmtp, &t, sizeof(t)))
-                       return -EFAULT;
-
-               restart = &current_thread_info()->restart_block;
-               restart->fn = nanosleep_restart;
-               restart->arg0 = jiffies + expire;
-               restart->arg1 = (unsigned long) rmtp;
-               ret = -ERESTART_RESTARTBLOCK;
-       }
-       return ret;
-}
-
 /*
  * sys_sysinfo - fill in sysinfo struct
  */ 
@@ -1263,12 +1245,32 @@ asmlinkage long sys_sysinfo(struct sysinfo __user *info)
        return 0;
 }
 
-static void __devinit init_timers_cpu(int cpu)
+static int __devinit init_timers_cpu(int cpu)
 {
        int j;
        tvec_base_t *base;
 
-       base = &per_cpu(tvec_bases, cpu);
+       base = per_cpu(tvec_bases, cpu);
+       if (!base) {
+               static char boot_done;
+
+               /*
+                * Cannot do allocation in init_timers as that runs before the
+                * allocator initializes (and would waste memory if there are
+                * more possible CPUs than will ever be installed/brought up).
+                */
+               if (boot_done) {
+                       base = kmalloc_node(sizeof(*base), GFP_KERNEL,
+                                               cpu_to_node(cpu));
+                       if (!base)
+                               return -ENOMEM;
+                       memset(base, 0, sizeof(*base));
+               } else {
+                       base = &boot_tvec_bases;
+                       boot_done = 1;
+               }
+               per_cpu(tvec_bases, cpu) = base;
+       }
        spin_lock_init(&base->t_base.lock);
        for (j = 0; j < TVN_SIZE; j++) {
                INIT_LIST_HEAD(base->tv5.vec + j);
@@ -1280,6 +1282,7 @@ static void __devinit init_timers_cpu(int cpu)
                INIT_LIST_HEAD(base->tv1.vec + j);
 
        base->timer_jiffies = jiffies;
+       return 0;
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
@@ -1302,8 +1305,8 @@ static void __devinit migrate_timers(int cpu)
        int i;
 
        BUG_ON(cpu_online(cpu));
-       old_base = &per_cpu(tvec_bases, cpu);
-       new_base = &get_cpu_var(tvec_bases);
+       old_base = per_cpu(tvec_bases, cpu);
+       new_base = get_cpu_var(tvec_bases);
 
        local_irq_disable();
        spin_lock(&new_base->t_base.lock);
@@ -1333,7 +1336,8 @@ static int __devinit timer_cpu_notify(struct notifier_block *self,
        long cpu = (long)hcpu;
        switch(action) {
        case CPU_UP_PREPARE:
-               init_timers_cpu(cpu);
+               if (init_timers_cpu(cpu) < 0)
+                       return NOTIFY_BAD;
                break;
 #ifdef CONFIG_HOTPLUG_CPU
        case CPU_DEAD:
@@ -1361,8 +1365,8 @@ void __init init_timers(void)
 
 #ifdef CONFIG_TIME_INTERPOLATION
 
-struct time_interpolator *time_interpolator;
-static struct time_interpolator *time_interpolator_list;
+struct time_interpolator *time_interpolator __read_mostly;
+static struct time_interpolator *time_interpolator_list __read_mostly;
 static DEFINE_SPINLOCK(time_interpolator_lock);
 
 static inline u64 time_interpolator_get_cycles(unsigned int src)
@@ -1376,10 +1380,10 @@ static inline u64 time_interpolator_get_cycles(unsigned int src)
                        return x();
 
                case TIME_SOURCE_MMIO64 :
-                       return readq((void __iomem *) time_interpolator->addr);
+                       return readq_relaxed((void __iomem *)time_interpolator->addr);
 
                case TIME_SOURCE_MMIO32 :
-                       return readl((void __iomem *) time_interpolator->addr);
+                       return readl_relaxed((void __iomem *)time_interpolator->addr);
 
                default: return get_cycles();
        }