[PATCH] powerpc: legacy_serial loop cleanup
[powerpc.git] / kernel / sched.c
index bc38804..78acdef 100644 (file)
@@ -49,6 +49,7 @@
 #include <linux/syscalls.h>
 #include <linux/times.h>
 #include <linux/acct.h>
+#include <linux/kprobes.h>
 #include <asm/tlb.h>
 
 #include <asm/unistd.h>
@@ -178,13 +179,6 @@ static unsigned int task_timeslice(task_t *p)
 #define task_hot(p, now, sd) ((long long) ((now) - (p)->last_ran)      \
                                < (long long) (sd)->cache_hot_time)
 
-void __put_task_struct_cb(struct rcu_head *rhp)
-{
-       __put_task_struct(container_of(rhp, struct task_struct, rcu));
-}
-
-EXPORT_SYMBOL_GPL(__put_task_struct_cb);
-
 /*
  * These are the runqueue data structures:
  */
@@ -215,7 +209,6 @@ struct runqueue {
         */
        unsigned long nr_running;
 #ifdef CONFIG_SMP
-       unsigned long prio_bias;
        unsigned long cpu_load[3];
 #endif
        unsigned long long nr_switches;
@@ -245,6 +238,7 @@ struct runqueue {
 
        task_t *migration_thread;
        struct list_head migration_queue;
+       int cpu;
 #endif
 
 #ifdef CONFIG_SCHEDSTATS
@@ -669,68 +663,13 @@ static int effective_prio(task_t *p)
        return prio;
 }
 
-#ifdef CONFIG_SMP
-static inline void inc_prio_bias(runqueue_t *rq, int prio)
-{
-       rq->prio_bias += MAX_PRIO - prio;
-}
-
-static inline void dec_prio_bias(runqueue_t *rq, int prio)
-{
-       rq->prio_bias -= MAX_PRIO - prio;
-}
-
-static inline void inc_nr_running(task_t *p, runqueue_t *rq)
-{
-       rq->nr_running++;
-       if (rt_task(p)) {
-               if (p != rq->migration_thread)
-                       /*
-                        * The migration thread does the actual balancing. Do
-                        * not bias by its priority as the ultra high priority
-                        * will skew balancing adversely.
-                        */
-                       inc_prio_bias(rq, p->prio);
-       } else
-               inc_prio_bias(rq, p->static_prio);
-}
-
-static inline void dec_nr_running(task_t *p, runqueue_t *rq)
-{
-       rq->nr_running--;
-       if (rt_task(p)) {
-               if (p != rq->migration_thread)
-                       dec_prio_bias(rq, p->prio);
-       } else
-               dec_prio_bias(rq, p->static_prio);
-}
-#else
-static inline void inc_prio_bias(runqueue_t *rq, int prio)
-{
-}
-
-static inline void dec_prio_bias(runqueue_t *rq, int prio)
-{
-}
-
-static inline void inc_nr_running(task_t *p, runqueue_t *rq)
-{
-       rq->nr_running++;
-}
-
-static inline void dec_nr_running(task_t *p, runqueue_t *rq)
-{
-       rq->nr_running--;
-}
-#endif
-
 /*
  * __activate_task - move a task to the runqueue.
  */
 static inline void __activate_task(task_t *p, runqueue_t *rq)
 {
        enqueue_task(p, rq->active);
-       inc_nr_running(p, rq);
+       rq->nr_running++;
 }
 
 /*
@@ -739,7 +678,7 @@ static inline void __activate_task(task_t *p, runqueue_t *rq)
 static inline void __activate_idle_task(task_t *p, runqueue_t *rq)
 {
        enqueue_task_head(p, rq->active);
-       inc_nr_running(p, rq);
+       rq->nr_running++;
 }
 
 static int recalc_task_prio(task_t *p, unsigned long long now)
@@ -769,12 +708,6 @@ static int recalc_task_prio(task_t *p, unsigned long long now)
                                p->sleep_avg = JIFFIES_TO_NS(MAX_SLEEP_AVG -
                                                DEF_TIMESLICE);
                } else {
-                       /*
-                        * The lower the sleep avg a task has the more
-                        * rapidly it will rise with sleep time.
-                        */
-                       sleep_time *= (MAX_BONUS - CURRENT_BONUS(p)) ? : 1;
-
                        /*
                         * Tasks waking from uninterruptible sleep are
                         * limited in their sleep_avg rise as they
@@ -863,7 +796,7 @@ static void activate_task(task_t *p, runqueue_t *rq, int local)
  */
 static void deactivate_task(struct task_struct *p, runqueue_t *rq)
 {
-       dec_nr_running(p, rq);
+       rq->nr_running--;
        dequeue_task(p, p->array);
        p->array = NULL;
 }
@@ -1007,61 +940,27 @@ void kick_process(task_t *p)
  * We want to under-estimate the load of migration sources, to
  * balance conservatively.
  */
-static unsigned long __source_load(int cpu, int type, enum idle_type idle)
+static inline unsigned long source_load(int cpu, int type)
 {
        runqueue_t *rq = cpu_rq(cpu);
-       unsigned long running = rq->nr_running;
-       unsigned long source_load, cpu_load = rq->cpu_load[type-1],
-               load_now = running * SCHED_LOAD_SCALE;
-
+       unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE;
        if (type == 0)
-               source_load = load_now;
-       else
-               source_load = min(cpu_load, load_now);
+               return load_now;
 
-       if (running > 1 || (idle == NOT_IDLE && running))
-               /*
-                * If we are busy rebalancing the load is biased by
-                * priority to create 'nice' support across cpus. When
-                * idle rebalancing we should only bias the source_load if
-                * there is more than one task running on that queue to
-                * prevent idle rebalance from trying to pull tasks from a
-                * queue with only one running task.
-                */
-               source_load = source_load * rq->prio_bias / running;
-
-       return source_load;
-}
-
-static inline unsigned long source_load(int cpu, int type)
-{
-       return __source_load(cpu, type, NOT_IDLE);
+       return min(rq->cpu_load[type-1], load_now);
 }
 
 /*
  * Return a high guess at the load of a migration-target cpu
  */
-static inline unsigned long __target_load(int cpu, int type, enum idle_type idle)
+static inline unsigned long target_load(int cpu, int type)
 {
        runqueue_t *rq = cpu_rq(cpu);
-       unsigned long running = rq->nr_running;
-       unsigned long target_load, cpu_load = rq->cpu_load[type-1],
-               load_now = running * SCHED_LOAD_SCALE;
-
+       unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE;
        if (type == 0)
-               target_load = load_now;
-       else
-               target_load = max(cpu_load, load_now);
-
-       if (running > 1 || (idle == NOT_IDLE && running))
-               target_load = target_load * rq->prio_bias / running;
+               return load_now;
 
-       return target_load;
-}
-
-static inline unsigned long target_load(int cpu, int type)
-{
-       return __target_load(cpu, type, NOT_IDLE);
+       return max(rq->cpu_load[type-1], load_now);
 }
 
 /*
@@ -1294,9 +1193,6 @@ static int try_to_wake_up(task_t *p, unsigned int state, int sync)
                }
        }
 
-       if (p->last_waker_cpu != this_cpu)
-               goto out_set_cpu;
-
        if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed)))
                goto out_set_cpu;
 
@@ -1367,8 +1263,6 @@ out_set_cpu:
                cpu = task_cpu(p);
        }
 
-       p->last_waker_cpu = this_cpu;
-
 out_activate:
 #endif /* CONFIG_SMP */
        if (old_state == TASK_UNINTERRUPTIBLE) {
@@ -1450,12 +1344,9 @@ void fastcall sched_fork(task_t *p, int clone_flags)
 #ifdef CONFIG_SCHEDSTATS
        memset(&p->sched_info, 0, sizeof(p->sched_info));
 #endif
-#if defined(CONFIG_SMP)
-       p->last_waker_cpu = cpu;
-#if defined(__ARCH_WANT_UNLOCKED_CTXSW)
+#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
        p->oncpu = 0;
 #endif
-#endif
 #ifdef CONFIG_PREEMPT
        /* Want to start with kernel preemption disabled. */
        task_thread_info(p)->preempt_count = 1;
@@ -1530,7 +1421,7 @@ void fastcall wake_up_new_task(task_t *p, unsigned long clone_flags)
                                list_add_tail(&p->run_list, &current->run_list);
                                p->array = current->array;
                                p->array->nr_active++;
-                               inc_nr_running(p, rq);
+                               rq->nr_running++;
                        }
                        set_need_resched();
                } else
@@ -1656,8 +1547,14 @@ static inline void finish_task_switch(runqueue_t *rq, task_t *prev)
        finish_lock_switch(rq, prev);
        if (mm)
                mmdrop(mm);
-       if (unlikely(prev_task_flags & PF_DEAD))
+       if (unlikely(prev_task_flags & PF_DEAD)) {
+               /*
+                * Remove function-return probe instances associated with this
+                * task and put them back on the free list.
+                */
+               kprobe_flush_task(prev);
                put_task_struct(prev);
+       }
 }
 
 /**
@@ -1765,6 +1662,9 @@ unsigned long nr_iowait(void)
 /*
  * double_rq_lock - safely lock two runqueues
  *
+ * We must take them in cpu order to match code in
+ * dependent_sleeper and wake_dependent_sleeper.
+ *
  * Note this does not disable interrupts like task_rq_lock,
  * you need to do so manually before calling.
  */
@@ -1776,7 +1676,7 @@ static void double_rq_lock(runqueue_t *rq1, runqueue_t *rq2)
                spin_lock(&rq1->lock);
                __acquire(rq2->lock);   /* Fake it out ;) */
        } else {
-               if (rq1 < rq2) {
+               if (rq1->cpu < rq2->cpu) {
                        spin_lock(&rq1->lock);
                        spin_lock(&rq2->lock);
                } else {
@@ -1812,7 +1712,7 @@ static void double_lock_balance(runqueue_t *this_rq, runqueue_t *busiest)
        __acquires(this_rq->lock)
 {
        if (unlikely(!spin_trylock(&busiest->lock))) {
-               if (busiest < this_rq) {
+               if (busiest->cpu < this_rq->cpu) {
                        spin_unlock(&this_rq->lock);
                        spin_lock(&busiest->lock);
                        spin_lock(&this_rq->lock);
@@ -1875,9 +1775,9 @@ void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p,
               runqueue_t *this_rq, prio_array_t *this_array, int this_cpu)
 {
        dequeue_task(p, src_array);
-       dec_nr_running(p, src_rq);
+       src_rq->nr_running--;
        set_task_cpu(p, this_cpu);
-       inc_nr_running(p, this_rq);
+       this_rq->nr_running++;
        enqueue_task(p, this_array);
        p->timestamp = (p->timestamp - src_rq->timestamp_last_tick)
                                + this_rq->timestamp_last_tick;
@@ -2056,9 +1956,9 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
 
                        /* Bias balancing toward cpus of our domain */
                        if (local_group)
-                               load = __target_load(i, load_idx, idle);
+                               load = target_load(i, load_idx);
                        else
-                               load = __source_load(i, load_idx, idle);
+                               load = source_load(i, load_idx);
 
                        avg_load += load;
                }
@@ -2171,7 +2071,7 @@ static runqueue_t *find_busiest_queue(struct sched_group *group,
        int i;
 
        for_each_cpu_mask(i, group->cpumask) {
-               load = __source_load(i, 0, idle);
+               load = source_load(i, 0);
 
                if (load > max_load) {
                        max_load = load;
@@ -2980,7 +2880,7 @@ asmlinkage void __sched schedule(void)
         */
        if (likely(!current->exit_state)) {
                if (unlikely(in_atomic())) {
-                       printk(KERN_ERR "scheduling while atomic: "
+                       printk(KERN_ERR "BUG: scheduling while atomic: "
                                "%s/0x%08x/%d\n",
                                current->comm, preempt_count(), current->pid);
                        dump_stack();
@@ -3571,10 +3471,8 @@ void set_user_nice(task_t *p, long nice)
                goto out_unlock;
        }
        array = p->array;
-       if (array) {
+       if (array)
                dequeue_task(p, array);
-               dec_prio_bias(rq, p->static_prio);
-       }
 
        old_prio = p->prio;
        new_prio = NICE_TO_PRIO(nice);
@@ -3584,7 +3482,6 @@ void set_user_nice(task_t *p, long nice)
 
        if (array) {
                enqueue_task(p, array);
-               inc_prio_bias(rq, p->static_prio);
                /*
                 * If the task increased its priority or is running and
                 * lowered its priority, then reschedule its CPU:
@@ -4129,6 +4026,8 @@ static inline void __cond_resched(void)
         */
        if (unlikely(preempt_count()))
                return;
+       if (unlikely(system_state != SYSTEM_RUNNING))
+               return;
        do {
                add_preempt_count(PREEMPT_ACTIVE);
                schedule();
@@ -4434,6 +4333,7 @@ void __devinit init_idle(task_t *idle, int cpu)
        runqueue_t *rq = cpu_rq(cpu);
        unsigned long flags;
 
+       idle->timestamp = sched_clock();
        idle->sleep_avg = 0;
        idle->array = NULL;
        idle->prio = MAX_PRIO;
@@ -5159,7 +5059,18 @@ static void init_sched_build_groups(struct sched_group groups[], cpumask_t span,
 #define MAX_DOMAIN_DISTANCE 32
 
 static unsigned long long migration_cost[MAX_DOMAIN_DISTANCE] =
-               { [ 0 ... MAX_DOMAIN_DISTANCE-1 ] = -1LL };
+               { [ 0 ... MAX_DOMAIN_DISTANCE-1 ] =
+/*
+ * Architectures may override the migration cost and thus avoid
+ * boot-time calibration. Unit is nanoseconds. Mostly useful for
+ * virtualized hardware:
+ */
+#ifdef CONFIG_DEFAULT_MIGRATION_COST
+                       CONFIG_DEFAULT_MIGRATION_COST
+#else
+                       -1LL
+#endif
+};
 
 /*
  * Allow override of migration cost - in units of microseconds.
@@ -6129,6 +6040,7 @@ void __init sched_init(void)
                rq->push_cpu = 0;
                rq->migration_thread = NULL;
                INIT_LIST_HEAD(&rq->migration_queue);
+               rq->cpu = i;
 #endif
                atomic_set(&rq->nr_iowait, 0);
 
@@ -6169,7 +6081,7 @@ void __might_sleep(char *file, int line)
                if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
                        return;
                prev_jiffy = jiffies;
-               printk(KERN_ERR "Debug: sleeping function called from invalid"
+               printk(KERN_ERR "BUG: sleeping function called from invalid"
                                " context at %s:%d\n", file, line);
                printk("in_atomic():%d, irqs_disabled():%d\n",
                        in_atomic(), irqs_disabled());