[ARM] Fix collie for -rc1

[powerpc.git] / kernel / sched.c
diff --git a/kernel/sched.c b/kernel/sched.c

index 1e5cafd..b650667 100644 (file)
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -206,6 +206,7 @@ struct runqueue {
          */
         unsigned long nr_running;
  #ifdef CONFIG_SMP
+       unsigned long prio_bias;
         unsigned long cpu_load[3];
  #endif
         unsigned long long nr_switches;
@@ -659,13 +660,68 @@ static int effective_prio(task_t *p)
         return prio;
  }
  
+#ifdef CONFIG_SMP
+static inline void inc_prio_bias(runqueue_t *rq, int prio)
+{
+       rq->prio_bias += MAX_PRIO - prio;
+}
+
+static inline void dec_prio_bias(runqueue_t *rq, int prio)
+{
+       rq->prio_bias -= MAX_PRIO - prio;
+}
+
+static inline void inc_nr_running(task_t *p, runqueue_t *rq)
+{
+       rq->nr_running++;
+       if (rt_task(p)) {
+               if (p != rq->migration_thread)
+                       /*
+                        * The migration thread does the actual balancing. Do
+                        * not bias by its priority as the ultra high priority
+                        * will skew balancing adversely.
+                        */
+                       inc_prio_bias(rq, p->prio);
+       } else
+               inc_prio_bias(rq, p->static_prio);
+}
+
+static inline void dec_nr_running(task_t *p, runqueue_t *rq)
+{
+       rq->nr_running--;
+       if (rt_task(p)) {
+               if (p != rq->migration_thread)
+                       dec_prio_bias(rq, p->prio);
+       } else
+               dec_prio_bias(rq, p->static_prio);
+}
+#else
+static inline void inc_prio_bias(runqueue_t *rq, int prio)
+{
+}
+
+static inline void dec_prio_bias(runqueue_t *rq, int prio)
+{
+}
+
+static inline void inc_nr_running(task_t *p, runqueue_t *rq)
+{
+       rq->nr_running++;
+}
+
+static inline void dec_nr_running(task_t *p, runqueue_t *rq)
+{
+       rq->nr_running--;
+}
+#endif
+
  /*
   * __activate_task - move a task to the runqueue.
   */
  static inline void __activate_task(task_t *p, runqueue_t *rq)
  {
         enqueue_task(p, rq->active);
-       rq->nr_running++;
+       inc_nr_running(p, rq);
  }
  
  /*
@@ -674,7 +730,7 @@ static inline void __activate_task(task_t *p, runqueue_t *rq)
  static inline void __activate_idle_task(task_t *p, runqueue_t *rq)
  {
         enqueue_task_head(p, rq->active);
-       rq->nr_running++;
+       inc_nr_running(p, rq);
  }
  
  static int recalc_task_prio(task_t *p, unsigned long long now)
@@ -759,7 +815,8 @@ static void activate_task(task_t *p, runqueue_t *rq, int local)
         }
  #endif
  
-       p->prio = recalc_task_prio(p, now);
+       if (!rt_task(p))
+               p->prio = recalc_task_prio(p, now);
  
         /*
          * This checks to make sure it's not an uninterruptible task
@@ -793,7 +850,7 @@ static void activate_task(task_t *p, runqueue_t *rq, int local)
   */
  static void deactivate_task(struct task_struct *p, runqueue_t *rq)
  {
-       rq->nr_running--;
+       dec_nr_running(p, rq);
         dequeue_task(p, p->array);
         p->array = NULL;
  }
@@ -808,21 +865,28 @@ static void deactivate_task(struct task_struct *p, runqueue_t *rq)
  #ifdef CONFIG_SMP
  static void resched_task(task_t *p)
  {
-       int need_resched, nrpolling;
+       int cpu;
  
         assert_spin_locked(&task_rq(p)->lock);
  
-       /* minimise the chance of sending an interrupt to poll_idle() */
-       nrpolling = test_tsk_thread_flag(p,TIF_POLLING_NRFLAG);
-       need_resched = test_and_set_tsk_thread_flag(p,TIF_NEED_RESCHED);
-       nrpolling |= test_tsk_thread_flag(p,TIF_POLLING_NRFLAG);
+       if (unlikely(test_tsk_thread_flag(p, TIF_NEED_RESCHED)))
+               return;
+
+       set_tsk_thread_flag(p, TIF_NEED_RESCHED);
+
+       cpu = task_cpu(p);
+       if (cpu == smp_processor_id())
+               return;
  
-       if (!need_resched && !nrpolling && (task_cpu(p) != smp_processor_id()))
-               smp_send_reschedule(task_cpu(p));
+       /* NEED_RESCHED must be visible before we test POLLING_NRFLAG */
+       smp_mb();
+       if (!test_tsk_thread_flag(p, TIF_POLLING_NRFLAG))
+               smp_send_reschedule(cpu);
  }
  #else
  static inline void resched_task(task_t *p)
  {
+       assert_spin_locked(&task_rq(p)->lock);
         set_tsk_need_resched(p);
  }
  #endif
@@ -930,27 +994,61 @@ void kick_process(task_t *p)
   * We want to under-estimate the load of migration sources, to
   * balance conservatively.
   */
-static inline unsigned long source_load(int cpu, int type)
+static inline unsigned long __source_load(int cpu, int type, enum idle_type idle)
  {
         runqueue_t *rq = cpu_rq(cpu);
-       unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE;
+       unsigned long running = rq->nr_running;
+       unsigned long source_load, cpu_load = rq->cpu_load[type-1],
+               load_now = running * SCHED_LOAD_SCALE;
+
         if (type == 0)
-               return load_now;
+               source_load = load_now;
+       else
+               source_load = min(cpu_load, load_now);
+
+       if (running > 1 || (idle == NOT_IDLE && running))
+               /*
+                * If we are busy rebalancing the load is biased by
+                * priority to create 'nice' support across cpus. When
+                * idle rebalancing we should only bias the source_load if
+                * there is more than one task running on that queue to
+                * prevent idle rebalance from trying to pull tasks from a
+                * queue with only one running task.
+                */
+               source_load = source_load * rq->prio_bias / running;
+
+       return source_load;
+}
  
-       return min(rq->cpu_load[type-1], load_now);
+static inline unsigned long source_load(int cpu, int type)
+{
+       return __source_load(cpu, type, NOT_IDLE);
  }
  
  /*
   * Return a high guess at the load of a migration-target cpu
   */
-static inline unsigned long target_load(int cpu, int type)
+static inline unsigned long __target_load(int cpu, int type, enum idle_type idle)
  {
         runqueue_t *rq = cpu_rq(cpu);
-       unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE;
+       unsigned long running = rq->nr_running;
+       unsigned long target_load, cpu_load = rq->cpu_load[type-1],
+               load_now = running * SCHED_LOAD_SCALE;
+
         if (type == 0)
-               return load_now;
+               target_load = load_now;
+       else
+               target_load = max(cpu_load, load_now);
+
+       if (running > 1 || (idle == NOT_IDLE && running))
+               target_load = target_load * rq->prio_bias / running;
+
+       return target_load;
+}
  
-       return max(rq->cpu_load[type-1], load_now);
+static inline unsigned long target_load(int cpu, int type)
+{
+       return __target_load(cpu, type, NOT_IDLE);
  }
  
  /*
@@ -1411,7 +1509,7 @@ void fastcall wake_up_new_task(task_t *p, unsigned long clone_flags)
                                 list_add_tail(&p->run_list, &current->run_list);
                                 p->array = current->array;
                                 p->array->nr_active++;
-                               rq->nr_running++;
+                               inc_nr_running(p, rq);
                         }
                         set_need_resched();
                 } else
@@ -1468,7 +1566,7 @@ void fastcall sched_exit(task_t *p)
          * the sleep_avg of the parent as well.
          */
         rq = task_rq_lock(p->parent, &flags);
-       if (p->first_time_slice) {
+       if (p->first_time_slice && task_cpu(p) == task_cpu(p->parent)) {
                 p->parent->time_slice += p->time_slice;
                 if (unlikely(p->parent->time_slice > task_timeslice(p)))
                         p->parent->time_slice = task_timeslice(p);
@@ -1756,9 +1854,9 @@ void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p,
                runqueue_t *this_rq, prio_array_t *this_array, int this_cpu)
  {
         dequeue_task(p, src_array);
-       src_rq->nr_running--;
+       dec_nr_running(p, src_rq);
         set_task_cpu(p, this_cpu);
-       this_rq->nr_running++;
+       inc_nr_running(p, this_rq);
         enqueue_task(p, this_array);
         p->timestamp = (p->timestamp - src_rq->timestamp_last_tick)
                                 + this_rq->timestamp_last_tick;
@@ -1937,9 +2035,9 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
  
                         /* Bias balancing toward cpus of our domain */
                         if (local_group)
-                               load = target_load(i, load_idx);
+                               load = __target_load(i, load_idx, idle);
                         else
-                               load = source_load(i, load_idx);
+                               load = __source_load(i, load_idx, idle);
  
                         avg_load += load;
                 }
@@ -2044,14 +2142,15 @@ out_balanced:
  /*
   * find_busiest_queue - find the busiest runqueue among the cpus in group.
   */
-static runqueue_t *find_busiest_queue(struct sched_group *group)
+static runqueue_t *find_busiest_queue(struct sched_group *group,
+       enum idle_type idle)
  {
         unsigned long load, max_load = 0;
         runqueue_t *busiest = NULL;
         int i;
  
         for_each_cpu_mask(i, group->cpumask) {
-               load = source_load(i, 0);
+               load = __source_load(i, 0, idle);
  
                 if (load > max_load) {
                         max_load = load;
@@ -2095,7 +2194,7 @@ static int load_balance(int this_cpu, runqueue_t *this_rq,
                 goto out_balanced;
         }
  
-       busiest = find_busiest_queue(group);
+       busiest = find_busiest_queue(group, idle);
         if (!busiest) {
                 schedstat_inc(sd, lb_nobusyq[idle]);
                 goto out_balanced;
@@ -2218,7 +2317,7 @@ static int load_balance_newidle(int this_cpu, runqueue_t *this_rq,
                 goto out_balanced;
         }
  
-       busiest = find_busiest_queue(group);
+       busiest = find_busiest_queue(group, NEWLY_IDLE);
         if (!busiest) {
                 schedstat_inc(sd, lb_nobusyq[NEWLY_IDLE]);
                 goto out_balanced;
@@ -2511,8 +2610,6 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
                 cpustat->idle = cputime64_add(cpustat->idle, tmp);
         /* Account for system time used */
         acct_update_integrals(p);
-       /* Update rss highwater mark */
-       update_mem_hiwater(p);
  }
  
  /*
@@ -3453,8 +3550,10 @@ void set_user_nice(task_t *p, long nice)
                 goto out_unlock;
         }
         array = p->array;
-       if (array)
+       if (array) {
                 dequeue_task(p, array);
+               dec_prio_bias(rq, p->static_prio);
+       }
  
         old_prio = p->prio;
         new_prio = NICE_TO_PRIO(nice);
@@ -3464,6 +3563,7 @@ void set_user_nice(task_t *p, long nice)
  
         if (array) {
                 enqueue_task(p, array);
+               inc_prio_bias(rq, p->static_prio);
                 /*
                  * If the task increased its priority or is running and
                  * lowered its priority, then reschedule its CPU:
@@ -3565,8 +3665,6 @@ int idle_cpu(int cpu)
         return cpu_curr(cpu) == cpu_rq(cpu)->idle;
  }
  
-EXPORT_SYMBOL_GPL(idle_cpu);
-
  /**
   * idle_task - return the idle task for a given cpu.
   * @cpu: the processor in question.
@@ -3879,7 +3977,6 @@ EXPORT_SYMBOL(cpu_present_map);
  
  #ifndef CONFIG_SMP
  cpumask_t cpu_online_map = CPU_MASK_ALL;
-EXPORT_SYMBOL_GPL(cpu_online_map);
  cpumask_t cpu_possible_map = CPU_MASK_ALL;
  #endif
  
@@ -4683,7 +4780,8 @@ static int migration_call(struct notifier_block *nfb, unsigned long action,
  #ifdef CONFIG_HOTPLUG_CPU
         case CPU_UP_CANCELED:
                 /* Unbind it from offline cpu so it can run.  Fall thru. */
-               kthread_bind(cpu_rq(cpu)->migration_thread,smp_processor_id());
+               kthread_bind(cpu_rq(cpu)->migration_thread,
+                            any_online_cpu(cpu_online_map));
                 kthread_stop(cpu_rq(cpu)->migration_thread);
                 cpu_rq(cpu)->migration_thread = NULL;
                 break;