sched: track highest prio task queued
[powerpc.git] / kernel / sched_rt.c
1 /*
2  * Real-Time Scheduling Class (mapped to the SCHED_FIFO and SCHED_RR
3  * policies)
4  */
5
6 /*
7  * Update the current task's runtime statistics. Skip current tasks that
8  * are not in our scheduling class.
9  */
10 static void update_curr_rt(struct rq *rq)
11 {
12         struct task_struct *curr = rq->curr;
13         u64 delta_exec;
14
15         if (!task_has_rt_policy(curr))
16                 return;
17
18         delta_exec = rq->clock - curr->se.exec_start;
19         if (unlikely((s64)delta_exec < 0))
20                 delta_exec = 0;
21
22         schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec));
23
24         curr->se.sum_exec_runtime += delta_exec;
25         curr->se.exec_start = rq->clock;
26         cpuacct_charge(curr, delta_exec);
27 }
28
29 static inline void inc_rt_tasks(struct task_struct *p, struct rq *rq)
30 {
31         WARN_ON(!rt_task(p));
32         rq->rt.rt_nr_running++;
33 #ifdef CONFIG_SMP
34         if (p->prio < rq->rt.highest_prio)
35                 rq->rt.highest_prio = p->prio;
36 #endif /* CONFIG_SMP */
37 }
38
39 static inline void dec_rt_tasks(struct task_struct *p, struct rq *rq)
40 {
41         WARN_ON(!rt_task(p));
42         WARN_ON(!rq->rt.rt_nr_running);
43         rq->rt.rt_nr_running--;
44 #ifdef CONFIG_SMP
45         if (rq->rt.rt_nr_running) {
46                 struct rt_prio_array *array;
47
48                 WARN_ON(p->prio < rq->rt.highest_prio);
49                 if (p->prio == rq->rt.highest_prio) {
50                         /* recalculate */
51                         array = &rq->rt.active;
52                         rq->rt.highest_prio =
53                                 sched_find_first_bit(array->bitmap);
54                 } /* otherwise leave rq->highest prio alone */
55         } else
56                 rq->rt.highest_prio = MAX_RT_PRIO;
57 #endif /* CONFIG_SMP */
58 }
59
60 static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)
61 {
62         struct rt_prio_array *array = &rq->rt.active;
63
64         list_add_tail(&p->run_list, array->queue + p->prio);
65         __set_bit(p->prio, array->bitmap);
66         inc_cpu_load(rq, p->se.load.weight);
67
68         inc_rt_tasks(p, rq);
69 }
70
71 /*
72  * Adding/removing a task to/from a priority array:
73  */
74 static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
75 {
76         struct rt_prio_array *array = &rq->rt.active;
77
78         update_curr_rt(rq);
79
80         list_del(&p->run_list);
81         if (list_empty(array->queue + p->prio))
82                 __clear_bit(p->prio, array->bitmap);
83         dec_cpu_load(rq, p->se.load.weight);
84
85         dec_rt_tasks(p, rq);
86 }
87
88 /*
89  * Put task to the end of the run list without the overhead of dequeue
90  * followed by enqueue.
91  */
92 static void requeue_task_rt(struct rq *rq, struct task_struct *p)
93 {
94         struct rt_prio_array *array = &rq->rt.active;
95
96         list_move_tail(&p->run_list, array->queue + p->prio);
97 }
98
99 static void
100 yield_task_rt(struct rq *rq)
101 {
102         requeue_task_rt(rq, rq->curr);
103 }
104
105 /*
106  * Preempt the current task with a newly woken task if needed:
107  */
108 static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p)
109 {
110         if (p->prio < rq->curr->prio)
111                 resched_task(rq->curr);
112 }
113
114 static struct task_struct *pick_next_task_rt(struct rq *rq)
115 {
116         struct rt_prio_array *array = &rq->rt.active;
117         struct task_struct *next;
118         struct list_head *queue;
119         int idx;
120
121         idx = sched_find_first_bit(array->bitmap);
122         if (idx >= MAX_RT_PRIO)
123                 return NULL;
124
125         queue = array->queue + idx;
126         next = list_entry(queue->next, struct task_struct, run_list);
127
128         next->se.exec_start = rq->clock;
129
130         return next;
131 }
132
133 static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
134 {
135         update_curr_rt(rq);
136         p->se.exec_start = 0;
137 }
138
139 #ifdef CONFIG_SMP
140 /*
141  * Load-balancing iterator. Note: while the runqueue stays locked
142  * during the whole iteration, the current task might be
143  * dequeued so the iterator has to be dequeue-safe. Here we
144  * achieve that by always pre-iterating before returning
145  * the current task:
146  */
147 static struct task_struct *load_balance_start_rt(void *arg)
148 {
149         struct rq *rq = arg;
150         struct rt_prio_array *array = &rq->rt.active;
151         struct list_head *head, *curr;
152         struct task_struct *p;
153         int idx;
154
155         idx = sched_find_first_bit(array->bitmap);
156         if (idx >= MAX_RT_PRIO)
157                 return NULL;
158
159         head = array->queue + idx;
160         curr = head->prev;
161
162         p = list_entry(curr, struct task_struct, run_list);
163
164         curr = curr->prev;
165
166         rq->rt.rt_load_balance_idx = idx;
167         rq->rt.rt_load_balance_head = head;
168         rq->rt.rt_load_balance_curr = curr;
169
170         return p;
171 }
172
173 static struct task_struct *load_balance_next_rt(void *arg)
174 {
175         struct rq *rq = arg;
176         struct rt_prio_array *array = &rq->rt.active;
177         struct list_head *head, *curr;
178         struct task_struct *p;
179         int idx;
180
181         idx = rq->rt.rt_load_balance_idx;
182         head = rq->rt.rt_load_balance_head;
183         curr = rq->rt.rt_load_balance_curr;
184
185         /*
186          * If we arrived back to the head again then
187          * iterate to the next queue (if any):
188          */
189         if (unlikely(head == curr)) {
190                 int next_idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1);
191
192                 if (next_idx >= MAX_RT_PRIO)
193                         return NULL;
194
195                 idx = next_idx;
196                 head = array->queue + idx;
197                 curr = head->prev;
198
199                 rq->rt.rt_load_balance_idx = idx;
200                 rq->rt.rt_load_balance_head = head;
201         }
202
203         p = list_entry(curr, struct task_struct, run_list);
204
205         curr = curr->prev;
206
207         rq->rt.rt_load_balance_curr = curr;
208
209         return p;
210 }
211
212 static unsigned long
213 load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
214                 unsigned long max_load_move,
215                 struct sched_domain *sd, enum cpu_idle_type idle,
216                 int *all_pinned, int *this_best_prio)
217 {
218         struct rq_iterator rt_rq_iterator;
219
220         rt_rq_iterator.start = load_balance_start_rt;
221         rt_rq_iterator.next = load_balance_next_rt;
222         /* pass 'busiest' rq argument into
223          * load_balance_[start|next]_rt iterators
224          */
225         rt_rq_iterator.arg = busiest;
226
227         return balance_tasks(this_rq, this_cpu, busiest, max_load_move, sd,
228                              idle, all_pinned, this_best_prio, &rt_rq_iterator);
229 }
230
231 static int
232 move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
233                  struct sched_domain *sd, enum cpu_idle_type idle)
234 {
235         struct rq_iterator rt_rq_iterator;
236
237         rt_rq_iterator.start = load_balance_start_rt;
238         rt_rq_iterator.next = load_balance_next_rt;
239         rt_rq_iterator.arg = busiest;
240
241         return iter_move_one_task(this_rq, this_cpu, busiest, sd, idle,
242                                   &rt_rq_iterator);
243 }
244 #endif
245
246 static void task_tick_rt(struct rq *rq, struct task_struct *p)
247 {
248         update_curr_rt(rq);
249
250         /*
251          * RR tasks need a special form of timeslice management.
252          * FIFO tasks have no timeslices.
253          */
254         if (p->policy != SCHED_RR)
255                 return;
256
257         if (--p->time_slice)
258                 return;
259
260         p->time_slice = DEF_TIMESLICE;
261
262         /*
263          * Requeue to the end of queue if we are not the only element
264          * on the queue:
265          */
266         if (p->run_list.prev != p->run_list.next) {
267                 requeue_task_rt(rq, p);
268                 set_tsk_need_resched(p);
269         }
270 }
271
272 static void set_curr_task_rt(struct rq *rq)
273 {
274         struct task_struct *p = rq->curr;
275
276         p->se.exec_start = rq->clock;
277 }
278
279 const struct sched_class rt_sched_class = {
280         .next                   = &fair_sched_class,
281         .enqueue_task           = enqueue_task_rt,
282         .dequeue_task           = dequeue_task_rt,
283         .yield_task             = yield_task_rt,
284
285         .check_preempt_curr     = check_preempt_curr_rt,
286
287         .pick_next_task         = pick_next_task_rt,
288         .put_prev_task          = put_prev_task_rt,
289
290 #ifdef CONFIG_SMP
291         .load_balance           = load_balance_rt,
292         .move_one_task          = move_one_task_rt,
293 #endif
294
295         .set_curr_task          = set_curr_task_rt,
296         .task_tick              = task_tick_rt,
297 };