import of upstream 2.4.34.4 from kernel.org
[linux-2.4.git] / net / sunrpc / sched.c
1 /*
2  * linux/net/sunrpc/sched.c
3  *
4  * Scheduling for synchronous and asynchronous RPC requests.
5  *
6  * Copyright (C) 1996 Olaf Kirch, <okir@monad.swb.de>
7  * 
8  * TCP NFS related read + write fixes
9  * (C) 1999 Dave Airlie, University of Limerick, Ireland <airlied@linux.ie>
10  */
11
12 #include <linux/module.h>
13
14 #define __KERNEL_SYSCALLS__
15 #include <linux/sched.h>
16 #include <linux/interrupt.h>
17 #include <linux/slab.h>
18 #include <linux/unistd.h>
19 #include <linux/smp.h>
20 #include <linux/smp_lock.h>
21 #include <linux/spinlock.h>
22
23 #include <linux/sunrpc/clnt.h>
24 #include <linux/sunrpc/xprt.h>
25
26 #ifdef RPC_DEBUG
27 #define RPCDBG_FACILITY         RPCDBG_SCHED
28 static int                      rpc_task_id;
29 #endif
30
31 /*
32  * We give RPC the same get_free_pages priority as NFS
33  */
34 #define GFP_RPC                 GFP_NOFS
35
36 static void                     __rpc_default_timer(struct rpc_task *task);
37 static void                     rpciod_killall(void);
38
39 /*
40  * When an asynchronous RPC task is activated within a bottom half
41  * handler, or while executing another RPC task, it is put on
42  * schedq, and rpciod is woken up.
43  */
44 static RPC_WAITQ(schedq, "schedq");
45
46 /*
47  * RPC tasks that create another task (e.g. for contacting the portmapper)
48  * will wait on this queue for their child's completion
49  */
50 static RPC_WAITQ(childq, "childq");
51
52 /*
53  * RPC tasks sit here while waiting for conditions to improve.
54  */
55 static RPC_WAITQ(delay_queue, "delayq");
56
57 /*
58  * All RPC tasks are linked into this list
59  */
60 static LIST_HEAD(all_tasks);
61
62 /*
63  * rpciod-related stuff
64  */
65 static DECLARE_WAIT_QUEUE_HEAD(rpciod_idle);
66 static DECLARE_WAIT_QUEUE_HEAD(rpciod_killer);
67 static DECLARE_MUTEX(rpciod_sema);
68 static unsigned int             rpciod_users;
69 static pid_t                    rpciod_pid;
70 static int                      rpc_inhibit;
71
72 /*
73  * Spinlock for wait queues. Access to the latter also has to be
74  * interrupt-safe in order to allow timers to wake up sleeping tasks.
75  */
76 static spinlock_t rpc_queue_lock = SPIN_LOCK_UNLOCKED;
77 /*
78  * Spinlock for other critical sections of code.
79  */
80 static spinlock_t rpc_sched_lock = SPIN_LOCK_UNLOCKED;
81
82 /*
83  * This is the last-ditch buffer for NFS swap requests
84  */
85 static u32                      swap_buffer[PAGE_SIZE >> 2];
86 static long                     swap_buffer_used;
87
88 /*
89  * Make allocation of the swap_buffer SMP-safe
90  */
91 static __inline__ int rpc_lock_swapbuf(void)
92 {
93         return !test_and_set_bit(1, &swap_buffer_used);
94 }
95 static __inline__ void rpc_unlock_swapbuf(void)
96 {
97         clear_bit(1, &swap_buffer_used);
98 }
99
100 /*
101  * Disable the timer for a given RPC task. Should be called with
102  * rpc_queue_lock and bh_disabled in order to avoid races within
103  * rpc_run_timer().
104  */
105 static inline void
106 __rpc_disable_timer(struct rpc_task *task)
107 {
108         dprintk("RPC: %4d disabling timer\n", task->tk_pid);
109         task->tk_timeout_fn = NULL;
110         task->tk_timeout = 0;
111 }
112
113 /*
114  * Run a timeout function.
115  * We use the callback in order to allow __rpc_wake_up_task()
116  * and friends to disable the timer synchronously on SMP systems
117  * without calling del_timer_sync(). The latter could cause a
118  * deadlock if called while we're holding spinlocks...
119  */
120 static void
121 rpc_run_timer(struct rpc_task *task)
122 {
123         void (*callback)(struct rpc_task *);
124
125         spin_lock_bh(&rpc_queue_lock);
126         callback = task->tk_timeout_fn;
127         task->tk_timeout_fn = NULL;
128         spin_unlock_bh(&rpc_queue_lock);
129         if (callback) {
130                 dprintk("RPC: %4d running timer\n", task->tk_pid);
131                 callback(task);
132         }
133 }
134
135 /*
136  * Set up a timer for the current task.
137  */
138 static inline void
139 __rpc_add_timer(struct rpc_task *task, rpc_action timer)
140 {
141         if (!task->tk_timeout)
142                 return;
143
144         dprintk("RPC: %4d setting alarm for %lu ms\n",
145                         task->tk_pid, task->tk_timeout * 1000 / HZ);
146
147         if (timer)
148                 task->tk_timeout_fn = timer;
149         else
150                 task->tk_timeout_fn = __rpc_default_timer;
151         mod_timer(&task->tk_timer, jiffies + task->tk_timeout);
152 }
153
154 /*
155  * Set up a timer for an already sleeping task.
156  */
157 void rpc_add_timer(struct rpc_task *task, rpc_action timer)
158 {
159         spin_lock_bh(&rpc_queue_lock);
160         if (!RPC_IS_RUNNING(task))
161                 __rpc_add_timer(task, timer);
162         spin_unlock_bh(&rpc_queue_lock);
163 }
164
165 /*
166  * Delete any timer for the current task. Because we use del_timer_sync(),
167  * this function should never be called while holding rpc_queue_lock.
168  */
169 static inline void
170 rpc_delete_timer(struct rpc_task *task)
171 {
172         dprintk("RPC: %4d deleting timer\n", task->tk_pid);
173         del_timer_sync(&task->tk_timer);
174 }
175
176 /*
177  * Add new request to wait queue.
178  *
179  * Swapper tasks always get inserted at the head of the queue.
180  * This should avoid many nasty memory deadlocks and hopefully
181  * improve overall performance.
182  * Everyone else gets appended to the queue to ensure proper FIFO behavior.
183  */
184 static inline int
185 __rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task)
186 {
187         if (task->tk_rpcwait == queue)
188                 return 0;
189
190         if (task->tk_rpcwait) {
191                 printk(KERN_WARNING "RPC: doubly enqueued task!\n");
192                 return -EWOULDBLOCK;
193         }
194         if (RPC_IS_SWAPPER(task))
195                 list_add(&task->tk_list, &queue->tasks);
196         else
197                 list_add_tail(&task->tk_list, &queue->tasks);
198         task->tk_rpcwait = queue;
199
200         dprintk("RPC: %4d added to queue %p \"%s\"\n",
201                                 task->tk_pid, queue, rpc_qname(queue));
202
203         return 0;
204 }
205
206 int
207 rpc_add_wait_queue(struct rpc_wait_queue *q, struct rpc_task *task)
208 {
209         int             result;
210
211         spin_lock_bh(&rpc_queue_lock);
212         result = __rpc_add_wait_queue(q, task);
213         spin_unlock_bh(&rpc_queue_lock);
214         return result;
215 }
216
217 /*
218  * Remove request from queue.
219  * Note: must be called with spin lock held.
220  */
221 static inline void
222 __rpc_remove_wait_queue(struct rpc_task *task)
223 {
224         struct rpc_wait_queue *queue = task->tk_rpcwait;
225
226         if (!queue)
227                 return;
228
229         list_del(&task->tk_list);
230         task->tk_rpcwait = NULL;
231
232         dprintk("RPC: %4d removed from queue %p \"%s\"\n",
233                                 task->tk_pid, queue, rpc_qname(queue));
234 }
235
236 void
237 rpc_remove_wait_queue(struct rpc_task *task)
238 {
239         if (!task->tk_rpcwait)
240                 return;
241         spin_lock_bh(&rpc_queue_lock);
242         __rpc_remove_wait_queue(task);
243         spin_unlock_bh(&rpc_queue_lock);
244 }
245
246 /*
247  * Make an RPC task runnable.
248  *
249  * Note: If the task is ASYNC, this must be called with 
250  * the spinlock held to protect the wait queue operation.
251  */
252 static inline void
253 rpc_make_runnable(struct rpc_task *task)
254 {
255         if (task->tk_timeout_fn) {
256                 printk(KERN_ERR "RPC: task w/ running timer in rpc_make_runnable!!\n");
257                 return;
258         }
259         rpc_set_running(task);
260         if (RPC_IS_ASYNC(task)) {
261                 if (RPC_IS_SLEEPING(task)) {
262                         int status;
263                         status = __rpc_add_wait_queue(&schedq, task);
264                         if (status < 0) {
265                                 printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status);
266                                 task->tk_status = status;
267                                 return;
268                         }
269                         rpc_clear_sleeping(task);
270                         if (waitqueue_active(&rpciod_idle))
271                                 wake_up(&rpciod_idle);
272                 }
273         } else {
274                 rpc_clear_sleeping(task);
275                 if (waitqueue_active(&task->tk_wait))
276                         wake_up(&task->tk_wait);
277         }
278 }
279
280 /*
281  * Place a newly initialized task on the schedq.
282  */
283 static inline void
284 rpc_schedule_run(struct rpc_task *task)
285 {
286         /* Don't run a child twice! */
287         if (RPC_IS_ACTIVATED(task))
288                 return;
289         task->tk_active = 1;
290         rpc_set_sleeping(task);
291         rpc_make_runnable(task);
292 }
293
294 /*
295  *      For other people who may need to wake the I/O daemon
296  *      but should (for now) know nothing about its innards
297  */
298 void rpciod_wake_up(void)
299 {
300         if(rpciod_pid==0)
301                 printk(KERN_ERR "rpciod: wot no daemon?\n");
302         if (waitqueue_active(&rpciod_idle))
303                 wake_up(&rpciod_idle);
304 }
305
306 /*
307  * Prepare for sleeping on a wait queue.
308  * By always appending tasks to the list we ensure FIFO behavior.
309  * NB: An RPC task will only receive interrupt-driven events as long
310  * as it's on a wait queue.
311  */
312 static void
313 __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
314                         rpc_action action, rpc_action timer)
315 {
316         int status;
317
318         dprintk("RPC: %4d sleep_on(queue \"%s\" time %ld)\n", task->tk_pid,
319                                 rpc_qname(q), jiffies);
320
321         if (!RPC_IS_ASYNC(task) && !RPC_IS_ACTIVATED(task)) {
322                 printk(KERN_ERR "RPC: Inactive synchronous task put to sleep!\n");
323                 return;
324         }
325
326         /* Mark the task as being activated if so needed */
327         if (!RPC_IS_ACTIVATED(task)) {
328                 task->tk_active = 1;
329                 rpc_set_sleeping(task);
330         }
331
332         status = __rpc_add_wait_queue(q, task);
333         if (status) {
334                 printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status);
335                 task->tk_status = status;
336         } else {
337                 rpc_clear_running(task);
338                 if (task->tk_callback) {
339                         dprintk(KERN_ERR "RPC: %4d overwrites an active callback\n", task->tk_pid);
340                         BUG();
341                 }
342                 task->tk_callback = action;
343                 __rpc_add_timer(task, timer);
344         }
345 }
346
347 void
348 rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
349                                 rpc_action action, rpc_action timer)
350 {
351         /*
352          * Protect the queue operations.
353          */
354         spin_lock_bh(&rpc_queue_lock);
355         __rpc_sleep_on(q, task, action, timer);
356         spin_unlock_bh(&rpc_queue_lock);
357 }
358
359 /**
360  * __rpc_wake_up_task - wake up a single rpc_task
361  * @task: task to be woken up
362  *
363  * Caller must hold rpc_queue_lock
364  */
365 static void
366 __rpc_wake_up_task(struct rpc_task *task)
367 {
368         dprintk("RPC: %4d __rpc_wake_up_task (now %ld inh %d)\n",
369                                         task->tk_pid, jiffies, rpc_inhibit);
370
371 #ifdef RPC_DEBUG
372         if (task->tk_magic != 0xf00baa) {
373                 printk(KERN_ERR "RPC: attempt to wake up non-existing task!\n");
374                 rpc_debug = ~0;
375                 rpc_show_tasks();
376                 return;
377         }
378 #endif
379         /* Has the task been executed yet? If not, we cannot wake it up! */
380         if (!RPC_IS_ACTIVATED(task)) {
381                 printk(KERN_ERR "RPC: Inactive task (%p) being woken up!\n", task);
382                 return;
383         }
384         if (RPC_IS_RUNNING(task))
385                 return;
386
387         __rpc_disable_timer(task);
388         if (task->tk_rpcwait != &schedq)
389                 __rpc_remove_wait_queue(task);
390
391         rpc_make_runnable(task);
392
393         dprintk("RPC:      __rpc_wake_up_task done\n");
394 }
395
396 /*
397  * Default timeout handler if none specified by user
398  */
399 static void
400 __rpc_default_timer(struct rpc_task *task)
401 {
402         dprintk("RPC: %d timeout (default timer)\n", task->tk_pid);
403         task->tk_status = -ETIMEDOUT;
404         rpc_wake_up_task(task);
405 }
406
407 /*
408  * Wake up the specified task
409  */
410 void
411 rpc_wake_up_task(struct rpc_task *task)
412 {
413         if (RPC_IS_RUNNING(task))
414                 return;
415         spin_lock_bh(&rpc_queue_lock);
416         __rpc_wake_up_task(task);
417         spin_unlock_bh(&rpc_queue_lock);
418 }
419
420 /*
421  * Wake up the next task on the wait queue.
422  */
423 struct rpc_task *
424 rpc_wake_up_next(struct rpc_wait_queue *queue)
425 {
426         struct rpc_task *task = NULL;
427
428         dprintk("RPC:      wake_up_next(%p \"%s\")\n", queue, rpc_qname(queue));
429         spin_lock_bh(&rpc_queue_lock);
430         task_for_first(task, &queue->tasks)
431                 __rpc_wake_up_task(task);
432         spin_unlock_bh(&rpc_queue_lock);
433
434         return task;
435 }
436
437 /**
438  * rpc_wake_up - wake up all rpc_tasks
439  * @queue: rpc_wait_queue on which the tasks are sleeping
440  *
441  * Grabs rpc_queue_lock
442  */
443 void
444 rpc_wake_up(struct rpc_wait_queue *queue)
445 {
446         struct rpc_task *task;
447
448         spin_lock_bh(&rpc_queue_lock);
449         while (!list_empty(&queue->tasks))
450                 task_for_first(task, &queue->tasks)
451                         __rpc_wake_up_task(task);
452         spin_unlock_bh(&rpc_queue_lock);
453 }
454
455 /**
456  * rpc_wake_up_status - wake up all rpc_tasks and set their status value.
457  * @queue: rpc_wait_queue on which the tasks are sleeping
458  * @status: status value to set
459  *
460  * Grabs rpc_queue_lock
461  */
462 void
463 rpc_wake_up_status(struct rpc_wait_queue *queue, int status)
464 {
465         struct rpc_task *task;
466
467         spin_lock_bh(&rpc_queue_lock);
468         while (!list_empty(&queue->tasks)) {
469                 task_for_first(task, &queue->tasks) {
470                         task->tk_status = status;
471                         __rpc_wake_up_task(task);
472                 }
473         }
474         spin_unlock_bh(&rpc_queue_lock);
475 }
476
477 /*
478  * Run a task at a later time
479  */
480 static void     __rpc_atrun(struct rpc_task *);
481 void
482 rpc_delay(struct rpc_task *task, unsigned long delay)
483 {
484         task->tk_timeout = delay;
485         rpc_sleep_on(&delay_queue, task, NULL, __rpc_atrun);
486 }
487
488 static void
489 __rpc_atrun(struct rpc_task *task)
490 {
491         task->tk_status = 0;
492         rpc_wake_up_task(task);
493 }
494
495 /*
496  * This is the RPC `scheduler' (or rather, the finite state machine).
497  */
498 static int
499 __rpc_execute(struct rpc_task *task)
500 {
501         int             status = 0;
502
503         dprintk("RPC: %4d rpc_execute flgs %x\n",
504                                 task->tk_pid, task->tk_flags);
505
506         if (!RPC_IS_RUNNING(task)) {
507                 printk(KERN_WARNING "RPC: rpc_execute called for sleeping task!!\n");
508                 return 0;
509         }
510
511  restarted:
512         while (1) {
513                 /*
514                  * Execute any pending callback.
515                  */
516                 if (RPC_DO_CALLBACK(task)) {
517                         /* Define a callback save pointer */
518                         void (*save_callback)(struct rpc_task *);
519         
520                         /* 
521                          * If a callback exists, save it, reset it,
522                          * call it.
523                          * The save is needed to stop from resetting
524                          * another callback set within the callback handler
525                          * - Dave
526                          */
527                         save_callback=task->tk_callback;
528                         task->tk_callback=NULL;
529                         save_callback(task);
530                 }
531
532                 /*
533                  * Perform the next FSM step.
534                  * tk_action may be NULL when the task has been killed
535                  * by someone else.
536                  */
537                 if (RPC_IS_RUNNING(task)) {
538                         /*
539                          * Garbage collection of pending timers...
540                          */
541                         rpc_delete_timer(task);
542                         if (!task->tk_action)
543                                 break;
544                         task->tk_action(task);
545                 }
546
547                 /*
548                  * Check whether task is sleeping.
549                  */
550                 spin_lock_bh(&rpc_queue_lock);
551                 if (!RPC_IS_RUNNING(task)) {
552                         rpc_set_sleeping(task);
553                         if (RPC_IS_ASYNC(task)) {
554                                 spin_unlock_bh(&rpc_queue_lock);
555                                 return 0;
556                         }
557                 }
558                 spin_unlock_bh(&rpc_queue_lock);
559
560                 while (RPC_IS_SLEEPING(task)) {
561                         /* sync task: sleep here */
562                         dprintk("RPC: %4d sync task going to sleep\n",
563                                                         task->tk_pid);
564                         if (current->pid == rpciod_pid)
565                                 printk(KERN_ERR "RPC: rpciod waiting on sync task!\n");
566
567                         __wait_event(task->tk_wait, !RPC_IS_SLEEPING(task));
568                         dprintk("RPC: %4d sync task resuming\n", task->tk_pid);
569
570                         /*
571                          * When a sync task receives a signal, it exits with
572                          * -ERESTARTSYS. In order to catch any callbacks that
573                          * clean up after sleeping on some queue, we don't
574                          * break the loop here, but go around once more.
575                          */
576                         if (task->tk_client->cl_intr && signalled()) {
577                                 dprintk("RPC: %4d got signal\n", task->tk_pid);
578                                 task->tk_flags |= RPC_TASK_KILLED;
579                                 rpc_exit(task, -ERESTARTSYS);
580                                 rpc_wake_up_task(task);
581                         }
582                 }
583         }
584
585         if (task->tk_exit) {
586                 task->tk_exit(task);
587                 /* If tk_action is non-null, the user wants us to restart */
588                 if (task->tk_action) {
589                         if (!RPC_ASSASSINATED(task)) {
590                                 /* Release RPC slot and buffer memory */
591                                 if (task->tk_rqstp)
592                                         xprt_release(task);
593                                 if (task->tk_buffer) {
594                                         rpc_free(task->tk_buffer);
595                                         task->tk_buffer = NULL;
596                                 }
597                                 goto restarted;
598                         }
599                         printk(KERN_ERR "RPC: dead task tries to walk away.\n");
600                 }
601         }
602
603         dprintk("RPC: %4d exit() = %d\n", task->tk_pid, task->tk_status);
604         status = task->tk_status;
605
606         /* Release all resources associated with the task */
607         rpc_release_task(task);
608
609         return status;
610 }
611
612 /*
613  * User-visible entry point to the scheduler.
614  *
615  * This may be called recursively if e.g. an async NFS task updates
616  * the attributes and finds that dirty pages must be flushed.
617  * NOTE: Upon exit of this function the task is guaranteed to be
618  *       released. In particular note that tk_release() will have
619  *       been called, so your task memory may have been freed.
620  */
621 int
622 rpc_execute(struct rpc_task *task)
623 {
624         int status = -EIO;
625         if (rpc_inhibit) {
626                 printk(KERN_INFO "RPC: execution inhibited!\n");
627                 goto out_release;
628         }
629
630         status = -EWOULDBLOCK;
631         if (task->tk_active) {
632                 printk(KERN_ERR "RPC: active task was run twice!\n");
633                 goto out_err;
634         }
635
636         task->tk_active = 1;
637         rpc_set_running(task);
638         return __rpc_execute(task);
639  out_release:
640         rpc_release_task(task);
641  out_err:
642         return status;
643 }
644
645 /*
646  * This is our own little scheduler for async RPC tasks.
647  */
648 static void
649 __rpc_schedule(void)
650 {
651         struct rpc_task *task;
652         int             count = 0;
653
654         dprintk("RPC:      rpc_schedule enter\n");
655         while (1) {
656                 spin_lock_bh(&rpc_queue_lock);
657
658                 task_for_first(task, &schedq.tasks) {
659                         __rpc_remove_wait_queue(task);
660                         spin_unlock_bh(&rpc_queue_lock);
661
662                         __rpc_execute(task);
663                 } else {
664                         spin_unlock_bh(&rpc_queue_lock);
665                         break;
666                 }
667
668                 if (++count >= 200 || current->need_resched) {
669                         count = 0;
670                         schedule();
671                 }
672         }
673         dprintk("RPC:      rpc_schedule leave\n");
674 }
675
676 /*
677  * Allocate memory for RPC purpose.
678  *
679  * This is yet another tricky issue: For sync requests issued by
680  * a user process, we want to make kmalloc sleep if there isn't
681  * enough memory. Async requests should not sleep too excessively
682  * because that will block rpciod (but that's not dramatic when
683  * it's starved of memory anyway). Finally, swapout requests should
684  * never sleep at all, and should not trigger another swap_out
685  * request through kmalloc which would just increase memory contention.
686  *
687  * I hope the following gets it right, which gives async requests
688  * a slight advantage over sync requests (good for writeback, debatable
689  * for readahead):
690  *
691  *   sync user requests:        GFP_KERNEL
692  *   async requests:            GFP_RPC         (== GFP_NOFS)
693  *   swap requests:             GFP_ATOMIC      (or new GFP_SWAPPER)
694  */
695 void *
696 rpc_allocate(unsigned int flags, unsigned int size)
697 {
698         u32     *buffer;
699         int     gfp;
700
701         if (flags & RPC_TASK_SWAPPER)
702                 gfp = GFP_ATOMIC;
703         else if (flags & RPC_TASK_ASYNC)
704                 gfp = GFP_RPC;
705         else
706                 gfp = GFP_KERNEL;
707
708         do {
709                 if ((buffer = (u32 *) kmalloc(size, gfp)) != NULL) {
710                         dprintk("RPC:      allocated buffer %p\n", buffer);
711                         return buffer;
712                 }
713                 if ((flags & RPC_TASK_SWAPPER) && size <= sizeof(swap_buffer)
714                     && rpc_lock_swapbuf()) {
715                         dprintk("RPC:      used last-ditch swap buffer\n");
716                         return swap_buffer;
717                 }
718                 if (flags & RPC_TASK_ASYNC)
719                         return NULL;
720                 yield();
721         } while (!signalled());
722
723         return NULL;
724 }
725
726 void
727 rpc_free(void *buffer)
728 {
729         if (buffer != swap_buffer) {
730                 kfree(buffer);
731                 return;
732         }
733         rpc_unlock_swapbuf();
734 }
735
736 /*
737  * Creation and deletion of RPC task structures
738  */
739 inline void
740 rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt,
741                                 rpc_action callback, int flags)
742 {
743         memset(task, 0, sizeof(*task));
744         init_timer(&task->tk_timer);
745         task->tk_timer.data     = (unsigned long) task;
746         task->tk_timer.function = (void (*)(unsigned long)) rpc_run_timer;
747         task->tk_client = clnt;
748         task->tk_flags  = flags;
749         task->tk_exit   = callback;
750         init_waitqueue_head(&task->tk_wait);
751         if (current->uid != current->fsuid || current->gid != current->fsgid)
752                 task->tk_flags |= RPC_TASK_SETUID;
753
754         /* Initialize retry counters */
755         task->tk_garb_retry = 2;
756         task->tk_cred_retry = 2;
757         task->tk_suid_retry = 1;
758
759         /* Add to global list of all tasks */
760         spin_lock(&rpc_sched_lock);
761         list_add(&task->tk_task, &all_tasks);
762         spin_unlock(&rpc_sched_lock);
763
764         if (clnt)
765                 atomic_inc(&clnt->cl_users);
766
767 #ifdef RPC_DEBUG
768         task->tk_magic = 0xf00baa;
769         task->tk_pid = rpc_task_id++;
770 #endif
771         dprintk("RPC: %4d new task procpid %d\n", task->tk_pid,
772                                 current->pid);
773 }
774
775 static void
776 rpc_default_free_task(struct rpc_task *task)
777 {
778         dprintk("RPC: %4d freeing task\n", task->tk_pid);
779         rpc_free(task);
780 }
781
782 /*
783  * Create a new task for the specified client.  We have to
784  * clean up after an allocation failure, as the client may
785  * have specified "oneshot".
786  */
787 struct rpc_task *
788 rpc_new_task(struct rpc_clnt *clnt, rpc_action callback, int flags)
789 {
790         struct rpc_task *task;
791
792         task = (struct rpc_task *) rpc_allocate(flags, sizeof(*task));
793         if (!task)
794                 goto cleanup;
795
796         rpc_init_task(task, clnt, callback, flags);
797
798         /* Replace tk_release */
799         task->tk_release = rpc_default_free_task;
800
801         dprintk("RPC: %4d allocated task\n", task->tk_pid);
802         task->tk_flags |= RPC_TASK_DYNAMIC;
803 out:
804         return task;
805
806 cleanup:
807         /* Check whether to release the client */
808         if (clnt) {
809                 printk("rpc_new_task: failed, users=%d, oneshot=%d\n",
810                         atomic_read(&clnt->cl_users), clnt->cl_oneshot);
811                 atomic_inc(&clnt->cl_users); /* pretend we were used ... */
812                 rpc_release_client(clnt);
813         }
814         goto out;
815 }
816
817 void
818 rpc_release_task(struct rpc_task *task)
819 {
820         dprintk("RPC: %4d release task\n", task->tk_pid);
821
822 #ifdef RPC_DEBUG
823         if (task->tk_magic != 0xf00baa) {
824                 printk(KERN_ERR "RPC: attempt to release a non-existing task!\n");
825                 rpc_debug = ~0;
826                 rpc_show_tasks();
827                 return;
828         }
829 #endif
830
831         /* Remove from global task list */
832         spin_lock(&rpc_sched_lock);
833         list_del(&task->tk_task);
834         spin_unlock(&rpc_sched_lock);
835
836         /* Protect the execution below. */
837         spin_lock_bh(&rpc_queue_lock);
838
839         /* Disable timer to prevent zombie wakeup */
840         __rpc_disable_timer(task);
841
842         /* Remove from any wait queue we're still on */
843         __rpc_remove_wait_queue(task);
844
845         task->tk_active = 0;
846
847         spin_unlock_bh(&rpc_queue_lock);
848
849         /* Synchronously delete any running timer */
850         rpc_delete_timer(task);
851
852         /* Release resources */
853         if (task->tk_rqstp)
854                 xprt_release(task);
855         if (task->tk_msg.rpc_cred)
856                 rpcauth_unbindcred(task);
857         if (task->tk_buffer) {
858                 rpc_free(task->tk_buffer);
859                 task->tk_buffer = NULL;
860         }
861         if (task->tk_client) {
862                 rpc_release_client(task->tk_client);
863                 task->tk_client = NULL;
864         }
865
866 #ifdef RPC_DEBUG
867         task->tk_magic = 0;
868 #endif
869         if (task->tk_release)
870                 task->tk_release(task);
871 }
872
873 /**
874  * rpc_find_parent - find the parent of a child task.
875  * @child: child task
876  *
877  * Checks that the parent task is still sleeping on the
878  * queue 'childq'. If so returns a pointer to the parent.
879  * Upon failure returns NULL.
880  *
881  * Caller must hold rpc_queue_lock
882  */
883 static inline struct rpc_task *
884 rpc_find_parent(struct rpc_task *child)
885 {
886         struct rpc_task *task, *parent;
887         struct list_head *le;
888
889         parent = (struct rpc_task *) child->tk_calldata;
890         task_for_each(task, le, &childq.tasks)
891                 if (task == parent)
892                         return parent;
893
894         return NULL;
895 }
896
897 static void
898 rpc_child_exit(struct rpc_task *child)
899 {
900         struct rpc_task *parent;
901
902         spin_lock_bh(&rpc_queue_lock);
903         if ((parent = rpc_find_parent(child)) != NULL) {
904                 parent->tk_status = child->tk_status;
905                 __rpc_wake_up_task(parent);
906         }
907         spin_unlock_bh(&rpc_queue_lock);
908 }
909
910 /*
911  * Note: rpc_new_task releases the client after a failure.
912  */
913 struct rpc_task *
914 rpc_new_child(struct rpc_clnt *clnt, struct rpc_task *parent)
915 {
916         struct rpc_task *task;
917
918         task = rpc_new_task(clnt, NULL, RPC_TASK_ASYNC | RPC_TASK_CHILD);
919         if (!task)
920                 goto fail;
921         task->tk_exit = rpc_child_exit;
922         task->tk_calldata = parent;
923         return task;
924
925 fail:
926         parent->tk_status = -ENOMEM;
927         return NULL;
928 }
929
930 void
931 rpc_run_child(struct rpc_task *task, struct rpc_task *child, rpc_action func)
932 {
933         spin_lock_bh(&rpc_queue_lock);
934         /* N.B. Is it possible for the child to have already finished? */
935         __rpc_sleep_on(&childq, task, func, NULL);
936         rpc_schedule_run(child);
937         spin_unlock_bh(&rpc_queue_lock);
938 }
939
940 /*
941  * Kill all tasks for the given client.
942  * XXX: kill their descendants as well?
943  */
944 void
945 rpc_killall_tasks(struct rpc_clnt *clnt)
946 {
947         struct rpc_task *rovr;
948         struct list_head *le;
949
950         dprintk("RPC:      killing all tasks for client %p\n", clnt);
951
952         /*
953          * Spin lock all_tasks to prevent changes...
954          */
955         spin_lock(&rpc_sched_lock);
956         alltask_for_each(rovr, le, &all_tasks)
957                 if (!clnt || rovr->tk_client == clnt) {
958                         rovr->tk_flags |= RPC_TASK_KILLED;
959                         rpc_exit(rovr, -EIO);
960                         rpc_wake_up_task(rovr);
961                 }
962         spin_unlock(&rpc_sched_lock);
963 }
964
965 static DECLARE_MUTEX_LOCKED(rpciod_running);
966
967 static inline int
968 rpciod_task_pending(void)
969 {
970         return !list_empty(&schedq.tasks);
971 }
972
973
974 /*
975  * This is the rpciod kernel thread
976  */
977 static int
978 rpciod(void *ptr)
979 {
980         wait_queue_head_t *assassin = (wait_queue_head_t*) ptr;
981         int             rounds = 0;
982
983         MOD_INC_USE_COUNT;
984         lock_kernel();
985         /*
986          * Let our maker know we're running ...
987          */
988         rpciod_pid = current->pid;
989         up(&rpciod_running);
990
991         daemonize();
992
993         spin_lock_irq(&current->sigmask_lock);
994         siginitsetinv(&current->blocked, sigmask(SIGKILL));
995         recalc_sigpending(current);
996         spin_unlock_irq(&current->sigmask_lock);
997
998         strcpy(current->comm, "rpciod");
999
1000         dprintk("RPC: rpciod starting (pid %d)\n", rpciod_pid);
1001         while (rpciod_users) {
1002                 if (signalled()) {
1003                         rpciod_killall();
1004                         flush_signals(current);
1005                 }
1006                 __rpc_schedule();
1007
1008                 if (++rounds >= 64) {   /* safeguard */
1009                         schedule();
1010                         rounds = 0;
1011                 }
1012
1013                 if (!rpciod_task_pending()) {
1014                         dprintk("RPC: rpciod back to sleep\n");
1015                         wait_event_interruptible(rpciod_idle, rpciod_task_pending());
1016                         dprintk("RPC: switch to rpciod\n");
1017                         rounds = 0;
1018                 }
1019         }
1020
1021         dprintk("RPC: rpciod shutdown commences\n");
1022         if (!list_empty(&all_tasks)) {
1023                 printk(KERN_ERR "rpciod: active tasks at shutdown?!\n");
1024                 rpciod_killall();
1025         }
1026
1027         rpciod_pid = 0;
1028         wake_up(assassin);
1029
1030         dprintk("RPC: rpciod exiting\n");
1031         MOD_DEC_USE_COUNT;
1032         return 0;
1033 }
1034
1035 static void
1036 rpciod_killall(void)
1037 {
1038         unsigned long flags;
1039
1040         while (!list_empty(&all_tasks)) {
1041                 current->sigpending = 0;
1042                 rpc_killall_tasks(NULL);
1043                 __rpc_schedule();
1044                 if (!list_empty(&all_tasks)) {
1045                         dprintk("rpciod_killall: waiting for tasks to exit\n");
1046                         yield();
1047                 }
1048         }
1049
1050         spin_lock_irqsave(&current->sigmask_lock, flags);
1051         recalc_sigpending(current);
1052         spin_unlock_irqrestore(&current->sigmask_lock, flags);
1053 }
1054
1055 /*
1056  * Start up the rpciod process if it's not already running.
1057  */
1058 int
1059 rpciod_up(void)
1060 {
1061         int error = 0;
1062
1063         MOD_INC_USE_COUNT;
1064         down(&rpciod_sema);
1065         dprintk("rpciod_up: pid %d, users %d\n", rpciod_pid, rpciod_users);
1066         rpciod_users++;
1067         if (rpciod_pid)
1068                 goto out;
1069         /*
1070          * If there's no pid, we should be the first user.
1071          */
1072         if (rpciod_users > 1)
1073                 printk(KERN_WARNING "rpciod_up: no pid, %d users??\n", rpciod_users);
1074         /*
1075          * Create the rpciod thread and wait for it to start.
1076          */
1077         error = kernel_thread(rpciod, &rpciod_killer, 0);
1078         if (error < 0) {
1079                 printk(KERN_WARNING "rpciod_up: create thread failed, error=%d\n", error);
1080                 rpciod_users--;
1081                 goto out;
1082         }
1083         down(&rpciod_running);
1084         error = 0;
1085 out:
1086         up(&rpciod_sema);
1087         MOD_DEC_USE_COUNT;
1088         return error;
1089 }
1090
1091 void
1092 rpciod_down(void)
1093 {
1094         unsigned long flags;
1095
1096         MOD_INC_USE_COUNT;
1097         down(&rpciod_sema);
1098         dprintk("rpciod_down pid %d sema %d\n", rpciod_pid, rpciod_users);
1099         if (rpciod_users) {
1100                 if (--rpciod_users)
1101                         goto out;
1102         } else
1103                 printk(KERN_WARNING "rpciod_down: pid=%d, no users??\n", rpciod_pid);
1104
1105         if (!rpciod_pid) {
1106                 dprintk("rpciod_down: Nothing to do!\n");
1107                 goto out;
1108         }
1109
1110         kill_proc(rpciod_pid, SIGKILL, 1);
1111         /*
1112          * Usually rpciod will exit very quickly, so we
1113          * wait briefly before checking the process id.
1114          */
1115         current->sigpending = 0;
1116         yield();
1117         /*
1118          * Display a message if we're going to wait longer.
1119          */
1120         while (rpciod_pid) {
1121                 dprintk("rpciod_down: waiting for pid %d to exit\n", rpciod_pid);
1122                 if (signalled()) {
1123                         dprintk("rpciod_down: caught signal\n");
1124                         break;
1125                 }
1126                 interruptible_sleep_on(&rpciod_killer);
1127         }
1128         spin_lock_irqsave(&current->sigmask_lock, flags);
1129         recalc_sigpending(current);
1130         spin_unlock_irqrestore(&current->sigmask_lock, flags);
1131 out:
1132         up(&rpciod_sema);
1133         MOD_DEC_USE_COUNT;
1134 }
1135
1136 #ifdef RPC_DEBUG
1137 void rpc_show_tasks(void)
1138 {
1139         struct list_head *le;
1140         struct rpc_task *t;
1141
1142         spin_lock(&rpc_sched_lock);
1143         if (list_empty(&all_tasks)) {
1144                 spin_unlock(&rpc_sched_lock);
1145                 return;
1146         }
1147         printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout "
1148                 "-rpcwait -action- --exit--\n");
1149         alltask_for_each(t, le, &all_tasks)
1150                 printk("%05d %04d %04x %06d %8p %6d %8p %08ld %8s %8p %8p\n",
1151                         t->tk_pid, t->tk_msg.rpc_proc, t->tk_flags, t->tk_status,
1152                         t->tk_client, t->tk_client->cl_prog,
1153                         t->tk_rqstp, t->tk_timeout,
1154                         t->tk_rpcwait ? rpc_qname(t->tk_rpcwait) : " <NULL> ",
1155                         t->tk_action, t->tk_exit);
1156         spin_unlock(&rpc_sched_lock);
1157 }
1158 #endif