net/sunrpc/sched.c

   1 /*
   2  * linux/net/sunrpc/sched.c
   3  *
   4  * Scheduling for synchronous and asynchronous RPC requests.
   5  *
   6  * Copyright (C) 1996 Olaf Kirch, <okir@monad.swb.de>
   7  *
   8  * TCP NFS related read + write fixes
   9  * (C) 1999 Dave Airlie, University of Limerick, Ireland <airlied@linux.ie>
  10  */
  11
  12 #include <linux/module.h>
  13
  14 #define __KERNEL_SYSCALLS__
  15 #include <linux/sched.h>
  16 #include <linux/interrupt.h>
  17 #include <linux/slab.h>
  18 #include <linux/unistd.h>
  19 #include <linux/smp.h>
  20 #include <linux/smp_lock.h>
  21 #include <linux/spinlock.h>
  22
  23 #include <linux/sunrpc/clnt.h>
  24 #include <linux/sunrpc/xprt.h>
  25
  26 #ifdef RPC_DEBUG
  27 #define RPCDBG_FACILITY         RPCDBG_SCHED
  28 static int                      rpc_task_id;
  29 #endif
  30
  31 /*
  32  * We give RPC the same get_free_pages priority as NFS
  33  */
  34 #define GFP_RPC                 GFP_NOFS
  35
  36 static void                     __rpc_default_timer(struct rpc_task *task);
  37 static void                     rpciod_killall(void);
  38
  39 /*
  40  * When an asynchronous RPC task is activated within a bottom half
  41  * handler, or while executing another RPC task, it is put on
  42  * schedq, and rpciod is woken up.
  43  */
  44 static RPC_WAITQ(schedq, "schedq");
  45
  46 /*
  47  * RPC tasks that create another task (e.g. for contacting the portmapper)
  48  * will wait on this queue for their child's completion
  49  */
  50 static RPC_WAITQ(childq, "childq");
  51
  52 /*
  53  * RPC tasks sit here while waiting for conditions to improve.
  54  */
  55 static RPC_WAITQ(delay_queue, "delayq");
  56
  57 /*
  58  * All RPC tasks are linked into this list
  59  */
  60 static LIST_HEAD(all_tasks);
  61
  62 /*
  63  * rpciod-related stuff
  64  */
  65 static DECLARE_WAIT_QUEUE_HEAD(rpciod_idle);
  66 static DECLARE_WAIT_QUEUE_HEAD(rpciod_killer);
  67 static DECLARE_MUTEX(rpciod_sema);
  68 static unsigned int             rpciod_users;
  69 static pid_t                    rpciod_pid;
  70 static int                      rpc_inhibit;
  71
  72 /*
  73  * Spinlock for wait queues. Access to the latter also has to be
  74  * interrupt-safe in order to allow timers to wake up sleeping tasks.
  75  */
  76 static spinlock_t rpc_queue_lock = SPIN_LOCK_UNLOCKED;
  77 /*
  78  * Spinlock for other critical sections of code.
  79  */
  80 static spinlock_t rpc_sched_lock = SPIN_LOCK_UNLOCKED;
  81
  82 /*
  83  * This is the last-ditch buffer for NFS swap requests
  84  */
  85 static u32                      swap_buffer[PAGE_SIZE >> 2];
  86 static long                     swap_buffer_used;
  87
  88 /*
  89  * Make allocation of the swap_buffer SMP-safe
  90  */
  91 static __inline__ int rpc_lock_swapbuf(void)
  92 {
  93         return !test_and_set_bit(1, &swap_buffer_used);
  94 }
  95 static __inline__ void rpc_unlock_swapbuf(void)
  96 {
  97         clear_bit(1, &swap_buffer_used);
  98 }
  99
 100 /*
 101  * Disable the timer for a given RPC task. Should be called with
 102  * rpc_queue_lock and bh_disabled in order to avoid races within
 103  * rpc_run_timer().
 104  */
 105 static inline void
 106 __rpc_disable_timer(struct rpc_task *task)
 107 {
 108         dprintk("RPC: %4d disabling timer\n", task->tk_pid);
 109         task->tk_timeout_fn = NULL;
 110         task->tk_timeout = 0;
 111 }
 112
 113 /*
 114  * Run a timeout function.
 115  * We use the callback in order to allow __rpc_wake_up_task()
 116  * and friends to disable the timer synchronously on SMP systems
 117  * without calling del_timer_sync(). The latter could cause a
 118  * deadlock if called while we're holding spinlocks...
 119  */
 120 static void
 121 rpc_run_timer(struct rpc_task *task)
 122 {
 123         void (*callback)(struct rpc_task *);
 124
 125         spin_lock_bh(&rpc_queue_lock);
 126         callback = task->tk_timeout_fn;
 127         task->tk_timeout_fn = NULL;
 128         spin_unlock_bh(&rpc_queue_lock);
 129         if (callback) {
 130                 dprintk("RPC: %4d running timer\n", task->tk_pid);
 131                 callback(task);
 132         }
 133 }
 134
 135 /*
 136  * Set up a timer for the current task.
 137  */
 138 static inline void
 139 __rpc_add_timer(struct rpc_task *task, rpc_action timer)
 140 {
 141         if (!task->tk_timeout)
 142                 return;
 143
 144         dprintk("RPC: %4d setting alarm for %lu ms\n",
 145                         task->tk_pid, task->tk_timeout * 1000 / HZ);
 146
 147         if (timer)
 148                 task->tk_timeout_fn = timer;
 149         else
 150                 task->tk_timeout_fn = __rpc_default_timer;
 151         mod_timer(&task->tk_timer, jiffies + task->tk_timeout);
 152 }
 153
 154 /*
 155  * Set up a timer for an already sleeping task.
 156  */
 157 void rpc_add_timer(struct rpc_task *task, rpc_action timer)
 158 {
 159         spin_lock_bh(&rpc_queue_lock);
 160         if (!RPC_IS_RUNNING(task))
 161                 __rpc_add_timer(task, timer);
 162         spin_unlock_bh(&rpc_queue_lock);
 163 }
 164
 165 /*
 166  * Delete any timer for the current task. Because we use del_timer_sync(),
 167  * this function should never be called while holding rpc_queue_lock.
 168  */
 169 static inline void
 170 rpc_delete_timer(struct rpc_task *task)
 171 {
 172         dprintk("RPC: %4d deleting timer\n", task->tk_pid);
 173         del_timer_sync(&task->tk_timer);
 174 }
 175
 176 /*
 177  * Add new request to wait queue.
 178  *
 179  * Swapper tasks always get inserted at the head of the queue.
 180  * This should avoid many nasty memory deadlocks and hopefully
 181  * improve overall performance.
 182  * Everyone else gets appended to the queue to ensure proper FIFO behavior.
 183  */
 184 static inline int
 185 __rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task)
 186 {
 187         if (task->tk_rpcwait == queue)
 188                 return 0;
 189
 190         if (task->tk_rpcwait) {
 191                 printk(KERN_WARNING "RPC: doubly enqueued task!\n");
 192                 return -EWOULDBLOCK;
 193         }
 194         if (RPC_IS_SWAPPER(task))
 195                 list_add(&task->tk_list, &queue->tasks);
 196         else
 197                 list_add_tail(&task->tk_list, &queue->tasks);
 198         task->tk_rpcwait = queue;
 199
 200         dprintk("RPC: %4d added to queue %p \"%s\"\n",
 201                                 task->tk_pid, queue, rpc_qname(queue));
 202
 203         return 0;
 204 }
 205
 206 int
 207 rpc_add_wait_queue(struct rpc_wait_queue *q, struct rpc_task *task)
 208 {
 209         int             result;
 210
 211         spin_lock_bh(&rpc_queue_lock);
 212         result = __rpc_add_wait_queue(q, task);
 213         spin_unlock_bh(&rpc_queue_lock);
 214         return result;
 215 }
 216
 217 /*
 218  * Remove request from queue.
 219  * Note: must be called with spin lock held.
 220  */
 221 static inline void
 222 __rpc_remove_wait_queue(struct rpc_task *task)
 223 {
 224         struct rpc_wait_queue *queue = task->tk_rpcwait;
 225
 226         if (!queue)
 227                 return;
 228
 229         list_del(&task->tk_list);
 230         task->tk_rpcwait = NULL;
 231
 232         dprintk("RPC: %4d removed from queue %p \"%s\"\n",
 233                                 task->tk_pid, queue, rpc_qname(queue));
 234 }
 235
 236 void
 237 rpc_remove_wait_queue(struct rpc_task *task)
 238 {
 239         if (!task->tk_rpcwait)
 240                 return;
 241         spin_lock_bh(&rpc_queue_lock);
 242         __rpc_remove_wait_queue(task);
 243         spin_unlock_bh(&rpc_queue_lock);
 244 }
 245
 246 /*
 247  * Make an RPC task runnable.
 248  *
 249  * Note: If the task is ASYNC, this must be called with
 250  * the spinlock held to protect the wait queue operation.
 251  */
 252 static inline void
 253 rpc_make_runnable(struct rpc_task *task)
 254 {
 255         if (task->tk_timeout_fn) {
 256                 printk(KERN_ERR "RPC: task w/ running timer in rpc_make_runnable!!\n");
 257                 return;
 258         }
 259         rpc_set_running(task);
 260         if (RPC_IS_ASYNC(task)) {
 261                 if (RPC_IS_SLEEPING(task)) {
 262                         int status;
 263                         status = __rpc_add_wait_queue(&schedq, task);
 264                         if (status < 0) {
 265                                 printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status);
 266                                 task->tk_status = status;
 267                                 return;
 268                         }
 269                         rpc_clear_sleeping(task);
 270                         if (waitqueue_active(&rpciod_idle))
 271                                 wake_up(&rpciod_idle);
 272                 }
 273         } else {
 274                 rpc_clear_sleeping(task);
 275                 if (waitqueue_active(&task->tk_wait))
 276                         wake_up(&task->tk_wait);
 277         }
 278 }
 279
 280 /*
 281  * Place a newly initialized task on the schedq.
 282  */
 283 static inline void
 284 rpc_schedule_run(struct rpc_task *task)
 285 {
 286         /* Don't run a child twice! */
 287         if (RPC_IS_ACTIVATED(task))
 288                 return;
 289         task->tk_active = 1;
 290         rpc_set_sleeping(task);
 291         rpc_make_runnable(task);
 292 }
 293
 294 /*
 295  *      For other people who may need to wake the I/O daemon
 296  *      but should (for now) know nothing about its innards
 297  */
 298 void rpciod_wake_up(void)
 299 {
 300         if(rpciod_pid==0)
 301                 printk(KERN_ERR "rpciod: wot no daemon?\n");
 302         if (waitqueue_active(&rpciod_idle))
 303                 wake_up(&rpciod_idle);
 304 }
 305
 306 /*
 307  * Prepare for sleeping on a wait queue.
 308  * By always appending tasks to the list we ensure FIFO behavior.
 309  * NB: An RPC task will only receive interrupt-driven events as long
 310  * as it's on a wait queue.
 311  */
 312 static void
 313 __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
 314                         rpc_action action, rpc_action timer)
 315 {
 316         int status;
 317
 318         dprintk("RPC: %4d sleep_on(queue \"%s\" time %ld)\n", task->tk_pid,
 319                                 rpc_qname(q), jiffies);
 320
 321         if (!RPC_IS_ASYNC(task) && !RPC_IS_ACTIVATED(task)) {
 322                 printk(KERN_ERR "RPC: Inactive synchronous task put to sleep!\n");
 323                 return;
 324         }
 325
 326         /* Mark the task as being activated if so needed */
 327         if (!RPC_IS_ACTIVATED(task)) {
 328                 task->tk_active = 1;
 329                 rpc_set_sleeping(task);
 330         }
 331
 332         status = __rpc_add_wait_queue(q, task);
 333         if (status) {
 334                 printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status);
 335                 task->tk_status = status;
 336         } else {
 337                 rpc_clear_running(task);
 338                 if (task->tk_callback) {
 339                         dprintk(KERN_ERR "RPC: %4d overwrites an active callback\n", task->tk_pid);
 340                         BUG();
 341                 }
 342                 task->tk_callback = action;
 343                 __rpc_add_timer(task, timer);
 344         }
 345 }
 346
 347 void
 348 rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
 349                                 rpc_action action, rpc_action timer)
 350 {
 351         /*
 352          * Protect the queue operations.
 353          */
 354         spin_lock_bh(&rpc_queue_lock);
 355         __rpc_sleep_on(q, task, action, timer);
 356         spin_unlock_bh(&rpc_queue_lock);
 357 }
 358
 359 /**
 360  * __rpc_wake_up_task - wake up a single rpc_task
 361  * @task: task to be woken up
 362  *
 363  * Caller must hold rpc_queue_lock
 364  */
 365 static void
 366 __rpc_wake_up_task(struct rpc_task *task)
 367 {
 368         dprintk("RPC: %4d __rpc_wake_up_task (now %ld inh %d)\n",
 369                                         task->tk_pid, jiffies, rpc_inhibit);
 370
 371 #ifdef RPC_DEBUG
 372         if (task->tk_magic != 0xf00baa) {
 373                 printk(KERN_ERR "RPC: attempt to wake up non-existing task!\n");
 374                 rpc_debug = ~0;
 375                 rpc_show_tasks();
 376                 return;
 377         }
 378 #endif
 379         /* Has the task been executed yet? If not, we cannot wake it up! */
 380         if (!RPC_IS_ACTIVATED(task)) {
 381                 printk(KERN_ERR "RPC: Inactive task (%p) being woken up!\n", task);
 382                 return;
 383         }
 384         if (RPC_IS_RUNNING(task))
 385                 return;
 386
 387         __rpc_disable_timer(task);
 388         if (task->tk_rpcwait != &schedq)
 389                 __rpc_remove_wait_queue(task);
 390
 391         rpc_make_runnable(task);
 392
 393         dprintk("RPC:      __rpc_wake_up_task done\n");
 394 }
 395
 396 /*
 397  * Default timeout handler if none specified by user
 398  */
 399 static void
 400 __rpc_default_timer(struct rpc_task *task)
 401 {
 402         dprintk("RPC: %d timeout (default timer)\n", task->tk_pid);
 403         task->tk_status = -ETIMEDOUT;
 404         rpc_wake_up_task(task);
 405 }
 406
 407 /*
 408  * Wake up the specified task
 409  */
 410 void
 411 rpc_wake_up_task(struct rpc_task *task)
 412 {
 413         if (RPC_IS_RUNNING(task))
 414                 return;
 415         spin_lock_bh(&rpc_queue_lock);
 416         __rpc_wake_up_task(task);
 417         spin_unlock_bh(&rpc_queue_lock);
 418 }
 419
 420 /*
 421  * Wake up the next task on the wait queue.
 422  */
 423 struct rpc_task *
 424 rpc_wake_up_next(struct rpc_wait_queue *queue)
 425 {
 426         struct rpc_task *task = NULL;
 427
 428         dprintk("RPC:      wake_up_next(%p \"%s\")\n", queue, rpc_qname(queue));
 429         spin_lock_bh(&rpc_queue_lock);
 430         task_for_first(task, &queue->tasks)
 431                 __rpc_wake_up_task(task);
 432         spin_unlock_bh(&rpc_queue_lock);
 433
 434         return task;
 435 }
 436
 437 /**
 438  * rpc_wake_up - wake up all rpc_tasks
 439  * @queue: rpc_wait_queue on which the tasks are sleeping
 440  *
 441  * Grabs rpc_queue_lock
 442  */
 443 void
 444 rpc_wake_up(struct rpc_wait_queue *queue)
 445 {
 446         struct rpc_task *task;
 447
 448         spin_lock_bh(&rpc_queue_lock);
 449         while (!list_empty(&queue->tasks))
 450                 task_for_first(task, &queue->tasks)
 451                         __rpc_wake_up_task(task);
 452         spin_unlock_bh(&rpc_queue_lock);
 453 }
 454
 455 /**
 456  * rpc_wake_up_status - wake up all rpc_tasks and set their status value.
 457  * @queue: rpc_wait_queue on which the tasks are sleeping
 458  * @status: status value to set
 459  *
 460  * Grabs rpc_queue_lock
 461  */
 462 void
 463 rpc_wake_up_status(struct rpc_wait_queue *queue, int status)
 464 {
 465         struct rpc_task *task;
 466
 467         spin_lock_bh(&rpc_queue_lock);
 468         while (!list_empty(&queue->tasks)) {
 469                 task_for_first(task, &queue->tasks) {
 470                         task->tk_status = status;
 471                         __rpc_wake_up_task(task);
 472                 }
 473         }
 474         spin_unlock_bh(&rpc_queue_lock);
 475 }
 476
 477 /*
 478  * Run a task at a later time
 479  */
 480 static void     __rpc_atrun(struct rpc_task *);
 481 void
 482 rpc_delay(struct rpc_task *task, unsigned long delay)
 483 {
 484         task->tk_timeout = delay;
 485         rpc_sleep_on(&delay_queue, task, NULL, __rpc_atrun);
 486 }
 487
 488 static void
 489 __rpc_atrun(struct rpc_task *task)
 490 {
 491         task->tk_status = 0;
 492         rpc_wake_up_task(task);
 493 }
 494
 495 /*
 496  * This is the RPC `scheduler' (or rather, the finite state machine).
 497  */
 498 static int
 499 __rpc_execute(struct rpc_task *task)
 500 {
 501         int             status = 0;
 502
 503         dprintk("RPC: %4d rpc_execute flgs %x\n",
 504                                 task->tk_pid, task->tk_flags);
 505
 506         if (!RPC_IS_RUNNING(task)) {
 507                 printk(KERN_WARNING "RPC: rpc_execute called for sleeping task!!\n");
 508                 return 0;
 509         }
 510
 511  restarted:
 512         while (1) {
 513                 /*
 514                  * Execute any pending callback.
 515                  */
 516                 if (RPC_DO_CALLBACK(task)) {
 517                         /* Define a callback save pointer */
 518                         void (*save_callback)(struct rpc_task *);
 519
 520                         /*
 521                          * If a callback exists, save it, reset it,
 522                          * call it.
 523                          * The save is needed to stop from resetting
 524                          * another callback set within the callback handler
 525                          * - Dave
 526                          */
 527                         save_callback=task->tk_callback;
 528                         task->tk_callback=NULL;
 529                         save_callback(task);
 530                 }
 531
 532                 /*
 533                  * Perform the next FSM step.
 534                  * tk_action may be NULL when the task has been killed
 535                  * by someone else.
 536                  */
 537                 if (RPC_IS_RUNNING(task)) {
 538                         /*
 539                          * Garbage collection of pending timers...
 540                          */
 541                         rpc_delete_timer(task);
 542                         if (!task->tk_action)
 543                                 break;
 544                         task->tk_action(task);
 545                 }
 546
 547                 /*
 548                  * Check whether task is sleeping.
 549                  */
 550                 spin_lock_bh(&rpc_queue_lock);
 551                 if (!RPC_IS_RUNNING(task)) {
 552                         rpc_set_sleeping(task);
 553                         if (RPC_IS_ASYNC(task)) {
 554                                 spin_unlock_bh(&rpc_queue_lock);
 555                                 return 0;
 556                         }
 557                 }
 558                 spin_unlock_bh(&rpc_queue_lock);
 559
 560                 while (RPC_IS_SLEEPING(task)) {
 561                         /* sync task: sleep here */
 562                         dprintk("RPC: %4d sync task going to sleep\n",
 563                                                         task->tk_pid);
 564                         if (current->pid == rpciod_pid)
 565                                 printk(KERN_ERR "RPC: rpciod waiting on sync task!\n");
 566
 567                         __wait_event(task->tk_wait, !RPC_IS_SLEEPING(task));
 568                         dprintk("RPC: %4d sync task resuming\n", task->tk_pid);
 569
 570                         /*
 571                          * When a sync task receives a signal, it exits with
 572                          * -ERESTARTSYS. In order to catch any callbacks that
 573                          * clean up after sleeping on some queue, we don't
 574                          * break the loop here, but go around once more.
 575                          */
 576                         if (task->tk_client->cl_intr && signalled()) {
 577                                 dprintk("RPC: %4d got signal\n", task->tk_pid);
 578                                 task->tk_flags |= RPC_TASK_KILLED;
 579                                 rpc_exit(task, -ERESTARTSYS);
 580                                 rpc_wake_up_task(task);
 581                         }
 582                 }
 583         }
 584
 585         if (task->tk_exit) {
 586                 task->tk_exit(task);
 587                 /* If tk_action is non-null, the user wants us to restart */
 588                 if (task->tk_action) {
 589                         if (!RPC_ASSASSINATED(task)) {
 590                                 /* Release RPC slot and buffer memory */
 591                                 if (task->tk_rqstp)
 592                                         xprt_release(task);
 593                                 if (task->tk_buffer) {
 594                                         rpc_free(task->tk_buffer);
 595                                         task->tk_buffer = NULL;
 596                                 }
 597                                 goto restarted;
 598                         }
 599                         printk(KERN_ERR "RPC: dead task tries to walk away.\n");
 600                 }
 601         }
 602
 603         dprintk("RPC: %4d exit() = %d\n", task->tk_pid, task->tk_status);
 604         status = task->tk_status;
 605
 606         /* Release all resources associated with the task */
 607         rpc_release_task(task);
 608
 609         return status;
 610 }
 611
 612 /*
 613  * User-visible entry point to the scheduler.
 614  *
 615  * This may be called recursively if e.g. an async NFS task updates
 616  * the attributes and finds that dirty pages must be flushed.
 617  * NOTE: Upon exit of this function the task is guaranteed to be
 618  *       released. In particular note that tk_release() will have
 619  *       been called, so your task memory may have been freed.
 620  */
 621 int
 622 rpc_execute(struct rpc_task *task)
 623 {
 624         int status = -EIO;
 625         if (rpc_inhibit) {
 626                 printk(KERN_INFO "RPC: execution inhibited!\n");
 627                 goto out_release;
 628         }
 629
 630         status = -EWOULDBLOCK;
 631         if (task->tk_active) {
 632                 printk(KERN_ERR "RPC: active task was run twice!\n");
 633                 goto out_err;
 634         }
 635
 636         task->tk_active = 1;
 637         rpc_set_running(task);
 638         return __rpc_execute(task);
 639  out_release:
 640         rpc_release_task(task);
 641  out_err:
 642         return status;
 643 }
 644
 645 /*
 646  * This is our own little scheduler for async RPC tasks.
 647  */
 648 static void
 649 __rpc_schedule(void)
 650 {
 651         struct rpc_task *task;
 652         int             count = 0;
 653
 654         dprintk("RPC:      rpc_schedule enter\n");
 655         while (1) {
 656                 spin_lock_bh(&rpc_queue_lock);
 657
 658                 task_for_first(task, &schedq.tasks) {
 659                         __rpc_remove_wait_queue(task);
 660                         spin_unlock_bh(&rpc_queue_lock);
 661
 662                         __rpc_execute(task);
 663                 } else {
 664                         spin_unlock_bh(&rpc_queue_lock);
 665                         break;
 666                 }
 667
 668                 if (++count >= 200 || current->need_resched) {
 669                         count = 0;
 670                         schedule();
 671                 }
 672         }
 673         dprintk("RPC:      rpc_schedule leave\n");
 674 }
 675
 676 /*
 677  * Allocate memory for RPC purpose.
 678  *
 679  * This is yet another tricky issue: For sync requests issued by
 680  * a user process, we want to make kmalloc sleep if there isn't
 681  * enough memory. Async requests should not sleep too excessively
 682  * because that will block rpciod (but that's not dramatic when
 683  * it's starved of memory anyway). Finally, swapout requests should
 684  * never sleep at all, and should not trigger another swap_out
 685  * request through kmalloc which would just increase memory contention.
 686  *
 687  * I hope the following gets it right, which gives async requests
 688  * a slight advantage over sync requests (good for writeback, debatable
 689  * for readahead):
 690  *
 691  *   sync user requests:        GFP_KERNEL
 692  *   async requests:            GFP_RPC         (== GFP_NOFS)
 693  *   swap requests:             GFP_ATOMIC      (or new GFP_SWAPPER)
 694  */
 695 void *
 696 rpc_allocate(unsigned int flags, unsigned int size)
 697 {
 698         u32     *buffer;
 699         int     gfp;
 700
 701         if (flags & RPC_TASK_SWAPPER)
 702                 gfp = GFP_ATOMIC;
 703         else if (flags & RPC_TASK_ASYNC)
 704                 gfp = GFP_RPC;
 705         else
 706                 gfp = GFP_KERNEL;
 707
 708         do {
 709                 if ((buffer = (u32 *) kmalloc(size, gfp)) != NULL) {
 710                         dprintk("RPC:      allocated buffer %p\n", buffer);
 711                         return buffer;
 712                 }
 713                 if ((flags & RPC_TASK_SWAPPER) && size <= sizeof(swap_buffer)
 714                     && rpc_lock_swapbuf()) {
 715                         dprintk("RPC:      used last-ditch swap buffer\n");
 716                         return swap_buffer;
 717                 }
 718                 if (flags & RPC_TASK_ASYNC)
 719                         return NULL;
 720                 yield();
 721         } while (!signalled());
 722
 723         return NULL;
 724 }
 725
 726 void
 727 rpc_free(void *buffer)
 728 {
 729         if (buffer != swap_buffer) {
 730                 kfree(buffer);
 731                 return;
 732         }
 733         rpc_unlock_swapbuf();
 734 }
 735
 736 /*
 737  * Creation and deletion of RPC task structures
 738  */
 739 inline void
 740 rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt,
 741                                 rpc_action callback, int flags)
 742 {
 743         memset(task, 0, sizeof(*task));
 744         init_timer(&task->tk_timer);
 745         task->tk_timer.data     = (unsigned long) task;
 746         task->tk_timer.function = (void (*)(unsigned long)) rpc_run_timer;
 747         task->tk_client = clnt;
 748         task->tk_flags  = flags;
 749         task->tk_exit   = callback;
 750         init_waitqueue_head(&task->tk_wait);
 751         if (current->uid != current->fsuid || current->gid != current->fsgid)
 752                 task->tk_flags |= RPC_TASK_SETUID;
 753
 754         /* Initialize retry counters */
 755         task->tk_garb_retry = 2;
 756         task->tk_cred_retry = 2;
 757         task->tk_suid_retry = 1;
 758
 759         /* Add to global list of all tasks */
 760         spin_lock(&rpc_sched_lock);
 761         list_add(&task->tk_task, &all_tasks);
 762         spin_unlock(&rpc_sched_lock);
 763
 764         if (clnt)
 765                 atomic_inc(&clnt->cl_users);
 766
 767 #ifdef RPC_DEBUG
 768         task->tk_magic = 0xf00baa;
 769         task->tk_pid = rpc_task_id++;
 770 #endif
 771         dprintk("RPC: %4d new task procpid %d\n", task->tk_pid,
 772                                 current->pid);
 773 }
 774
 775 static void
 776 rpc_default_free_task(struct rpc_task *task)
 777 {
 778         dprintk("RPC: %4d freeing task\n", task->tk_pid);
 779         rpc_free(task);
 780 }
 781
 782 /*
 783  * Create a new task for the specified client.  We have to
 784  * clean up after an allocation failure, as the client may
 785  * have specified "oneshot".
 786  */
 787 struct rpc_task *
 788 rpc_new_task(struct rpc_clnt *clnt, rpc_action callback, int flags)
 789 {
 790         struct rpc_task *task;
 791
 792         task = (struct rpc_task *) rpc_allocate(flags, sizeof(*task));
 793         if (!task)
 794                 goto cleanup;
 795
 796         rpc_init_task(task, clnt, callback, flags);
 797
 798         /* Replace tk_release */
 799         task->tk_release = rpc_default_free_task;
 800
 801         dprintk("RPC: %4d allocated task\n", task->tk_pid);
 802         task->tk_flags |= RPC_TASK_DYNAMIC;
 803 out:
 804         return task;
 805
 806 cleanup:
 807         /* Check whether to release the client */
 808         if (clnt) {
 809                 printk("rpc_new_task: failed, users=%d, oneshot=%d\n",
 810                         atomic_read(&clnt->cl_users), clnt->cl_oneshot);
 811                 atomic_inc(&clnt->cl_users); /* pretend we were used ... */
 812                 rpc_release_client(clnt);
 813         }
 814         goto out;
 815 }
 816
 817 void
 818 rpc_release_task(struct rpc_task *task)
 819 {
 820         dprintk("RPC: %4d release task\n", task->tk_pid);
 821
 822 #ifdef RPC_DEBUG
 823         if (task->tk_magic != 0xf00baa) {
 824                 printk(KERN_ERR "RPC: attempt to release a non-existing task!\n");
 825                 rpc_debug = ~0;
 826                 rpc_show_tasks();
 827                 return;
 828         }
 829 #endif
 830
 831         /* Remove from global task list */
 832         spin_lock(&rpc_sched_lock);
 833         list_del(&task->tk_task);
 834         spin_unlock(&rpc_sched_lock);
 835
 836         /* Protect the execution below. */
 837         spin_lock_bh(&rpc_queue_lock);
 838
 839         /* Disable timer to prevent zombie wakeup */
 840         __rpc_disable_timer(task);
 841
 842         /* Remove from any wait queue we're still on */
 843         __rpc_remove_wait_queue(task);
 844
 845         task->tk_active = 0;
 846
 847         spin_unlock_bh(&rpc_queue_lock);
 848
 849         /* Synchronously delete any running timer */
 850         rpc_delete_timer(task);
 851
 852         /* Release resources */
 853         if (task->tk_rqstp)
 854                 xprt_release(task);
 855         if (task->tk_msg.rpc_cred)
 856                 rpcauth_unbindcred(task);
 857         if (task->tk_buffer) {
 858                 rpc_free(task->tk_buffer);
 859                 task->tk_buffer = NULL;
 860         }
 861         if (task->tk_client) {
 862                 rpc_release_client(task->tk_client);
 863                 task->tk_client = NULL;
 864         }
 865
 866 #ifdef RPC_DEBUG
 867         task->tk_magic = 0;
 868 #endif
 869         if (task->tk_release)
 870                 task->tk_release(task);
 871 }
 872
 873 /**
 874  * rpc_find_parent - find the parent of a child task.
 875  * @child: child task
 876  *
 877  * Checks that the parent task is still sleeping on the
 878  * queue 'childq'. If so returns a pointer to the parent.
 879  * Upon failure returns NULL.
 880  *
 881  * Caller must hold rpc_queue_lock
 882  */
 883 static inline struct rpc_task *
 884 rpc_find_parent(struct rpc_task *child)
 885 {
 886         struct rpc_task *task, *parent;
 887         struct list_head *le;
 888
 889         parent = (struct rpc_task *) child->tk_calldata;
 890         task_for_each(task, le, &childq.tasks)
 891                 if (task == parent)
 892                         return parent;
 893
 894         return NULL;
 895 }
 896
 897 static void
 898 rpc_child_exit(struct rpc_task *child)
 899 {
 900         struct rpc_task *parent;
 901
 902         spin_lock_bh(&rpc_queue_lock);
 903         if ((parent = rpc_find_parent(child)) != NULL) {
 904                 parent->tk_status = child->tk_status;
 905                 __rpc_wake_up_task(parent);
 906         }
 907         spin_unlock_bh(&rpc_queue_lock);
 908 }
 909
 910 /*
 911  * Note: rpc_new_task releases the client after a failure.
 912  */
 913 struct rpc_task *
 914 rpc_new_child(struct rpc_clnt *clnt, struct rpc_task *parent)
 915 {
 916         struct rpc_task *task;
 917
 918         task = rpc_new_task(clnt, NULL, RPC_TASK_ASYNC | RPC_TASK_CHILD);
 919         if (!task)
 920                 goto fail;
 921         task->tk_exit = rpc_child_exit;
 922         task->tk_calldata = parent;
 923         return task;
 924
 925 fail:
 926         parent->tk_status = -ENOMEM;
 927         return NULL;
 928 }
 929
 930 void
 931 rpc_run_child(struct rpc_task *task, struct rpc_task *child, rpc_action func)
 932 {
 933         spin_lock_bh(&rpc_queue_lock);
 934         /* N.B. Is it possible for the child to have already finished? */
 935         __rpc_sleep_on(&childq, task, func, NULL);
 936         rpc_schedule_run(child);
 937         spin_unlock_bh(&rpc_queue_lock);
 938 }
 939
 940 /*
 941  * Kill all tasks for the given client.
 942  * XXX: kill their descendants as well?
 943  */
 944 void
 945 rpc_killall_tasks(struct rpc_clnt *clnt)
 946 {
 947         struct rpc_task *rovr;
 948         struct list_head *le;
 949
 950         dprintk("RPC:      killing all tasks for client %p\n", clnt);
 951
 952         /*
 953          * Spin lock all_tasks to prevent changes...
 954          */
 955         spin_lock(&rpc_sched_lock);
 956         alltask_for_each(rovr, le, &all_tasks)
 957                 if (!clnt || rovr->tk_client == clnt) {
 958                         rovr->tk_flags |= RPC_TASK_KILLED;
 959                         rpc_exit(rovr, -EIO);
 960                         rpc_wake_up_task(rovr);
 961                 }
 962         spin_unlock(&rpc_sched_lock);
 963 }
 964
 965 static DECLARE_MUTEX_LOCKED(rpciod_running);
 966
 967 static inline int
 968 rpciod_task_pending(void)
 969 {
 970         return !list_empty(&schedq.tasks);
 971 }
 972
 973
 974 /*
 975  * This is the rpciod kernel thread
 976  */
 977 static int
 978 rpciod(void *ptr)
 979 {
 980         wait_queue_head_t *assassin = (wait_queue_head_t*) ptr;
 981         int             rounds = 0;
 982
 983         MOD_INC_USE_COUNT;
 984         lock_kernel();
 985         /*
 986          * Let our maker know we're running ...
 987          */
 988         rpciod_pid = current->pid;
 989         up(&rpciod_running);
 990
 991         daemonize();
 992
 993         spin_lock_irq(&current->sigmask_lock);
 994         siginitsetinv(&current->blocked, sigmask(SIGKILL));
 995         recalc_sigpending(current);
 996         spin_unlock_irq(&current->sigmask_lock);
 997
 998         strcpy(current->comm, "rpciod");
 999
1000         dprintk("RPC: rpciod starting (pid %d)\n", rpciod_pid);
1001         while (rpciod_users) {
1002                 if (signalled()) {
1003                         rpciod_killall();
1004                         flush_signals(current);
1005                 }
1006                 __rpc_schedule();
1007
1008                 if (++rounds >= 64) {   /* safeguard */
1009                         schedule();
1010                         rounds = 0;
1011                 }
1012
1013                 if (!rpciod_task_pending()) {
1014                         dprintk("RPC: rpciod back to sleep\n");
1015                         wait_event_interruptible(rpciod_idle, rpciod_task_pending());
1016                         dprintk("RPC: switch to rpciod\n");
1017                         rounds = 0;
1018                 }
1019         }
1020
1021         dprintk("RPC: rpciod shutdown commences\n");
1022         if (!list_empty(&all_tasks)) {
1023                 printk(KERN_ERR "rpciod: active tasks at shutdown?!\n");
1024                 rpciod_killall();
1025         }
1026
1027         rpciod_pid = 0;
1028         wake_up(assassin);
1029
1030         dprintk("RPC: rpciod exiting\n");
1031         MOD_DEC_USE_COUNT;
1032         return 0;
1033 }
1034
1035 static void
1036 rpciod_killall(void)
1037 {
1038         unsigned long flags;
1039
1040         while (!list_empty(&all_tasks)) {
1041                 current->sigpending = 0;
1042                 rpc_killall_tasks(NULL);
1043                 __rpc_schedule();
1044                 if (!list_empty(&all_tasks)) {
1045                         dprintk("rpciod_killall: waiting for tasks to exit\n");
1046                         yield();
1047                 }
1048         }
1049
1050         spin_lock_irqsave(&current->sigmask_lock, flags);
1051         recalc_sigpending(current);
1052         spin_unlock_irqrestore(&current->sigmask_lock, flags);
1053 }
1054
1055 /*
1056  * Start up the rpciod process if it's not already running.
1057  */
1058 int
1059 rpciod_up(void)
1060 {
1061         int error = 0;
1062
1063         MOD_INC_USE_COUNT;
1064         down(&rpciod_sema);
1065         dprintk("rpciod_up: pid %d, users %d\n", rpciod_pid, rpciod_users);
1066         rpciod_users++;
1067         if (rpciod_pid)
1068                 goto out;
1069         /*
1070          * If there's no pid, we should be the first user.
1071          */
1072         if (rpciod_users > 1)
1073                 printk(KERN_WARNING "rpciod_up: no pid, %d users??\n", rpciod_users);
1074         /*
1075          * Create the rpciod thread and wait for it to start.
1076          */
1077         error = kernel_thread(rpciod, &rpciod_killer, 0);
1078         if (error < 0) {
1079                 printk(KERN_WARNING "rpciod_up: create thread failed, error=%d\n", error);
1080                 rpciod_users--;
1081                 goto out;
1082         }
1083         down(&rpciod_running);
1084         error = 0;
1085 out:
1086         up(&rpciod_sema);
1087         MOD_DEC_USE_COUNT;
1088         return error;
1089 }
1090
1091 void
1092 rpciod_down(void)
1093 {
1094         unsigned long flags;
1095
1096         MOD_INC_USE_COUNT;
1097         down(&rpciod_sema);
1098         dprintk("rpciod_down pid %d sema %d\n", rpciod_pid, rpciod_users);
1099         if (rpciod_users) {
1100                 if (--rpciod_users)
1101                         goto out;
1102         } else
1103                 printk(KERN_WARNING "rpciod_down: pid=%d, no users??\n", rpciod_pid);
1104
1105         if (!rpciod_pid) {
1106                 dprintk("rpciod_down: Nothing to do!\n");
1107                 goto out;
1108         }
1109
1110         kill_proc(rpciod_pid, SIGKILL, 1);
1111         /*
1112          * Usually rpciod will exit very quickly, so we
1113          * wait briefly before checking the process id.
1114          */
1115         current->sigpending = 0;
1116         yield();
1117         /*
1118          * Display a message if we're going to wait longer.
1119          */
1120         while (rpciod_pid) {
1121                 dprintk("rpciod_down: waiting for pid %d to exit\n", rpciod_pid);
1122                 if (signalled()) {
1123                         dprintk("rpciod_down: caught signal\n");
1124                         break;
1125                 }
1126                 interruptible_sleep_on(&rpciod_killer);
1127         }
1128         spin_lock_irqsave(&current->sigmask_lock, flags);
1129         recalc_sigpending(current);
1130         spin_unlock_irqrestore(&current->sigmask_lock, flags);
1131 out:
1132         up(&rpciod_sema);
1133         MOD_DEC_USE_COUNT;
1134 }
1135
1136 #ifdef RPC_DEBUG
1137 void rpc_show_tasks(void)
1138 {
1139         struct list_head *le;
1140         struct rpc_task *t;
1141
1142         spin_lock(&rpc_sched_lock);
1143         if (list_empty(&all_tasks)) {
1144                 spin_unlock(&rpc_sched_lock);
1145                 return;
1146         }
1147         printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout "
1148                 "-rpcwait -action- --exit--\n");
1149         alltask_for_each(t, le, &all_tasks)
1150                 printk("%05d %04d %04x %06d %8p %6d %8p %08ld %8s %8p %8p\n",
1151                         t->tk_pid, t->tk_msg.rpc_proc, t->tk_flags, t->tk_status,
1152                         t->tk_client, t->tk_client->cl_prog,
1153                         t->tk_rqstp, t->tk_timeout,
1154                         t->tk_rpcwait ? rpc_qname(t->tk_rpcwait) : " <NULL> ",
1155                         t->tk_action, t->tk_exit);
1156         spin_unlock(&rpc_sched_lock);
1157 }
1158 #endif