drivers/block/ll_rw_blk.c

   1 /*
   2  *  linux/drivers/block/ll_rw_blk.c
   3  *
   4  * Copyright (C) 1991, 1992 Linus Torvalds
   5  * Copyright (C) 1994,      Karl Keyte: Added support for disk statistics
   6  * Elevator latency, (C) 2000  Andrea Arcangeli <andrea@suse.de> SuSE
   7  * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de>
   8  * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> -  July2000
   9  */
  10
  11 /*
  12  * This handles all read/write requests to block devices
  13  */
  14 #include <linux/sched.h>
  15 #include <linux/kernel.h>
  16 #include <linux/kernel_stat.h>
  17 #include <linux/errno.h>
  18 #include <linux/string.h>
  19 #include <linux/config.h>
  20 #include <linux/locks.h>
  21 #include <linux/mm.h>
  22 #include <linux/swap.h>
  23 #include <linux/init.h>
  24 #include <linux/smp_lock.h>
  25 #include <linux/completion.h>
  26 #include <linux/bootmem.h>
  27
  28 #include <asm/system.h>
  29 #include <asm/io.h>
  30 #include <linux/blk.h>
  31 #include <linux/highmem.h>
  32 #include <linux/slab.h>
  33 #include <linux/module.h>
  34
  35 /*
  36  * MAC Floppy IWM hooks
  37  */
  38
  39 #ifdef CONFIG_MAC_FLOPPY_IWM
  40 extern int mac_floppy_init(void);
  41 #endif
  42
  43 /*
  44  * For the allocated request tables
  45  */
  46 static kmem_cache_t *request_cachep;
  47
  48 /*
  49  * The "disk" task queue is used to start the actual requests
  50  * after a plug
  51  */
  52 DECLARE_TASK_QUEUE(tq_disk);
  53
  54 /*
  55  * Protect the request list against multiple users..
  56  *
  57  * With this spinlock the Linux block IO subsystem is 100% SMP threaded
  58  * from the IRQ event side, and almost 100% SMP threaded from the syscall
  59  * side (we still have protect against block device array operations, and
  60  * the do_request() side is casually still unsafe. The kernel lock protects
  61  * this part currently.).
  62  *
  63  * there is a fair chance that things will work just OK if these functions
  64  * are called with no global kernel lock held ...
  65  */
  66 spinlock_t io_request_lock = SPIN_LOCK_UNLOCKED;
  67
  68 /* This specifies how many sectors to read ahead on the disk. */
  69
  70 int read_ahead[MAX_BLKDEV];
  71
  72 /* blk_dev_struct is:
  73  *      *request_fn
  74  *      *current_request
  75  */
  76 struct blk_dev_struct blk_dev[MAX_BLKDEV]; /* initialized by blk_dev_init() */
  77
  78 /*
  79  * blk_size contains the size of all block-devices in units of 1024 byte
  80  * sectors:
  81  *
  82  * blk_size[MAJOR][MINOR]
  83  *
  84  * if (!blk_size[MAJOR]) then no minor size checking is done.
  85  */
  86 int * blk_size[MAX_BLKDEV];
  87
  88 /*
  89  * blksize_size contains the size of all block-devices:
  90  *
  91  * blksize_size[MAJOR][MINOR]
  92  *
  93  * if (!blksize_size[MAJOR]) then 1024 bytes is assumed.
  94  */
  95 int * blksize_size[MAX_BLKDEV];
  96
  97 /*
  98  * hardsect_size contains the size of the hardware sector of a device.
  99  *
 100  * hardsect_size[MAJOR][MINOR]
 101  *
 102  * if (!hardsect_size[MAJOR])
 103  *              then 512 bytes is assumed.
 104  * else
 105  *              sector_size is hardsect_size[MAJOR][MINOR]
 106  * This is currently set by some scsi devices and read by the msdos fs driver.
 107  * Other uses may appear later.
 108  */
 109 int * hardsect_size[MAX_BLKDEV];
 110
 111 /*
 112  * The following tunes the read-ahead algorithm in mm/filemap.c
 113  */
 114 int * max_readahead[MAX_BLKDEV];
 115
 116 /*
 117  * Max number of sectors per request
 118  */
 119 int * max_sectors[MAX_BLKDEV];
 120
 121 unsigned long blk_max_low_pfn, blk_max_pfn;
 122 int blk_nohighio = 0;
 123
 124 static inline int get_max_sectors(kdev_t dev)
 125 {
 126         if (!max_sectors[MAJOR(dev)])
 127                 return MAX_SECTORS;
 128         return max_sectors[MAJOR(dev)][MINOR(dev)];
 129 }
 130
 131 inline request_queue_t *blk_get_queue(kdev_t dev)
 132 {
 133         struct blk_dev_struct *bdev = blk_dev + MAJOR(dev);
 134
 135         if (bdev->queue)
 136                 return bdev->queue(dev);
 137         else
 138                 return &blk_dev[MAJOR(dev)].request_queue;
 139 }
 140
 141 static int __blk_cleanup_queue(struct request_list *list)
 142 {
 143         struct list_head *head = &list->free;
 144         struct request *rq;
 145         int i = 0;
 146
 147         while (!list_empty(head)) {
 148                 rq = list_entry(head->next, struct request, queue);
 149                 list_del(&rq->queue);
 150                 kmem_cache_free(request_cachep, rq);
 151                 i++;
 152         };
 153
 154         if (i != list->count)
 155                 printk("request list leak!\n");
 156
 157         list->count = 0;
 158         return i;
 159 }
 160
 161 /**
 162  * blk_cleanup_queue: - release a &request_queue_t when it is no longer needed
 163  * @q:    the request queue to be released
 164  *
 165  * Description:
 166  *     blk_cleanup_queue is the pair to blk_init_queue().  It should
 167  *     be called when a request queue is being released; typically
 168  *     when a block device is being de-registered.  Currently, its
 169  *     primary task it to free all the &struct request structures that
 170  *     were allocated to the queue.
 171  * Caveat:
 172  *     Hopefully the low level driver will have finished any
 173  *     outstanding requests first...
 174  **/
 175 void blk_cleanup_queue(request_queue_t * q)
 176 {
 177         int count = q->nr_requests;
 178
 179         count -= __blk_cleanup_queue(&q->rq[READ]);
 180         count -= __blk_cleanup_queue(&q->rq[WRITE]);
 181
 182         if (count)
 183                 printk("blk_cleanup_queue: leaked requests (%d)\n", count);
 184
 185         memset(q, 0, sizeof(*q));
 186 }
 187
 188 /**
 189  * blk_queue_headactive - indicate whether head of request queue may be active
 190  * @q:       The queue which this applies to.
 191  * @active:  A flag indication where the head of the queue is active.
 192  *
 193  * Description:
 194  *    The driver for a block device may choose to leave the currently active
 195  *    request on the request queue, removing it only when it has completed.
 196  *    The queue handling routines assume this by default for safety reasons
 197  *    and will not involve the head of the request queue in any merging or
 198  *    reordering of requests when the queue is unplugged (and thus may be
 199  *    working on this particular request).
 200  *
 201  *    If a driver removes requests from the queue before processing them, then
 202  *    it may indicate that it does so, there by allowing the head of the queue
 203  *    to be involved in merging and reordering.  This is done be calling
 204  *    blk_queue_headactive() with an @active flag of %0.
 205  *
 206  *    If a driver processes several requests at once, it must remove them (or
 207  *    at least all but one of them) from the request queue.
 208  *
 209  *    When a queue is plugged the head will be assumed to be inactive.
 210  **/
 211
 212 void blk_queue_headactive(request_queue_t * q, int active)
 213 {
 214         q->head_active = active;
 215 }
 216
 217 /**
 218  * blk_queue_make_request - define an alternate make_request function for a device
 219  * @q:  the request queue for the device to be affected
 220  * @mfn: the alternate make_request function
 221  *
 222  * Description:
 223  *    The normal way for &struct buffer_heads to be passed to a device
 224  *    driver is for them to be collected into requests on a request
 225  *    queue, and then to allow the device driver to select requests
 226  *    off that queue when it is ready.  This works well for many block
 227  *    devices. However some block devices (typically virtual devices
 228  *    such as md or lvm) do not benefit from the processing on the
 229  *    request queue, and are served best by having the requests passed
 230  *    directly to them.  This can be achieved by providing a function
 231  *    to blk_queue_make_request().
 232  *
 233  * Caveat:
 234  *    The driver that does this *must* be able to deal appropriately
 235  *    with buffers in "highmemory", either by calling bh_kmap() to get
 236  *    a kernel mapping, to by calling create_bounce() to create a
 237  *    buffer in normal memory.
 238  **/
 239
 240 void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
 241 {
 242         q->make_request_fn = mfn;
 243 }
 244
 245 /**
 246  * blk_queue_bounce_limit - set bounce buffer limit for queue
 247  * @q:  the request queue for the device
 248  * @dma_addr:   bus address limit
 249  *
 250  * Description:
 251  *    Different hardware can have different requirements as to what pages
 252  *    it can do I/O directly to. A low level driver can call
 253  *    blk_queue_bounce_limit to have lower memory pages allocated as bounce
 254  *    buffers for doing I/O to pages residing above @page. By default
 255  *    the block layer sets this to the highest numbered "low" memory page.
 256  **/
 257 void blk_queue_bounce_limit(request_queue_t *q, u64 dma_addr)
 258 {
 259         unsigned long bounce_pfn = dma_addr >> PAGE_SHIFT;
 260         unsigned long mb = dma_addr >> 20;
 261         static request_queue_t *old_q;
 262
 263         /*
 264          * keep this for debugging for now...
 265          */
 266         if (dma_addr != BLK_BOUNCE_HIGH && q != old_q) {
 267                 old_q = q;
 268                 printk("blk: queue %p, ", q);
 269                 if (dma_addr == BLK_BOUNCE_ANY)
 270                         printk("no I/O memory limit\n");
 271                 else
 272                         printk("I/O limit %luMb (mask 0x%Lx)\n", mb,
 273                                (long long) dma_addr);
 274         }
 275
 276         q->bounce_pfn = bounce_pfn;
 277 }
 278
 279
 280 /*
 281  * can we merge the two segments, or do we need to start a new one?
 282  */
 283 inline int blk_seg_merge_ok(struct buffer_head *bh, struct buffer_head *nxt)
 284 {
 285         /*
 286          * if bh and nxt are contigous and don't cross a 4g boundary, it's ok
 287          */
 288         if (BH_CONTIG(bh, nxt) && BH_PHYS_4G(bh, nxt))
 289                 return 1;
 290
 291         return 0;
 292 }
 293
 294 static inline int ll_new_segment(request_queue_t *q, struct request *req, int max_segments)
 295 {
 296         if (req->nr_segments < max_segments) {
 297                 req->nr_segments++;
 298                 return 1;
 299         }
 300         return 0;
 301 }
 302
 303 static int ll_back_merge_fn(request_queue_t *q, struct request *req,
 304                             struct buffer_head *bh, int max_segments)
 305 {
 306         if (blk_seg_merge_ok(req->bhtail, bh))
 307                 return 1;
 308
 309         return ll_new_segment(q, req, max_segments);
 310 }
 311
 312 static int ll_front_merge_fn(request_queue_t *q, struct request *req,
 313                              struct buffer_head *bh, int max_segments)
 314 {
 315         if (blk_seg_merge_ok(bh, req->bh))
 316                 return 1;
 317
 318         return ll_new_segment(q, req, max_segments);
 319 }
 320
 321 static int ll_merge_requests_fn(request_queue_t *q, struct request *req,
 322                                 struct request *next, int max_segments)
 323 {
 324         int total_segments = req->nr_segments + next->nr_segments;
 325
 326         if (blk_seg_merge_ok(req->bhtail, next->bh))
 327                 total_segments--;
 328
 329         if (total_segments > max_segments)
 330                 return 0;
 331
 332         req->nr_segments = total_segments;
 333         return 1;
 334 }
 335
 336 /*
 337  * "plug" the device if there are no outstanding requests: this will
 338  * force the transfer to start only after we have put all the requests
 339  * on the list.
 340  *
 341  * This is called with interrupts off and no requests on the queue.
 342  * (and with the request spinlock acquired)
 343  */
 344 static void generic_plug_device(request_queue_t *q, kdev_t dev)
 345 {
 346         /*
 347          * no need to replug device
 348          */
 349         if (!list_empty(&q->queue_head) || q->plugged)
 350                 return;
 351
 352         q->plugged = 1;
 353         queue_task(&q->plug_tq, &tq_disk);
 354 }
 355
 356 /*
 357  * remove the plug and let it rip..
 358  */
 359 static inline void __generic_unplug_device(request_queue_t *q)
 360 {
 361         if (q->plugged) {
 362                 q->plugged = 0;
 363                 if (!list_empty(&q->queue_head))
 364                         q->request_fn(q);
 365         }
 366 }
 367
 368 void generic_unplug_device(void *data)
 369 {
 370         request_queue_t *q = (request_queue_t *) data;
 371         unsigned long flags;
 372
 373         spin_lock_irqsave(&io_request_lock, flags);
 374         __generic_unplug_device(q);
 375         spin_unlock_irqrestore(&io_request_lock, flags);
 376 }
 377
 378 /** blk_grow_request_list
 379  *  @q: The &request_queue_t
 380  *  @nr_requests: how many requests are desired
 381  *
 382  * More free requests are added to the queue's free lists, bringing
 383  * the total number of requests to @nr_requests.
 384  *
 385  * The requests are added equally to the request queue's read
 386  * and write freelists.
 387  *
 388  * This function can sleep.
 389  *
 390  * Returns the (new) number of requests which the queue has available.
 391  */
 392 int blk_grow_request_list(request_queue_t *q, int nr_requests)
 393 {
 394         unsigned long flags;
 395         /* Several broken drivers assume that this function doesn't sleep,
 396          * this causes system hangs during boot.
 397          * As a temporary fix, make the function non-blocking.
 398          */
 399         spin_lock_irqsave(&io_request_lock, flags);
 400         while (q->nr_requests < nr_requests) {
 401                 struct request *rq;
 402                 int rw;
 403
 404                 rq = kmem_cache_alloc(request_cachep, SLAB_ATOMIC);
 405                 if (rq == NULL)
 406                         break;
 407                 memset(rq, 0, sizeof(*rq));
 408                 rq->rq_status = RQ_INACTIVE;
 409                 rw = q->nr_requests & 1;
 410                 list_add(&rq->queue, &q->rq[rw].free);
 411                 q->rq[rw].count++;
 412                 q->nr_requests++;
 413         }
 414         q->batch_requests = q->nr_requests / 4;
 415         if (q->batch_requests > 32)
 416                 q->batch_requests = 32;
 417         spin_unlock_irqrestore(&io_request_lock, flags);
 418         return q->nr_requests;
 419 }
 420
 421 static void blk_init_free_list(request_queue_t *q)
 422 {
 423         struct sysinfo si;
 424         int megs;               /* Total memory, in megabytes */
 425         int nr_requests;
 426
 427         INIT_LIST_HEAD(&q->rq[READ].free);
 428         INIT_LIST_HEAD(&q->rq[WRITE].free);
 429         q->rq[READ].count = 0;
 430         q->rq[WRITE].count = 0;
 431         q->nr_requests = 0;
 432
 433         si_meminfo(&si);
 434         megs = si.totalram >> (20 - PAGE_SHIFT);
 435         nr_requests = 128;
 436         if (megs < 32)
 437                 nr_requests /= 2;
 438         blk_grow_request_list(q, nr_requests);
 439
 440         init_waitqueue_head(&q->wait_for_requests[0]);
 441         init_waitqueue_head(&q->wait_for_requests[1]);
 442         spin_lock_init(&q->queue_lock);
 443 }
 444
 445 static int __make_request(request_queue_t * q, int rw, struct buffer_head * bh);
 446
 447 /**
 448  * blk_init_queue  - prepare a request queue for use with a block device
 449  * @q:    The &request_queue_t to be initialised
 450  * @rfn:  The function to be called to process requests that have been
 451  *        placed on the queue.
 452  *
 453  * Description:
 454  *    If a block device wishes to use the standard request handling procedures,
 455  *    which sorts requests and coalesces adjacent requests, then it must
 456  *    call blk_init_queue().  The function @rfn will be called when there
 457  *    are requests on the queue that need to be processed.  If the device
 458  *    supports plugging, then @rfn may not be called immediately when requests
 459  *    are available on the queue, but may be called at some time later instead.
 460  *    Plugged queues are generally unplugged when a buffer belonging to one
 461  *    of the requests on the queue is needed, or due to memory pressure.
 462  *
 463  *    @rfn is not required, or even expected, to remove all requests off the
 464  *    queue, but only as many as it can handle at a time.  If it does leave
 465  *    requests on the queue, it is responsible for arranging that the requests
 466  *    get dealt with eventually.
 467  *
 468  *    A global spin lock $io_request_lock must be held while manipulating the
 469  *    requests on the request queue.
 470  *
 471  *    The request on the head of the queue is by default assumed to be
 472  *    potentially active, and it is not considered for re-ordering or merging
 473  *    whenever the given queue is unplugged. This behaviour can be changed with
 474  *    blk_queue_headactive().
 475  *
 476  * Note:
 477  *    blk_init_queue() must be paired with a blk_cleanup_queue() call
 478  *    when the block device is deactivated (such as at module unload).
 479  **/
 480 void blk_init_queue(request_queue_t * q, request_fn_proc * rfn)
 481 {
 482         INIT_LIST_HEAD(&q->queue_head);
 483         elevator_init(&q->elevator, ELEVATOR_LINUS);
 484         blk_init_free_list(q);
 485         q->request_fn           = rfn;
 486         q->back_merge_fn        = ll_back_merge_fn;
 487         q->front_merge_fn       = ll_front_merge_fn;
 488         q->merge_requests_fn    = ll_merge_requests_fn;
 489         q->make_request_fn      = __make_request;
 490         q->plug_tq.sync         = 0;
 491         q->plug_tq.routine      = &generic_unplug_device;
 492         q->plug_tq.data         = q;
 493         q->plugged              = 0;
 494         /*
 495          * These booleans describe the queue properties.  We set the
 496          * default (and most common) values here.  Other drivers can
 497          * use the appropriate functions to alter the queue properties.
 498          * as appropriate.
 499          */
 500         q->plug_device_fn       = generic_plug_device;
 501         q->head_active          = 1;
 502
 503         blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
 504 }
 505
 506 #define blkdev_free_rq(list) list_entry((list)->next, struct request, queue);
 507 /*
 508  * Get a free request. io_request_lock must be held and interrupts
 509  * disabled on the way in.  Returns NULL if there are no free requests.
 510  */
 511 static struct request *get_request(request_queue_t *q, int rw)
 512 {
 513         struct request *rq = NULL;
 514         struct request_list *rl = q->rq + rw;
 515
 516         if (!list_empty(&rl->free)) {
 517                 rq = blkdev_free_rq(&rl->free);
 518                 list_del(&rq->queue);
 519                 rl->count--;
 520                 rq->rq_status = RQ_ACTIVE;
 521                 rq->cmd = rw;
 522                 rq->special = NULL;
 523                 rq->q = q;
 524         }
 525
 526         return rq;
 527 }
 528
 529 /*
 530  * Here's the request allocation design:
 531  *
 532  * 1: Blocking on request exhaustion is a key part of I/O throttling.
 533  *
 534  * 2: We want to be `fair' to all requesters.  We must avoid starvation, and
 535  *    attempt to ensure that all requesters sleep for a similar duration.  Hence
 536  *    no stealing requests when there are other processes waiting.
 537  *
 538  * 3: We also wish to support `batching' of requests.  So when a process is
 539  *    woken, we want to allow it to allocate a decent number of requests
 540  *    before it blocks again, so they can be nicely merged (this only really
 541  *    matters if the process happens to be adding requests near the head of
 542  *    the queue).
 543  *
 544  * 4: We want to avoid scheduling storms.  This isn't really important, because
 545  *    the system will be I/O bound anyway.  But it's easy.
 546  *
 547  *    There is tension between requirements 2 and 3.  Once a task has woken,
 548  *    we don't want to allow it to sleep as soon as it takes its second request.
 549  *    But we don't want currently-running tasks to steal all the requests
 550  *    from the sleepers.  We handle this with wakeup hysteresis around
 551  *    0 .. batch_requests and with the assumption that request taking is much,
 552  *    much faster than request freeing.
 553  *
 554  * So here's what we do:
 555  *
 556  *    a) A READA requester fails if free_requests < batch_requests
 557  *
 558  *       We don't want READA requests to prevent sleepers from ever
 559  *       waking.  Note that READA is used extremely rarely - a few
 560  *       filesystems use it for directory readahead.
 561  *
 562  *  When a process wants a new request:
 563  *
 564  *    b) If free_requests == 0, the requester sleeps in FIFO manner.
 565  *
 566  *    b) If 0 <  free_requests < batch_requests and there are waiters,
 567  *       we still take a request non-blockingly.  This provides batching.
 568  *
 569  *    c) If free_requests >= batch_requests, the caller is immediately
 570  *       granted a new request.
 571  *
 572  *  When a request is released:
 573  *
 574  *    d) If free_requests < batch_requests, do nothing.
 575  *
 576  *    f) If free_requests >= batch_requests, wake up a single waiter.
 577  *
 578  *   The net effect is that when a process is woken at the batch_requests level,
 579  *   it will be able to take approximately (batch_requests) requests before
 580  *   blocking again (at the tail of the queue).
 581  *
 582  *   This all assumes that the rate of taking requests is much, much higher
 583  *   than the rate of releasing them.  Which is very true.
 584  *
 585  * -akpm, Feb 2002.
 586  */
 587
 588 static struct request *__get_request_wait(request_queue_t *q, int rw)
 589 {
 590         register struct request *rq;
 591         DECLARE_WAITQUEUE(wait, current);
 592
 593         generic_unplug_device(q);
 594         add_wait_queue_exclusive(&q->wait_for_requests[rw], &wait);
 595         do {
 596                 set_current_state(TASK_UNINTERRUPTIBLE);
 597                 if (q->rq[rw].count == 0)
 598                         schedule();
 599                 spin_lock_irq(&io_request_lock);
 600                 rq = get_request(q, rw);
 601                 spin_unlock_irq(&io_request_lock);
 602         } while (rq == NULL);
 603         remove_wait_queue(&q->wait_for_requests[rw], &wait);
 604         current->state = TASK_RUNNING;
 605         return rq;
 606 }
 607
 608 /* RO fail safe mechanism */
 609
 610 static long ro_bits[MAX_BLKDEV][8];
 611
 612 int is_read_only(kdev_t dev)
 613 {
 614         int minor,major;
 615
 616         major = MAJOR(dev);
 617         minor = MINOR(dev);
 618         if (major < 0 || major >= MAX_BLKDEV) return 0;
 619         return ro_bits[major][minor >> 5] & (1 << (minor & 31));
 620 }
 621
 622 void set_device_ro(kdev_t dev,int flag)
 623 {
 624         int minor,major;
 625
 626         major = MAJOR(dev);
 627         minor = MINOR(dev);
 628         if (major < 0 || major >= MAX_BLKDEV) return;
 629         if (flag) ro_bits[major][minor >> 5] |= 1 << (minor & 31);
 630         else ro_bits[major][minor >> 5] &= ~(1 << (minor & 31));
 631 }
 632
 633 inline void drive_stat_acct (kdev_t dev, int rw,
 634                                 unsigned long nr_sectors, int new_io)
 635 {
 636         unsigned int major = MAJOR(dev);
 637         unsigned int index;
 638
 639         index = disk_index(dev);
 640         if ((index >= DK_MAX_DISK) || (major >= DK_MAX_MAJOR))
 641                 return;
 642
 643         kstat.dk_drive[major][index] += new_io;
 644         if (rw == READ) {
 645                 kstat.dk_drive_rio[major][index] += new_io;
 646                 kstat.dk_drive_rblk[major][index] += nr_sectors;
 647         } else if (rw == WRITE) {
 648                 kstat.dk_drive_wio[major][index] += new_io;
 649                 kstat.dk_drive_wblk[major][index] += nr_sectors;
 650         } else
 651                 printk(KERN_ERR "drive_stat_acct: cmd not R/W?\n");
 652 }
 653
 654 #ifdef CONFIG_BLK_STATS
 655 /*
 656  * Return up to two hd_structs on which to do IO accounting for a given
 657  * request.
 658  *
 659  * On a partitioned device, we want to account both against the partition
 660  * and against the whole disk.
 661  */
 662 static void locate_hd_struct(struct request *req,
 663                              struct hd_struct **hd1,
 664                              struct hd_struct **hd2)
 665 {
 666         struct gendisk *gd;
 667
 668         *hd1 = NULL;
 669         *hd2 = NULL;
 670
 671         gd = get_gendisk(req->rq_dev);
 672         if (gd && gd->part) {
 673                 /* Mask out the partition bits: account for the entire disk */
 674                 int devnr = MINOR(req->rq_dev) >> gd->minor_shift;
 675                 int whole_minor = devnr << gd->minor_shift;
 676
 677                 *hd1 = &gd->part[whole_minor];
 678                 if (whole_minor != MINOR(req->rq_dev))
 679                         *hd2= &gd->part[MINOR(req->rq_dev)];
 680         }
 681 }
 682
 683 /*
 684  * Round off the performance stats on an hd_struct.
 685  *
 686  * The average IO queue length and utilisation statistics are maintained
 687  * by observing the current state of the queue length and the amount of
 688  * time it has been in this state for.
 689  * Normally, that accounting is done on IO completion, but that can result
 690  * in more than a second's worth of IO being accounted for within any one
 691  * second, leading to >100% utilisation.  To deal with that, we do a
 692  * round-off before returning the results when reading /proc/partitions,
 693  * accounting immediately for all queue usage up to the current jiffies and
 694  * restarting the counters again.
 695  */
 696 void disk_round_stats(struct hd_struct *hd)
 697 {
 698         unsigned long now = jiffies;
 699
 700         hd->aveq += (hd->ios_in_flight * (jiffies - hd->last_queue_change));
 701         hd->last_queue_change = now;
 702
 703         if (hd->ios_in_flight)
 704                 hd->io_ticks += (now - hd->last_idle_time);
 705         hd->last_idle_time = now;
 706 }
 707
 708 static inline void down_ios(struct hd_struct *hd)
 709 {
 710         disk_round_stats(hd);
 711         --hd->ios_in_flight;
 712 }
 713
 714 static inline void up_ios(struct hd_struct *hd)
 715 {
 716         disk_round_stats(hd);
 717         ++hd->ios_in_flight;
 718 }
 719
 720 static void account_io_start(struct hd_struct *hd, struct request *req,
 721                              int merge, int sectors)
 722 {
 723         switch (req->cmd) {
 724         case READ:
 725                 if (merge)
 726                         hd->rd_merges++;
 727                 hd->rd_sectors += sectors;
 728                 break;
 729         case WRITE:
 730                 if (merge)
 731                         hd->wr_merges++;
 732                 hd->wr_sectors += sectors;
 733                 break;
 734         }
 735         if (!merge)
 736                 up_ios(hd);
 737 }
 738
 739 static void account_io_end(struct hd_struct *hd, struct request *req)
 740 {
 741         unsigned long duration = jiffies - req->start_time;
 742         switch (req->cmd) {
 743         case READ:
 744                 hd->rd_ticks += duration;
 745                 hd->rd_ios++;
 746                 break;
 747         case WRITE:
 748                 hd->wr_ticks += duration;
 749                 hd->wr_ios++;
 750                 break;
 751         }
 752         down_ios(hd);
 753 }
 754
 755 void req_new_io(struct request *req, int merge, int sectors)
 756 {
 757         struct hd_struct *hd1, *hd2;
 758
 759         locate_hd_struct(req, &hd1, &hd2);
 760         if (hd1)
 761                 account_io_start(hd1, req, merge, sectors);
 762         if (hd2)
 763                 account_io_start(hd2, req, merge, sectors);
 764 }
 765
 766 void req_merged_io(struct request *req)
 767 {
 768         struct hd_struct *hd1, *hd2;
 769
 770         locate_hd_struct(req, &hd1, &hd2);
 771         if (hd1)
 772                 down_ios(hd1);
 773         if (hd2)
 774                 down_ios(hd2);
 775 }
 776
 777 void req_finished_io(struct request *req)
 778 {
 779         struct hd_struct *hd1, *hd2;
 780
 781         locate_hd_struct(req, &hd1, &hd2);
 782         if (hd1)
 783                 account_io_end(hd1, req);
 784         if (hd2)
 785                 account_io_end(hd2, req);
 786 }
 787 EXPORT_SYMBOL(req_finished_io);
 788 #endif /* CONFIG_BLK_STATS */
 789
 790 /*
 791  * add-request adds a request to the linked list.
 792  * io_request_lock is held and interrupts disabled, as we muck with the
 793  * request queue list.
 794  *
 795  * By this point, req->cmd is always either READ/WRITE, never READA,
 796  * which is important for drive_stat_acct() above.
 797  */
 798 static inline void add_request(request_queue_t * q, struct request * req,
 799                                struct list_head *insert_here)
 800 {
 801         drive_stat_acct(req->rq_dev, req->cmd, req->nr_sectors, 1);
 802
 803         if (!q->plugged && q->head_active && insert_here == &q->queue_head) {
 804                 spin_unlock_irq(&io_request_lock);
 805                 BUG();
 806         }
 807
 808         /*
 809          * elevator indicated where it wants this request to be
 810          * inserted at elevator_merge time
 811          */
 812         list_add(&req->queue, insert_here);
 813 }
 814
 815 /*
 816  * Must be called with io_request_lock held and interrupts disabled
 817  */
 818 void blkdev_release_request(struct request *req)
 819 {
 820         request_queue_t *q = req->q;
 821         int rw = req->cmd;
 822
 823         req->rq_status = RQ_INACTIVE;
 824         req->q = NULL;
 825
 826         /*
 827          * Request may not have originated from ll_rw_blk. if not,
 828          * assume it has free buffers and check waiters
 829          */
 830         if (q) {
 831                 list_add(&req->queue, &q->rq[rw].free);
 832                 if (++q->rq[rw].count >= q->batch_requests &&
 833                                 waitqueue_active(&q->wait_for_requests[rw]))
 834                         wake_up(&q->wait_for_requests[rw]);
 835         }
 836 }
 837
 838 /*
 839  * Has to be called with the request spinlock acquired
 840  */
 841 static void attempt_merge(request_queue_t * q,
 842                           struct request *req,
 843                           int max_sectors,
 844                           int max_segments)
 845 {
 846         struct request *next;
 847
 848         next = blkdev_next_request(req);
 849         if (req->sector + req->nr_sectors != next->sector)
 850                 return;
 851         if (req->cmd != next->cmd
 852             || req->rq_dev != next->rq_dev
 853             || req->nr_sectors + next->nr_sectors > max_sectors
 854             || next->waiting)
 855                 return;
 856         /*
 857          * If we are not allowed to merge these requests, then
 858          * return.  If we are allowed to merge, then the count
 859          * will have been updated to the appropriate number,
 860          * and we shouldn't do it here too.
 861          */
 862         if (!q->merge_requests_fn(q, req, next, max_segments))
 863                 return;
 864
 865         q->elevator.elevator_merge_req_fn(req, next);
 866         req->bhtail->b_reqnext = next->bh;
 867         req->bhtail = next->bhtail;
 868         req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors;
 869         list_del(&next->queue);
 870
 871         /* One last thing: we have removed a request, so we now have one
 872            less expected IO to complete for accounting purposes. */
 873         req_merged_io(req);
 874
 875         blkdev_release_request(next);
 876 }
 877
 878 static inline void attempt_back_merge(request_queue_t * q,
 879                                       struct request *req,
 880                                       int max_sectors,
 881                                       int max_segments)
 882 {
 883         if (&req->queue == q->queue_head.prev)
 884                 return;
 885         attempt_merge(q, req, max_sectors, max_segments);
 886 }
 887
 888 static inline void attempt_front_merge(request_queue_t * q,
 889                                        struct list_head * head,
 890                                        struct request *req,
 891                                        int max_sectors,
 892                                        int max_segments)
 893 {
 894         struct list_head * prev;
 895
 896         prev = req->queue.prev;
 897         if (head == prev)
 898                 return;
 899         attempt_merge(q, blkdev_entry_to_request(prev), max_sectors, max_segments);
 900 }
 901
 902 static int __make_request(request_queue_t * q, int rw,
 903                                   struct buffer_head * bh)
 904 {
 905         unsigned int sector, count;
 906         int max_segments = MAX_SEGMENTS;
 907         struct request * req, *freereq = NULL;
 908         int rw_ahead, max_sectors, el_ret;
 909         struct list_head *head, *insert_here;
 910         int latency;
 911         elevator_t *elevator = &q->elevator;
 912
 913         count = bh->b_size >> 9;
 914         sector = bh->b_rsector;
 915
 916         rw_ahead = 0;   /* normal case; gets changed below for READA */
 917         switch (rw) {
 918                 case READA:
 919 #if 0   /* bread() misinterprets failed READA attempts as IO errors on SMP */
 920                         rw_ahead = 1;
 921 #endif
 922                         rw = READ;      /* drop into READ */
 923                 case READ:
 924                 case WRITE:
 925                         latency = elevator_request_latency(elevator, rw);
 926                         break;
 927                 default:
 928                         BUG();
 929                         goto end_io;
 930         }
 931
 932         /* We'd better have a real physical mapping!
 933            Check this bit only if the buffer was dirty and just locked
 934            down by us so at this point flushpage will block and
 935            won't clear the mapped bit under us. */
 936         if (!buffer_mapped(bh))
 937                 BUG();
 938
 939         /*
 940          * Temporary solution - in 2.5 this will be done by the lowlevel
 941          * driver. Create a bounce buffer if the buffer data points into
 942          * high memory - keep the original buffer otherwise.
 943          */
 944         bh = blk_queue_bounce(q, rw, bh);
 945
 946 /* look for a free request. */
 947         /*
 948          * Try to coalesce the new request with old requests
 949          */
 950         max_sectors = get_max_sectors(bh->b_rdev);
 951
 952 again:
 953         req = NULL;
 954         head = &q->queue_head;
 955         /*
 956          * Now we acquire the request spinlock, we have to be mega careful
 957          * not to schedule or do something nonatomic
 958          */
 959         spin_lock_irq(&io_request_lock);
 960
 961         insert_here = head->prev;
 962         if (list_empty(head)) {
 963                 q->plug_device_fn(q, bh->b_rdev); /* is atomic */
 964                 goto get_rq;
 965         } else if (q->head_active && !q->plugged)
 966                 head = head->next;
 967
 968         el_ret = elevator->elevator_merge_fn(q, &req, head, bh, rw,max_sectors);
 969         switch (el_ret) {
 970
 971                 case ELEVATOR_BACK_MERGE:
 972                         if (!q->back_merge_fn(q, req, bh, max_segments)) {
 973                                 insert_here = &req->queue;
 974                                 break;
 975                         }
 976                         req->bhtail->b_reqnext = bh;
 977                         req->bhtail = bh;
 978                         req->nr_sectors = req->hard_nr_sectors += count;
 979                         blk_started_io(count);
 980                         drive_stat_acct(req->rq_dev, req->cmd, count, 0);
 981                         req_new_io(req, 1, count);
 982                         attempt_back_merge(q, req, max_sectors, max_segments);
 983                         goto out;
 984
 985                 case ELEVATOR_FRONT_MERGE:
 986                         if (!q->front_merge_fn(q, req, bh, max_segments)) {
 987                                 insert_here = req->queue.prev;
 988                                 break;
 989                         }
 990                         bh->b_reqnext = req->bh;
 991                         req->bh = bh;
 992                         /*
 993                          * may not be valid, but queues not having bounce
 994                          * enabled for highmem pages must not look at
 995                          * ->buffer anyway
 996                          */
 997                         req->buffer = bh->b_data;
 998                         req->current_nr_sectors = req->hard_cur_sectors = count;
 999                         req->sector = req->hard_sector = sector;
1000                         req->nr_sectors = req->hard_nr_sectors += count;
1001                         blk_started_io(count);
1002                         drive_stat_acct(req->rq_dev, req->cmd, count, 0);
1003                         req_new_io(req, 1, count);
1004                         attempt_front_merge(q, head, req, max_sectors, max_segments);
1005                         goto out;
1006
1007                 /*
1008                  * elevator says don't/can't merge. get new request
1009                  */
1010                 case ELEVATOR_NO_MERGE:
1011                         /*
1012                          * use elevator hints as to where to insert the
1013                          * request. if no hints, just add it to the back
1014                          * of the queue
1015                          */
1016                         if (req)
1017                                 insert_here = &req->queue;
1018                         break;
1019
1020                 default:
1021                         printk("elevator returned crap (%d)\n", el_ret);
1022                         BUG();
1023         }
1024
1025 get_rq:
1026         if (freereq) {
1027                 req = freereq;
1028                 freereq = NULL;
1029         } else {
1030                 /*
1031                  * See description above __get_request_wait()
1032                  */
1033                 if (rw_ahead) {
1034                         if (q->rq[rw].count < q->batch_requests) {
1035                                 spin_unlock_irq(&io_request_lock);
1036                                 goto end_io;
1037                         }
1038                         req = get_request(q, rw);
1039                         if (req == NULL)
1040                                 BUG();
1041                 } else {
1042                         req = get_request(q, rw);
1043                         if (req == NULL) {
1044                                 spin_unlock_irq(&io_request_lock);
1045                                 freereq = __get_request_wait(q, rw);
1046                                 goto again;
1047                         }
1048                 }
1049         }
1050
1051 /* fill up the request-info, and add it to the queue */
1052         req->elevator_sequence = latency;
1053         req->cmd = rw;
1054         req->errors = 0;
1055         req->hard_sector = req->sector = sector;
1056         req->hard_nr_sectors = req->nr_sectors = count;
1057         req->current_nr_sectors = req->hard_cur_sectors = count;
1058         req->nr_segments = 1; /* Always 1 for a new request. */
1059         req->nr_hw_segments = 1; /* Always 1 for a new request. */
1060         req->buffer = bh->b_data;
1061         req->waiting = NULL;
1062         req->bh = bh;
1063         req->bhtail = bh;
1064         req->rq_dev = bh->b_rdev;
1065         req->start_time = jiffies;
1066         req_new_io(req, 0, count);
1067         blk_started_io(count);
1068         add_request(q, req, insert_here);
1069 out:
1070         if (freereq)
1071                 blkdev_release_request(freereq);
1072         spin_unlock_irq(&io_request_lock);
1073         return 0;
1074 end_io:
1075         bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state));
1076         return 0;
1077 }
1078
1079 /**
1080  * generic_make_request: hand a buffer head to it's device driver for I/O
1081  * @rw:  READ, WRITE, or READA - what sort of I/O is desired.
1082  * @bh:  The buffer head describing the location in memory and on the device.
1083  *
1084  * generic_make_request() is used to make I/O requests of block
1085  * devices. It is passed a &struct buffer_head and a &rw value.  The
1086  * %READ and %WRITE options are (hopefully) obvious in meaning.  The
1087  * %READA value means that a read is required, but that the driver is
1088  * free to fail the request if, for example, it cannot get needed
1089  * resources immediately.
1090  *
1091  * generic_make_request() does not return any status.  The
1092  * success/failure status of the request, along with notification of
1093  * completion, is delivered asynchronously through the bh->b_end_io
1094  * function described (one day) else where.
1095  *
1096  * The caller of generic_make_request must make sure that b_page,
1097  * b_addr, b_size are set to describe the memory buffer, that b_rdev
1098  * and b_rsector are set to describe the device address, and the
1099  * b_end_io and optionally b_private are set to describe how
1100  * completion notification should be signaled.  BH_Mapped should also
1101  * be set (to confirm that b_dev and b_blocknr are valid).
1102  *
1103  * generic_make_request and the drivers it calls may use b_reqnext,
1104  * and may change b_rdev and b_rsector.  So the values of these fields
1105  * should NOT be depended on after the call to generic_make_request.
1106  * Because of this, the caller should record the device address
1107  * information in b_dev and b_blocknr.
1108  *
1109  * Apart from those fields mentioned above, no other fields, and in
1110  * particular, no other flags, are changed by generic_make_request or
1111  * any lower level drivers.
1112  * */
1113 void generic_make_request (int rw, struct buffer_head * bh)
1114 {
1115         int major = MAJOR(bh->b_rdev);
1116         int minorsize = 0;
1117         request_queue_t *q;
1118
1119         if (!bh->b_end_io)
1120                 BUG();
1121
1122         /* Test device size, when known. */
1123         if (blk_size[major])
1124                 minorsize = blk_size[major][MINOR(bh->b_rdev)];
1125         if (minorsize) {
1126                 unsigned long maxsector = (minorsize << 1) + 1;
1127                 unsigned long sector = bh->b_rsector;
1128                 unsigned int count = bh->b_size >> 9;
1129
1130                 if (maxsector < count || maxsector - count < sector) {
1131                         /* Yecch */
1132                         bh->b_state &= (1 << BH_Lock) | (1 << BH_Mapped);
1133
1134                         /* This may well happen - the kernel calls bread()
1135                            without checking the size of the device, e.g.,
1136                            when mounting a device. */
1137                         printk(KERN_INFO
1138                                "attempt to access beyond end of device\n");
1139                         printk(KERN_INFO "%s: rw=%d, want=%ld, limit=%d\n",
1140                                kdevname(bh->b_rdev), rw,
1141                                (sector + count)>>1, minorsize);
1142
1143                         /* Yecch again */
1144                         bh->b_end_io(bh, 0);
1145                         return;
1146                 }
1147         }
1148
1149         /*
1150          * Resolve the mapping until finished. (drivers are
1151          * still free to implement/resolve their own stacking
1152          * by explicitly returning 0)
1153          */
1154         /* NOTE: we don't repeat the blk_size check for each new device.
1155          * Stacking drivers are expected to know what they are doing.
1156          */
1157         do {
1158                 q = blk_get_queue(bh->b_rdev);
1159                 if (!q) {
1160                         printk(KERN_ERR
1161                                "generic_make_request: Trying to access "
1162                                "nonexistent block-device %s (%ld)\n",
1163                                kdevname(bh->b_rdev), bh->b_rsector);
1164                         buffer_IO_error(bh);
1165                         break;
1166                 }
1167         } while (q->make_request_fn(q, rw, bh));
1168 }
1169
1170
1171 /**
1172  * submit_bh: submit a buffer_head to the block device later for I/O
1173  * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
1174  * @bh: The &struct buffer_head which describes the I/O
1175  *
1176  * submit_bh() is very similar in purpose to generic_make_request(), and
1177  * uses that function to do most of the work.
1178  *
1179  * The extra functionality provided by submit_bh is to determine
1180  * b_rsector from b_blocknr and b_size, and to set b_rdev from b_dev.
1181  * This is is appropriate for IO requests that come from the buffer
1182  * cache and page cache which (currently) always use aligned blocks.
1183  */
1184 void submit_bh(int rw, struct buffer_head * bh)
1185 {
1186         int count = bh->b_size >> 9;
1187
1188         if (!test_bit(BH_Lock, &bh->b_state))
1189                 BUG();
1190
1191         set_bit(BH_Req, &bh->b_state);
1192         set_bit(BH_Launder, &bh->b_state);
1193
1194         /*
1195          * First step, 'identity mapping' - RAID or LVM might
1196          * further remap this.
1197          */
1198         bh->b_rdev = bh->b_dev;
1199         bh->b_rsector = bh->b_blocknr * count;
1200
1201         generic_make_request(rw, bh);
1202
1203         switch (rw) {
1204                 case WRITE:
1205                         kstat.pgpgout += count;
1206                         break;
1207                 default:
1208                         kstat.pgpgin += count;
1209                         break;
1210         }
1211 }
1212
1213 /**
1214  * ll_rw_block: low-level access to block devices
1215  * @rw: whether to %READ or %WRITE or maybe %READA (readahead)
1216  * @nr: number of &struct buffer_heads in the array
1217  * @bhs: array of pointers to &struct buffer_head
1218  *
1219  * ll_rw_block() takes an array of pointers to &struct buffer_heads,
1220  * and requests an I/O operation on them, either a %READ or a %WRITE.
1221  * The third %READA option is described in the documentation for
1222  * generic_make_request() which ll_rw_block() calls.
1223  *
1224  * This function provides extra functionality that is not in
1225  * generic_make_request() that is relevant to buffers in the buffer
1226  * cache or page cache.  In particular it drops any buffer that it
1227  * cannot get a lock on (with the BH_Lock state bit), any buffer that
1228  * appears to be clean when doing a write request, and any buffer that
1229  * appears to be up-to-date when doing read request.  Further it marks
1230  * as clean buffers that are processed for writing (the buffer cache
1231  * wont assume that they are actually clean until the buffer gets
1232  * unlocked).
1233  *
1234  * ll_rw_block sets b_end_io to simple completion handler that marks
1235  * the buffer up-to-date (if approriate), unlocks the buffer and wakes
1236  * any waiters.  As client that needs a more interesting completion
1237  * routine should call submit_bh() (or generic_make_request())
1238  * directly.
1239  *
1240  * Caveat:
1241  *  All of the buffers must be for the same device, and must also be
1242  *  of the current approved size for the device.  */
1243
1244 void ll_rw_block(int rw, int nr, struct buffer_head * bhs[])
1245 {
1246         unsigned int major;
1247         int correct_size;
1248         int i;
1249
1250         if (!nr)
1251                 return;
1252
1253         major = MAJOR(bhs[0]->b_dev);
1254
1255         /* Determine correct block size for this device. */
1256         correct_size = get_hardsect_size(bhs[0]->b_dev);
1257
1258         /* Verify requested block sizes. */
1259         for (i = 0; i < nr; i++) {
1260                 struct buffer_head *bh = bhs[i];
1261                 if (bh->b_size % correct_size) {
1262                         printk(KERN_NOTICE "ll_rw_block: device %s: "
1263                                "only %d-char blocks implemented (%u)\n",
1264                                kdevname(bhs[0]->b_dev),
1265                                correct_size, bh->b_size);
1266                         goto sorry;
1267                 }
1268         }
1269
1270         if ((rw & WRITE) && is_read_only(bhs[0]->b_dev)) {
1271                 printk(KERN_NOTICE "Can't write to read-only device %s\n",
1272                        kdevname(bhs[0]->b_dev));
1273                 goto sorry;
1274         }
1275
1276         for (i = 0; i < nr; i++) {
1277                 struct buffer_head *bh = bhs[i];
1278
1279                 /* Only one thread can actually submit the I/O. */
1280                 if (test_and_set_bit(BH_Lock, &bh->b_state))
1281                         continue;
1282
1283                 /* We have the buffer lock */
1284                 atomic_inc(&bh->b_count);
1285                 bh->b_end_io = end_buffer_io_sync;
1286
1287                 switch(rw) {
1288                 case WRITE:
1289                         if (!atomic_set_buffer_clean(bh))
1290                                 /* Hmmph! Nothing to write */
1291                                 goto end_io;
1292                         __mark_buffer_clean(bh);
1293                         break;
1294
1295                 case READA:
1296                 case READ:
1297                         if (buffer_uptodate(bh))
1298                                 /* Hmmph! Already have it */
1299                                 goto end_io;
1300                         break;
1301                 default:
1302                         BUG();
1303         end_io:
1304                         bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state));
1305                         continue;
1306                 }
1307
1308                 submit_bh(rw, bh);
1309         }
1310         return;
1311
1312 sorry:
1313         /* Make sure we don't get infinite dirty retries.. */
1314         for (i = 0; i < nr; i++)
1315                 mark_buffer_clean(bhs[i]);
1316 }
1317
1318 #ifdef CONFIG_STRAM_SWAP
1319 extern int stram_device_init (void);
1320 #endif
1321
1322
1323 /**
1324  * end_that_request_first - end I/O on one buffer.
1325  * @req:      the request being processed
1326  * @uptodate: 0 for I/O error
1327  * @name:     the name printed for an I/O error
1328  *
1329  * Description:
1330  *     Ends I/O on the first buffer attached to @req, and sets it up
1331  *     for the next buffer_head (if any) in the cluster.
1332  *
1333  * Return:
1334  *     0 - we are done with this request, call end_that_request_last()
1335  *     1 - still buffers pending for this request
1336  *
1337  * Caveat:
1338  *     Drivers implementing their own end_request handling must call
1339  *     blk_finished_io() appropriately.
1340  **/
1341
1342 int end_that_request_first (struct request *req, int uptodate, char *name)
1343 {
1344         struct buffer_head * bh;
1345         int nsect;
1346
1347         req->errors = 0;
1348         if (!uptodate)
1349                 printk("end_request: I/O error, dev %s (%s), sector %lu\n",
1350                         kdevname(req->rq_dev), name, req->sector);
1351
1352         if ((bh = req->bh) != NULL) {
1353                 nsect = bh->b_size >> 9;
1354                 blk_finished_io(nsect);
1355                 req->bh = bh->b_reqnext;
1356                 bh->b_reqnext = NULL;
1357                 bh->b_end_io(bh, uptodate);
1358                 if ((bh = req->bh) != NULL) {
1359                         req->hard_sector += nsect;
1360                         req->hard_nr_sectors -= nsect;
1361                         req->sector = req->hard_sector;
1362                         req->nr_sectors = req->hard_nr_sectors;
1363
1364                         req->current_nr_sectors = bh->b_size >> 9;
1365                         req->hard_cur_sectors = req->current_nr_sectors;
1366                         if (req->nr_sectors < req->current_nr_sectors) {
1367                                 req->nr_sectors = req->current_nr_sectors;
1368                                 printk("end_request: buffer-list destroyed\n");
1369                         }
1370                         req->buffer = bh->b_data;
1371                         return 1;
1372                 }
1373         }
1374         return 0;
1375 }
1376
1377 void end_that_request_last(struct request *req)
1378 {
1379         if (req->waiting != NULL)
1380                 complete(req->waiting);
1381         req_finished_io(req);
1382
1383         blkdev_release_request(req);
1384 }
1385
1386 int __init blk_dev_init(void)
1387 {
1388         struct blk_dev_struct *dev;
1389
1390         request_cachep = kmem_cache_create("blkdev_requests",
1391                                            sizeof(struct request),
1392                                            0, SLAB_HWCACHE_ALIGN, NULL, NULL);
1393
1394         if (!request_cachep)
1395                 panic("Can't create request pool slab cache\n");
1396
1397         for (dev = blk_dev + MAX_BLKDEV; dev-- != blk_dev;)
1398                 dev->queue = NULL;
1399
1400         memset(ro_bits,0,sizeof(ro_bits));
1401         memset(max_readahead, 0, sizeof(max_readahead));
1402         memset(max_sectors, 0, sizeof(max_sectors));
1403
1404         blk_max_low_pfn = max_low_pfn - 1;
1405         blk_max_pfn = max_pfn - 1;
1406
1407 #ifdef CONFIG_AMIGA_Z2RAM
1408         z2_init();
1409 #endif
1410 #ifdef CONFIG_STRAM_SWAP
1411         stram_device_init();
1412 #endif
1413 #ifdef CONFIG_ISP16_CDI
1414         isp16_init();
1415 #endif
1416 #ifdef CONFIG_BLK_DEV_PS2
1417         ps2esdi_init();
1418 #endif
1419 #ifdef CONFIG_BLK_DEV_XD
1420         xd_init();
1421 #endif
1422 #ifdef CONFIG_VIOCD
1423         viocd_init();
1424 #endif
1425 #ifdef CONFIG_BLK_DEV_MFM
1426         mfm_init();
1427 #endif
1428 #ifdef CONFIG_PARIDE
1429         { extern void paride_init(void); paride_init(); };
1430 #endif
1431 #ifdef CONFIG_MAC_FLOPPY
1432         swim3_init();
1433 #endif
1434 #ifdef CONFIG_BLK_DEV_SWIM_IOP
1435         swimiop_init();
1436 #endif
1437 #ifdef CONFIG_AMIGA_FLOPPY
1438         amiga_floppy_init();
1439 #endif
1440 #ifdef CONFIG_ATARI_FLOPPY
1441         atari_floppy_init();
1442 #endif
1443 #ifdef CONFIG_BLK_DEV_FD
1444         floppy_init();
1445 #else
1446 #if defined(__i386__)   /* Do we even need this? */
1447         outb_p(0xc, 0x3f2);
1448 #endif
1449 #endif
1450 #ifdef CONFIG_CDU31A
1451         cdu31a_init();
1452 #endif
1453 #ifdef CONFIG_ATARI_ACSI
1454         acsi_init();
1455 #endif
1456 #ifdef CONFIG_MCD
1457         mcd_init();
1458 #endif
1459 #ifdef CONFIG_MCDX
1460         mcdx_init();
1461 #endif
1462 #ifdef CONFIG_SBPCD
1463         sbpcd_init();
1464 #endif
1465 #ifdef CONFIG_AZTCD
1466         aztcd_init();
1467 #endif
1468 #ifdef CONFIG_CDU535
1469         sony535_init();
1470 #endif
1471 #ifdef CONFIG_GSCD
1472         gscd_init();
1473 #endif
1474 #ifdef CONFIG_CM206
1475         cm206_init();
1476 #endif
1477 #ifdef CONFIG_OPTCD
1478         optcd_init();
1479 #endif
1480 #ifdef CONFIG_SJCD
1481         sjcd_init();
1482 #endif
1483 #ifdef CONFIG_APBLOCK
1484         ap_init();
1485 #endif
1486 #ifdef CONFIG_DDV
1487         ddv_init();
1488 #endif
1489 #ifdef CONFIG_MDISK
1490         mdisk_init();
1491 #endif
1492 #ifdef CONFIG_DASD
1493         dasd_init();
1494 #endif
1495 #if defined(CONFIG_S390_TAPE) && defined(CONFIG_S390_TAPE_BLOCK)
1496         tapeblock_init();
1497 #endif
1498 #ifdef CONFIG_BLK_DEV_XPRAM
1499         xpram_init();
1500 #endif
1501
1502 #ifdef CONFIG_SUN_JSFLASH
1503         jsfd_init();
1504 #endif
1505         return 0;
1506 };
1507
1508 EXPORT_SYMBOL(io_request_lock);
1509 EXPORT_SYMBOL(end_that_request_first);
1510 EXPORT_SYMBOL(end_that_request_last);
1511 EXPORT_SYMBOL(blk_grow_request_list);
1512 EXPORT_SYMBOL(blk_init_queue);
1513 EXPORT_SYMBOL(blk_get_queue);
1514 EXPORT_SYMBOL(blk_cleanup_queue);
1515 EXPORT_SYMBOL(blk_queue_headactive);
1516 EXPORT_SYMBOL(blk_queue_make_request);
1517 EXPORT_SYMBOL(generic_make_request);
1518 EXPORT_SYMBOL(blkdev_release_request);
1519 EXPORT_SYMBOL(generic_unplug_device);
1520 EXPORT_SYMBOL(blk_queue_bounce_limit);
1521 EXPORT_SYMBOL(blk_max_low_pfn);
1522 EXPORT_SYMBOL(blk_max_pfn);
1523 EXPORT_SYMBOL(blk_seg_merge_ok);
1524 EXPORT_SYMBOL(blk_nohighio);