drivers/scsi/scsi_merge.c

   1 /*
   2  *  scsi_merge.c Copyright (C) 1999 Eric Youngdale
   3  *
   4  *  SCSI queueing library.
   5  *      Initial versions: Eric Youngdale (eric@andante.org).
   6  *                        Based upon conversations with large numbers
   7  *                        of people at Linux Expo.
   8  *      Support for dynamic DMA mapping: Jakub Jelinek (jakub@redhat.com).
   9  *      Support for highmem I/O: Jens Axboe <axboe@suse.de>
  10  */
  11
  12 /*
  13  * This file contains queue management functions that are used by SCSI.
  14  * Typically this is used for several purposes.   First, we need to ensure
  15  * that commands do not grow so large that they cannot be handled all at
  16  * once by a host adapter.   The various flavors of merge functions included
  17  * here serve this purpose.
  18  *
  19  * Note that it would be quite trivial to allow the low-level driver the
  20  * flexibility to define it's own queue handling functions.  For the time
  21  * being, the hooks are not present.   Right now we are just using the
  22  * data in the host template as an indicator of how we should be handling
  23  * queues, and we select routines that are optimized for that purpose.
  24  *
  25  * Some hosts do not impose any restrictions on the size of a request.
  26  * In such cases none of the merge functions in this file are called,
  27  * and we allow ll_rw_blk to merge requests in the default manner.
  28  * This isn't guaranteed to be optimal, but it should be pretty darned
  29  * good.   If someone comes up with ideas of better ways of managing queues
  30  * to improve on the default behavior, then certainly fit it into this
  31  * scheme in whatever manner makes the most sense.   Please note that
  32  * since each device has it's own queue, we have considerable flexibility
  33  * in queue management.
  34  */
  35
  36 #define __NO_VERSION__
  37 #include <linux/config.h>
  38 #include <linux/module.h>
  39
  40 #include <linux/sched.h>
  41 #include <linux/timer.h>
  42 #include <linux/string.h>
  43 #include <linux/slab.h>
  44 #include <linux/ioport.h>
  45 #include <linux/kernel.h>
  46 #include <linux/stat.h>
  47 #include <linux/blk.h>
  48 #include <linux/interrupt.h>
  49 #include <linux/delay.h>
  50 #include <linux/smp_lock.h>
  51
  52 #define __KERNEL_SYSCALLS__
  53
  54 #include <linux/unistd.h>
  55
  56 #include <asm/system.h>
  57 #include <asm/irq.h>
  58 #include <asm/dma.h>
  59 #include <asm/io.h>
  60
  61 #include "scsi.h"
  62 #include "hosts.h"
  63 #include "constants.h"
  64 #include <scsi/scsi_ioctl.h>
  65
  66 /*
  67  * scsi_malloc() can only dish out items of PAGE_SIZE or less, so we cannot
  68  * build a request that requires an sg table allocation of more than that.
  69  */
  70 static const int scsi_max_sg = PAGE_SIZE / sizeof(struct scatterlist);
  71
  72 /*
  73  * This means that bounce buffers cannot be allocated in chunks > PAGE_SIZE.
  74  * Ultimately we should get away from using a dedicated DMA bounce buffer
  75  * pool, and we should instead try and use kmalloc() instead.  If we can
  76  * eliminate this pool, then this restriction would no longer be needed.
  77  */
  78 #define DMA_SEGMENT_SIZE_LIMITED
  79
  80 #ifdef CONFIG_SCSI_DEBUG_QUEUES
  81 /*
  82  * Enable a bunch of additional consistency checking.   Turn this off
  83  * if you are benchmarking.
  84  */
  85 static int dump_stats(struct request *req,
  86                       int use_clustering,
  87                       int dma_host,
  88                       int segments)
  89 {
  90         struct buffer_head *bh;
  91
  92         /*
  93          * Dump the information that we have.  We know we have an
  94          * inconsistency.
  95          */
  96         printk("nr_segments is %x\n", req->nr_segments);
  97         printk("counted segments is %x\n", segments);
  98         printk("Flags %d %d\n", use_clustering, dma_host);
  99         for (bh = req->bh; bh->b_reqnext != NULL; bh = bh->b_reqnext)
 100         {
 101                 printk("Segment 0x%p, blocks %d, addr 0x%lx\n",
 102                        bh,
 103                        bh->b_size >> 9,
 104                        bh_phys(bh) - 1);
 105         }
 106         panic("Ththththaats all folks.  Too dangerous to continue.\n");
 107 }
 108
 109
 110 /*
 111  * Simple sanity check that we will use for the first go around
 112  * in order to ensure that we are doing the counting correctly.
 113  * This can be removed for optimization.
 114  */
 115 #define SANITY_CHECK(req, _CLUSTER, _DMA)                               \
 116     if( req->nr_segments != __count_segments(req, _CLUSTER, _DMA, NULL) )       \
 117     {                                                                   \
 118         printk("Incorrect segment count at 0x%p", current_text_addr()); \
 119         dump_stats(req, _CLUSTER, _DMA, __count_segments(req, _CLUSTER, _DMA, NULL)); \
 120     }
 121 #else
 122 #define SANITY_CHECK(req, _CLUSTER, _DMA)
 123 #endif
 124
 125 static void dma_exhausted(Scsi_Cmnd * SCpnt, int i)
 126 {
 127         int jj;
 128         struct scatterlist *sgpnt;
 129         void **bbpnt;
 130         int consumed = 0;
 131
 132         sgpnt = (struct scatterlist *) SCpnt->request_buffer;
 133         bbpnt = SCpnt->bounce_buffers;
 134
 135         /*
 136          * Now print out a bunch of stats.  First, start with the request
 137          * size.
 138          */
 139         printk("dma_free_sectors:%d\n", scsi_dma_free_sectors);
 140         printk("use_sg:%d\ti:%d\n", SCpnt->use_sg, i);
 141         printk("request_bufflen:%d\n", SCpnt->request_bufflen);
 142         /*
 143          * Now dump the scatter-gather table, up to the point of failure.
 144          */
 145         for(jj=0; jj < SCpnt->use_sg; jj++)
 146         {
 147                 printk("[%d]\tlen:%d\taddr:%p\tbounce:%p\n",
 148                        jj,
 149                        sgpnt[jj].length,
 150                        sgpnt[jj].address,
 151                        (bbpnt ? bbpnt[jj] : NULL));
 152                 if (bbpnt && bbpnt[jj])
 153                         consumed += sgpnt[jj].length;
 154         }
 155         printk("Total %d sectors consumed\n", consumed);
 156         panic("DMA pool exhausted");
 157 }
 158
 159 #define CLUSTERABLE_DEVICE(SH,SD) (SH->use_clustering)
 160
 161 /*
 162  * This entire source file deals with the new queueing code.
 163  */
 164
 165 /*
 166  * Function:    __count_segments()
 167  *
 168  * Purpose:     Prototype for queue merge function.
 169  *
 170  * Arguments:   q       - Queue for which we are merging request.
 171  *              req     - request into which we wish to merge.
 172  *              use_clustering - 1 if this host wishes to use clustering
 173  *              dma_host - 1 if this host has ISA DMA issues (bus doesn't
 174  *                      expose all of the address lines, so that DMA cannot
 175  *                      be done from an arbitrary address).
 176  *              remainder - used to track the residual size of the last
 177  *                      segment.  Comes in handy when we want to limit the
 178  *                      size of bounce buffer segments to PAGE_SIZE.
 179  *
 180  * Returns:     Count of the number of SG segments for the request.
 181  *
 182  * Lock status:
 183  *
 184  * Notes:       This is only used for diagnostic purposes.
 185  */
 186 __inline static int __count_segments(struct request *req,
 187                                      int use_clustering,
 188                                      int dma_host,
 189                                      int * remainder)
 190 {
 191         int ret = 1;
 192         int reqsize = 0;
 193         struct buffer_head *bh;
 194         struct buffer_head *bhnext;
 195
 196         if( remainder != NULL ) {
 197                 reqsize = *remainder;
 198         }
 199
 200         /*
 201          * Add in the size increment for the first buffer.
 202          */
 203         bh = req->bh;
 204 #ifdef DMA_SEGMENT_SIZE_LIMITED
 205         if( reqsize + bh->b_size > PAGE_SIZE ) {
 206                 ret++;
 207                 reqsize = bh->b_size;
 208         } else {
 209                 reqsize += bh->b_size;
 210         }
 211 #else
 212         reqsize += bh->b_size;
 213 #endif
 214
 215         for (bh = req->bh, bhnext = bh->b_reqnext;
 216              bhnext != NULL;
 217              bh = bhnext, bhnext = bh->b_reqnext) {
 218                 if (use_clustering) {
 219                         /*
 220                          * See if we can do this without creating another
 221                          * scatter-gather segment.  In the event that this is a
 222                          * DMA capable host, make sure that a segment doesn't span
 223                          * the DMA threshold boundary.
 224                          */
 225                         if (dma_host && bh_phys(bhnext) - 1 == ISA_DMA_THRESHOLD) {
 226                                 ret++;
 227                                 reqsize = bhnext->b_size;
 228                         } else if (blk_seg_merge_ok(bh, bhnext)) {
 229                                 /*
 230                                  * This one is OK.  Let it go.
 231                                  */
 232 #ifdef DMA_SEGMENT_SIZE_LIMITED
 233                                 /* Note scsi_malloc is only able to hand out
 234                                  * chunks of memory in sizes of PAGE_SIZE or
 235                                  * less.  Thus we need to keep track of
 236                                  * the size of the piece that we have
 237                                  * seen so far, and if we have hit
 238                                  * the limit of PAGE_SIZE, then we are
 239                                  * kind of screwed and we need to start
 240                                  * another segment.
 241                                  */
 242                                 if( dma_host && bh_phys(bh) - 1 >= ISA_DMA_THRESHOLD
 243                                     && reqsize + bhnext->b_size > PAGE_SIZE )
 244                                 {
 245                                         ret++;
 246                                         reqsize = bhnext->b_size;
 247                                         continue;
 248                                 }
 249 #endif
 250                                 reqsize += bhnext->b_size;
 251                                 continue;
 252                         }
 253                         ret++;
 254                         reqsize = bhnext->b_size;
 255                 } else {
 256                         ret++;
 257                         reqsize = bhnext->b_size;
 258                 }
 259         }
 260         if( remainder != NULL ) {
 261                 *remainder = reqsize;
 262         }
 263         return ret;
 264 }
 265
 266 /*
 267  * Function:    recount_segments()
 268  *
 269  * Purpose:     Recount the number of scatter-gather segments for this request.
 270  *
 271  * Arguments:   req     - request that needs recounting.
 272  *
 273  * Returns:     Count of the number of SG segments for the request.
 274  *
 275  * Lock status: Irrelevant.
 276  *
 277  * Notes:       This is only used when we have partially completed requests
 278  *              and the bit that is leftover is of an indeterminate size.
 279  *              This can come up if you get a MEDIUM_ERROR, for example,
 280  *              as we will have "completed" all of the sectors up to and
 281  *              including the bad sector, and the leftover bit is what
 282  *              we have to do now.  This tends to be a rare occurrence, so
 283  *              we aren't busting our butts to instantiate separate versions
 284  *              of this function for the 4 different flag values.  We
 285  *              probably should, however.
 286  */
 287 void
 288 recount_segments(Scsi_Cmnd * SCpnt)
 289 {
 290         struct request *req;
 291         struct Scsi_Host *SHpnt;
 292         Scsi_Device * SDpnt;
 293
 294         req   = &SCpnt->request;
 295         SHpnt = SCpnt->host;
 296         SDpnt = SCpnt->device;
 297
 298         req->nr_segments = __count_segments(req,
 299                                             CLUSTERABLE_DEVICE(SHpnt, SDpnt),
 300                                             SHpnt->unchecked_isa_dma, NULL);
 301 }
 302
 303 #define MERGEABLE_BUFFERS(X,Y) \
 304 (((((long)bh_phys((X))+(X)->b_size)|((long)bh_phys((Y)))) & \
 305   (DMA_CHUNK_SIZE - 1)) == 0)
 306
 307 #ifdef DMA_CHUNK_SIZE
 308 static inline int scsi_new_mergeable(request_queue_t * q,
 309                                      struct request * req,
 310                                      struct Scsi_Host *SHpnt,
 311                                      int max_segments)
 312 {
 313         /*
 314          * pci_map_sg will be able to merge these two
 315          * into a single hardware sg entry, check if
 316          * we'll have enough memory for the sg list.
 317          * scsi.c allocates for this purpose
 318          * min(64,sg_tablesize) entries.
 319          */
 320         if (req->nr_segments >= max_segments ||
 321             req->nr_segments >= SHpnt->sg_tablesize)
 322                 return 0;
 323         req->nr_segments++;
 324         return 1;
 325 }
 326
 327 static inline int scsi_new_segment(request_queue_t * q,
 328                                    struct request * req,
 329                                    struct Scsi_Host *SHpnt,
 330                                    int max_segments)
 331 {
 332         /*
 333          * pci_map_sg won't be able to map these two
 334          * into a single hardware sg entry, so we have to
 335          * check if things fit into sg_tablesize.
 336          */
 337         if (req->nr_hw_segments >= SHpnt->sg_tablesize ||
 338              req->nr_segments >= SHpnt->sg_tablesize)
 339                 return 0;
 340         req->nr_hw_segments++;
 341         req->nr_segments++;
 342         return 1;
 343 }
 344 #else
 345 static inline int scsi_new_segment(request_queue_t * q,
 346                                    struct request * req,
 347                                    struct Scsi_Host *SHpnt,
 348                                    int max_segments)
 349 {
 350         if (req->nr_segments < SHpnt->sg_tablesize &&
 351             req->nr_segments < max_segments) {
 352                 /*
 353                  * This will form the start of a new segment.  Bump the
 354                  * counter.
 355                  */
 356                 req->nr_segments++;
 357                 return 1;
 358         } else {
 359                 return 0;
 360         }
 361 }
 362 #endif
 363
 364 /*
 365  * Function:    __scsi_merge_fn()
 366  *
 367  * Purpose:     Prototype for queue merge function.
 368  *
 369  * Arguments:   q       - Queue for which we are merging request.
 370  *              req     - request into which we wish to merge.
 371  *              bh      - Block which we may wish to merge into request
 372  *              use_clustering - 1 if this host wishes to use clustering
 373  *              dma_host - 1 if this host has ISA DMA issues (bus doesn't
 374  *                      expose all of the address lines, so that DMA cannot
 375  *                      be done from an arbitrary address).
 376  *
 377  * Returns:     1 if it is OK to merge the block into the request.  0
 378  *              if it is not OK.
 379  *
 380  * Lock status: io_request_lock is assumed to be held here.
 381  *
 382  * Notes:       Some drivers have limited scatter-gather table sizes, and
 383  *              thus they cannot queue an infinitely large command.  This
 384  *              function is called from ll_rw_blk before it attempts to merge
 385  *              a new block into a request to make sure that the request will
 386  *              not become too large.
 387  *
 388  *              This function is not designed to be directly called.  Instead
 389  *              it should be referenced from other functions where the
 390  *              use_clustering and dma_host parameters should be integer
 391  *              constants.  The compiler should thus be able to properly
 392  *              optimize the code, eliminating stuff that is irrelevant.
 393  *              It is more maintainable to do this way with a single function
 394  *              than to have 4 separate functions all doing roughly the
 395  *              same thing.
 396  */
 397 __inline static int __scsi_back_merge_fn(request_queue_t * q,
 398                                          struct request *req,
 399                                          struct buffer_head *bh,
 400                                          int max_segments,
 401                                          int use_clustering,
 402                                          int dma_host)
 403 {
 404         unsigned int count;
 405         unsigned int segment_size = 0;
 406         Scsi_Device *SDpnt = q->queuedata;
 407         struct Scsi_Host *SHpnt = SDpnt->host;
 408
 409         if (max_segments > scsi_max_sg)
 410                 max_segments = scsi_max_sg;
 411
 412 #ifdef DMA_CHUNK_SIZE
 413         if (max_segments > 64)
 414                 max_segments = 64;
 415 #endif
 416
 417         if ((req->nr_sectors + (bh->b_size >> 9)) > SHpnt->max_sectors)
 418                 return 0;
 419
 420         if (!BH_PHYS_4G(req->bhtail, bh))
 421                 goto new_end_segment;
 422
 423         if (use_clustering) {
 424                 /*
 425                  * See if we can do this without creating another
 426                  * scatter-gather segment.  In the event that this is a
 427                  * DMA capable host, make sure that a segment doesn't span
 428                  * the DMA threshold boundary.
 429                  */
 430                 if (dma_host && bh_phys(req->bhtail) - 1 == ISA_DMA_THRESHOLD)
 431                         goto new_end_segment;
 432                 if (BH_CONTIG(req->bhtail, bh)) {
 433 #ifdef DMA_SEGMENT_SIZE_LIMITED
 434                         if (dma_host && bh_phys(bh) - 1 >= ISA_DMA_THRESHOLD) {
 435                                 segment_size = 0;
 436                                 count = __count_segments(req, use_clustering, dma_host, &segment_size);
 437                                 if( segment_size + bh->b_size > PAGE_SIZE ) {
 438                                         goto new_end_segment;
 439                                 }
 440                         }
 441 #endif
 442                         /*
 443                          * This one is OK.  Let it go.
 444                          */
 445                         return 1;
 446                 }
 447         }
 448  new_end_segment:
 449 #ifdef DMA_CHUNK_SIZE
 450         if (MERGEABLE_BUFFERS(req->bhtail, bh))
 451                 return scsi_new_mergeable(q, req, SHpnt, max_segments);
 452 #endif
 453         return scsi_new_segment(q, req, SHpnt, max_segments);
 454 }
 455
 456 __inline static int __scsi_front_merge_fn(request_queue_t * q,
 457                                           struct request *req,
 458                                           struct buffer_head *bh,
 459                                           int max_segments,
 460                                           int use_clustering,
 461                                           int dma_host)
 462 {
 463         unsigned int count;
 464         unsigned int segment_size = 0;
 465         Scsi_Device *SDpnt = q->queuedata;
 466         struct Scsi_Host *SHpnt = SDpnt->host;
 467
 468         if (max_segments > scsi_max_sg)
 469                 max_segments = scsi_max_sg;
 470
 471 #ifdef DMA_CHUNK_SIZE
 472         if (max_segments > 64)
 473                 max_segments = 64;
 474 #endif
 475
 476         if ((req->nr_sectors + (bh->b_size >> 9)) > SHpnt->max_sectors)
 477                 return 0;
 478
 479         if (!BH_PHYS_4G(bh, req->bh))
 480                 goto new_start_segment;
 481
 482         if (use_clustering) {
 483                 /*
 484                  * See if we can do this without creating another
 485                  * scatter-gather segment.  In the event that this is a
 486                  * DMA capable host, make sure that a segment doesn't span
 487                  * the DMA threshold boundary.
 488                  */
 489                 if (dma_host && bh_phys(bh) - 1 == ISA_DMA_THRESHOLD) {
 490                         goto new_start_segment;
 491                 }
 492                 if (BH_CONTIG(bh, req->bh)) {
 493 #ifdef DMA_SEGMENT_SIZE_LIMITED
 494                         if (dma_host && bh_phys(bh) - 1 >= ISA_DMA_THRESHOLD) {
 495                                 segment_size = bh->b_size;
 496                                 count = __count_segments(req, use_clustering, dma_host, &segment_size);
 497                                 if( count != req->nr_segments ) {
 498                                         goto new_start_segment;
 499                                 }
 500                         }
 501 #endif
 502                         /*
 503                          * This one is OK.  Let it go.
 504                          */
 505                         return 1;
 506                 }
 507         }
 508  new_start_segment:
 509 #ifdef DMA_CHUNK_SIZE
 510         if (MERGEABLE_BUFFERS(bh, req->bh))
 511                 return scsi_new_mergeable(q, req, SHpnt, max_segments);
 512 #endif
 513         return scsi_new_segment(q, req, SHpnt, max_segments);
 514 }
 515
 516 /*
 517  * Function:    scsi_merge_fn_()
 518  *
 519  * Purpose:     queue merge function.
 520  *
 521  * Arguments:   q       - Queue for which we are merging request.
 522  *              req     - request into which we wish to merge.
 523  *              bh      - Block which we may wish to merge into request
 524  *
 525  * Returns:     1 if it is OK to merge the block into the request.  0
 526  *              if it is not OK.
 527  *
 528  * Lock status: io_request_lock is assumed to be held here.
 529  *
 530  * Notes:       Optimized for different cases depending upon whether
 531  *              ISA DMA is in use and whether clustering should be used.
 532  */
 533 #define MERGEFCT(_FUNCTION, _BACK_FRONT, _CLUSTER, _DMA)                \
 534 static int _FUNCTION(request_queue_t * q,                               \
 535                      struct request * req,                              \
 536                      struct buffer_head * bh,                           \
 537                      int max_segments)                                  \
 538 {                                                                       \
 539     int ret;                                                            \
 540     SANITY_CHECK(req, _CLUSTER, _DMA);                                  \
 541     ret =  __scsi_ ## _BACK_FRONT ## _merge_fn(q,                       \
 542                                                req,                     \
 543                                                bh,                      \
 544                                                max_segments,            \
 545                                                _CLUSTER,                \
 546                                                _DMA);                   \
 547     return ret;                                                         \
 548 }
 549
 550 /* Version with use_clustering 0 and dma_host 1 is not necessary,
 551  * since the only use of dma_host above is protected by use_clustering.
 552  */
 553 MERGEFCT(scsi_back_merge_fn_, back, 0, 0)
 554 MERGEFCT(scsi_back_merge_fn_c, back, 1, 0)
 555 MERGEFCT(scsi_back_merge_fn_dc, back, 1, 1)
 556
 557 MERGEFCT(scsi_front_merge_fn_, front, 0, 0)
 558 MERGEFCT(scsi_front_merge_fn_c, front, 1, 0)
 559 MERGEFCT(scsi_front_merge_fn_dc, front, 1, 1)
 560
 561 /*
 562  * Function:    __scsi_merge_requests_fn()
 563  *
 564  * Purpose:     Prototype for queue merge function.
 565  *
 566  * Arguments:   q       - Queue for which we are merging request.
 567  *              req     - request into which we wish to merge.
 568  *              next    - 2nd request that we might want to combine with req
 569  *              use_clustering - 1 if this host wishes to use clustering
 570  *              dma_host - 1 if this host has ISA DMA issues (bus doesn't
 571  *                      expose all of the address lines, so that DMA cannot
 572  *                      be done from an arbitrary address).
 573  *
 574  * Returns:     1 if it is OK to merge the two requests.  0
 575  *              if it is not OK.
 576  *
 577  * Lock status: io_request_lock is assumed to be held here.
 578  *
 579  * Notes:       Some drivers have limited scatter-gather table sizes, and
 580  *              thus they cannot queue an infinitely large command.  This
 581  *              function is called from ll_rw_blk before it attempts to merge
 582  *              a new block into a request to make sure that the request will
 583  *              not become too large.
 584  *
 585  *              This function is not designed to be directly called.  Instead
 586  *              it should be referenced from other functions where the
 587  *              use_clustering and dma_host parameters should be integer
 588  *              constants.  The compiler should thus be able to properly
 589  *              optimize the code, eliminating stuff that is irrelevant.
 590  *              It is more maintainable to do this way with a single function
 591  *              than to have 4 separate functions all doing roughly the
 592  *              same thing.
 593  */
 594 __inline static int __scsi_merge_requests_fn(request_queue_t * q,
 595                                              struct request *req,
 596                                              struct request *next,
 597                                              int max_segments,
 598                                              int use_clustering,
 599                                              int dma_host)
 600 {
 601         Scsi_Device *SDpnt = q->queuedata;
 602         struct Scsi_Host *SHpnt = SDpnt->host;
 603
 604         /*
 605          * First check if the either of the requests are re-queued
 606          * requests.  Can't merge them if they are.
 607          */
 608         if (req->special || next->special)
 609                 return 0;
 610
 611         if (max_segments > scsi_max_sg)
 612                 max_segments = scsi_max_sg;
 613
 614 #ifdef DMA_CHUNK_SIZE
 615         if (max_segments > 64)
 616                 max_segments = 64;
 617
 618         /* If it would not fit into prepared memory space for sg chain,
 619          * then don't allow the merge.
 620          */
 621         if (req->nr_segments + next->nr_segments - 1 > max_segments ||
 622             req->nr_segments + next->nr_segments - 1 > SHpnt->sg_tablesize) {
 623                 return 0;
 624         }
 625         if (req->nr_hw_segments + next->nr_hw_segments - 1 > SHpnt->sg_tablesize) {
 626                 return 0;
 627         }
 628 #else
 629         /*
 630          * If the two requests together are too large (even assuming that we
 631          * can merge the boundary requests into one segment, then don't
 632          * allow the merge.
 633          */
 634         if (req->nr_segments + next->nr_segments - 1 > SHpnt->sg_tablesize) {
 635                 return 0;
 636         }
 637 #endif
 638
 639         if ((req->nr_sectors + next->nr_sectors) > SHpnt->max_sectors)
 640                 return 0;
 641
 642         if (!BH_PHYS_4G(req->bhtail, next->bh))
 643                 goto dont_combine;
 644
 645         /*
 646          * The main question is whether the two segments at the boundaries
 647          * would be considered one or two.
 648          */
 649         if (use_clustering) {
 650                 /*
 651                  * See if we can do this without creating another
 652                  * scatter-gather segment.  In the event that this is a
 653                  * DMA capable host, make sure that a segment doesn't span
 654                  * the DMA threshold boundary.
 655                  */
 656                 if (dma_host && bh_phys(req->bhtail) - 1 == ISA_DMA_THRESHOLD)
 657                         goto dont_combine;
 658 #ifdef DMA_SEGMENT_SIZE_LIMITED
 659                 /*
 660                  * We currently can only allocate scatter-gather bounce
 661                  * buffers in chunks of PAGE_SIZE or less.
 662                  */
 663                 if (dma_host && BH_CONTIG(req->bhtail, next->bh)
 664                     && bh_phys(req->bhtail) - 1 >= ISA_DMA_THRESHOLD)
 665                 {
 666                         int segment_size = 0;
 667                         int count = 0;
 668
 669                         count = __count_segments(req, use_clustering, dma_host, &segment_size);
 670                         count += __count_segments(next, use_clustering, dma_host, &segment_size);
 671                         if( count != req->nr_segments + next->nr_segments ) {
 672                                 goto dont_combine;
 673                         }
 674                 }
 675 #endif
 676                 if (BH_CONTIG(req->bhtail, next->bh)) {
 677                         /*
 678                          * This one is OK.  Let it go.
 679                          */
 680                         req->nr_segments += next->nr_segments - 1;
 681 #ifdef DMA_CHUNK_SIZE
 682                         req->nr_hw_segments += next->nr_hw_segments - 1;
 683 #endif
 684                         return 1;
 685                 }
 686         }
 687       dont_combine:
 688 #ifdef DMA_CHUNK_SIZE
 689         if (req->nr_segments + next->nr_segments > max_segments ||
 690             req->nr_segments + next->nr_segments > SHpnt->sg_tablesize) {
 691                 return 0;
 692         }
 693         /* If dynamic DMA mapping can merge last segment in req with
 694          * first segment in next, then the check for hw segments was
 695          * done above already, so we can always merge.
 696          */
 697         if (MERGEABLE_BUFFERS (req->bhtail, next->bh)) {
 698                 req->nr_hw_segments += next->nr_hw_segments - 1;
 699         } else if (req->nr_hw_segments + next->nr_hw_segments > SHpnt->sg_tablesize) {
 700                 return 0;
 701         } else {
 702                 req->nr_hw_segments += next->nr_hw_segments;
 703         }
 704         req->nr_segments += next->nr_segments;
 705         return 1;
 706 #else
 707         /*
 708          * We know that the two requests at the boundary should not be combined.
 709          * Make sure we can fix something that is the sum of the two.
 710          * A slightly stricter test than we had above.
 711          */
 712         if (req->nr_segments + next->nr_segments > max_segments ||
 713             req->nr_segments + next->nr_segments > SHpnt->sg_tablesize) {
 714                 return 0;
 715         } else {
 716                 /*
 717                  * This will form the start of a new segment.  Bump the
 718                  * counter.
 719                  */
 720                 req->nr_segments += next->nr_segments;
 721                 return 1;
 722         }
 723 #endif
 724 }
 725
 726 /*
 727  * Function:    scsi_merge_requests_fn_()
 728  *
 729  * Purpose:     queue merge function.
 730  *
 731  * Arguments:   q       - Queue for which we are merging request.
 732  *              req     - request into which we wish to merge.
 733  *              bh      - Block which we may wish to merge into request
 734  *
 735  * Returns:     1 if it is OK to merge the block into the request.  0
 736  *              if it is not OK.
 737  *
 738  * Lock status: io_request_lock is assumed to be held here.
 739  *
 740  * Notes:       Optimized for different cases depending upon whether
 741  *              ISA DMA is in use and whether clustering should be used.
 742  */
 743 #define MERGEREQFCT(_FUNCTION, _CLUSTER, _DMA)          \
 744 static int _FUNCTION(request_queue_t * q,               \
 745                      struct request * req,              \
 746                      struct request * next,             \
 747                      int max_segments)                  \
 748 {                                                       \
 749     int ret;                                            \
 750     SANITY_CHECK(req, _CLUSTER, _DMA);                  \
 751     ret =  __scsi_merge_requests_fn(q, req, next, max_segments, _CLUSTER, _DMA); \
 752     return ret;                                         \
 753 }
 754
 755 /* Version with use_clustering 0 and dma_host 1 is not necessary,
 756  * since the only use of dma_host above is protected by use_clustering.
 757  */
 758 MERGEREQFCT(scsi_merge_requests_fn_, 0, 0)
 759 MERGEREQFCT(scsi_merge_requests_fn_c, 1, 0)
 760 MERGEREQFCT(scsi_merge_requests_fn_dc, 1, 1)
 761 /*
 762  * Function:    __init_io()
 763  *
 764  * Purpose:     Prototype for io initialize function.
 765  *
 766  * Arguments:   SCpnt   - Command descriptor we wish to initialize
 767  *              sg_count_valid  - 1 if the sg count in the req is valid.
 768  *              use_clustering - 1 if this host wishes to use clustering
 769  *              dma_host - 1 if this host has ISA DMA issues (bus doesn't
 770  *                      expose all of the address lines, so that DMA cannot
 771  *                      be done from an arbitrary address).
 772  *
 773  * Returns:     1 on success.
 774  *
 775  * Lock status:
 776  *
 777  * Notes:       Only the SCpnt argument should be a non-constant variable.
 778  *              This function is designed in such a way that it will be
 779  *              invoked from a series of small stubs, each of which would
 780  *              be optimized for specific circumstances.
 781  *
 782  *              The advantage of this is that hosts that don't do DMA
 783  *              get versions of the function that essentially don't have
 784  *              any of the DMA code.  Same goes for clustering - in the
 785  *              case of hosts with no need for clustering, there is no point
 786  *              in a whole bunch of overhead.
 787  *
 788  *              Finally, in the event that a host has set can_queue to SG_ALL
 789  *              implying that there is no limit to the length of a scatter
 790  *              gather list, the sg count in the request won't be valid
 791  *              (mainly because we don't need queue management functions
 792  *              which keep the tally uptodate.
 793  */
 794 __inline static int __init_io(Scsi_Cmnd * SCpnt,
 795                               int sg_count_valid,
 796                               int use_clustering,
 797                               int dma_host)
 798 {
 799         struct buffer_head * bh;
 800         struct buffer_head * bhprev;
 801         char               * buff;
 802         int                  count;
 803         int                  i;
 804         struct request     * req = &SCpnt->request;
 805         int                  sectors;
 806         struct scatterlist * sgpnt;
 807         int                  this_count;
 808         void               ** bbpnt;
 809
 810         /*
 811          * First we need to know how many scatter gather segments are needed.
 812          */
 813         if (!sg_count_valid) {
 814                 count = __count_segments(req, use_clustering, dma_host, NULL);
 815         } else {
 816                 count = req->nr_segments;
 817         }
 818
 819         /*
 820          * If the dma pool is nearly empty, then queue a minimal request
 821          * with a single segment.  Typically this will satisfy a single
 822          * buffer.
 823          */
 824         if (dma_host && scsi_dma_free_sectors <= 10) {
 825                 this_count = req->current_nr_sectors;
 826                 goto single_segment;
 827         }
 828         /*
 829          * we really want to use sg even for a single segment request,
 830          * however some people just cannot be bothered to write decent
 831          * driver code so we can't risk to break somebody making the
 832          * assumption that sg requests will always contain at least 2
 833          * segments. if the driver is 32-bit dma safe, then use sg for
 834          * 1 entry anyways. if not, don't rely on the driver handling this
 835          * case.
 836          */
 837         if (count == 1 && !SCpnt->host->highmem_io) {
 838                 this_count = req->nr_sectors;
 839                 goto single_segment;
 840         }
 841
 842         /*
 843          * for sane drivers, use sg even for 1 entry request
 844          */
 845         SCpnt->use_sg = count;
 846         SCpnt->sglist_len = (SCpnt->use_sg * sizeof(struct scatterlist));
 847
 848         /* If we could potentially require ISA bounce buffers, allocate
 849          * space for this array here.
 850          */
 851         if (dma_host)
 852                 SCpnt->sglist_len += (SCpnt->use_sg * sizeof(void *));
 853
 854         /* scsi_malloc can only allocate in chunks of 512 bytes so
 855          * round it up.
 856          */
 857         SCpnt->sglist_len = (SCpnt->sglist_len + 511) & ~511;
 858
 859         sgpnt = (struct scatterlist *) scsi_malloc(SCpnt->sglist_len);
 860
 861         /*
 862          * Now fill the scatter-gather table.
 863          */
 864         if (!sgpnt) {
 865 #if 0
 866                 /*
 867                  * If we cannot allocate the scatter-gather table, then
 868                  * simply write the first buffer all by itself.
 869                  */
 870                 printk("Warning - running *really* short on DMA buffers\n");
 871                 this_count = req->current_nr_sectors;
 872                 goto single_segment;
 873 #else
 874                 /*
 875                  * it's probably better to simply always back off a little,
 876                  * and let some memory be returned to dma pool instead of
 877                  * always falling back to (slow) single segments
 878                  */
 879                 return 0;
 880 #endif
 881         }
 882
 883         /*
 884          * Next, walk the list, and fill in the addresses and sizes of
 885          * each segment.
 886          */
 887         memset(sgpnt, 0, SCpnt->sglist_len);
 888         SCpnt->request_buffer = (char *) sgpnt;
 889         SCpnt->request_bufflen = 0;
 890         bhprev = NULL;
 891
 892         if (dma_host)
 893                 bbpnt = (void **) ((char *)sgpnt +
 894                          (SCpnt->use_sg * sizeof(struct scatterlist)));
 895         else
 896                 bbpnt = NULL;
 897
 898         SCpnt->bounce_buffers = bbpnt;
 899
 900         for (count = 0, bh = req->bh; bh; bh = bh->b_reqnext) {
 901                 if (use_clustering && bhprev != NULL) {
 902                         if (dma_host && bh_phys(bhprev) - 1 == ISA_DMA_THRESHOLD) {
 903                                 /* Nothing - fall through */
 904                         } else if (blk_seg_merge_ok(bhprev, bh)) {
 905                                 /*
 906                                  * This one is OK.  Let it go.  Note that we
 907                                  * do not have the ability to allocate
 908                                  * bounce buffer segments > PAGE_SIZE, so
 909                                  * for now we limit the thing.
 910                                  */
 911                                 if( dma_host ) {
 912 #ifdef DMA_SEGMENT_SIZE_LIMITED
 913                                         if (bh_phys(bh) - 1 < ISA_DMA_THRESHOLD
 914                                             || sgpnt[count - 1].length + bh->b_size <= PAGE_SIZE ) {
 915                                                 sgpnt[count - 1].length += bh->b_size;
 916                                                 bhprev = bh;
 917                                                 continue;
 918                                         }
 919 #else
 920                                         sgpnt[count - 1].length += bh->b_size;
 921                                         bhprev = bh;
 922                                         continue;
 923 #endif
 924                                 } else {
 925                                         sgpnt[count - 1].length += bh->b_size;
 926                                         SCpnt->request_bufflen += bh->b_size;
 927                                         bhprev = bh;
 928                                         continue;
 929                                 }
 930                         }
 931                 }
 932
 933                 if (SCpnt->host->highmem_io) {
 934                         sgpnt[count].page = bh->b_page;
 935                         sgpnt[count].offset = bh_offset(bh);
 936                         sgpnt[count].address = NULL;
 937                 } else {
 938                         if (PageHighMem(bh->b_page))
 939                                 BUG();
 940
 941                         sgpnt[count].page = NULL;
 942                         sgpnt[count].address = bh->b_data;
 943                 }
 944
 945                 sgpnt[count].length = bh->b_size;
 946
 947                 if (!dma_host)
 948                         SCpnt->request_bufflen += bh->b_size;
 949
 950                 count++;
 951                 bhprev = bh;
 952         }
 953
 954         /*
 955          * Verify that the count is correct.
 956          */
 957         if (count != SCpnt->use_sg) {
 958                 printk("Incorrect number of segments after building list\n");
 959 #ifdef CONFIG_SCSI_DEBUG_QUEUES
 960                 dump_stats(req, use_clustering, dma_host, count);
 961 #endif
 962         }
 963         if (!dma_host) {
 964                 return 1;
 965         }
 966         /*
 967          * Now allocate bounce buffers, if needed.
 968          */
 969         SCpnt->request_bufflen = 0;
 970         for (i = 0; i < count; i++) {
 971                 sectors = (sgpnt[i].length >> 9);
 972                 SCpnt->request_bufflen += sgpnt[i].length;
 973                 /*
 974                  * only done for dma_host, in which case .page is not
 975                  * set since it's guarenteed to be a low memory page
 976                  */
 977                 if (virt_to_phys(sgpnt[i].address) + sgpnt[i].length - 1 >
 978                     ISA_DMA_THRESHOLD) {
 979                         if( scsi_dma_free_sectors - sectors <= 10  ) {
 980                                 /*
 981                                  * If this would nearly drain the DMA
 982                                  * pool empty, then let's stop here.
 983                                  * Don't make this request any larger.
 984                                  * This is kind of a safety valve that
 985                                  * we use - we could get screwed later
 986                                  * on if we run out completely.
 987                                  */
 988                                 SCpnt->request_bufflen -= sgpnt[i].length;
 989                                 SCpnt->use_sg = i;
 990                                 if (i == 0) {
 991                                         goto big_trouble;
 992                                 }
 993                                 break;
 994                         }
 995
 996                         bbpnt[i] = sgpnt[i].address;
 997                         sgpnt[i].address =
 998                             (char *) scsi_malloc(sgpnt[i].length);
 999                         /*
1000                          * If we cannot allocate memory for this DMA bounce
1001                          * buffer, then queue just what we have done so far.
1002                          */
1003                         if (sgpnt[i].address == NULL) {
1004                                 printk("Warning - running low on DMA memory\n");
1005                                 SCpnt->request_bufflen -= sgpnt[i].length;
1006                                 SCpnt->use_sg = i;
1007                                 if (i == 0) {
1008                                         goto big_trouble;
1009                                 }
1010                                 break;
1011                         }
1012                         if (req->cmd == WRITE) {
1013                                 memcpy(sgpnt[i].address, bbpnt[i],
1014                                        sgpnt[i].length);
1015                         }
1016                 }
1017         }
1018         return 1;
1019
1020       big_trouble:
1021         /*
1022          * We come here in the event that we get one humongous
1023          * request, where we need a bounce buffer, and the buffer is
1024          * more than we can allocate in a single call to
1025          * scsi_malloc().  In addition, we only come here when it is
1026          * the 0th element of the scatter-gather table that gets us
1027          * into this trouble.  As a fallback, we fall back to
1028          * non-scatter-gather, and ask for a single segment.  We make
1029          * a half-hearted attempt to pick a reasonably large request
1030          * size mainly so that we don't thrash the thing with
1031          * iddy-biddy requests.
1032          */
1033
1034         /*
1035          * The original number of sectors in the 0th element of the
1036          * scatter-gather table.
1037          */
1038         sectors = sgpnt[0].length >> 9;
1039
1040         /*
1041          * Free up the original scatter-gather table.  Note that since
1042          * it was the 0th element that got us here, we don't have to
1043          * go in and free up memory from the other slots.
1044          */
1045         SCpnt->request_bufflen = 0;
1046         SCpnt->use_sg = 0;
1047         scsi_free(SCpnt->request_buffer, SCpnt->sglist_len);
1048
1049         /*
1050          * Make an attempt to pick up as much as we reasonably can.
1051          * Just keep adding sectors until the pool starts running kind of
1052          * low.  The limit of 30 is somewhat arbitrary - the point is that
1053          * it would kind of suck if we dropped down and limited ourselves to
1054          * single-block requests if we had hundreds of free sectors.
1055          */
1056         if( scsi_dma_free_sectors > 30 ) {
1057                 for (this_count = 0, bh = req->bh; bh; bh = bh->b_reqnext) {
1058                         if( scsi_dma_free_sectors - this_count < 30
1059                             || this_count == sectors )
1060                         {
1061                                 break;
1062                         }
1063                         this_count += bh->b_size >> 9;
1064                 }
1065
1066         } else {
1067                 /*
1068                  * Yow!   Take the absolute minimum here.
1069                  */
1070                 this_count = req->current_nr_sectors;
1071         }
1072
1073         /*
1074          * Now drop through into the single-segment case.
1075          */
1076
1077 single_segment:
1078         /*
1079          * for highmem cases, we have to revert to bouncing for single
1080          * segments. rather just give up now and let the device starvation
1081          * path reinitiate this i/o later
1082          */
1083         if (SCpnt->host->highmem_io)
1084                 return 0;
1085
1086         /*
1087          * Come here if for any reason we choose to do this as a single
1088          * segment.  Possibly the entire request, or possibly a small
1089          * chunk of the entire request.
1090          */
1091         bh = req->bh;
1092         buff = req->buffer = bh->b_data;
1093
1094         if (PageHighMem(bh->b_page))
1095                 BUG();
1096
1097         if (dma_host) {
1098                 /*
1099                  * Allocate a DMA bounce buffer.  If the allocation fails, fall
1100                  * back and allocate a really small one - enough to satisfy
1101                  * the first buffer.
1102                  */
1103                 if (bh_phys(bh) + (this_count << 9) - 1 > ISA_DMA_THRESHOLD) {
1104                         buff = (char *) scsi_malloc(this_count << 9);
1105                         if (!buff) {
1106                                 printk("Warning - running low on DMA memory\n");
1107                                 this_count = req->current_nr_sectors;
1108                                 buff = (char *) scsi_malloc(this_count << 9);
1109                                 if (!buff) {
1110                                         dma_exhausted(SCpnt, 0);
1111                                 }
1112                         }
1113                         if (req->cmd == WRITE)
1114                                 memcpy(buff, (char *) req->buffer, this_count << 9);
1115                 }
1116         }
1117
1118         SCpnt->request_bufflen = this_count << 9;
1119         SCpnt->request_buffer = buff;
1120         SCpnt->use_sg = 0;
1121         return 1;
1122 }
1123
1124 #define INITIO(_FUNCTION, _VALID, _CLUSTER, _DMA)       \
1125 static int _FUNCTION(Scsi_Cmnd * SCpnt)                 \
1126 {                                                       \
1127     return __init_io(SCpnt, _VALID, _CLUSTER, _DMA);    \
1128 }
1129
1130 /*
1131  * ll_rw_blk.c now keeps track of the number of segments in
1132  * a request.  Thus we don't have to do it any more here.
1133  * We always force "_VALID" to 1.  Eventually clean this up
1134  * and get rid of the extra argument.
1135  */
1136 INITIO(scsi_init_io_v, 1, 0, 0)
1137 INITIO(scsi_init_io_vd, 1, 0, 1)
1138 INITIO(scsi_init_io_vc, 1, 1, 0)
1139 INITIO(scsi_init_io_vdc, 1, 1, 1)
1140
1141 /*
1142  * Function:    initialize_merge_fn()
1143  *
1144  * Purpose:     Initialize merge function for a host
1145  *
1146  * Arguments:   SHpnt   - Host descriptor.
1147  *
1148  * Returns:     Nothing.
1149  *
1150  * Lock status:
1151  *
1152  * Notes:
1153  */
1154 void initialize_merge_fn(Scsi_Device * SDpnt)
1155 {
1156         struct Scsi_Host *SHpnt = SDpnt->host;
1157         request_queue_t *q = &SDpnt->request_queue;
1158         dma64_addr_t bounce_limit;
1159
1160         /*
1161          * If this host has an unlimited tablesize, then don't bother with a
1162          * merge manager.  The whole point of the operation is to make sure
1163          * that requests don't grow too large, and this host isn't picky.
1164          *
1165          * Note that ll_rw_blk.c is effectively maintaining a segment
1166          * count which is only valid if clustering is used, and it obviously
1167          * doesn't handle the DMA case.   In the end, it
1168          * is simply easier to do it ourselves with our own functions
1169          * rather than rely upon the default behavior of ll_rw_blk.
1170          */
1171         if (!CLUSTERABLE_DEVICE(SHpnt, SDpnt) && SHpnt->unchecked_isa_dma == 0) {
1172                 q->back_merge_fn = scsi_back_merge_fn_;
1173                 q->front_merge_fn = scsi_front_merge_fn_;
1174                 q->merge_requests_fn = scsi_merge_requests_fn_;
1175                 SDpnt->scsi_init_io_fn = scsi_init_io_v;
1176         } else if (!CLUSTERABLE_DEVICE(SHpnt, SDpnt) && SHpnt->unchecked_isa_dma != 0) {
1177                 q->back_merge_fn = scsi_back_merge_fn_;
1178                 q->front_merge_fn = scsi_front_merge_fn_;
1179                 q->merge_requests_fn = scsi_merge_requests_fn_;
1180                 SDpnt->scsi_init_io_fn = scsi_init_io_vd;
1181         } else if (CLUSTERABLE_DEVICE(SHpnt, SDpnt) && SHpnt->unchecked_isa_dma == 0) {
1182                 q->back_merge_fn = scsi_back_merge_fn_c;
1183                 q->front_merge_fn = scsi_front_merge_fn_c;
1184                 q->merge_requests_fn = scsi_merge_requests_fn_c;
1185                 SDpnt->scsi_init_io_fn = scsi_init_io_vc;
1186         } else if (CLUSTERABLE_DEVICE(SHpnt, SDpnt) && SHpnt->unchecked_isa_dma != 0) {
1187                 q->back_merge_fn = scsi_back_merge_fn_dc;
1188                 q->front_merge_fn = scsi_front_merge_fn_dc;
1189                 q->merge_requests_fn = scsi_merge_requests_fn_dc;
1190                 SDpnt->scsi_init_io_fn = scsi_init_io_vdc;
1191         }
1192
1193         /*
1194          * now enable highmem I/O, if appropriate
1195          */
1196         bounce_limit = BLK_BOUNCE_HIGH;
1197         if (SHpnt->highmem_io && (SDpnt->type == TYPE_DISK)) {
1198                 if (!PCI_DMA_BUS_IS_PHYS)
1199                         /* Platforms with virtual-DMA translation
1200                          * hardware have no practical limit.
1201                          */
1202                         bounce_limit = BLK_BOUNCE_ANY;
1203                 else
1204                         bounce_limit = SHpnt->pci_dev->dma_mask;
1205         }
1206
1207         blk_queue_bounce_limit(q, bounce_limit);
1208 }