special usb hub handling, IDE disks, and retries all over the place
[linux-2.4.git] / drivers / ide / raid / pdcraid.c
1 /*
2    pdcraid.c  Copyright (C) 2001 Red Hat, Inc. All rights reserved.
3
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; either version 2, or (at your option)
7    any later version.
8    
9    You should have received a copy of the GNU General Public License
10    (for example /usr/src/linux/COPYING); if not, write to the Free
11    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  
12    
13    Authors:     Arjan van de Ven <arjanv@redhat.com>
14                 
15    Based on work done by Søren Schmidt for FreeBSD  
16
17 */
18
19 #include <linux/module.h>
20 #include <linux/init.h>
21 #include <linux/kernel.h>
22 #include <linux/sched.h>
23 #include <linux/smp_lock.h>
24 #include <linux/blkdev.h>
25 #include <linux/blkpg.h>
26 #include <linux/genhd.h>
27 #include <linux/ioctl.h>
28
29 #include <linux/ide.h>
30 #include <asm/uaccess.h>
31
32 #include "ataraid.h"
33
34 static int pdcraid_open(struct inode * inode, struct file * filp);
35 static int pdcraid_release(struct inode * inode, struct file * filp);
36 static int pdcraid_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg);
37 static int pdcraid0_make_request (request_queue_t *q, int rw, struct buffer_head * bh);
38 static int pdcraid1_make_request (request_queue_t *q, int rw, struct buffer_head * bh);
39
40 struct disk_dev {
41         int major;
42         int minor;
43         int device;
44 };
45
46 static struct disk_dev devlist[]= {
47         {IDE0_MAJOR,  0,  -1 },
48         {IDE0_MAJOR, 64,  -1 },
49         {IDE1_MAJOR,  0,  -1 },
50         {IDE1_MAJOR, 64,  -1 },
51         {IDE2_MAJOR,  0,  -1 },
52         {IDE2_MAJOR, 64,  -1 },
53         {IDE3_MAJOR,  0,  -1 },
54         {IDE3_MAJOR, 64,  -1 },
55         {IDE4_MAJOR,  0,  -1 },
56         {IDE4_MAJOR, 64,  -1 },
57         {IDE5_MAJOR,  0,  -1 },
58         {IDE5_MAJOR, 64,  -1 },
59         {IDE6_MAJOR,  0,  -1 },
60         {IDE6_MAJOR, 64,  -1 },
61 };
62
63
64 struct pdcdisk {
65         kdev_t  device;
66         unsigned long sectors;
67         struct block_device *bdev;
68         unsigned long last_pos;
69 };
70
71 struct pdcraid {
72         unsigned int stride;
73         unsigned int disks;
74         unsigned long sectors;
75         struct geom geom;
76         
77         struct pdcdisk disk[8];
78         
79         unsigned long cutoff[8];
80         unsigned int cutoff_disks[8];
81 };
82
83 static struct raid_device_operations pdcraid0_ops = {
84         open:                   pdcraid_open,
85         release:                pdcraid_release,
86         ioctl:                  pdcraid_ioctl,
87         make_request:           pdcraid0_make_request
88 };
89
90 static struct raid_device_operations pdcraid1_ops = {
91         open:                   pdcraid_open,
92         release:                pdcraid_release,
93         ioctl:                  pdcraid_ioctl,
94         make_request:           pdcraid1_make_request
95 };
96
97 static struct pdcraid raid[16];
98
99
100 static int pdcraid_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg)
101 {
102         unsigned int minor;
103         unsigned long sectors;
104
105         if (!inode || !inode->i_rdev) 
106                 return -EINVAL;
107
108         minor = MINOR(inode->i_rdev)>>SHIFT;
109         
110         switch (cmd) {
111
112                 case BLKGETSIZE:   /* Return device size */
113                         if (!arg)  return -EINVAL;
114                         sectors = ataraid_gendisk.part[MINOR(inode->i_rdev)].nr_sects;
115                         if (MINOR(inode->i_rdev)&15)
116                                 return put_user(sectors, (unsigned long *) arg);
117                         return put_user(raid[minor].sectors , (unsigned long *) arg);
118                         break;
119                         
120
121                 case HDIO_GETGEO:
122                 {
123                         struct hd_geometry *loc = (struct hd_geometry *) arg;
124                         unsigned short bios_cyl = raid[minor].geom.cylinders; /* truncate */
125                         
126                         if (!loc) return -EINVAL;
127                         if (put_user(raid[minor].geom.heads, (byte *) &loc->heads)) return -EFAULT;
128                         if (put_user(raid[minor].geom.sectors, (byte *) &loc->sectors)) return -EFAULT;
129                         if (put_user(bios_cyl, (unsigned short *) &loc->cylinders)) return -EFAULT;
130                         if (put_user((unsigned)ataraid_gendisk.part[MINOR(inode->i_rdev)].start_sect,
131                                 (unsigned long *) &loc->start)) return -EFAULT;
132                         return 0;
133                 }
134
135                 case HDIO_GETGEO_BIG:
136                 {
137                         struct hd_big_geometry *loc = (struct hd_big_geometry *) arg;
138                         if (!loc) return -EINVAL;
139                         if (put_user(raid[minor].geom.heads, (byte *) &loc->heads)) return -EFAULT;
140                         if (put_user(raid[minor].geom.sectors, (byte *) &loc->sectors)) return -EFAULT;
141                         if (put_user(raid[minor].geom.cylinders, (unsigned int *) &loc->cylinders)) return -EFAULT;
142                         if (put_user((unsigned)ataraid_gendisk.part[MINOR(inode->i_rdev)].start_sect,
143                                 (unsigned long *) &loc->start)) return -EFAULT;
144                         return 0;
145                 }
146
147                 default:
148                         return blk_ioctl(inode->i_rdev, cmd, arg);
149         };
150
151         return 0;
152 }
153
154
155 static unsigned long partition_map_normal(unsigned long block, unsigned long partition_off, unsigned long partition_size, int stride)
156 {
157         return block + partition_off;
158 }
159
160 static int pdcraid0_make_request (request_queue_t *q, int rw, struct buffer_head * bh)
161 {
162         unsigned long rsect;
163         unsigned long rsect_left,rsect_accum = 0;
164         unsigned long block;
165         unsigned int disk=0,real_disk=0;
166         int i;
167         int device;
168         struct pdcraid *thisraid;
169
170         rsect = bh->b_rsector;
171         
172         /* Ok. We need to modify this sector number to a new disk + new sector number. 
173          * If there are disks of different sizes, this gets tricky. 
174          * Example with 3 disks (1Gb, 4Gb and 5 GB):
175          * The first 3 Gb of the "RAID" are evenly spread over the 3 disks.
176          * Then things get interesting. The next 2Gb (RAID view) are spread across disk 2 and 3
177          * and the last 1Gb is disk 3 only.
178          *
179          * the way this is solved is like this: We have a list of "cutoff" points where everytime
180          * a disk falls out of the "higher" count, we mark the max sector. So once we pass a cutoff
181          * point, we have to divide by one less.
182          */
183         
184         device = (bh->b_rdev >> SHIFT)&MAJOR_MASK;
185         thisraid = &raid[device];
186         if (thisraid->stride==0)
187                 thisraid->stride=1;
188
189         /* Partitions need adding of the start sector of the partition to the requested sector */
190         
191         rsect = partition_map_normal(rsect, ataraid_gendisk.part[MINOR(bh->b_rdev)].start_sect, ataraid_gendisk.part[MINOR(bh->b_rdev)].nr_sects, thisraid->stride);
192
193         /* Woops we need to split the request to avoid crossing a stride barrier */
194         if ((rsect/thisraid->stride) != ((rsect+(bh->b_size/512)-1)/thisraid->stride)) {
195                 return -1;  
196         }
197         
198         rsect_left = rsect;
199         
200         for (i=0;i<8;i++) {
201                 if (thisraid->cutoff_disks[i]==0)
202                         break;
203                 if (rsect > thisraid->cutoff[i]) {
204                         /* we're in the wrong area so far */
205                         rsect_left -= thisraid->cutoff[i];
206                         rsect_accum += thisraid->cutoff[i]/thisraid->cutoff_disks[i];
207                 } else {
208                         block = rsect_left / thisraid->stride;
209                         disk = block % thisraid->cutoff_disks[i];
210                         block = (block / thisraid->cutoff_disks[i]) * thisraid->stride;
211                         rsect = rsect_accum + (rsect_left % thisraid->stride) + block;
212                         break;
213                 }
214         }
215         
216         for (i=0;i<8;i++) {
217                 if ((disk==0) && (thisraid->disk[i].sectors > rsect_accum)) {
218                         real_disk = i;
219                         break;
220                 }
221                 if ((disk>0) && (thisraid->disk[i].sectors >= rsect_accum)) {
222                         disk--;
223                 }
224                 
225         }
226         disk = real_disk;
227                 
228         
229         /*
230          * The new BH_Lock semantics in ll_rw_blk.c guarantee that this
231          * is the only IO operation happening on this bh.
232          */
233         bh->b_rdev = thisraid->disk[disk].device;
234         bh->b_rsector = rsect;
235
236         /*
237          * Let the main block layer submit the IO and resolve recursion:
238          */
239         return 1;
240 }
241
242 static int pdcraid1_write_request(request_queue_t *q, int rw, struct buffer_head * bh)
243 {
244         struct buffer_head *bh1;
245         struct ataraid_bh_private *private;
246         int device;
247         int i;
248
249         device = (bh->b_rdev >> SHIFT)&MAJOR_MASK;
250         private = ataraid_get_private();
251         if (private==NULL)
252                 BUG();
253
254         private->parent = bh;
255         
256         atomic_set(&private->count,raid[device].disks);
257
258
259         for (i = 0; i< raid[device].disks; i++) { 
260                 bh1=ataraid_get_bhead();
261                 /* If this ever fails we're doomed */
262                 if (!bh1)
263                         BUG();
264         
265                 /* dupe the bufferhead and update the parts that need to be different */
266                 memcpy(bh1, bh, sizeof(*bh));
267                 
268                 bh1->b_end_io = ataraid_end_request;
269                 bh1->b_private = private;
270                 bh1->b_rsector += ataraid_gendisk.part[MINOR(bh->b_rdev)].start_sect; /* partition offset */
271                 bh1->b_rdev = raid[device].disk[i].device;
272
273                 /* update the last known head position for the drive */
274                 raid[device].disk[i].last_pos = bh1->b_rsector+(bh1->b_size>>9);
275
276                 generic_make_request(rw,bh1);
277         }
278         return 0;
279 }
280
281 static int pdcraid1_read_request (request_queue_t *q, int rw, struct buffer_head * bh)
282 {
283         int device;
284         int dist;
285         int bestsofar,bestdist,i;
286         static int previous;
287
288         /* Reads are simple in principle. Pick a disk and go. 
289            Initially I cheat by just picking the one which the last known
290            head position is closest by.
291            Later on, online/offline checking and performance needs adding */
292         
293         device = (bh->b_rdev >> SHIFT)&MAJOR_MASK;
294         bh->b_rsector += ataraid_gendisk.part[MINOR(bh->b_rdev)].start_sect;
295
296         bestsofar = 0; 
297         bestdist = raid[device].disk[0].last_pos - bh->b_rsector;
298         if (bestdist<0) 
299                 bestdist=-bestdist;
300         if (bestdist>4095)
301                 bestdist=4095;
302
303         for (i=1 ; i<raid[device].disks; i++) {
304                 dist = raid[device].disk[i].last_pos - bh->b_rsector;
305                 if (dist<0) 
306                         dist = -dist;
307                 if (dist>4095)
308                         dist=4095;
309                 
310                 if (bestdist==dist) {  /* it's a tie; try to do some read balancing */
311                         if ((previous>bestsofar)&&(previous<=i))  
312                                 bestsofar = i;
313                         previous = (previous + 1) % raid[device].disks;
314                 } else if (bestdist>dist) {
315                         bestdist = dist;
316                         bestsofar = i;
317                 }
318         
319         }
320         
321         bh->b_rdev = raid[device].disk[bestsofar].device; 
322         raid[device].disk[bestsofar].last_pos = bh->b_rsector+(bh->b_size>>9);
323
324         /*
325          * Let the main block layer submit the IO and resolve recursion:
326          */
327                                 
328         return 1;
329 }
330
331
332 static int pdcraid1_make_request (request_queue_t *q, int rw, struct buffer_head * bh)
333 {
334         /* Read and Write are totally different cases; split them totally here */
335         if (rw==READA)
336                 rw = READ;
337         
338         if (rw==READ)
339                 return pdcraid1_read_request(q,rw,bh);
340         else
341                 return pdcraid1_write_request(q,rw,bh);
342 }
343
344 #include "pdcraid.h"
345
346 static unsigned long calc_pdcblock_offset (int major,int minor)
347 {
348         unsigned long lba = 0;
349         kdev_t dev;
350         ide_drive_t *ideinfo;
351         
352         dev = MKDEV(major,minor);
353         ideinfo = ide_info_ptr (dev, 0);
354         if (ideinfo==NULL)
355                 return 0;
356         
357         
358         /* first sector of the last cluster */
359         if (ideinfo->head==0) 
360                 return 0;
361         if (ideinfo->sect==0)
362                 return 0;
363         if (ideinfo->head!=255) {
364                 lba = (ideinfo->capacity / (ideinfo->head*ideinfo->sect));
365                 lba = lba * (ideinfo->head*ideinfo->sect);
366                 lba = lba - ideinfo->sect; }
367         else {
368                 lba = ideinfo->capacity - ideinfo->sect;
369         }
370
371         return lba;
372 }
373
374
375 static int read_disk_sb (int major, int minor, unsigned char *buffer,int bufsize)
376 {
377         int ret = -EINVAL;
378         struct buffer_head *bh = NULL;
379         kdev_t dev = MKDEV(major,minor);
380         unsigned long sb_offset;
381
382         if (blksize_size[major]==NULL)   /* device doesn't exist */
383                 return -EINVAL;
384                        
385         
386         /*
387          * Calculate the position of the superblock,
388          * it's at first sector of the last cylinder
389          */
390         sb_offset = calc_pdcblock_offset(major,minor)/8;
391         /* The /8 transforms sectors into 4Kb blocks */
392
393         if (sb_offset==0)
394                 return -1;      
395         
396         set_blocksize (dev, 4096);
397
398         bh = bread (dev, sb_offset, 4096);
399         
400         if (bh) {
401                 memcpy (buffer, bh->b_data, bufsize);
402         } else {
403                 printk(KERN_ERR "pdcraid: Error reading superblock.\n");
404                 goto abort;
405         }
406         ret = 0;
407 abort:
408         if (bh)
409                 brelse (bh);
410         return ret;
411 }
412
413 static unsigned int calc_sb_csum (unsigned int* ptr)
414 {       
415         unsigned int sum;
416         int count;
417         
418         sum = 0;
419         for (count=0;count<511;count++)
420                 sum += *ptr++;
421         
422         return sum;
423 }
424
425 static int cookie = 0;
426
427 static void __init probedisk(int devindex,int device, int raidlevel)
428 {
429         int i;
430         int major, minor;
431         struct promise_raid_conf *prom;
432         static unsigned char block[4096];
433         struct block_device *bdev;
434
435         if (devlist[devindex].device!=-1) /* already assigned to another array */
436                 return;
437         
438         major = devlist[devindex].major;
439         minor = devlist[devindex].minor; 
440
441         if (read_disk_sb(major,minor,(unsigned char*)&block,sizeof(block)))
442                 return;
443                                                                                                                  
444         prom = (struct promise_raid_conf*)&block[512];
445
446         /* the checksums must match */
447         if (prom->checksum != calc_sb_csum((unsigned int*)prom))
448                 return;
449         if (prom->raid.type!=raidlevel) /* different raidlevel */
450                 return;
451
452         if ((cookie!=0) && (cookie != prom->raid.magic_1)) /* different array */
453                 return;
454         
455         cookie = prom->raid.magic_1;
456
457         /* This looks evil. But basically, we have to search for our adapternumber
458            in the arraydefinition, both of which are in the superblock */       
459         for (i=0;(i<prom->raid.total_disks)&&(i<8);i++) {
460                 if ( (prom->raid.disk[i].channel== prom->raid.channel) &&
461                      (prom->raid.disk[i].device == prom->raid.device) ) {
462
463                         bdev = bdget(MKDEV(major,minor));
464                         if (bdev && blkdev_get(bdev, FMODE_READ|FMODE_WRITE, 0, BDEV_RAW) == 0) {
465                                 raid[device].disk[i].bdev = bdev;
466                         }
467                         raid[device].disk[i].device = MKDEV(major,minor);
468                         raid[device].disk[i].sectors = prom->raid.disk_secs;
469                         raid[device].stride = (1<<prom->raid.raid0_shift);
470                         raid[device].disks = prom->raid.total_disks;
471                         raid[device].sectors = prom->raid.total_secs;
472                         raid[device].geom.heads = prom->raid.heads+1;
473                         raid[device].geom.sectors = prom->raid.sectors;
474                         raid[device].geom.cylinders = prom->raid.cylinders+1;
475                         devlist[devindex].device=device;
476                      }
477         }
478                        
479 }
480
481 static void __init fill_cutoff(int device)
482 {
483         int i,j;
484         unsigned long smallest;
485         unsigned long bar;
486         int count;
487         
488         bar = 0;
489         for (i=0;i<8;i++) {
490                 smallest = ~0;
491                 for (j=0;j<8;j++) 
492                         if ((raid[device].disk[j].sectors < smallest) && (raid[device].disk[j].sectors>bar))
493                                 smallest = raid[device].disk[j].sectors;
494                 count = 0;
495                 for (j=0;j<8;j++) 
496                         if (raid[device].disk[j].sectors >= smallest)
497                                 count++;
498                                 
499                 smallest = smallest * count;
500                 bar = smallest;
501                 raid[device].cutoff[i] = smallest;
502                 raid[device].cutoff_disks[i] = count;
503         }
504 }
505                            
506 static __init int pdcraid_init_one(int device,int raidlevel)
507 {
508         int i, count;
509
510         for (i=0; i<14; i++)
511                 probedisk(i, device, raidlevel);
512         
513         if (raidlevel==0)
514                 fill_cutoff(device);
515         
516         /* Initialize the gendisk structure */
517         
518         ataraid_register_disk(device,raid[device].sectors);        
519                 
520         count=0;
521         
522         for (i=0;i<8;i++) {
523                 if (raid[device].disk[i].device!=0) {
524                         printk(KERN_INFO "Drive %i is %li Mb (%i / %i) \n",
525                                 i,raid[device].disk[i].sectors/2048,MAJOR(raid[device].disk[i].device),MINOR(raid[device].disk[i].device));
526                         count++;
527                 }
528         }
529         if (count) {
530                 printk(KERN_INFO "Raid%i array consists of %i drives. \n",raidlevel,count);
531                 return 0;
532         } else {
533                 return -ENODEV;
534         }
535 }
536
537 static __init int pdcraid_init(void)
538 {
539         int retval, device, count = 0;
540
541         do {
542                 cookie = 0;
543                 device=ataraid_get_device(&pdcraid0_ops);
544                 if (device<0)
545                         break;
546                 retval = pdcraid_init_one(device,0);
547                 if (retval) {
548                         ataraid_release_device(device);
549                         break;
550                 } else {
551                         count++;
552                 }
553         } while (1);
554
555         do {
556         
557                 cookie = 0;
558                 device=ataraid_get_device(&pdcraid1_ops);
559                 if (device<0)
560                         break;
561                 retval = pdcraid_init_one(device,1);
562                 if (retval) {
563                         ataraid_release_device(device);
564                         break;
565                 } else {
566                         count++;
567                 }
568         } while (1);
569
570         if (count) {
571                 printk(KERN_INFO "Promise Fasttrak(tm) Softwareraid driver for linux version 0.03beta\n");
572                 return 0;
573         }
574         printk(KERN_DEBUG "Promise Fasttrak(tm) Softwareraid driver 0.03beta: No raid array found\n");
575         return -ENODEV;
576 }
577
578 static void __exit pdcraid_exit (void)
579 {
580         int i,device;
581         for (device = 0; device<16; device++) {
582                 for (i=0;i<8;i++) {
583                         struct block_device *bdev = raid[device].disk[i].bdev;
584                         raid[device].disk[i].bdev = NULL;
585                         if (bdev)
586                                 blkdev_put(bdev, BDEV_RAW);
587                 }       
588                 if (raid[device].sectors)
589                         ataraid_release_device(device);
590         }
591 }
592
593 static int pdcraid_open(struct inode * inode, struct file * filp) 
594 {
595         MOD_INC_USE_COUNT;
596         return 0;
597 }
598 static int pdcraid_release(struct inode * inode, struct file * filp)
599 {       
600         MOD_DEC_USE_COUNT;
601         return 0;
602 }
603
604 module_init(pdcraid_init);
605 module_exit(pdcraid_exit);
606 MODULE_LICENSE("GPL");