2 pdcraid.c Copyright (C) 2001 Red Hat, Inc. All rights reserved.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 You should have received a copy of the GNU General Public License
10 (for example /usr/src/linux/COPYING); if not, write to the Free
11 Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
13 Authors: Arjan van de Ven <arjanv@redhat.com>
15 Based on work done by Søren Schmidt for FreeBSD
19 #include <linux/module.h>
20 #include <linux/init.h>
21 #include <linux/kernel.h>
22 #include <linux/sched.h>
23 #include <linux/smp_lock.h>
24 #include <linux/blkdev.h>
25 #include <linux/blkpg.h>
26 #include <linux/genhd.h>
27 #include <linux/ioctl.h>
29 #include <linux/ide.h>
30 #include <asm/uaccess.h>
34 static int pdcraid_open(struct inode * inode, struct file * filp);
35 static int pdcraid_release(struct inode * inode, struct file * filp);
36 static int pdcraid_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg);
37 static int pdcraid0_make_request (request_queue_t *q, int rw, struct buffer_head * bh);
38 static int pdcraid1_make_request (request_queue_t *q, int rw, struct buffer_head * bh);
46 static struct disk_dev devlist[]= {
48 {IDE0_MAJOR, 64, -1 },
50 {IDE1_MAJOR, 64, -1 },
52 {IDE2_MAJOR, 64, -1 },
54 {IDE3_MAJOR, 64, -1 },
56 {IDE4_MAJOR, 64, -1 },
58 {IDE5_MAJOR, 64, -1 },
60 {IDE6_MAJOR, 64, -1 },
66 unsigned long sectors;
67 struct block_device *bdev;
68 unsigned long last_pos;
74 unsigned long sectors;
77 struct pdcdisk disk[8];
79 unsigned long cutoff[8];
80 unsigned int cutoff_disks[8];
83 static struct raid_device_operations pdcraid0_ops = {
85 release: pdcraid_release,
87 make_request: pdcraid0_make_request
90 static struct raid_device_operations pdcraid1_ops = {
92 release: pdcraid_release,
94 make_request: pdcraid1_make_request
97 static struct pdcraid raid[16];
100 static int pdcraid_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg)
103 unsigned long sectors;
105 if (!inode || !inode->i_rdev)
108 minor = MINOR(inode->i_rdev)>>SHIFT;
112 case BLKGETSIZE: /* Return device size */
113 if (!arg) return -EINVAL;
114 sectors = ataraid_gendisk.part[MINOR(inode->i_rdev)].nr_sects;
115 if (MINOR(inode->i_rdev)&15)
116 return put_user(sectors, (unsigned long *) arg);
117 return put_user(raid[minor].sectors , (unsigned long *) arg);
123 struct hd_geometry *loc = (struct hd_geometry *) arg;
124 unsigned short bios_cyl = raid[minor].geom.cylinders; /* truncate */
126 if (!loc) return -EINVAL;
127 if (put_user(raid[minor].geom.heads, (byte *) &loc->heads)) return -EFAULT;
128 if (put_user(raid[minor].geom.sectors, (byte *) &loc->sectors)) return -EFAULT;
129 if (put_user(bios_cyl, (unsigned short *) &loc->cylinders)) return -EFAULT;
130 if (put_user((unsigned)ataraid_gendisk.part[MINOR(inode->i_rdev)].start_sect,
131 (unsigned long *) &loc->start)) return -EFAULT;
135 case HDIO_GETGEO_BIG:
137 struct hd_big_geometry *loc = (struct hd_big_geometry *) arg;
138 if (!loc) return -EINVAL;
139 if (put_user(raid[minor].geom.heads, (byte *) &loc->heads)) return -EFAULT;
140 if (put_user(raid[minor].geom.sectors, (byte *) &loc->sectors)) return -EFAULT;
141 if (put_user(raid[minor].geom.cylinders, (unsigned int *) &loc->cylinders)) return -EFAULT;
142 if (put_user((unsigned)ataraid_gendisk.part[MINOR(inode->i_rdev)].start_sect,
143 (unsigned long *) &loc->start)) return -EFAULT;
148 return blk_ioctl(inode->i_rdev, cmd, arg);
155 static unsigned long partition_map_normal(unsigned long block, unsigned long partition_off, unsigned long partition_size, int stride)
157 return block + partition_off;
160 static int pdcraid0_make_request (request_queue_t *q, int rw, struct buffer_head * bh)
163 unsigned long rsect_left,rsect_accum = 0;
165 unsigned int disk=0,real_disk=0;
168 struct pdcraid *thisraid;
170 rsect = bh->b_rsector;
172 /* Ok. We need to modify this sector number to a new disk + new sector number.
173 * If there are disks of different sizes, this gets tricky.
174 * Example with 3 disks (1Gb, 4Gb and 5 GB):
175 * The first 3 Gb of the "RAID" are evenly spread over the 3 disks.
176 * Then things get interesting. The next 2Gb (RAID view) are spread across disk 2 and 3
177 * and the last 1Gb is disk 3 only.
179 * the way this is solved is like this: We have a list of "cutoff" points where everytime
180 * a disk falls out of the "higher" count, we mark the max sector. So once we pass a cutoff
181 * point, we have to divide by one less.
184 device = (bh->b_rdev >> SHIFT)&MAJOR_MASK;
185 thisraid = &raid[device];
186 if (thisraid->stride==0)
189 /* Partitions need adding of the start sector of the partition to the requested sector */
191 rsect = partition_map_normal(rsect, ataraid_gendisk.part[MINOR(bh->b_rdev)].start_sect, ataraid_gendisk.part[MINOR(bh->b_rdev)].nr_sects, thisraid->stride);
193 /* Woops we need to split the request to avoid crossing a stride barrier */
194 if ((rsect/thisraid->stride) != ((rsect+(bh->b_size/512)-1)/thisraid->stride)) {
201 if (thisraid->cutoff_disks[i]==0)
203 if (rsect > thisraid->cutoff[i]) {
204 /* we're in the wrong area so far */
205 rsect_left -= thisraid->cutoff[i];
206 rsect_accum += thisraid->cutoff[i]/thisraid->cutoff_disks[i];
208 block = rsect_left / thisraid->stride;
209 disk = block % thisraid->cutoff_disks[i];
210 block = (block / thisraid->cutoff_disks[i]) * thisraid->stride;
211 rsect = rsect_accum + (rsect_left % thisraid->stride) + block;
217 if ((disk==0) && (thisraid->disk[i].sectors > rsect_accum)) {
221 if ((disk>0) && (thisraid->disk[i].sectors >= rsect_accum)) {
230 * The new BH_Lock semantics in ll_rw_blk.c guarantee that this
231 * is the only IO operation happening on this bh.
233 bh->b_rdev = thisraid->disk[disk].device;
234 bh->b_rsector = rsect;
237 * Let the main block layer submit the IO and resolve recursion:
242 static int pdcraid1_write_request(request_queue_t *q, int rw, struct buffer_head * bh)
244 struct buffer_head *bh1;
245 struct ataraid_bh_private *private;
249 device = (bh->b_rdev >> SHIFT)&MAJOR_MASK;
250 private = ataraid_get_private();
254 private->parent = bh;
256 atomic_set(&private->count,raid[device].disks);
259 for (i = 0; i< raid[device].disks; i++) {
260 bh1=ataraid_get_bhead();
261 /* If this ever fails we're doomed */
265 /* dupe the bufferhead and update the parts that need to be different */
266 memcpy(bh1, bh, sizeof(*bh));
268 bh1->b_end_io = ataraid_end_request;
269 bh1->b_private = private;
270 bh1->b_rsector += ataraid_gendisk.part[MINOR(bh->b_rdev)].start_sect; /* partition offset */
271 bh1->b_rdev = raid[device].disk[i].device;
273 /* update the last known head position for the drive */
274 raid[device].disk[i].last_pos = bh1->b_rsector+(bh1->b_size>>9);
276 generic_make_request(rw,bh1);
281 static int pdcraid1_read_request (request_queue_t *q, int rw, struct buffer_head * bh)
285 int bestsofar,bestdist,i;
288 /* Reads are simple in principle. Pick a disk and go.
289 Initially I cheat by just picking the one which the last known
290 head position is closest by.
291 Later on, online/offline checking and performance needs adding */
293 device = (bh->b_rdev >> SHIFT)&MAJOR_MASK;
294 bh->b_rsector += ataraid_gendisk.part[MINOR(bh->b_rdev)].start_sect;
297 bestdist = raid[device].disk[0].last_pos - bh->b_rsector;
303 for (i=1 ; i<raid[device].disks; i++) {
304 dist = raid[device].disk[i].last_pos - bh->b_rsector;
310 if (bestdist==dist) { /* it's a tie; try to do some read balancing */
311 if ((previous>bestsofar)&&(previous<=i))
313 previous = (previous + 1) % raid[device].disks;
314 } else if (bestdist>dist) {
321 bh->b_rdev = raid[device].disk[bestsofar].device;
322 raid[device].disk[bestsofar].last_pos = bh->b_rsector+(bh->b_size>>9);
325 * Let the main block layer submit the IO and resolve recursion:
332 static int pdcraid1_make_request (request_queue_t *q, int rw, struct buffer_head * bh)
334 /* Read and Write are totally different cases; split them totally here */
339 return pdcraid1_read_request(q,rw,bh);
341 return pdcraid1_write_request(q,rw,bh);
346 static unsigned long calc_pdcblock_offset (int major,int minor)
348 unsigned long lba = 0;
350 ide_drive_t *ideinfo;
352 dev = MKDEV(major,minor);
353 ideinfo = ide_info_ptr (dev, 0);
358 /* first sector of the last cluster */
359 if (ideinfo->head==0)
361 if (ideinfo->sect==0)
363 if (ideinfo->head!=255) {
364 lba = (ideinfo->capacity / (ideinfo->head*ideinfo->sect));
365 lba = lba * (ideinfo->head*ideinfo->sect);
366 lba = lba - ideinfo->sect; }
368 lba = ideinfo->capacity - ideinfo->sect;
375 static int read_disk_sb (int major, int minor, unsigned char *buffer,int bufsize)
378 struct buffer_head *bh = NULL;
379 kdev_t dev = MKDEV(major,minor);
380 unsigned long sb_offset;
382 if (blksize_size[major]==NULL) /* device doesn't exist */
387 * Calculate the position of the superblock,
388 * it's at first sector of the last cylinder
390 sb_offset = calc_pdcblock_offset(major,minor)/8;
391 /* The /8 transforms sectors into 4Kb blocks */
396 set_blocksize (dev, 4096);
398 bh = bread (dev, sb_offset, 4096);
401 memcpy (buffer, bh->b_data, bufsize);
403 printk(KERN_ERR "pdcraid: Error reading superblock.\n");
413 static unsigned int calc_sb_csum (unsigned int* ptr)
419 for (count=0;count<511;count++)
425 static int cookie = 0;
427 static void __init probedisk(int devindex,int device, int raidlevel)
431 struct promise_raid_conf *prom;
432 static unsigned char block[4096];
433 struct block_device *bdev;
435 if (devlist[devindex].device!=-1) /* already assigned to another array */
438 major = devlist[devindex].major;
439 minor = devlist[devindex].minor;
441 if (read_disk_sb(major,minor,(unsigned char*)&block,sizeof(block)))
444 prom = (struct promise_raid_conf*)&block[512];
446 /* the checksums must match */
447 if (prom->checksum != calc_sb_csum((unsigned int*)prom))
449 if (prom->raid.type!=raidlevel) /* different raidlevel */
452 if ((cookie!=0) && (cookie != prom->raid.magic_1)) /* different array */
455 cookie = prom->raid.magic_1;
457 /* This looks evil. But basically, we have to search for our adapternumber
458 in the arraydefinition, both of which are in the superblock */
459 for (i=0;(i<prom->raid.total_disks)&&(i<8);i++) {
460 if ( (prom->raid.disk[i].channel== prom->raid.channel) &&
461 (prom->raid.disk[i].device == prom->raid.device) ) {
463 bdev = bdget(MKDEV(major,minor));
464 if (bdev && blkdev_get(bdev, FMODE_READ|FMODE_WRITE, 0, BDEV_RAW) == 0) {
465 raid[device].disk[i].bdev = bdev;
467 raid[device].disk[i].device = MKDEV(major,minor);
468 raid[device].disk[i].sectors = prom->raid.disk_secs;
469 raid[device].stride = (1<<prom->raid.raid0_shift);
470 raid[device].disks = prom->raid.total_disks;
471 raid[device].sectors = prom->raid.total_secs;
472 raid[device].geom.heads = prom->raid.heads+1;
473 raid[device].geom.sectors = prom->raid.sectors;
474 raid[device].geom.cylinders = prom->raid.cylinders+1;
475 devlist[devindex].device=device;
481 static void __init fill_cutoff(int device)
484 unsigned long smallest;
492 if ((raid[device].disk[j].sectors < smallest) && (raid[device].disk[j].sectors>bar))
493 smallest = raid[device].disk[j].sectors;
496 if (raid[device].disk[j].sectors >= smallest)
499 smallest = smallest * count;
501 raid[device].cutoff[i] = smallest;
502 raid[device].cutoff_disks[i] = count;
506 static __init int pdcraid_init_one(int device,int raidlevel)
511 probedisk(i, device, raidlevel);
516 /* Initialize the gendisk structure */
518 ataraid_register_disk(device,raid[device].sectors);
523 if (raid[device].disk[i].device!=0) {
524 printk(KERN_INFO "Drive %i is %li Mb (%i / %i) \n",
525 i,raid[device].disk[i].sectors/2048,MAJOR(raid[device].disk[i].device),MINOR(raid[device].disk[i].device));
530 printk(KERN_INFO "Raid%i array consists of %i drives. \n",raidlevel,count);
537 static __init int pdcraid_init(void)
539 int retval, device, count = 0;
543 device=ataraid_get_device(&pdcraid0_ops);
546 retval = pdcraid_init_one(device,0);
548 ataraid_release_device(device);
558 device=ataraid_get_device(&pdcraid1_ops);
561 retval = pdcraid_init_one(device,1);
563 ataraid_release_device(device);
571 printk(KERN_INFO "Promise Fasttrak(tm) Softwareraid driver for linux version 0.03beta\n");
574 printk(KERN_DEBUG "Promise Fasttrak(tm) Softwareraid driver 0.03beta: No raid array found\n");
578 static void __exit pdcraid_exit (void)
581 for (device = 0; device<16; device++) {
583 struct block_device *bdev = raid[device].disk[i].bdev;
584 raid[device].disk[i].bdev = NULL;
586 blkdev_put(bdev, BDEV_RAW);
588 if (raid[device].sectors)
589 ataraid_release_device(device);
593 static int pdcraid_open(struct inode * inode, struct file * filp)
598 static int pdcraid_release(struct inode * inode, struct file * filp)
604 module_init(pdcraid_init);
605 module_exit(pdcraid_exit);
606 MODULE_LICENSE("GPL");