2 * MEDLEY SOFTWARE RAID DRIVER (Silicon Image 3112 and others)
4 * Copyright (C) 2003 Thomas Horsten <thomas@horsten.com>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 * Copyright (C) 2003 Thomas Horsten <thomas@horsten.com>
20 * All Rights Reserved.
22 * This driver uses the ATA RAID driver framework and is based on
23 * code from Arjan van de Ven's silraid.c and hptraid.c.
25 * It is a driver for the Medley software RAID, which is used by
26 * some IDE controllers, including the Silicon Image 3112 SATA
27 * controller found onboard many modern motherboards, and the
28 * CMD680 stand-alone PCI RAID controller.
30 * The author has only tested this on the Silicon Image SiI3112.
31 * If you have any luck using more than 2 drives, and/or more
32 * than one RAID set, and/or any other chip than the SiI3112,
33 * please let me know by sending me mail at the above address.
35 * Currently, only striped mode is supported for these RAIDs.
37 * You are welcome to contact me if you have any questions or
38 * suggestions for improvement.
42 * 20040310 - thomas@horsten.com
43 * Removed C99 style variable declarations that confused gcc-2.x
44 * Fixed a bug where more than one RAID set would not be detected correctly
45 * General cleanup for submission to kernel
47 * 20031012 - thomas@horsten.com
48 * Added support for BLKRRPART ioctl to re-read partition table
50 * 20030801 - thomas@horsten.com
55 #include <linux/version.h>
56 #include <linux/module.h>
57 #include <linux/init.h>
58 #include <linux/kernel.h>
59 #include <linux/sched.h>
60 #include <linux/smp_lock.h>
61 #include <linux/blkdev.h>
62 #include <linux/blkpg.h>
63 #include <linux/genhd.h>
64 #include <linux/ioctl.h>
66 #include <linux/ide.h>
67 #include <asm/uaccess.h>
72 * These options can be tuned if the need should occur.
74 * Even better, this driver could be changed to allocate the structures
77 #define MAX_DRIVES_PER_SET 8
78 #define MAX_MEDLEY_ARRAYS 4
81 * Set to 1 only if you are debugging the driver, or if it doesn't work
82 * the way you expect and you want to to report it.
84 * This will produce lots of kernel messages, some of which might
85 * help me figure out what is going wrong).
87 #define DEBUGGING_MEDLEY 0
90 #define dprintk(fmt, args...) printk(fmt, ##args)
92 #define dprintk(fmt, args...)
96 * Medley RAID metadata structure.
98 * The metadata structure is based on the ATA drive ID from the drive itself,
99 * with the RAID information in the vendor specific regions.
101 * We do not use all the fields, since we only support Striped Sets.
103 struct medley_metadata {
107 u32 total_sectors_low;
108 u32 total_sectors_high;
115 u16 creation_timestamp[3];
120 u8 drives_per_striped_set;
121 u8 striped_set_number;
122 u8 drives_per_mirrored_set;
123 u8 mirrored_set_number;
125 u32 rebuild_ptr_high;
128 u8 mirrored_set_state;
129 u8 reported_device_location;
137 * This struct holds the information about a Medley array
139 struct medley_array {
151 kdev_t members[MAX_DRIVES_PER_SET];
152 struct block_device *bdev[MAX_DRIVES_PER_SET];
155 static struct medley_array raid[MAX_MEDLEY_ARRAYS];
158 * Here we keep the offset of the ATARAID device ID's compared to our
159 * own (this will normally be 0, unless another ATARAID driver has
160 * registered some arrays before us).
162 static int medley_devid_offset = 0;
165 * This holds the number of detected arrays.
167 static int medley_arrays = 0;
170 * Wait queue for opening device (used when re-reading partition table)
172 static DECLARE_WAIT_QUEUE_HEAD(medley_wait_open);
175 * The interface functions used by the ataraid framework.
177 static int medley_open(struct inode *inode, struct file *filp);
178 static int medley_release(struct inode *inode, struct file *filp);
179 static int medley_ioctl(struct inode *inode, struct file *file,
180 unsigned int cmd, unsigned long arg);
181 static int medley_make_request(request_queue_t * q, int rw,
182 struct buffer_head *bh);
184 static struct raid_device_operations medley_ops = {
186 release: medley_release,
188 make_request: medley_make_request
192 * This is the list of devices to probe.
194 static const kdev_t probelist[] = {
195 MKDEV(IDE0_MAJOR, 0),
196 MKDEV(IDE0_MAJOR, 64),
197 MKDEV(IDE1_MAJOR, 0),
198 MKDEV(IDE1_MAJOR, 64),
199 MKDEV(IDE2_MAJOR, 0),
200 MKDEV(IDE2_MAJOR, 64),
201 MKDEV(IDE3_MAJOR, 0),
202 MKDEV(IDE3_MAJOR, 64),
203 MKDEV(IDE4_MAJOR, 0),
204 MKDEV(IDE4_MAJOR, 64),
205 MKDEV(IDE5_MAJOR, 0),
206 MKDEV(IDE5_MAJOR, 64),
207 MKDEV(IDE6_MAJOR, 0),
208 MKDEV(IDE6_MAJOR, 64),
213 * Handler for ioctl calls to the virtual device
215 static int medley_ioctl(struct inode *inode, struct file *file,
216 unsigned int cmd, unsigned long arg)
219 unsigned long sectors;
220 int devminor = (inode->i_rdev >> SHIFT) & MAJOR_MASK;
221 int device = devminor - medley_devid_offset;
224 dprintk("medley_ioctl\n");
226 minor = MINOR(inode->i_rdev) >> SHIFT;
230 case BLKGETSIZE: /* Return device size */
234 ataraid_gendisk.part[MINOR(inode->i_rdev)].nr_sects;
235 dprintk("medley_ioctl: BLKGETSIZE\n");
236 if (MINOR(inode->i_rdev) & 15)
237 return put_user(sectors, (unsigned long *) arg);
238 return put_user(raid[minor - medley_devid_offset].sectors,
239 (unsigned long *) arg);
243 struct hd_geometry *loc =
244 (struct hd_geometry *) arg;
245 unsigned short bios_cyl = (unsigned short)
246 (raid[minor].sectors / 255 / 63); /* truncate */
248 dprintk("medley_ioctl: HDIO_GETGEO\n");
251 if (put_user(255, (byte *) & loc->heads))
253 if (put_user(63, (byte *) & loc->sectors))
256 (bios_cyl, (unsigned short *) &loc->cylinders))
259 ((unsigned) ataraid_gendisk.
260 part[MINOR(inode->i_rdev)].start_sect,
261 (unsigned long *) &loc->start))
266 case HDIO_GETGEO_BIG: {
267 struct hd_big_geometry *loc =
268 (struct hd_big_geometry *) arg;
270 dprintk("medley_ioctl: HDIO_GETGEO_BIG\n");
273 if (put_user(255, (byte *) & loc->heads))
275 if (put_user(63, (byte *) & loc->sectors))
278 (raid[minor - medley_devid_offset].sectors /
279 255 / 63, (unsigned int *) &loc->cylinders))
282 ((unsigned) ataraid_gendisk.
283 part[MINOR(inode->i_rdev)].start_sect,
284 (unsigned long *) &loc->start))
292 dprintk("medley_ioctl: BLK*\n");
293 return blk_ioctl(inode->i_rdev, cmd, arg);
295 case BLKRRPART: /* Re-read partition tables */
296 if (!capable(CAP_SYS_ADMIN))
300 if (atomic_read(&(raid[device].valid)) == 0)
303 atomic_set(&(raid[device].valid), 0);
304 if (raid[device].access != 1) {
305 atomic_set(&(raid[device].valid), 1);
309 for (partition = 15; partition >= 0; partition--) {
310 invalidate_device(MKDEV(ATARAID_MAJOR,
311 partition + devminor), 1);
312 ataraid_gendisk.part[partition +
313 devminor].start_sect = 0;
314 ataraid_gendisk.part[partition +
315 devminor].nr_sects = 0;
317 ataraid_register_disk(device, raid[device].sectors);
318 atomic_set(&(raid[device].valid), 1);
319 wake_up(&medley_wait_open);
330 * Handler to map a request to the real device.
331 * If the request cannot be made because it spans multiple disks,
332 * we return -1, otherwise we modify the request and return 1.
334 static int medley_make_request(request_queue_t * q, int rw,
335 struct buffer_head *bh)
338 u32 rsect = bh->b_rsector;
340 ((bh->b_rdev >> SHIFT) & MAJOR_MASK) - medley_devid_offset;
341 struct medley_array *r = raid + device;
343 /* Add the partition offset */
344 rsect = rsect + ataraid_gendisk.part[MINOR(bh->b_rdev)].start_sect;
346 dprintk("medley_make_request, rsect=%ul\n", rsect);
348 /* Detect if the request crosses a chunk barrier */
349 if (r->chunk_size_log) {
350 if (((rsect & (r->chunk_size - 1)) +
351 (bh->b_size / 512)) > (1 << r->chunk_size_log)) {
355 if ((rsect / r->chunk_size) !=
356 ((rsect + (bh->b_size / 512) - 1) / r->chunk_size)) {
362 * Medley arrays are simple enough, since the smallest disk decides the
363 * number of sectors used per disk. So there is no need for the cutoff
364 * magic found in other drivers like hptraid.
366 if (r->chunk_size_log) {
367 /* We save some expensive operations (1 div, 1 mul, 1 mod),
368 * if the chunk size is a power of 2, which is true in most
369 * cases (at least with my version of the RAID BIOS).
371 disk = (rsect >> r->chunk_size_log) % r->drives;
372 rsect = ((rsect / r->sectors_per_row) <<
373 r->chunk_size_log) + (rsect & (r->chunk_size -
376 disk = (rsect / r->chunk_size) % r->drives;
377 rsect = rsect / r->sectors_per_row * r->chunk_size +
378 rsect % r->chunk_size;
381 dprintk("medley_make_request :-), disk=%d, rsect=%ul\n", disk,
383 bh->b_rdev = r->members[disk];
384 bh->b_rsector = rsect;
389 * Find out which array a drive belongs to, and add it to that array.
390 * If it is not a member of a detected array, add a new array for it.
392 void medley_add_raiddrive(kdev_t dev, struct medley_metadata *md)
396 dprintk("Candidate drive %02x:%02x - drive %d of %d, stride %d, "
397 "sectors %ul (%d MB)\n",
398 MAJOR(dev), MINOR(dev), md->drive_number,
399 md->drives_per_striped_set, md->chunk_size,
400 md->total_sectors_low,
401 md->total_sectors_low / 1024 / 1024 / 2);
403 for (c = 0; c < medley_arrays; c++) {
404 if ((raid[c].timestamp[0] == md->creation_timestamp[0]) &&
405 (raid[c].timestamp[1] == md->creation_timestamp[1]) &&
406 (raid[c].timestamp[2] == md->creation_timestamp[2]) &&
407 (raid[c].drives == md->drives_per_striped_set) &&
408 (raid[c].chunk_size == md->chunk_size) &&
409 ((raid[c].present & (1 << md->drive_number)) == 0)) {
410 dprintk("Existing array %d\n", c);
411 raid[c].present |= (1 << md->drive_number);
412 raid[c].members[md->drive_number] = dev;
416 if (c == medley_arrays) {
417 dprintk("New array %d\n", medley_arrays);
418 if (medley_arrays == MAX_MEDLEY_ARRAYS) {
419 printk(KERN_ERR "Medley RAID: "
420 "Too many RAID sets detected - you can change "
421 "the max in the driver.\n");
423 raid[c].timestamp[0] = md->creation_timestamp[0];
424 raid[c].timestamp[1] = md->creation_timestamp[1];
425 raid[c].timestamp[2] = md->creation_timestamp[2];
426 raid[c].drives = md->drives_per_striped_set;
427 raid[c].chunk_size = md->chunk_size;
428 raid[c].sectors_per_row = md->chunk_size *
429 md->drives_per_striped_set;
431 /* Speedup if chunk size is a power of 2 */
432 if (((raid[c].chunk_size - 1) &
433 (raid[c].chunk_size)) == 0) {
434 raid[c].chunk_size_log =
435 ffs(raid[c].chunk_size) - 1;
437 raid[c].chunk_size_log = 0;
439 raid[c].present = (1 << md->drive_number);
440 raid[c].members[md->drive_number] = dev;
441 if (md->major_ver == 1) {
442 raid[c].sectors = ((u32 *) (md))[27];
444 raid[c].sectors = md->total_sectors_low;
452 * Read the Medley metadata from a drive.
453 * Returns the bh if it was found, otherwise NULL.
455 struct buffer_head *medley_get_metadata(kdev_t dev)
457 struct buffer_head *bh = NULL;
458 struct pci_dev *pcidev;
461 struct medley_metadata *md;
463 ide_drive_t *drvinfo = ide_info_ptr(dev, 0);
464 if ((drvinfo == NULL) || drvinfo->capacity < 1) {
468 dprintk("Probing %02x:%02x\n", MAJOR(dev), MINOR(dev));
470 /* If this drive is not on a PCI controller, it is not Medley RAID.
471 * Medley matches the PCI device ID with the metadata to check if
472 * it is valid. Unfortunately it is the only reliable way to identify
474 pcidev = drvinfo->hwif ? drvinfo->hwif->pci_dev : NULL;
480 * 4 copies of the metadata exist, in the following 4 sectors:
481 * last, last-0x200, last-0x400, last-0x600.
483 * We must try each of these in order, until we find the metadata.
484 * FIXME: This does not take into account drives with 48/64-bit LBA
485 * addressing, even though the Medley RAID version 2 supports these.
487 lba = drvinfo->capacity - 1;
488 for (pos = 0; pos < 4; pos++, lba -= 0x200) {
489 bh = bread(dev, lba, 512);
491 printk(KERN_ERR "Medley RAID (%02x:%02x): "
492 "Error reading metadata (lba=%d)\n",
493 MAJOR(dev), MINOR(dev), lba);
497 /* A valid Medley RAID has the PCI vendor/device ID of its
498 * IDE controller, and the correct checksum. */
499 md = (void *) (bh->b_data);
501 if (pcidev->vendor == md->vendor_id &&
502 pcidev->device == md->product_id) {
504 u16 *p = (void *) (bh->b_data);
506 for (c = 0; c < 160; c++) {
510 ("Probing %02x:%02x csum=%d, major_ver=%d\n",
511 MAJOR(dev), MINOR(dev), checksum,
513 if (((checksum == 0xffff) && (md->major_ver == 1))
514 || (checksum == 0)) {
515 dprintk("Probing %02x:%02x VALID\n",
516 MAJOR(dev), MINOR(dev));
520 /* Was not a valid superblock */
530 * Determine if this drive belongs to a Medley array.
532 static void medley_probe_drive(int major, int minor)
534 struct buffer_head *bh;
535 kdev_t dev = MKDEV(major, minor);
536 struct medley_metadata *md;
538 bh = medley_get_metadata(dev);
542 md = (void *) (bh->b_data);
544 if (md->raid_type != 0x0) {
545 printk(KERN_INFO "Medley RAID (%02x:%02x): "
546 "Sorry, this driver currently only supports "
547 "striped sets (RAID level 0).\n", major, minor);
548 } else if (md->major_ver == 2 && md->total_sectors_high != 0) {
549 printk(KERN_ERR "Medley RAID (%02x:%02x):"
550 "Sorry, the driver only supports 32 bit LBA disks "
551 "(disk too big).\n", major, minor);
552 } else if (md->major_ver > 0 && md->major_ver > 2) {
553 printk(KERN_INFO "Medley RAID (%02x:%02x): "
554 "Unsupported version (%d.%d) - this driver supports "
555 "Medley version 1.x and 2.x\n",
556 major, minor, md->major_ver, md->minor_ver);
557 } else if (md->drives_per_striped_set > MAX_DRIVES_PER_SET) {
558 printk(KERN_ERR "Medley RAID (%02x:%02x): "
559 "Striped set too large (%d drives) - please report "
560 "this (and change max in driver).\n",
561 major, minor, md->drives_per_striped_set);
562 } else if ((md->drive_number > md->drives_per_striped_set) ||
563 (md->drives_per_striped_set < 1) ||
564 (md->chunk_size < 1)) {
565 printk(KERN_ERR "Medley RAID (%02x:%02x): "
566 "Metadata appears to be corrupt.\n", major, minor);
568 /* We have a good candidate, put it in the correct array */
569 medley_add_raiddrive(dev, md);
579 * Taken from hptraid.c, this is called to prevent the device
580 * from disappearing from under us and also nullifies the (incorrect)
581 * partitions of the underlying disk.
583 struct block_device *get_device_lock(kdev_t member)
585 struct block_device *bdev = bdget(member);
587 int minor = MINOR(member);
591 && blkdev_get(bdev, FMODE_READ | FMODE_WRITE, 0,
594 * This is supposed to prevent others from
595 * stealing our underlying disks. Now blank
596 * the /proc/partitions table for the wrong
597 * partition table, so that scripts don't
598 * accidentally mount it and crash the kernel
600 /* XXX: the 0 is an utter hack --hch */
601 gd = get_gendisk(MKDEV(MAJOR(member), 0));
603 if (gd->major == MAJOR(member)) {
604 for (j = 1 + (minor << gd->minor_shift);
605 j < ((minor + 1) << gd->minor_shift);
607 gd->part[j].nr_sects = 0;
615 * Initialise the driver.
617 static __init int medley_init(void)
621 memset(raid, 0, MAX_MEDLEY_ARRAYS * sizeof(struct medley_array));
623 /* Probe each of the drives on our list */
624 for (c = 0; probelist[c] != MKDEV(0, 0); c++) {
625 medley_probe_drive(MAJOR(probelist[c]),
626 MINOR(probelist[c]));
629 /* Check if the detected sets are complete */
630 for (c = 0; c < medley_arrays; c++) {
631 if (raid[c].present != (1 << raid[c].drives) - 1) {
632 printk(KERN_ERR "Medley RAID: "
633 "Incomplete RAID set deleted - disks:");
634 for (d = 0; c < raid[c].drives; c++) {
635 if (raid[c].present & (1 << d)) {
637 MAJOR(raid[c].members[d]),
638 MINOR(raid[c].members[d]));
642 if (c + 1 < medley_arrays) {
643 memmove(raid + c + 1, raid + c,
645 1) * sizeof(struct medley_array));
651 /* Register any remaining array(s) */
652 for (c = 0; c < medley_arrays; c++) {
653 int device = ataraid_get_device(&medley_ops);
655 printk(KERN_ERR "Medley RAID: "
656 "Could not get ATARAID device.\n");
660 /* First array, compute offset to our device ID's */
661 medley_devid_offset = device;
662 dprintk("Medley_devid_offset: %d\n",
663 medley_devid_offset);
664 } else if (device - medley_devid_offset != c) {
665 printk(KERN_ERR "Medley RAID: "
666 "ATARAID gave us an illegal device ID.\n");
667 ataraid_release_device(device);
671 printk(KERN_INFO "Medley RAID: "
672 "Striped set %d consists of %d disks, total %dMiB "
675 raid[c].sectors / 1024 / 1024 / 2);
676 for (d = 0; d < raid[c].drives; d++) {
677 printk(" %02x:%02x", MAJOR(raid[c].members[d]),
678 MINOR(raid[c].members[d]));
679 raid[c].bdev[d] = get_device_lock(raid[c].members[d]);
682 raid[c].registered = 1;
683 atomic_set(&(raid[c].valid), 1);
684 ataraid_register_disk(c, raid[c].sectors);
687 if (medley_arrays > 0) {
688 printk(KERN_INFO "Medley RAID: %d active RAID set%s\n",
689 medley_arrays, medley_arrays == 1 ? "" : "s");
693 printk(KERN_INFO "Medley RAID: No usable RAID sets found\n");
698 * Remove the arrays and clean up.
700 static void __exit medley_exit(void)
703 for (device = 0; device < medley_arrays; device++) {
704 for (d = 0; d < raid[device].drives; d++) {
705 if (raid[device].bdev[d]) {
706 blkdev_put(raid[device].bdev[d], BDEV_RAW);
707 raid[device].bdev[d] = NULL;
710 if (raid[device].registered) {
711 ataraid_release_device(device +
712 medley_devid_offset);
713 raid[device].registered = 0;
719 * Called to open the virtual device
721 static int medley_open(struct inode *inode, struct file *filp)
723 int device = ((inode->i_rdev >> SHIFT) & MAJOR_MASK) -
725 dprintk("medley_open\n");
727 if (device < medley_arrays) {
728 while (!atomic_read(&(raid[device].valid)))
729 sleep_on(&medley_wait_open);
730 raid[device].access++;
738 * Called to release the handle on the virtual device
740 static int medley_release(struct inode *inode, struct file *filp)
742 int device = ((inode->i_rdev >> SHIFT) & MAJOR_MASK) -
744 dprintk("medley_release\n");
745 raid[device].access--;
750 module_init(medley_init);
751 module_exit(medley_exit);
752 MODULE_LICENSE("GPL");