* idle IO detection.
*
* you can change it via /proc/sys/dev/raid/speed_limit_min and _max.
+ * or /sys/block/mdX/md/sync_speed_{min,max}
*/
static int sysctl_speed_limit_min = 1000;
static int sysctl_speed_limit_max = 200000;
+static inline int speed_min(mddev_t *mddev)
+{
+ return mddev->sync_speed_min ?
+ mddev->sync_speed_min : sysctl_speed_limit_min;
+}
+
+static inline int speed_max(mddev_t *mddev)
+{
+ return mddev->sync_speed_max ?
+ mddev->sync_speed_max : sysctl_speed_limit_max;
+}
static struct ctl_table_header *raid_table_header;
}
rdev->size = calc_dev_size(rdev, sb->chunk_size);
+ if (rdev->size < sb->size && sb->level > 1)
+ /* "this cannot possibly happen" ... */
+ ret = -EINVAL;
+
abort:
return ret;
}
rdev->size = le64_to_cpu(sb->data_size)/2;
if (le32_to_cpu(sb->chunksize))
rdev->size &= ~((sector_t)le32_to_cpu(sb->chunksize)/2 - 1);
+
+ if (le32_to_cpu(sb->size) > rdev->size*2)
+ return -EINVAL;
return 0;
}
MD_BUG();
return -EINVAL;
}
+ /* make sure rdev->size exceeds mddev->size */
+ if (rdev->size && (mddev->size == 0 || rdev->size < mddev->size)) {
+ if (mddev->pers)
+ /* Cannot change size, so fail */
+ return -ENOSPC;
+ else
+ mddev->size = rdev->size;
+ }
same_pdev = match_dev_unit(mddev, rdev);
if (same_pdev)
printk(KERN_WARNING
static struct rdev_sysfs_entry rdev_errors =
__ATTR(errors, 0644, errors_show, errors_store);
+static ssize_t
+slot_show(mdk_rdev_t *rdev, char *page)
+{
+ if (rdev->raid_disk < 0)
+ return sprintf(page, "none\n");
+ else
+ return sprintf(page, "%d\n", rdev->raid_disk);
+}
+
+static ssize_t
+slot_store(mdk_rdev_t *rdev, const char *buf, size_t len)
+{
+ char *e;
+ int slot = simple_strtoul(buf, &e, 10);
+ if (strncmp(buf, "none", 4)==0)
+ slot = -1;
+ else if (e==buf || (*e && *e!= '\n'))
+ return -EINVAL;
+ if (rdev->mddev->pers)
+ /* Cannot set slot in active array (yet) */
+ return -EBUSY;
+ if (slot >= rdev->mddev->raid_disks)
+ return -ENOSPC;
+ rdev->raid_disk = slot;
+ /* assume it is working */
+ rdev->flags = 0;
+ set_bit(In_sync, &rdev->flags);
+ return len;
+}
+
+
+static struct rdev_sysfs_entry rdev_slot =
+__ATTR(slot, 0644, slot_show, slot_store);
+
+static ssize_t
+offset_show(mdk_rdev_t *rdev, char *page)
+{
+ return sprintf(page, "%llu\n", (unsigned long long)rdev->data_offset);
+}
+
+static ssize_t
+offset_store(mdk_rdev_t *rdev, const char *buf, size_t len)
+{
+ char *e;
+ unsigned long long offset = simple_strtoull(buf, &e, 10);
+ if (e==buf || (*e && *e != '\n'))
+ return -EINVAL;
+ if (rdev->mddev->pers)
+ return -EBUSY;
+ rdev->data_offset = offset;
+ return len;
+}
+
+static struct rdev_sysfs_entry rdev_offset =
+__ATTR(offset, 0644, offset_show, offset_store);
+
+static ssize_t
+rdev_size_show(mdk_rdev_t *rdev, char *page)
+{
+ return sprintf(page, "%llu\n", (unsigned long long)rdev->size);
+}
+
+static ssize_t
+rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len)
+{
+ char *e;
+ unsigned long long size = simple_strtoull(buf, &e, 10);
+ if (e==buf || (*e && *e != '\n'))
+ return -EINVAL;
+ if (rdev->mddev->pers)
+ return -EBUSY;
+ rdev->size = size;
+ if (size < rdev->mddev->size || rdev->mddev->size == 0)
+ rdev->mddev->size = size;
+ return len;
+}
+
+static struct rdev_sysfs_entry rdev_size =
+__ATTR(size, 0644, rdev_size_show, rdev_size_store);
+
static struct attribute *rdev_default_attrs[] = {
&rdev_state.attr,
&rdev_super.attr,
&rdev_errors.attr,
+ &rdev_slot.attr,
+ &rdev_offset.attr,
+ &rdev_size.attr,
NULL,
};
static ssize_t
static struct md_sysfs_entry md_chunk_size =
__ATTR(chunk_size, 0644, chunk_size_show, chunk_size_store);
+static ssize_t
+null_show(mddev_t *mddev, char *page)
+{
+ return -EINVAL;
+}
+
+static ssize_t
+new_dev_store(mddev_t *mddev, const char *buf, size_t len)
+{
+ /* buf must be %d:%d\n? giving major and minor numbers */
+ /* The new device is added to the array.
+ * If the array has a persistent superblock, we read the
+ * superblock to initialise info and check validity.
+ * Otherwise, only checking done is that in bind_rdev_to_array,
+ * which mainly checks size.
+ */
+ char *e;
+ int major = simple_strtoul(buf, &e, 10);
+ int minor;
+ dev_t dev;
+ mdk_rdev_t *rdev;
+ int err;
+
+ if (!*buf || *e != ':' || !e[1] || e[1] == '\n')
+ return -EINVAL;
+ minor = simple_strtoul(e+1, &e, 10);
+ if (*e && *e != '\n')
+ return -EINVAL;
+ dev = MKDEV(major, minor);
+ if (major != MAJOR(dev) ||
+ minor != MINOR(dev))
+ return -EOVERFLOW;
+
+
+ if (mddev->persistent) {
+ rdev = md_import_device(dev, mddev->major_version,
+ mddev->minor_version);
+ if (!IS_ERR(rdev) && !list_empty(&mddev->disks)) {
+ mdk_rdev_t *rdev0 = list_entry(mddev->disks.next,
+ mdk_rdev_t, same_set);
+ err = super_types[mddev->major_version]
+ .load_super(rdev, rdev0, mddev->minor_version);
+ if (err < 0)
+ goto out;
+ }
+ } else
+ rdev = md_import_device(dev, -1, -1);
+
+ if (IS_ERR(rdev))
+ return PTR_ERR(rdev);
+ err = bind_rdev_to_array(rdev, mddev);
+ out:
+ if (err)
+ export_rdev(rdev);
+ return err ? err : len;
+}
+
+static struct md_sysfs_entry md_new_device =
+__ATTR(new_dev, 0200, null_show, new_dev_store);
static ssize_t
size_show(mddev_t *mddev, char *page)
static struct md_sysfs_entry
md_mismatches = __ATTR_RO(mismatch_cnt);
+static ssize_t
+sync_min_show(mddev_t *mddev, char *page)
+{
+ return sprintf(page, "%d (%s)\n", speed_min(mddev),
+ mddev->sync_speed_min ? "local": "system");
+}
+
+static ssize_t
+sync_min_store(mddev_t *mddev, const char *buf, size_t len)
+{
+ int min;
+ char *e;
+ if (strncmp(buf, "system", 6)==0) {
+ mddev->sync_speed_min = 0;
+ return len;
+ }
+ min = simple_strtoul(buf, &e, 10);
+ if (buf == e || (*e && *e != '\n') || min <= 0)
+ return -EINVAL;
+ mddev->sync_speed_min = min;
+ return len;
+}
+
+static struct md_sysfs_entry md_sync_min =
+__ATTR(sync_speed_min, S_IRUGO|S_IWUSR, sync_min_show, sync_min_store);
+
+static ssize_t
+sync_max_show(mddev_t *mddev, char *page)
+{
+ return sprintf(page, "%d (%s)\n", speed_max(mddev),
+ mddev->sync_speed_max ? "local": "system");
+}
+
+static ssize_t
+sync_max_store(mddev_t *mddev, const char *buf, size_t len)
+{
+ int max;
+ char *e;
+ if (strncmp(buf, "system", 6)==0) {
+ mddev->sync_speed_max = 0;
+ return len;
+ }
+ max = simple_strtoul(buf, &e, 10);
+ if (buf == e || (*e && *e != '\n') || max <= 0)
+ return -EINVAL;
+ mddev->sync_speed_max = max;
+ return len;
+}
+
+static struct md_sysfs_entry md_sync_max =
+__ATTR(sync_speed_max, S_IRUGO|S_IWUSR, sync_max_show, sync_max_store);
+
+
+static ssize_t
+sync_speed_show(mddev_t *mddev, char *page)
+{
+ unsigned long resync, dt, db;
+ resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active));
+ dt = ((jiffies - mddev->resync_mark) / HZ);
+ if (!dt) dt++;
+ db = resync - (mddev->resync_mark_cnt);
+ return sprintf(page, "%ld\n", db/dt/2); /* K/sec */
+}
+
+static struct md_sysfs_entry
+md_sync_speed = __ATTR_RO(sync_speed);
+
+static ssize_t
+sync_completed_show(mddev_t *mddev, char *page)
+{
+ unsigned long max_blocks, resync;
+
+ if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
+ max_blocks = mddev->resync_max_sectors;
+ else
+ max_blocks = mddev->size << 1;
+
+ resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active));
+ return sprintf(page, "%lu / %lu\n", resync, max_blocks);
+}
+
+static struct md_sysfs_entry
+md_sync_completed = __ATTR_RO(sync_completed);
+
static struct attribute *md_default_attrs[] = {
&md_level.attr,
&md_raid_disks.attr,
&md_chunk_size.attr,
&md_size.attr,
&md_metadata.attr,
+ &md_new_device.attr,
NULL,
};
static struct attribute *md_redundancy_attrs[] = {
&md_scan_mode.attr,
&md_mismatches.attr,
+ &md_sync_min.attr,
+ &md_sync_max.attr,
+ &md_sync_speed.attr,
+ &md_sync_completed.attr,
NULL,
};
static struct attribute_group md_redundancy_group = {
if (info->state & (1<<MD_DISK_WRITEMOSTLY))
set_bit(WriteMostly, &rdev->flags);
- err = bind_rdev_to_array(rdev, mddev);
- if (err) {
- export_rdev(rdev);
- return err;
- }
-
if (!mddev->persistent) {
printk(KERN_INFO "md: nonpersistent superblock ...\n");
rdev->sb_offset = rdev->bdev->bd_inode->i_size >> BLOCK_SIZE_BITS;
rdev->sb_offset = calc_dev_sboffset(rdev->bdev);
rdev->size = calc_dev_size(rdev, mddev->chunk_size);
- if (!mddev->size || (mddev->size > rdev->size))
- mddev->size = rdev->size;
+ err = bind_rdev_to_array(rdev, mddev);
+ if (err) {
+ export_rdev(rdev);
+ return err;
+ }
}
return 0;
size = calc_dev_size(rdev, mddev->chunk_size);
rdev->size = size;
- if (size < mddev->size) {
- printk(KERN_WARNING
- "%s: disk size %llu blocks < array size %llu\n",
- mdname(mddev), (unsigned long long)size,
- (unsigned long long)mddev->size);
- err = -ENOSPC;
- goto abort_export;
- }
-
if (test_bit(Faulty, &rdev->flags)) {
printk(KERN_WARNING
"md: can not hot-add faulty %s disk to %s!\n",
}
clear_bit(In_sync, &rdev->flags);
rdev->desc_nr = -1;
- bind_rdev_to_array(rdev, mddev);
+ err = bind_rdev_to_array(rdev, mddev);
+ if (err)
+ goto abort_export;
/*
* The rest should better be atomic, we can have disk failures
bdev = bdget_disk(mddev->gendisk, 0);
if (bdev) {
- down(&bdev->bd_inode->i_sem);
+ mutex_lock(&bdev->bd_inode->i_mutex);
i_size_write(bdev->bd_inode, mddev->array_size << 10);
- up(&bdev->bd_inode->i_sem);
+ mutex_unlock(&bdev->bd_inode->i_mutex);
bdput(bdev);
}
}
bdev = bdget_disk(mddev->gendisk, 0);
if (bdev) {
- down(&bdev->bd_inode->i_sem);
+ mutex_lock(&bdev->bd_inode->i_mutex);
i_size_write(bdev->bd_inode, mddev->array_size << 10);
- up(&bdev->bd_inode->i_sem);
+ mutex_unlock(&bdev->bd_inode->i_mutex);
bdput(bdev);
}
}
return 0;
}
+static int md_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+{
+ mddev_t *mddev = bdev->bd_disk->private_data;
+
+ geo->heads = 2;
+ geo->sectors = 4;
+ geo->cylinders = get_capacity(mddev->gendisk) / 8;
+ return 0;
+}
+
static int md_ioctl(struct inode *inode, struct file *file,
unsigned int cmd, unsigned long arg)
{
int err = 0;
void __user *argp = (void __user *)arg;
- struct hd_geometry __user *loc = argp;
mddev_t *mddev = NULL;
if (!capable(CAP_SYS_ADMIN))
* 4 sectors (with a BIG number of cylinders...). This drives
* dosfs just mad... ;-)
*/
- case HDIO_GETGEO:
- if (!loc) {
- err = -EINVAL;
- goto abort_unlock;
- }
- err = put_user (2, (char __user *) &loc->heads);
- if (err)
- goto abort_unlock;
- err = put_user (4, (char __user *) &loc->sectors);
- if (err)
- goto abort_unlock;
- err = put_user(get_capacity(mddev->gendisk)/8,
- (short __user *) &loc->cylinders);
- if (err)
- goto abort_unlock;
- err = put_user (get_start_sect(inode->i_bdev),
- (long __user *) &loc->start);
- goto done_unlock;
}
/*
.open = md_open,
.release = md_release,
.ioctl = md_ioctl,
+ .getgeo = md_getgeo,
.media_changed = md_media_changed,
.revalidate_disk= md_revalidate,
};
printk(KERN_INFO "md: syncing RAID array %s\n", mdname(mddev));
printk(KERN_INFO "md: minimum _guaranteed_ reconstruction speed:"
- " %d KB/sec/disc.\n", sysctl_speed_limit_min);
+ " %d KB/sec/disc.\n", speed_min(mddev));
printk(KERN_INFO "md: using maximum available idle IO bandwidth "
"(but not more than %d KB/sec) for reconstruction.\n",
- sysctl_speed_limit_max);
+ speed_max(mddev));
is_mddev_idle(mddev); /* this also initializes IO event counters */
/* we don't use the checkpoint if there's a bitmap */
skipped = 0;
sectors = mddev->pers->sync_request(mddev, j, &skipped,
- currspeed < sysctl_speed_limit_min);
+ currspeed < speed_min(mddev));
if (sectors == 0) {
set_bit(MD_RECOVERY_ERR, &mddev->recovery);
goto out;
currspeed = ((unsigned long)(io_sectors-mddev->resync_mark_cnt))/2
/((jiffies-mddev->resync_mark)/HZ +1) +1;
- if (currspeed > sysctl_speed_limit_min) {
- if ((currspeed > sysctl_speed_limit_max) ||
+ if (currspeed > speed_min(mddev)) {
+ if ((currspeed > speed_max(mddev)) ||
!is_mddev_idle(mddev)) {
msleep(500);
goto repeat;