2 * multipath.c : Multiple Devices driver for Linux
4 * Copyright (C) 1999, 2000, 2001 Ingo Molnar, Red Hat
6 * Copyright (C) 1996, 1997, 1998 Ingo Molnar, Miguel de Icaza, Gadi Oxman
8 * MULTIPATH management functions.
10 * derived from raid1.c.
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2, or (at your option)
17 * You should have received a copy of the GNU General Public License
18 * (for example /usr/src/linux/COPYING); if not, write to the Free
19 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22 #include <linux/module.h>
23 #include <linux/slab.h>
24 #include <linux/raid/multipath.h>
25 #include <asm/atomic.h>
27 #define MAJOR_NR MD_MAJOR
29 #define MD_PERSONALITY
31 #define MAX_WORK_PER_DISK 128
33 #define NR_RESERVED_BUFS 32
37 * The following can be used to debug the driver
39 #define MULTIPATH_DEBUG 0
42 #define PRINTK(x...) printk(x)
46 #define PRINTK(x...) do { } while (0)
50 static mdk_personality_t multipath_personality;
51 static md_spinlock_t retry_list_lock = MD_SPIN_LOCK_UNLOCKED;
52 struct multipath_bh *multipath_retry_list = NULL, **multipath_retry_tail;
54 static int multipath_diskop(mddev_t *mddev, mdp_disk_t **d, int state);
58 static struct multipath_bh *multipath_alloc_mpbh(multipath_conf_t *conf)
60 struct multipath_bh *mp_bh = NULL;
63 md_spin_lock_irq(&conf->device_lock);
64 if (!conf->freer1_blocked && conf->freer1) {
66 conf->freer1 = mp_bh->next_mp;
68 mp_bh->next_mp = NULL;
69 mp_bh->state = (1 << MPBH_PreAlloc);
70 mp_bh->bh_req.b_state = 0;
72 md_spin_unlock_irq(&conf->device_lock);
75 mp_bh = (struct multipath_bh *) kmalloc(sizeof(struct multipath_bh),
78 memset(mp_bh, 0, sizeof(*mp_bh));
81 conf->freer1_blocked = 1;
82 wait_disk_event(conf->wait_buffer,
83 !conf->freer1_blocked ||
84 conf->freer1_cnt > NR_RESERVED_BUFS/2
86 conf->freer1_blocked = 0;
90 static inline void multipath_free_mpbh(struct multipath_bh *mp_bh)
92 multipath_conf_t *conf = mddev_to_conf(mp_bh->mddev);
94 if (test_bit(MPBH_PreAlloc, &mp_bh->state)) {
96 spin_lock_irqsave(&conf->device_lock, flags);
97 mp_bh->next_mp = conf->freer1;
100 spin_unlock_irqrestore(&conf->device_lock, flags);
101 wake_up(&conf->wait_buffer);
107 static int multipath_grow_mpbh (multipath_conf_t *conf, int cnt)
112 struct multipath_bh *mp_bh;
113 mp_bh = (struct multipath_bh*)kmalloc(sizeof(*mp_bh), GFP_KERNEL);
116 memset(mp_bh, 0, sizeof(*mp_bh));
117 set_bit(MPBH_PreAlloc, &mp_bh->state);
118 mp_bh->mddev = conf->mddev;
120 multipath_free_mpbh(mp_bh);
126 static void multipath_shrink_mpbh(multipath_conf_t *conf)
128 md_spin_lock_irq(&conf->device_lock);
129 while (conf->freer1) {
130 struct multipath_bh *mp_bh = conf->freer1;
131 conf->freer1 = mp_bh->next_mp;
135 md_spin_unlock_irq(&conf->device_lock);
139 static int multipath_map (mddev_t *mddev, kdev_t *rdev)
141 multipath_conf_t *conf = mddev_to_conf(mddev);
142 int i, disks = MD_SB_DISKS;
145 * Later we do read balancing on the read side
146 * now we use the first available disk.
149 for (i = 0; i < disks; i++) {
150 if (conf->multipaths[i].operational) {
151 *rdev = conf->multipaths[i].dev;
156 printk (KERN_ERR "multipath_map(): no more operational IO paths?\n");
160 static void multipath_reschedule_retry (struct multipath_bh *mp_bh)
163 mddev_t *mddev = mp_bh->mddev;
164 multipath_conf_t *conf = mddev_to_conf(mddev);
166 md_spin_lock_irqsave(&retry_list_lock, flags);
167 if (multipath_retry_list == NULL)
168 multipath_retry_tail = &multipath_retry_list;
169 *multipath_retry_tail = mp_bh;
170 multipath_retry_tail = &mp_bh->next_mp;
171 mp_bh->next_mp = NULL;
172 md_spin_unlock_irqrestore(&retry_list_lock, flags);
173 md_wakeup_thread(conf->thread);
178 * multipath_end_bh_io() is called when we have finished servicing a multipathed
179 * operation and are ready to return a success/failure code to the buffer
182 static void multipath_end_bh_io (struct multipath_bh *mp_bh, int uptodate)
184 struct buffer_head *bh = mp_bh->master_bh;
186 bh->b_end_io(bh, uptodate);
187 multipath_free_mpbh(mp_bh);
190 void multipath_end_request (struct buffer_head *bh, int uptodate)
192 struct multipath_bh * mp_bh = (struct multipath_bh *)(bh->b_private);
195 * this branch is our 'one multipath IO has finished' event handler:
198 md_error (mp_bh->mddev, bh->b_dev);
201 * Set MPBH_Uptodate in our master buffer_head, so that
202 * we will return a good error code for to the higher
203 * levels even if IO on some other multipathed buffer fails.
205 * The 'master' represents the complex operation to
206 * user-side. So if something waits for IO, then it will
207 * wait for the 'master' buffer_head.
209 set_bit (MPBH_Uptodate, &mp_bh->state);
213 multipath_end_bh_io(mp_bh, uptodate);
219 printk(KERN_ERR "multipath: %s: rescheduling block %lu\n",
220 partition_name(bh->b_dev), bh->b_blocknr);
221 multipath_reschedule_retry(mp_bh);
226 * This routine returns the disk from which the requested read should
230 static int multipath_read_balance (multipath_conf_t *conf)
234 for (disk = 0; disk < conf->raid_disks; disk++)
235 if (conf->multipaths[disk].operational)
241 static int multipath_make_request (mddev_t *mddev, int rw,
242 struct buffer_head * bh)
244 multipath_conf_t *conf = mddev_to_conf(mddev);
245 struct buffer_head *bh_req;
246 struct multipath_bh * mp_bh;
247 struct multipath_info *multipath;
249 if (!buffer_locked(bh))
253 * make_request() can abort the operation when READA is being
254 * used and no empty request is available.
256 * Currently, just replace the command with READ/WRITE.
261 mp_bh = multipath_alloc_mpbh (conf);
263 mp_bh->master_bh = bh;
264 mp_bh->mddev = mddev;
268 * read balancing logic:
270 multipath = conf->multipaths + multipath_read_balance(conf);
272 bh_req = &mp_bh->bh_req;
273 memcpy(bh_req, bh, sizeof(*bh));
274 bh_req->b_blocknr = bh->b_rsector;
275 bh_req->b_dev = multipath->dev;
276 bh_req->b_rdev = multipath->dev;
277 /* bh_req->b_rsector = bh->n_rsector; */
278 bh_req->b_end_io = multipath_end_request;
279 bh_req->b_private = mp_bh;
280 generic_make_request (rw, bh_req);
284 static void multipath_status (struct seq_file *seq, mddev_t *mddev)
286 multipath_conf_t *conf = mddev_to_conf(mddev);
289 seq_printf (seq, " [%d/%d] [", conf->raid_disks,
290 conf->working_disks);
291 for (i = 0; i < conf->raid_disks; i++)
292 seq_printf (seq, "%s",
293 conf->multipaths[i].operational ? "U" : "_");
294 seq_printf (seq, "]");
297 #define LAST_DISK KERN_ALERT \
298 "multipath: only one IO path left and IO error.\n"
300 #define NO_SPARE_DISK KERN_ALERT \
301 "multipath: no spare IO path left!\n"
303 #define DISK_FAILED KERN_ALERT \
304 "multipath: IO failure on %s, disabling IO path. \n" \
305 " Operation continuing on %d IO paths.\n"
307 static void mark_disk_bad (mddev_t *mddev, int failed)
309 multipath_conf_t *conf = mddev_to_conf(mddev);
310 struct multipath_info *multipath = conf->multipaths+failed;
311 mdp_super_t *sb = mddev->sb;
313 multipath->operational = 0;
314 mark_disk_faulty(sb->disks+multipath->number);
315 mark_disk_nonsync(sb->disks+multipath->number);
316 mark_disk_inactive(sb->disks+multipath->number);
321 md_wakeup_thread(conf->thread);
322 conf->working_disks--;
323 printk (DISK_FAILED, partition_name (multipath->dev),
324 conf->working_disks);
328 * Careful, this can execute in IRQ contexts as well!
330 static int multipath_error (mddev_t *mddev, kdev_t dev)
332 multipath_conf_t *conf = mddev_to_conf(mddev);
333 struct multipath_info * multipaths = conf->multipaths;
334 int disks = MD_SB_DISKS;
338 if (conf->working_disks == 1) {
340 for (i = 0; i < disks; i++) {
341 if (multipaths[i].spare) {
350 * Uh oh, we can do nothing if this is our last path, but
351 * first check if this is a queued request for a device
352 * which has just failed.
354 for (i = 0; i < disks; i++) {
355 if (multipaths[i].dev==dev && !multipaths[i].operational)
361 * Mark disk as unusable
363 for (i = 0; i < disks; i++) {
364 if (multipaths[i].dev==dev && multipaths[i].operational) {
365 mark_disk_bad(mddev, i);
369 if (!conf->working_disks) {
372 mdp_super_t *sb = mddev->sb;
374 spare = get_spare(mddev);
376 err = multipath_diskop(mddev, &spare, DISKOP_SPARE_WRITE);
377 printk("got DISKOP_SPARE_WRITE err: %d. (spare_faulty(): %d)\n", err, disk_faulty(spare));
379 if (!err && !disk_faulty(spare)) {
380 multipath_diskop(mddev, &spare, DISKOP_SPARE_ACTIVE);
381 mark_disk_sync(spare);
382 mark_disk_active(spare);
396 static void print_multipath_conf (multipath_conf_t *conf)
399 struct multipath_info *tmp;
401 printk("MULTIPATH conf printout:\n");
403 printk("(conf==NULL)\n");
406 printk(" --- wd:%d rd:%d nd:%d\n", conf->working_disks,
407 conf->raid_disks, conf->nr_disks);
409 for (i = 0; i < MD_SB_DISKS; i++) {
410 tmp = conf->multipaths + i;
411 if (tmp->spare || tmp->operational || tmp->number ||
412 tmp->raid_disk || tmp->used_slot)
413 printk(" disk%d, s:%d, o:%d, n:%d rd:%d us:%d dev:%s\n",
414 i, tmp->spare,tmp->operational,
415 tmp->number,tmp->raid_disk,tmp->used_slot,
416 partition_name(tmp->dev));
420 static int multipath_diskop(mddev_t *mddev, mdp_disk_t **d, int state)
423 int i, failed_disk=-1, spare_disk=-1, removed_disk=-1, added_disk=-1;
424 multipath_conf_t *conf = mddev->private;
425 struct multipath_info *tmp, *sdisk, *fdisk, *rdisk, *adisk;
426 mdp_super_t *sb = mddev->sb;
427 mdp_disk_t *failed_desc, *spare_desc, *added_desc;
428 mdk_rdev_t *spare_rdev, *failed_rdev;
430 print_multipath_conf(conf);
431 md_spin_lock_irq(&conf->device_lock);
437 case DISKOP_SPARE_ACTIVE:
440 * Find the failed disk within the MULTIPATH configuration ...
441 * (this can only be in the first conf->working_disks part)
443 for (i = 0; i < conf->raid_disks; i++) {
444 tmp = conf->multipaths + i;
445 if ((!tmp->operational && !tmp->spare) ||
452 * When we activate a spare disk we _must_ have a disk in
453 * the lower (active) part of the array to replace.
455 if ((failed_disk == -1) || (failed_disk >= conf->raid_disks)) {
462 case DISKOP_SPARE_WRITE:
463 case DISKOP_SPARE_INACTIVE:
466 * Find the spare disk ... (can only be in the 'high'
469 for (i = conf->raid_disks; i < MD_SB_DISKS; i++) {
470 tmp = conf->multipaths + i;
471 if (tmp->spare && tmp->number == (*d)->number) {
476 if (spare_disk == -1) {
483 case DISKOP_HOT_REMOVE_DISK:
485 for (i = 0; i < MD_SB_DISKS; i++) {
486 tmp = conf->multipaths + i;
487 if (tmp->used_slot && (tmp->number == (*d)->number)) {
488 if (tmp->operational) {
489 printk(KERN_ERR "hot-remove-disk, slot %d is identified to be the requested disk (number %d), but is still operational!\n", i, (*d)->number);
497 if (removed_disk == -1) {
504 case DISKOP_HOT_ADD_DISK:
506 for (i = conf->raid_disks; i < MD_SB_DISKS; i++) {
507 tmp = conf->multipaths + i;
508 if (!tmp->used_slot) {
513 if (added_disk == -1) {
523 * Switch the spare disk to write-only mode:
525 case DISKOP_SPARE_WRITE:
526 sdisk = conf->multipaths + spare_disk;
527 sdisk->operational = 1;
530 * Deactivate a spare disk:
532 case DISKOP_SPARE_INACTIVE:
533 sdisk = conf->multipaths + spare_disk;
534 sdisk->operational = 0;
537 * Activate (mark read-write) the (now sync) spare disk,
538 * which means we switch it's 'raid position' (->raid_disk)
539 * with the failed disk. (only the first 'conf->nr_disks'
540 * slots are used for 'real' disks and we must preserve this
543 case DISKOP_SPARE_ACTIVE:
544 sdisk = conf->multipaths + spare_disk;
545 fdisk = conf->multipaths + failed_disk;
547 spare_desc = &sb->disks[sdisk->number];
548 failed_desc = &sb->disks[fdisk->number];
550 if (spare_desc != *d) {
556 if (spare_desc->raid_disk != sdisk->raid_disk) {
562 if (sdisk->raid_disk != spare_disk) {
568 if (failed_desc->raid_disk != fdisk->raid_disk) {
574 if (fdisk->raid_disk != failed_disk) {
581 * do the switch finally
583 spare_rdev = find_rdev_nr(mddev, spare_desc->number);
584 failed_rdev = find_rdev_nr(mddev, failed_desc->number);
585 xchg_values(spare_rdev->desc_nr, failed_rdev->desc_nr);
586 spare_rdev->alias_device = 0;
587 failed_rdev->alias_device = 1;
589 xchg_values(*spare_desc, *failed_desc);
590 xchg_values(*fdisk, *sdisk);
593 * (careful, 'failed' and 'spare' are switched from now on)
595 * we want to preserve linear numbering and we want to
596 * give the proper raid_disk number to the now activated
597 * disk. (this means we switch back these values)
600 xchg_values(spare_desc->raid_disk, failed_desc->raid_disk);
601 xchg_values(sdisk->raid_disk, fdisk->raid_disk);
602 xchg_values(spare_desc->number, failed_desc->number);
603 xchg_values(sdisk->number, fdisk->number);
607 if (sdisk->dev == MKDEV(0,0))
608 sdisk->used_slot = 0;
610 * this really activates the spare.
615 * if we activate a spare, we definitely replace a
616 * non-operational disk slot in the 'low' area of
620 conf->working_disks++;
624 case DISKOP_HOT_REMOVE_DISK:
625 rdisk = conf->multipaths + removed_disk;
627 if (rdisk->spare && (removed_disk < conf->raid_disks)) {
632 rdisk->dev = MKDEV(0,0);
633 rdisk->used_slot = 0;
637 case DISKOP_HOT_ADD_DISK:
638 adisk = conf->multipaths + added_disk;
641 if (added_disk != added_desc->number) {
647 adisk->number = added_desc->number;
648 adisk->raid_disk = added_desc->raid_disk;
649 adisk->dev = MKDEV(added_desc->major,added_desc->minor);
651 adisk->operational = 0;
653 adisk->used_slot = 1;
664 md_spin_unlock_irq(&conf->device_lock);
666 print_multipath_conf(conf);
671 #define IO_ERROR KERN_ALERT \
672 "multipath: %s: unrecoverable IO read error for block %lu\n"
674 #define REDIRECT_SECTOR KERN_ERR \
675 "multipath: %s: redirecting sector %lu to another IO path\n"
678 * This is a kernel thread which:
680 * 1. Retries failed read operations on working multipaths.
681 * 2. Updates the raid superblock when problems encounter.
682 * 3. Performs writes following reads for array syncronising.
685 static void multipathd (void *data)
687 struct multipath_bh *mp_bh;
688 struct buffer_head *bh;
695 md_spin_lock_irqsave(&retry_list_lock, flags);
696 mp_bh = multipath_retry_list;
699 multipath_retry_list = mp_bh->next_mp;
700 md_spin_unlock_irqrestore(&retry_list_lock, flags);
702 mddev = mp_bh->mddev;
708 multipath_map (mddev, &bh->b_dev);
709 if (bh->b_dev == dev) {
710 printk (IO_ERROR, partition_name(bh->b_dev), bh->b_blocknr);
711 multipath_end_bh_io(mp_bh, 0);
713 printk (REDIRECT_SECTOR,
714 partition_name(bh->b_dev), bh->b_blocknr);
715 bh->b_rdev = bh->b_dev;
716 bh->b_rsector = bh->b_blocknr;
717 generic_make_request (mp_bh->cmd, bh);
720 md_spin_unlock_irqrestore(&retry_list_lock, flags);
723 #undef REDIRECT_SECTOR
726 * This will catch the scenario in which one of the multipaths was
727 * mounted as a normal device rather than as a part of a raid set.
729 * check_consistency is very personality-dependent, eg. RAID5 cannot
730 * do this check, it uses another method.
732 static int __check_consistency (mddev_t *mddev, int row)
734 multipath_conf_t *conf = mddev_to_conf(mddev);
735 int disks = MD_SB_DISKS;
737 struct buffer_head *bh = NULL;
741 for (i = 0; i < disks; i++) {
742 if (!conf->multipaths[i].operational)
744 printk("(checking disk %d)\n",i);
745 dev = conf->multipaths[i].dev;
746 set_blocksize(dev, 4096);
747 if ((bh = bread(dev, row / 4, 4096)) == NULL)
750 buffer = (char *) __get_free_page(GFP_KERNEL);
753 memcpy(buffer, bh->b_data, 4096);
754 } else if (memcmp(buffer, bh->b_data, 4096)) {
760 invalidate_buffers(dev);
764 free_page((unsigned long) buffer);
769 invalidate_buffers(dev);
774 static int check_consistency (mddev_t *mddev)
776 if (__check_consistency(mddev, 0))
778 * we do not do this currently, as it's perfectly possible to
779 * have an inconsistent array when it's freshly created. Only
780 * newly written data has to be consistent.
787 #define INVALID_LEVEL KERN_WARNING \
788 "multipath: md%d: raid level not set to multipath IO (%d)\n"
790 #define NO_SB KERN_ERR \
791 "multipath: disabled IO path %s (couldn't access raid superblock)\n"
793 #define ERRORS KERN_ERR \
794 "multipath: disabled IO path %s (errors detected)\n"
796 #define NOT_IN_SYNC KERN_ERR \
797 "multipath: making IO path %s a spare path (not in sync)\n"
799 #define INCONSISTENT KERN_ERR \
800 "multipath: disabled IO path %s (inconsistent descriptor)\n"
802 #define ALREADY_RUNNING KERN_ERR \
803 "multipath: disabled IO path %s (multipath %d already operational)\n"
805 #define OPERATIONAL KERN_INFO \
806 "multipath: device %s operational as IO path %d\n"
808 #define MEM_ERROR KERN_ERR \
809 "multipath: couldn't allocate memory for md%d\n"
811 #define SPARE KERN_INFO \
812 "multipath: spare IO path %s\n"
814 #define NONE_OPERATIONAL KERN_ERR \
815 "multipath: no operational IO paths for md%d\n"
817 #define SB_DIFFERENCES KERN_ERR \
818 "multipath: detected IO path differences!\n"
820 #define ARRAY_IS_ACTIVE KERN_INFO \
821 "multipath: array md%d active with %d out of %d IO paths (%d spare IO paths)\n"
823 #define THREAD_ERROR KERN_ERR \
824 "multipath: couldn't allocate thread for md%d\n"
826 static int multipath_run (mddev_t *mddev)
828 multipath_conf_t *conf;
830 struct multipath_info *disk, *disk2;
831 mdp_super_t *sb = mddev->sb;
832 mdp_disk_t *desc, *desc2;
833 mdk_rdev_t *rdev, *def_rdev = NULL;
834 struct md_list_head *tmp;
839 if (sb->level != -4) {
840 printk(INVALID_LEVEL, mdidx(mddev), sb->level);
844 * copy the already verified devices into our private MULTIPATH
845 * bookkeeping area. [whatever we allocate in multipath_run(),
846 * should be freed in multipath_stop()]
849 conf = kmalloc(sizeof(multipath_conf_t), GFP_KERNEL);
850 mddev->private = conf;
852 printk(MEM_ERROR, mdidx(mddev));
855 memset(conf, 0, sizeof(*conf));
857 ITERATE_RDEV(mddev,rdev,tmp) {
859 /* this is a "should never happen" case and if it */
860 /* ever does happen, a continue; won't help */
861 printk(ERRORS, partition_name(rdev->dev));
864 /* this is a "should never happen" case and if it */
865 /* ever does happen, a continue; won't help */
871 if (rdev->desc_nr == -1) {
876 desc = &sb->disks[rdev->desc_nr];
877 disk_idx = desc->raid_disk;
878 disk = conf->multipaths + disk_idx;
880 if (!disk_sync(desc))
881 printk(NOT_IN_SYNC, partition_name(rdev->dev));
884 * Mark all disks as spare to start with, then pick our
885 * active disk. If we have a disk that is marked active
886 * in the sb, then use it, else use the first rdev.
888 disk->number = desc->number;
889 disk->raid_disk = desc->raid_disk;
890 disk->dev = rdev->dev;
891 disk->operational = 0;
894 mark_disk_sync(desc);
896 if (disk_active(desc)) {
897 if(!conf->working_disks) {
898 printk(OPERATIONAL, partition_name(rdev->dev),
900 disk->operational = 1;
902 conf->working_disks++;
905 mark_disk_spare(desc);
908 mark_disk_spare(desc);
910 if(!num_rdevs++) def_rdev = rdev;
912 if(!conf->working_disks && num_rdevs) {
913 desc = &sb->disks[def_rdev->desc_nr];
914 disk = conf->multipaths + desc->raid_disk;
915 printk(OPERATIONAL, partition_name(def_rdev->dev),
917 disk->operational = 1;
919 conf->working_disks++;
920 mark_disk_active(desc);
923 * Make sure our active path is in desc spot 0
925 if(def_rdev->desc_nr != 0) {
926 rdev = find_rdev_nr(mddev, 0);
927 desc = &sb->disks[def_rdev->desc_nr];
929 disk = conf->multipaths + desc->raid_disk;
930 disk2 = conf->multipaths + desc2->raid_disk;
931 xchg_values(*desc2,*desc);
932 xchg_values(*disk2,*disk);
933 xchg_values(desc2->number, desc->number);
934 xchg_values(disk2->number, disk->number);
935 xchg_values(desc2->raid_disk, desc->raid_disk);
936 xchg_values(disk2->raid_disk, disk->raid_disk);
938 xchg_values(def_rdev->desc_nr,rdev->desc_nr);
940 def_rdev->desc_nr = 0;
943 conf->raid_disks = sb->raid_disks = sb->active_disks = 1;
944 conf->nr_disks = sb->nr_disks = sb->working_disks = num_rdevs;
945 sb->failed_disks = 0;
946 sb->spare_disks = num_rdevs - 1;
949 conf->device_lock = MD_SPIN_LOCK_UNLOCKED;
951 init_waitqueue_head(&conf->wait_buffer);
953 if (!conf->working_disks) {
954 printk(NONE_OPERATIONAL, mdidx(mddev));
959 /* pre-allocate some buffer_head structures.
960 * As a minimum, 1 mpbh and raid_disks buffer_heads
961 * would probably get us by in tight memory situations,
962 * but a few more is probably a good idea.
963 * For now, try NR_RESERVED_BUFS mpbh and
964 * NR_RESERVED_BUFS*raid_disks bufferheads
965 * This will allow at least NR_RESERVED_BUFS concurrent
966 * reads or writes even if kmalloc starts failing
968 if (multipath_grow_mpbh(conf, NR_RESERVED_BUFS) < NR_RESERVED_BUFS) {
969 printk(MEM_ERROR, mdidx(mddev));
973 if ((sb->state & (1 << MD_SB_CLEAN))) {
975 * we do sanity checks even if the device says
978 if (check_consistency(mddev)) {
979 printk(SB_DIFFERENCES);
980 sb->state &= ~(1 << MD_SB_CLEAN);
985 const char * name = "multipathd";
987 conf->thread = md_register_thread(multipathd, conf, name);
989 printk(THREAD_ERROR, mdidx(mddev));
995 * Regenerate the "device is in sync with the raid set" bit for
998 for (i = 0; i < MD_SB_DISKS; i++) {
999 mark_disk_nonsync(sb->disks+i);
1000 for (j = 0; j < sb->raid_disks; j++) {
1001 if (sb->disks[i].number == conf->multipaths[j].number)
1002 mark_disk_sync(sb->disks+i);
1006 printk(ARRAY_IS_ACTIVE, mdidx(mddev), sb->active_disks,
1007 sb->raid_disks, sb->spare_disks);
1009 * Ok, everything is just fine now
1014 multipath_shrink_mpbh(conf);
1016 mddev->private = NULL;
1022 #undef INVALID_LEVEL
1027 #undef ALREADY_RUNNING
1030 #undef NONE_OPERATIONAL
1031 #undef SB_DIFFERENCES
1032 #undef ARRAY_IS_ACTIVE
1034 static int multipath_stop (mddev_t *mddev)
1036 multipath_conf_t *conf = mddev_to_conf(mddev);
1038 md_unregister_thread(conf->thread);
1039 multipath_shrink_mpbh(conf);
1041 mddev->private = NULL;
1046 static mdk_personality_t multipath_personality=
1049 make_request: multipath_make_request,
1051 stop: multipath_stop,
1052 status: multipath_status,
1053 error_handler: multipath_error,
1054 diskop: multipath_diskop,
1057 static int md__init multipath_init (void)
1059 return register_md_personality (MULTIPATH, &multipath_personality);
1062 static void multipath_exit (void)
1064 unregister_md_personality (MULTIPATH);
1067 module_init(multipath_init);
1068 module_exit(multipath_exit);
1069 MODULE_LICENSE("GPL");