X-Git-Url: http://git.rot13.org/?a=blobdiff_plain;ds=sidebyside;f=drivers%2Fmd%2Fdm.c;h=6617ce4af09579285a2be1090adde8c9f61a3fb4;hb=f84e3f521e1449300e0fdc314b7b43b418a66dc3;hp=d837d37f62093d1669a5bf87d160eab34eb45aa3;hpb=5cae826e9e54a31f06b4c11b73f4af29e2ea4932;p=powerpc.git diff --git a/drivers/md/dm.c b/drivers/md/dm.c index d837d37f62..6617ce4af0 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -7,6 +7,7 @@ #include "dm.h" #include "dm-bio-list.h" +#include "dm-uevent.h" #include #include @@ -70,9 +71,22 @@ union map_info *dm_get_mapinfo(struct bio *bio) #define DMF_DELETING 4 #define DMF_NOFLUSH_SUSPENDING 5 +/* + * Work processed by per-device workqueue. + */ +struct dm_wq_req { + enum { + DM_WQ_FLUSH_ALL, + DM_WQ_FLUSH_DEFERRED, + } type; + struct work_struct work; + struct mapped_device *md; + void *context; +}; + struct mapped_device { struct rw_semaphore io_lock; - struct semaphore suspend_lock; + struct mutex suspend_lock; spinlock_t pushback_lock; rwlock_t map_lock; atomic_t holders; @@ -94,6 +108,11 @@ struct mapped_device { struct bio_list deferred; struct bio_list pushback; + /* + * Processing queue (flush/barriers) + */ + struct workqueue_struct *wq; + /* * The current mapping. */ @@ -112,6 +131,9 @@ struct mapped_device { */ atomic_t event_nr; wait_queue_head_t eventq; + atomic_t uevent_seq; + struct list_head uevent_list; + spinlock_t uevent_lock; /* Protect access to uevent_list */ /* * freeze/thaw support require holding onto a super block @@ -143,11 +165,19 @@ static int __init local_init(void) return -ENOMEM; } + r = dm_uevent_init(); + if (r) { + kmem_cache_destroy(_tio_cache); + kmem_cache_destroy(_io_cache); + return r; + } + _major = major; r = register_blkdev(_major, _name); if (r < 0) { kmem_cache_destroy(_tio_cache); kmem_cache_destroy(_io_cache); + dm_uevent_exit(); return r; } @@ -162,13 +192,14 @@ static void local_exit(void) kmem_cache_destroy(_tio_cache); kmem_cache_destroy(_io_cache); unregister_blkdev(_major, _name); + dm_uevent_exit(); _major = 0; DMINFO("cleaned up"); } -int (*_inits[])(void) __initdata = { +static int (*_inits[])(void) __initdata = { local_init, dm_target_init, dm_linear_init, @@ -176,7 +207,7 @@ int (*_inits[])(void) __initdata = { dm_interface_init, }; -void (*_exits[])(void) = { +static void (*_exits[])(void) = { local_exit, dm_target_exit, dm_linear_exit, @@ -659,13 +690,19 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector, return clone; } -static void __clone_and_map(struct clone_info *ci) +static int __clone_and_map(struct clone_info *ci) { struct bio *clone, *bio = ci->bio; - struct dm_target *ti = dm_table_find_target(ci->map, ci->sector); - sector_t len = 0, max = max_io_len(ci->md, ci->sector, ti); + struct dm_target *ti; + sector_t len = 0, max; struct dm_target_io *tio; + ti = dm_table_find_target(ci->map, ci->sector); + if (!dm_target_is_valid(ti)) + return -EIO; + + max = max_io_len(ci->md, ci->sector, ti); + /* * Allocate a target io object. */ @@ -723,6 +760,9 @@ static void __clone_and_map(struct clone_info *ci) do { if (offset) { ti = dm_table_find_target(ci->map, ci->sector); + if (!dm_target_is_valid(ti)) + return -EIO; + max = max_io_len(ci->md, ci->sector, ti); tio = alloc_tio(ci->md); @@ -746,20 +786,21 @@ static void __clone_and_map(struct clone_info *ci) ci->idx++; } + + return 0; } /* * Split the bio into several clones. */ -static void __split_bio(struct mapped_device *md, struct bio *bio) +static int __split_bio(struct mapped_device *md, struct bio *bio) { struct clone_info ci; + int error = 0; ci.map = dm_get_table(md); - if (!ci.map) { - bio_io_error(bio); - return; - } + if (unlikely(!ci.map)) + return -EIO; ci.md = md; ci.bio = bio; @@ -773,12 +814,14 @@ static void __split_bio(struct mapped_device *md, struct bio *bio) ci.idx = bio->bi_idx; start_io_acct(ci.io); - while (ci.sector_count) - __clone_and_map(&ci); + while (ci.sector_count && !error) + error = __clone_and_map(&ci); /* drop the extra reference count */ - dec_pending(ci.io, 0); + dec_pending(ci.io, error); dm_table_put(ci.map); + + return 0; } /*----------------------------------------------------------------- * CRUD END @@ -790,7 +833,7 @@ static void __split_bio(struct mapped_device *md, struct bio *bio) */ static int dm_request(struct request_queue *q, struct bio *bio) { - int r; + int r = -EIO; int rw = bio_data_dir(bio); struct mapped_device *md = q->queuedata; @@ -815,18 +858,11 @@ static int dm_request(struct request_queue *q, struct bio *bio) while (test_bit(DMF_BLOCK_IO, &md->flags)) { up_read(&md->io_lock); - if (bio_rw(bio) == READA) { - bio_io_error(bio); - return 0; - } - - r = queue_io(md, bio); - if (r < 0) { - bio_io_error(bio); - return 0; + if (bio_rw(bio) != READA) + r = queue_io(md, bio); - } else if (r == 0) - return 0; /* deferred successfully */ + if (r <= 0) + goto out_req; /* * We're in a while loop, because someone could suspend @@ -835,8 +871,13 @@ static int dm_request(struct request_queue *q, struct bio *bio) down_read(&md->io_lock); } - __split_bio(md, bio); + r = __split_bio(md, bio); up_read(&md->io_lock); + +out_req: + if (r < 0) + bio_io_error(bio); + return 0; } @@ -959,7 +1000,7 @@ static struct mapped_device *alloc_dev(int minor) } if (!try_module_get(THIS_MODULE)) - goto bad0; + goto bad_module_get; /* get a minor number for the dev */ if (minor == DM_ANY_MINOR) @@ -967,20 +1008,23 @@ static struct mapped_device *alloc_dev(int minor) else r = specific_minor(md, minor); if (r < 0) - goto bad1; + goto bad_minor; memset(md, 0, sizeof(*md)); init_rwsem(&md->io_lock); - init_MUTEX(&md->suspend_lock); + mutex_init(&md->suspend_lock); spin_lock_init(&md->pushback_lock); rwlock_init(&md->map_lock); atomic_set(&md->holders, 1); atomic_set(&md->open_count, 0); atomic_set(&md->event_nr, 0); + atomic_set(&md->uevent_seq, 0); + INIT_LIST_HEAD(&md->uevent_list); + spin_lock_init(&md->uevent_lock); md->queue = blk_alloc_queue(GFP_KERNEL); if (!md->queue) - goto bad1_free_minor; + goto bad_queue; md->queue->queuedata = md; md->queue->backing_dev_info.congested_fn = dm_any_congested; @@ -991,11 +1035,11 @@ static struct mapped_device *alloc_dev(int minor) md->io_pool = mempool_create_slab_pool(MIN_IOS, _io_cache); if (!md->io_pool) - goto bad2; + goto bad_io_pool; md->tio_pool = mempool_create_slab_pool(MIN_IOS, _tio_cache); if (!md->tio_pool) - goto bad3; + goto bad_tio_pool; md->bs = bioset_create(16, 16); if (!md->bs) @@ -1003,7 +1047,7 @@ static struct mapped_device *alloc_dev(int minor) md->disk = alloc_disk(1); if (!md->disk) - goto bad4; + goto bad_disk; atomic_set(&md->pending, 0); init_waitqueue_head(&md->wait); @@ -1018,6 +1062,10 @@ static struct mapped_device *alloc_dev(int minor) add_disk(md->disk); format_dev_t(md->name, MKDEV(_major, minor)); + md->wq = create_singlethread_workqueue("kdmflush"); + if (!md->wq) + goto bad_thread; + /* Populate the mapping, nobody knows we exist yet */ spin_lock(&_minor_lock); old_md = idr_replace(&_minor_idr, md, minor); @@ -1027,31 +1075,36 @@ static struct mapped_device *alloc_dev(int minor) return md; - bad4: +bad_thread: + put_disk(md->disk); +bad_disk: bioset_free(md->bs); - bad_no_bioset: +bad_no_bioset: mempool_destroy(md->tio_pool); - bad3: +bad_tio_pool: mempool_destroy(md->io_pool); - bad2: +bad_io_pool: blk_cleanup_queue(md->queue); - bad1_free_minor: +bad_queue: free_minor(minor); - bad1: +bad_minor: module_put(THIS_MODULE); - bad0: +bad_module_get: kfree(md); return NULL; } +static void unlock_fs(struct mapped_device *md); + static void free_dev(struct mapped_device *md) { int minor = md->disk->first_minor; if (md->suspended_bdev) { - thaw_bdev(md->suspended_bdev, NULL); + unlock_fs(md); bdput(md->suspended_bdev); } + destroy_workqueue(md->wq); mempool_destroy(md->tio_pool); mempool_destroy(md->io_pool); bioset_free(md->bs); @@ -1073,8 +1126,16 @@ static void free_dev(struct mapped_device *md) */ static void event_callback(void *context) { + unsigned long flags; + LIST_HEAD(uevents); struct mapped_device *md = (struct mapped_device *) context; + spin_lock_irqsave(&md->uevent_lock, flags); + list_splice_init(&md->uevent_list, &uevents); + spin_unlock_irqrestore(&md->uevent_lock, flags); + + dm_send_uevents(&uevents, &md->disk->dev.kobj); + atomic_inc(&md->event_nr); wake_up(&md->eventq); } @@ -1223,19 +1284,91 @@ void dm_put(struct mapped_device *md) } EXPORT_SYMBOL_GPL(dm_put); +static int dm_wait_for_completion(struct mapped_device *md) +{ + int r = 0; + + while (1) { + set_current_state(TASK_INTERRUPTIBLE); + + smp_mb(); + if (!atomic_read(&md->pending)) + break; + + if (signal_pending(current)) { + r = -EINTR; + break; + } + + io_schedule(); + } + set_current_state(TASK_RUNNING); + + return r; +} + /* * Process the deferred bios */ -static void __flush_deferred_io(struct mapped_device *md, struct bio *c) +static void __flush_deferred_io(struct mapped_device *md) +{ + struct bio *c; + + while ((c = bio_list_pop(&md->deferred))) { + if (__split_bio(md, c)) + bio_io_error(c); + } + + clear_bit(DMF_BLOCK_IO, &md->flags); +} + +static void __merge_pushback_list(struct mapped_device *md) { - struct bio *n; + unsigned long flags; + + spin_lock_irqsave(&md->pushback_lock, flags); + clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); + bio_list_merge_head(&md->deferred, &md->pushback); + bio_list_init(&md->pushback); + spin_unlock_irqrestore(&md->pushback_lock, flags); +} + +static void dm_wq_work(struct work_struct *work) +{ + struct dm_wq_req *req = container_of(work, struct dm_wq_req, work); + struct mapped_device *md = req->md; - while (c) { - n = c->bi_next; - c->bi_next = NULL; - __split_bio(md, c); - c = n; + down_write(&md->io_lock); + switch (req->type) { + case DM_WQ_FLUSH_ALL: + __merge_pushback_list(md); + /* pass through */ + case DM_WQ_FLUSH_DEFERRED: + __flush_deferred_io(md); + break; + default: + DMERR("dm_wq_work: unrecognised work type %d", req->type); + BUG(); } + up_write(&md->io_lock); +} + +static void dm_wq_queue(struct mapped_device *md, int type, void *context, + struct dm_wq_req *req) +{ + req->type = type; + req->md = md; + req->context = context; + INIT_WORK(&req->work, dm_wq_work); + queue_work(md->wq, &req->work); +} + +static void dm_queue_flush(struct mapped_device *md, int type, void *context) +{ + struct dm_wq_req req; + + dm_wq_queue(md, type, context, &req); + flush_workqueue(md->wq); } /* @@ -1245,7 +1378,7 @@ int dm_swap_table(struct mapped_device *md, struct dm_table *table) { int r = -EINVAL; - down(&md->suspend_lock); + mutex_lock(&md->suspend_lock); /* device must be suspended */ if (!dm_suspended(md)) @@ -1260,7 +1393,7 @@ int dm_swap_table(struct mapped_device *md, struct dm_table *table) r = __bind(md, table); out: - up(&md->suspend_lock); + mutex_unlock(&md->suspend_lock); return r; } @@ -1309,17 +1442,17 @@ static void unlock_fs(struct mapped_device *md) int dm_suspend(struct mapped_device *md, unsigned suspend_flags) { struct dm_table *map = NULL; - unsigned long flags; DECLARE_WAITQUEUE(wait, current); - struct bio *def; - int r = -EINVAL; + int r = 0; int do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG ? 1 : 0; int noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG ? 1 : 0; - down(&md->suspend_lock); + mutex_lock(&md->suspend_lock); - if (dm_suspended(md)) + if (dm_suspended(md)) { + r = -EINVAL; goto out_unlock; + } map = dm_get_table(md); @@ -1341,16 +1474,16 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) r = -ENOMEM; goto flush_and_out; } - } - /* - * Flush I/O to the device. - * noflush supersedes do_lockfs, because lock_fs() needs to flush I/Os. - */ - if (do_lockfs && !noflush) { - r = lock_fs(md); - if (r) - goto out; + /* + * Flush I/O to the device. noflush supersedes do_lockfs, + * because lock_fs() needs to flush I/Os. + */ + if (do_lockfs) { + r = lock_fs(md); + if (r) + goto out; + } } /* @@ -1367,66 +1500,36 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) dm_table_unplug_all(map); /* - * Then we wait for the already mapped ios to - * complete. + * Wait for the already-mapped ios to complete. */ - while (1) { - set_current_state(TASK_INTERRUPTIBLE); - - if (!atomic_read(&md->pending) || signal_pending(current)) - break; - - io_schedule(); - } - set_current_state(TASK_RUNNING); + r = dm_wait_for_completion(md); down_write(&md->io_lock); remove_wait_queue(&md->wait, &wait); - if (noflush) { - spin_lock_irqsave(&md->pushback_lock, flags); - clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); - bio_list_merge_head(&md->deferred, &md->pushback); - bio_list_init(&md->pushback); - spin_unlock_irqrestore(&md->pushback_lock, flags); - } + if (noflush) + __merge_pushback_list(md); + up_write(&md->io_lock); /* were we interrupted ? */ - r = -EINTR; - if (atomic_read(&md->pending)) { - clear_bit(DMF_BLOCK_IO, &md->flags); - def = bio_list_get(&md->deferred); - __flush_deferred_io(md, def); - up_write(&md->io_lock); + if (r < 0) { + dm_queue_flush(md, DM_WQ_FLUSH_DEFERRED, NULL); + unlock_fs(md); goto out; /* pushback list is already flushed, so skip flush */ } - up_write(&md->io_lock); dm_table_postsuspend_targets(map); set_bit(DMF_SUSPENDED, &md->flags); - r = 0; - flush_and_out: - if (r && noflush) { + if (r && noflush) /* * Because there may be already I/Os in the pushback list, * flush them before return. */ - down_write(&md->io_lock); - - spin_lock_irqsave(&md->pushback_lock, flags); - clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); - bio_list_merge_head(&md->deferred, &md->pushback); - bio_list_init(&md->pushback); - spin_unlock_irqrestore(&md->pushback_lock, flags); - - def = bio_list_get(&md->deferred); - __flush_deferred_io(md, def); - up_write(&md->io_lock); - } + dm_queue_flush(md, DM_WQ_FLUSH_ALL, NULL); out: if (r && md->suspended_bdev) { @@ -1437,17 +1540,16 @@ out: dm_table_put(map); out_unlock: - up(&md->suspend_lock); + mutex_unlock(&md->suspend_lock); return r; } int dm_resume(struct mapped_device *md) { int r = -EINVAL; - struct bio *def; struct dm_table *map = NULL; - down(&md->suspend_lock); + mutex_lock(&md->suspend_lock); if (!dm_suspended(md)) goto out; @@ -1459,12 +1561,7 @@ int dm_resume(struct mapped_device *md) if (r) goto out; - down_write(&md->io_lock); - clear_bit(DMF_BLOCK_IO, &md->flags); - - def = bio_list_get(&md->deferred); - __flush_deferred_io(md, def); - up_write(&md->io_lock); + dm_queue_flush(md, DM_WQ_FLUSH_DEFERRED, NULL); unlock_fs(md); @@ -1477,13 +1574,13 @@ int dm_resume(struct mapped_device *md) dm_table_unplug_all(map); - kobject_uevent(&md->disk->kobj, KOBJ_CHANGE); + dm_kobject_uevent(md); r = 0; out: dm_table_put(map); - up(&md->suspend_lock); + mutex_unlock(&md->suspend_lock); return r; } @@ -1491,6 +1588,16 @@ out: /*----------------------------------------------------------------- * Event notification. *---------------------------------------------------------------*/ +void dm_kobject_uevent(struct mapped_device *md) +{ + kobject_uevent(&md->disk->dev.kobj, KOBJ_CHANGE); +} + +uint32_t dm_next_uevent_seq(struct mapped_device *md) +{ + return atomic_add_return(1, &md->uevent_seq); +} + uint32_t dm_get_event_nr(struct mapped_device *md) { return atomic_read(&md->event_nr); @@ -1502,6 +1609,15 @@ int dm_wait_event(struct mapped_device *md, int event_nr) (event_nr != atomic_read(&md->event_nr))); } +void dm_uevent_add(struct mapped_device *md, struct list_head *elist) +{ + unsigned long flags; + + spin_lock_irqsave(&md->uevent_lock, flags); + list_add(elist, &md->uevent_list); + spin_unlock_irqrestore(&md->uevent_lock, flags); +} + /* * The gendisk is only valid as long as you have a reference * count on 'md'.