X-Git-Url: http://git.rot13.org/?a=blobdiff_plain;f=block%2Fas-iosched.c;h=8da3cf66894c379b1b4908d94602c49f77cb473e;hb=2a16e3f4b0c408b9e50297d2ec27e295d490267a;hp=c6744ff382944a590767cef1827e627c1915c7ad;hpb=3a65dfe8c088143c7155cfd36a72f4b0ad2fc4b2;p=powerpc.git diff --git a/block/as-iosched.c b/block/as-iosched.c index c6744ff382..8da3cf6689 100644 --- a/block/as-iosched.c +++ b/block/as-iosched.c @@ -1,10 +1,8 @@ /* - * linux/drivers/block/as-iosched.c - * * Anticipatory & deadline i/o scheduler. * * Copyright (C) 2002 Jens Axboe - * Nick Piggin + * Nick Piggin * */ #include @@ -69,7 +67,7 @@ /* Bits in as_io_context.state */ enum as_io_states { - AS_TASK_RUNNING=0, /* Process has not exitted */ + AS_TASK_RUNNING=0, /* Process has not exited */ AS_TASK_IOSTARTED, /* Process has started some IO */ AS_TASK_IORUNNING, /* Process has completed some IO */ }; @@ -102,6 +100,9 @@ struct as_data { unsigned long exit_prob; /* probability a task will exit while being waited on */ + unsigned long exit_no_coop; /* probablility an exited task will + not be part of a later cooperating + request */ unsigned long new_ttime_total; /* mean thinktime on new proc */ unsigned long new_ttime_mean; u64 new_seek_total; /* mean seek on new proc */ @@ -181,6 +182,9 @@ struct as_rq { static kmem_cache_t *arq_pool; +static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq); +static void as_antic_stop(struct as_data *ad); + /* * IO Context helper functions */ @@ -369,7 +373,7 @@ static struct as_rq *as_find_first_arq(struct as_data *ad, int data_dir) * existing request against the same sector), which can happen when using * direct IO, then return the alias. */ -static struct as_rq *as_add_arq_rb(struct as_data *ad, struct as_rq *arq) +static struct as_rq *__as_add_arq_rb(struct as_data *ad, struct as_rq *arq) { struct rb_node **p = &ARQ_RB_ROOT(ad, arq)->rb_node; struct rb_node *parent = NULL; @@ -396,6 +400,16 @@ static struct as_rq *as_add_arq_rb(struct as_data *ad, struct as_rq *arq) return NULL; } +static void as_add_arq_rb(struct as_data *ad, struct as_rq *arq) +{ + struct as_rq *alias; + + while ((unlikely(alias = __as_add_arq_rb(ad, arq)))) { + as_move_to_dispatch(ad, alias); + as_antic_stop(ad); + } +} + static inline void as_del_arq_rb(struct as_data *ad, struct as_rq *arq) { if (!ON_RB(&arq->rb_node)) { @@ -636,37 +650,152 @@ static void as_antic_timeout(unsigned long data) kblockd_schedule_work(&ad->antic_work); if (aic->ttime_samples == 0) { - /* process anticipated on has exitted or timed out*/ + /* process anticipated on has exited or timed out*/ ad->exit_prob = (7*ad->exit_prob + 256)/8; } + if (!test_bit(AS_TASK_RUNNING, &aic->state)) { + /* process not "saved" by a cooperating request */ + ad->exit_no_coop = (7*ad->exit_no_coop + 256)/8; + } } spin_unlock_irqrestore(q->queue_lock, flags); } +static void as_update_thinktime(struct as_data *ad, struct as_io_context *aic, + unsigned long ttime) +{ + /* fixed point: 1.0 == 1<<8 */ + if (aic->ttime_samples == 0) { + ad->new_ttime_total = (7*ad->new_ttime_total + 256*ttime) / 8; + ad->new_ttime_mean = ad->new_ttime_total / 256; + + ad->exit_prob = (7*ad->exit_prob)/8; + } + aic->ttime_samples = (7*aic->ttime_samples + 256) / 8; + aic->ttime_total = (7*aic->ttime_total + 256*ttime) / 8; + aic->ttime_mean = (aic->ttime_total + 128) / aic->ttime_samples; +} + +static void as_update_seekdist(struct as_data *ad, struct as_io_context *aic, + sector_t sdist) +{ + u64 total; + + if (aic->seek_samples == 0) { + ad->new_seek_total = (7*ad->new_seek_total + 256*(u64)sdist)/8; + ad->new_seek_mean = ad->new_seek_total / 256; + } + + /* + * Don't allow the seek distance to get too large from the + * odd fragment, pagein, etc + */ + if (aic->seek_samples <= 60) /* second&third seek */ + sdist = min(sdist, (aic->seek_mean * 4) + 2*1024*1024); + else + sdist = min(sdist, (aic->seek_mean * 4) + 2*1024*64); + + aic->seek_samples = (7*aic->seek_samples + 256) / 8; + aic->seek_total = (7*aic->seek_total + (u64)256*sdist) / 8; + total = aic->seek_total + (aic->seek_samples/2); + do_div(total, aic->seek_samples); + aic->seek_mean = (sector_t)total; +} + +/* + * as_update_iohist keeps a decaying histogram of IO thinktimes, and + * updates @aic->ttime_mean based on that. It is called when a new + * request is queued. + */ +static void as_update_iohist(struct as_data *ad, struct as_io_context *aic, + struct request *rq) +{ + struct as_rq *arq = RQ_DATA(rq); + int data_dir = arq->is_sync; + unsigned long thinktime = 0; + sector_t seek_dist; + + if (aic == NULL) + return; + + if (data_dir == REQ_SYNC) { + unsigned long in_flight = atomic_read(&aic->nr_queued) + + atomic_read(&aic->nr_dispatched); + spin_lock(&aic->lock); + if (test_bit(AS_TASK_IORUNNING, &aic->state) || + test_bit(AS_TASK_IOSTARTED, &aic->state)) { + /* Calculate read -> read thinktime */ + if (test_bit(AS_TASK_IORUNNING, &aic->state) + && in_flight == 0) { + thinktime = jiffies - aic->last_end_request; + thinktime = min(thinktime, MAX_THINKTIME-1); + } + as_update_thinktime(ad, aic, thinktime); + + /* Calculate read -> read seek distance */ + if (aic->last_request_pos < rq->sector) + seek_dist = rq->sector - aic->last_request_pos; + else + seek_dist = aic->last_request_pos - rq->sector; + as_update_seekdist(ad, aic, seek_dist); + } + aic->last_request_pos = rq->sector + rq->nr_sectors; + set_bit(AS_TASK_IOSTARTED, &aic->state); + spin_unlock(&aic->lock); + } +} + /* * as_close_req decides if one request is considered "close" to the * previous one issued. */ -static int as_close_req(struct as_data *ad, struct as_rq *arq) +static int as_close_req(struct as_data *ad, struct as_io_context *aic, + struct as_rq *arq) { unsigned long delay; /* milliseconds */ sector_t last = ad->last_sector[ad->batch_data_dir]; sector_t next = arq->request->sector; sector_t delta; /* acceptable close offset (in sectors) */ + sector_t s; if (ad->antic_status == ANTIC_OFF || !ad->ioc_finished) delay = 0; else delay = ((jiffies - ad->antic_start) * 1000) / HZ; - if (delay <= 1) - delta = 64; + if (delay == 0) + delta = 8192; else if (delay <= 20 && delay <= ad->antic_expire) - delta = 64 << (delay-1); + delta = 8192 << delay; else return 1; - return (last - (delta>>1) <= next) && (next <= last + delta); + if ((last <= next + (delta>>1)) && (next <= last + delta)) + return 1; + + if (last < next) + s = next - last; + else + s = last - next; + + if (aic->seek_samples == 0) { + /* + * Process has just started IO. Use past statistics to + * gauge success possibility + */ + if (ad->new_seek_mean > s) { + /* this request is better than what we're expecting */ + return 1; + } + + } else { + if (aic->seek_mean > s) { + /* this request is better than what we're expecting */ + return 1; + } + } + + return 0; } /* @@ -678,7 +807,7 @@ static int as_close_req(struct as_data *ad, struct as_rq *arq) * dispatch it ASAP, because we know that application will not be submitting * any new reads. * - * If the task which has submitted the request has exitted, break anticipation. + * If the task which has submitted the request has exited, break anticipation. * * If this task has queued some other IO, do not enter enticipation. */ @@ -686,7 +815,6 @@ static int as_can_break_anticipation(struct as_data *ad, struct as_rq *arq) { struct io_context *ioc; struct as_io_context *aic; - sector_t s; ioc = ad->io_context; BUG_ON(!ioc); @@ -708,13 +836,6 @@ static int as_can_break_anticipation(struct as_data *ad, struct as_rq *arq) if (!aic) return 0; - if (!test_bit(AS_TASK_RUNNING, &aic->state)) { - /* process anticipated on has exitted */ - if (aic->ttime_samples == 0) - ad->exit_prob = (7*ad->exit_prob + 256)/8; - return 1; - } - if (atomic_read(&aic->nr_queued) > 0) { /* process has more requests queued */ return 1; @@ -725,57 +846,45 @@ static int as_can_break_anticipation(struct as_data *ad, struct as_rq *arq) return 1; } - if (arq && arq->is_sync == REQ_SYNC && as_close_req(ad, arq)) { + if (arq && arq->is_sync == REQ_SYNC && as_close_req(ad, aic, arq)) { /* * Found a close request that is not one of ours. * - * This makes close requests from another process reset - * our thinktime delay. Is generally useful when there are + * This makes close requests from another process update + * our IO history. Is generally useful when there are * two or more cooperating processes working in the same * area. */ - spin_lock(&aic->lock); - aic->last_end_request = jiffies; - spin_unlock(&aic->lock); + if (!test_bit(AS_TASK_RUNNING, &aic->state)) { + if (aic->ttime_samples == 0) + ad->exit_prob = (7*ad->exit_prob + 256)/8; + + ad->exit_no_coop = (7*ad->exit_no_coop)/8; + } + + as_update_iohist(ad, aic, arq->request); return 1; } + if (!test_bit(AS_TASK_RUNNING, &aic->state)) { + /* process anticipated on has exited */ + if (aic->ttime_samples == 0) + ad->exit_prob = (7*ad->exit_prob + 256)/8; + + if (ad->exit_no_coop > 128) + return 1; + } if (aic->ttime_samples == 0) { if (ad->new_ttime_mean > ad->antic_expire) return 1; - if (ad->exit_prob > 128) + if (ad->exit_prob * ad->exit_no_coop > 128*256) return 1; } else if (aic->ttime_mean > ad->antic_expire) { /* the process thinks too much between requests */ return 1; } - if (!arq) - return 0; - - if (ad->last_sector[REQ_SYNC] < arq->request->sector) - s = arq->request->sector - ad->last_sector[REQ_SYNC]; - else - s = ad->last_sector[REQ_SYNC] - arq->request->sector; - - if (aic->seek_samples == 0) { - /* - * Process has just started IO. Use past statistics to - * guage success possibility - */ - if (ad->new_seek_mean > s) { - /* this request is better than what we're expecting */ - return 1; - } - - } else { - if (aic->seek_mean > s) { - /* this request is better than what we're expecting */ - return 1; - } - } - return 0; } @@ -809,94 +918,11 @@ static int as_can_anticipate(struct as_data *ad, struct as_rq *arq) * Status is either ANTIC_OFF so start waiting, * ANTIC_WAIT_REQ so continue waiting for request to finish * or ANTIC_WAIT_NEXT so continue waiting for an acceptable request. - * */ return 1; } -static void as_update_thinktime(struct as_data *ad, struct as_io_context *aic, unsigned long ttime) -{ - /* fixed point: 1.0 == 1<<8 */ - if (aic->ttime_samples == 0) { - ad->new_ttime_total = (7*ad->new_ttime_total + 256*ttime) / 8; - ad->new_ttime_mean = ad->new_ttime_total / 256; - - ad->exit_prob = (7*ad->exit_prob)/8; - } - aic->ttime_samples = (7*aic->ttime_samples + 256) / 8; - aic->ttime_total = (7*aic->ttime_total + 256*ttime) / 8; - aic->ttime_mean = (aic->ttime_total + 128) / aic->ttime_samples; -} - -static void as_update_seekdist(struct as_data *ad, struct as_io_context *aic, sector_t sdist) -{ - u64 total; - - if (aic->seek_samples == 0) { - ad->new_seek_total = (7*ad->new_seek_total + 256*(u64)sdist)/8; - ad->new_seek_mean = ad->new_seek_total / 256; - } - - /* - * Don't allow the seek distance to get too large from the - * odd fragment, pagein, etc - */ - if (aic->seek_samples <= 60) /* second&third seek */ - sdist = min(sdist, (aic->seek_mean * 4) + 2*1024*1024); - else - sdist = min(sdist, (aic->seek_mean * 4) + 2*1024*64); - - aic->seek_samples = (7*aic->seek_samples + 256) / 8; - aic->seek_total = (7*aic->seek_total + (u64)256*sdist) / 8; - total = aic->seek_total + (aic->seek_samples/2); - do_div(total, aic->seek_samples); - aic->seek_mean = (sector_t)total; -} - -/* - * as_update_iohist keeps a decaying histogram of IO thinktimes, and - * updates @aic->ttime_mean based on that. It is called when a new - * request is queued. - */ -static void as_update_iohist(struct as_data *ad, struct as_io_context *aic, struct request *rq) -{ - struct as_rq *arq = RQ_DATA(rq); - int data_dir = arq->is_sync; - unsigned long thinktime; - sector_t seek_dist; - - if (aic == NULL) - return; - - if (data_dir == REQ_SYNC) { - unsigned long in_flight = atomic_read(&aic->nr_queued) - + atomic_read(&aic->nr_dispatched); - spin_lock(&aic->lock); - if (test_bit(AS_TASK_IORUNNING, &aic->state) || - test_bit(AS_TASK_IOSTARTED, &aic->state)) { - /* Calculate read -> read thinktime */ - if (test_bit(AS_TASK_IORUNNING, &aic->state) - && in_flight == 0) { - thinktime = jiffies - aic->last_end_request; - thinktime = min(thinktime, MAX_THINKTIME-1); - } else - thinktime = 0; - as_update_thinktime(ad, aic, thinktime); - - /* Calculate read -> read seek distance */ - if (aic->last_request_pos < rq->sector) - seek_dist = rq->sector - aic->last_request_pos; - else - seek_dist = aic->last_request_pos - rq->sector; - as_update_seekdist(ad, aic, seek_dist); - } - aic->last_request_pos = rq->sector + rq->nr_sectors; - set_bit(AS_TASK_IOSTARTED, &aic->state); - spin_unlock(&aic->lock); - } -} - /* * as_update_arq must be called whenever a request (arq) is added to * the sort_list. This function keeps caches up to date, and checks if the @@ -1120,23 +1146,6 @@ static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq) /* * take it off the sort and fifo list, add to dispatch queue */ - while (!list_empty(&rq->queuelist)) { - struct request *__rq = list_entry_rq(rq->queuelist.next); - struct as_rq *__arq = RQ_DATA(__rq); - - list_del(&__rq->queuelist); - - elv_dispatch_add_tail(ad->q, __rq); - - if (__arq->io_context && __arq->io_context->aic) - atomic_inc(&__arq->io_context->aic->nr_dispatched); - - WARN_ON(__arq->state != AS_RQ_QUEUED); - __arq->state = AS_RQ_DISPATCHED; - - ad->nr_dispatched++; - } - as_remove_queued_request(ad->q, rq); WARN_ON(arq->state != AS_RQ_QUEUED); @@ -1201,7 +1210,7 @@ static int as_dispatch_request(request_queue_t *q, int force) || ad->changed_batch) return 0; - if (!(reads && writes && as_batch_expired(ad)) ) { + if (!(reads && writes && as_batch_expired(ad))) { /* * batch is still running or no reads or no writes */ @@ -1312,41 +1321,6 @@ fifo_expired: return 1; } -/* - * Add arq to a list behind alias - */ -static inline void -as_add_aliased_request(struct as_data *ad, struct as_rq *arq, struct as_rq *alias) -{ - struct request *req = arq->request; - struct list_head *insert = alias->request->queuelist.prev; - - /* - * Transfer list of aliases - */ - while (!list_empty(&req->queuelist)) { - struct request *__rq = list_entry_rq(req->queuelist.next); - struct as_rq *__arq = RQ_DATA(__rq); - - list_move_tail(&__rq->queuelist, &alias->request->queuelist); - - WARN_ON(__arq->state != AS_RQ_QUEUED); - } - - /* - * Another request with the same start sector on the rbtree. - * Link this request to that sector. They are untangled in - * as_move_to_dispatch - */ - list_add(&arq->request->queuelist, insert); - - /* - * Don't want to have to handle merges. - */ - as_del_arq_hash(arq); - arq->request->flags |= REQ_NOMERGE; -} - /* * add arq to rbtree and fifo */ @@ -1354,13 +1328,8 @@ static void as_add_request(request_queue_t *q, struct request *rq) { struct as_data *ad = q->elevator->elevator_data; struct as_rq *arq = RQ_DATA(rq); - struct as_rq *alias; int data_dir; - if (arq->state != AS_RQ_PRESCHED) { - printk("arq->state: %d\n", arq->state); - WARN_ON(1); - } arq->state = AS_RQ_NEW; if (rq_data_dir(arq->request) == READ @@ -1377,33 +1346,17 @@ static void as_add_request(request_queue_t *q, struct request *rq) atomic_inc(&arq->io_context->aic->nr_queued); } - alias = as_add_arq_rb(ad, arq); - if (!alias) { - /* - * set expire time (only used for reads) and add to fifo list - */ - arq->expires = jiffies + ad->fifo_expire[data_dir]; - list_add_tail(&arq->fifo, &ad->fifo_list[data_dir]); + as_add_arq_rb(ad, arq); + if (rq_mergeable(arq->request)) + as_add_arq_hash(ad, arq); - if (rq_mergeable(arq->request)) - as_add_arq_hash(ad, arq); - as_update_arq(ad, arq); /* keep state machine up to date */ - - } else { - as_add_aliased_request(ad, arq, alias); - - /* - * have we been anticipating this request? - * or does it come from the same process as the one we are - * anticipating for? - */ - if (ad->antic_status == ANTIC_WAIT_REQ - || ad->antic_status == ANTIC_WAIT_NEXT) { - if (as_can_break_anticipation(ad, arq)) - as_antic_stop(ad); - } - } + /* + * set expire time (only used for reads) and add to fifo list + */ + arq->expires = jiffies + ad->fifo_expire[data_dir]; + list_add_tail(&arq->fifo, &ad->fifo_list[data_dir]); + as_update_arq(ad, arq); /* keep state machine up to date */ arq->state = AS_RQ_QUEUED; } @@ -1441,8 +1394,8 @@ static int as_queue_empty(request_queue_t *q) && list_empty(&ad->fifo_list[REQ_SYNC]); } -static struct request * -as_former_request(request_queue_t *q, struct request *rq) +static struct request *as_former_request(request_queue_t *q, + struct request *rq) { struct as_rq *arq = RQ_DATA(rq); struct rb_node *rbprev = rb_prev(&arq->rb_node); @@ -1454,8 +1407,8 @@ as_former_request(request_queue_t *q, struct request *rq) return ret; } -static struct request * -as_latter_request(request_queue_t *q, struct request *rq) +static struct request *as_latter_request(request_queue_t *q, + struct request *rq) { struct as_rq *arq = RQ_DATA(rq); struct rb_node *rbnext = rb_next(&arq->rb_node); @@ -1526,23 +1479,8 @@ static void as_merged_request(request_queue_t *q, struct request *req) * if the merge was a front merge, we need to reposition request */ if (rq_rb_key(req) != arq->rb_key) { - struct as_rq *alias, *next_arq = NULL; - - if (ad->next_arq[arq->is_sync] == arq) - next_arq = as_find_next_arq(ad, arq); - - /* - * Note! We should really be moving any old aliased requests - * off this request and try to insert them into the rbtree. We - * currently don't bother. Ditto the next function. - */ as_del_arq_rb(ad, arq); - if ((alias = as_add_arq_rb(ad, arq)) ) { - list_del_init(&arq->fifo); - as_add_aliased_request(ad, arq, alias); - if (next_arq) - ad->next_arq[arq->is_sync] = next_arq; - } + as_add_arq_rb(ad, arq); /* * Note! At this stage of this and the next function, our next * request may not be optimal - eg the request may have "grown" @@ -1551,9 +1489,8 @@ static void as_merged_request(request_queue_t *q, struct request *req) } } -static void -as_merged_requests(request_queue_t *q, struct request *req, - struct request *next) +static void as_merged_requests(request_queue_t *q, struct request *req, + struct request *next) { struct as_data *ad = q->elevator->elevator_data; struct as_rq *arq = RQ_DATA(req); @@ -1570,18 +1507,8 @@ as_merged_requests(request_queue_t *q, struct request *req, as_add_arq_hash(ad, arq); if (rq_rb_key(req) != arq->rb_key) { - struct as_rq *alias, *next_arq = NULL; - - if (ad->next_arq[arq->is_sync] == arq) - next_arq = as_find_next_arq(ad, arq); - as_del_arq_rb(ad, arq); - if ((alias = as_add_arq_rb(ad, arq)) ) { - list_del_init(&arq->fifo); - as_add_aliased_request(ad, arq, alias); - if (next_arq) - ad->next_arq[arq->is_sync] = next_arq; - } + as_add_arq_rb(ad, arq); } /* @@ -1600,18 +1527,6 @@ as_merged_requests(request_queue_t *q, struct request *req, } } - /* - * Transfer list of aliases - */ - while (!list_empty(&next->queuelist)) { - struct request *__rq = list_entry_rq(next->queuelist.next); - struct as_rq *__arq = RQ_DATA(__rq); - - list_move_tail(&__rq->queuelist, &req->queuelist); - - WARN_ON(__arq->state != AS_RQ_QUEUED); - } - /* * kill knowledge of next, this one is a goner */ @@ -1806,9 +1721,14 @@ static ssize_t as_est_show(struct as_data *ad, char *page) { int pos = 0; - pos += sprintf(page+pos, "%lu %% exit probability\n", 100*ad->exit_prob/256); + pos += sprintf(page+pos, "%lu %% exit probability\n", + 100*ad->exit_prob/256); + pos += sprintf(page+pos, "%lu %% probability of exiting without a " + "cooperating process submitting IO\n", + 100*ad->exit_no_coop/256); pos += sprintf(page+pos, "%lu ms new thinktime\n", ad->new_ttime_mean); - pos += sprintf(page+pos, "%llu sectors new seek distance\n", (unsigned long long)ad->new_seek_mean); + pos += sprintf(page+pos, "%llu sectors new seek distance\n", + (unsigned long long)ad->new_seek_mean); return pos; }