X-Git-Url: http://git.rot13.org/?a=blobdiff_plain;f=mm%2Fslub.c;h=bd2efae02bcd9cedf262b8dd6e2c79f3fd4b1bec;hb=932c37c375cca25175f9b6acee4c75d7a96d985f;hp=07492a83b46e12018a9eb2738a1d2d073df1d67e;hpb=b345970905e34c1b632fe4d80e2af14c7de99b45;p=powerpc.git diff --git a/mm/slub.c b/mm/slub.c index 07492a83b4..bd2efae02b 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -87,6 +87,29 @@ * the fast path. */ +static inline int SlabDebug(struct page *page) +{ +#ifdef CONFIG_SLUB_DEBUG + return PageError(page); +#else + return 0; +#endif +} + +static inline void SetSlabDebug(struct page *page) +{ +#ifdef CONFIG_SLUB_DEBUG + SetPageError(page); +#endif +} + +static inline void ClearSlabDebug(struct page *page) +{ +#ifdef CONFIG_SLUB_DEBUG + ClearPageError(page); +#endif +} + /* * Issues still to be resolved: * @@ -180,7 +203,19 @@ static enum { static DECLARE_RWSEM(slub_lock); LIST_HEAD(slab_caches); -#ifdef CONFIG_SYSFS +/* + * Tracking user of a slab. + */ +struct track { + void *addr; /* Called from address */ + int cpu; /* Was running on cpu */ + int pid; /* Pid context */ + unsigned long when; /* When did the operation occur */ +}; + +enum track_item { TRACK_ALLOC, TRACK_FREE }; + +#if defined(CONFIG_SYSFS) && defined(CONFIG_SLUB_DEBUG) static int sysfs_slab_add(struct kmem_cache *); static int sysfs_slab_alias(struct kmem_cache *, const char *); static void sysfs_slab_remove(struct kmem_cache *); @@ -208,6 +243,23 @@ static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node) #endif } +static inline int check_valid_pointer(struct kmem_cache *s, + struct page *page, const void *object) +{ + void *base; + + if (!object) + return 1; + + base = page_address(page); + if (object < base || object >= base + s->objects * s->size || + (object - base) % s->size) { + return 0; + } + + return 1; +} + /* * Slow version of get and set free pointer. * @@ -240,6 +292,14 @@ static inline int slab_index(void *p, struct kmem_cache *s, void *addr) return (p - addr) / s->size; } +#ifdef CONFIG_SLUB_DEBUG +/* + * Debug settings: + */ +static int slub_debug; + +static char *slub_debug_slabs; + /* * Object debugging */ @@ -275,18 +335,6 @@ static void print_section(char *text, u8 *addr, unsigned int length) } } -/* - * Tracking user of a slab. - */ -struct track { - void *addr; /* Called from address */ - int cpu; /* Was running on cpu */ - int pid; /* Pid context */ - unsigned long when; /* When did the operation occur */ -}; - -enum track_item { TRACK_ALLOC, TRACK_FREE }; - static struct track *get_track(struct kmem_cache *s, void *object, enum track_item alloc) { @@ -421,23 +469,6 @@ static int check_bytes(u8 *start, unsigned int value, unsigned int bytes) return 1; } -static inline int check_valid_pointer(struct kmem_cache *s, - struct page *page, const void *object) -{ - void *base; - - if (!object) - return 1; - - base = page_address(page); - if (object < base || object >= base + s->objects * s->size || - (object - base) % s->size) { - return 0; - } - - return 1; -} - /* * Object layout: * @@ -790,6 +821,113 @@ fail: return 0; } +static void trace(struct kmem_cache *s, struct page *page, void *object, int alloc) +{ + if (s->flags & SLAB_TRACE) { + printk(KERN_INFO "TRACE %s %s 0x%p inuse=%d fp=0x%p\n", + s->name, + alloc ? "alloc" : "free", + object, page->inuse, + page->freelist); + + if (!alloc) + print_section("Object", (void *)object, s->objsize); + + dump_stack(); + } +} + +static int __init setup_slub_debug(char *str) +{ + if (!str || *str != '=') + slub_debug = DEBUG_DEFAULT_FLAGS; + else { + str++; + if (*str == 0 || *str == ',') + slub_debug = DEBUG_DEFAULT_FLAGS; + else + for( ;*str && *str != ','; str++) + switch (*str) { + case 'f' : case 'F' : + slub_debug |= SLAB_DEBUG_FREE; + break; + case 'z' : case 'Z' : + slub_debug |= SLAB_RED_ZONE; + break; + case 'p' : case 'P' : + slub_debug |= SLAB_POISON; + break; + case 'u' : case 'U' : + slub_debug |= SLAB_STORE_USER; + break; + case 't' : case 'T' : + slub_debug |= SLAB_TRACE; + break; + default: + printk(KERN_ERR "slub_debug option '%c' " + "unknown. skipped\n",*str); + } + } + + if (*str == ',') + slub_debug_slabs = str + 1; + return 1; +} + +__setup("slub_debug", setup_slub_debug); + +static void kmem_cache_open_debug_check(struct kmem_cache *s) +{ + /* + * The page->offset field is only 16 bit wide. This is an offset + * in units of words from the beginning of an object. If the slab + * size is bigger then we cannot move the free pointer behind the + * object anymore. + * + * On 32 bit platforms the limit is 256k. On 64bit platforms + * the limit is 512k. + * + * Debugging or ctor/dtors may create a need to move the free + * pointer. Fail if this happens. + */ + if (s->size >= 65535 * sizeof(void *)) { + BUG_ON(s->flags & (SLAB_RED_ZONE | SLAB_POISON | + SLAB_STORE_USER | SLAB_DESTROY_BY_RCU)); + BUG_ON(s->ctor || s->dtor); + } + else + /* + * Enable debugging if selected on the kernel commandline. + */ + if (slub_debug && (!slub_debug_slabs || + strncmp(slub_debug_slabs, s->name, + strlen(slub_debug_slabs)) == 0)) + s->flags |= slub_debug; +} +#else + +static inline int alloc_object_checks(struct kmem_cache *s, + struct page *page, void *object) { return 0; } + +static inline int free_object_checks(struct kmem_cache *s, + struct page *page, void *object) { return 0; } + +static inline void add_full(struct kmem_cache_node *n, struct page *page) {} +static inline void remove_full(struct kmem_cache *s, struct page *page) {} +static inline void trace(struct kmem_cache *s, struct page *page, + void *object, int alloc) {} +static inline void init_object(struct kmem_cache *s, + void *object, int active) {} +static inline void init_tracking(struct kmem_cache *s, void *object) {} +static inline int slab_pad_check(struct kmem_cache *s, struct page *page) + { return 1; } +static inline int check_object(struct kmem_cache *s, struct page *page, + void *object, int active) { return 1; } +static inline void set_track(struct kmem_cache *s, void *object, + enum track_item alloc, void *addr) {} +static inline void kmem_cache_open_debug_check(struct kmem_cache *s) {} +#define slub_debug 0 +#endif /* * Slab allocation and freeing */ @@ -823,7 +961,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) static void setup_object(struct kmem_cache *s, struct page *page, void *object) { - if (PageError(page)) { + if (SlabDebug(page)) { init_object(s, object, 0); init_tracking(s, object); } @@ -858,7 +996,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) page->flags |= 1 << PG_slab; if (s->flags & (SLAB_DEBUG_FREE | SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | SLAB_TRACE)) - page->flags |= 1 << PG_error; + SetSlabDebug(page); start = page_address(page); end = start + s->objects * s->size; @@ -887,7 +1025,7 @@ static void __free_slab(struct kmem_cache *s, struct page *page) { int pages = 1 << s->order; - if (unlikely(PageError(page) || s->dtor)) { + if (unlikely(SlabDebug(page) || s->dtor)) { void *p; slab_pad_check(s, page); @@ -934,7 +1072,8 @@ static void discard_slab(struct kmem_cache *s, struct page *page) atomic_long_dec(&n->nr_slabs); reset_page_mapcount(page); - page->flags &= ~(1 << PG_slab | 1 << PG_error); + ClearSlabDebug(page); + __ClearPageSlab(page); free_slab(s, page); } @@ -1109,7 +1248,7 @@ static void putback_slab(struct kmem_cache *s, struct page *page) if (page->freelist) add_partial(n, page); - else if (PageError(page) && (s->flags & SLAB_STORE_USER)) + else if (SlabDebug(page) && (s->flags & SLAB_STORE_USER)) add_full(n, page); slab_unlock(page); @@ -1193,7 +1332,7 @@ static void flush_all(struct kmem_cache *s) * per cpu array in the kmem_cache struct. * * Fastpath is not possible if we need to get a new slab or have - * debugging enabled (which means all slabs are marked with PageError) + * debugging enabled (which means all slabs are marked with SlabDebug) */ static void *slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, void *addr) @@ -1216,7 +1355,7 @@ redo: object = page->freelist; if (unlikely(!object)) goto another_slab; - if (unlikely(PageError(page))) + if (unlikely(SlabDebug(page))) goto debug; have_object: @@ -1273,12 +1412,7 @@ debug: goto another_slab; if (s->flags & SLAB_STORE_USER) set_track(s, object, TRACK_ALLOC, addr); - if (s->flags & SLAB_TRACE) { - printk(KERN_INFO "TRACE %s alloc 0x%p inuse=%d fp=0x%p\n", - s->name, object, page->inuse, - page->freelist); - dump_stack(); - } + trace(s, page, object, 1); init_object(s, object, 1); goto have_object; } @@ -1314,7 +1448,7 @@ static void slab_free(struct kmem_cache *s, struct page *page, local_irq_save(flags); slab_lock(page); - if (unlikely(PageError(page))) + if (unlikely(SlabDebug(page))) goto debug; checks_ok: prior = object[page->offset] = page->freelist; @@ -1363,13 +1497,7 @@ debug: remove_full(s, page); if (s->flags & SLAB_STORE_USER) set_track(s, x, TRACK_FREE, addr); - if (s->flags & SLAB_TRACE) { - printk(KERN_INFO "TRACE %s free 0x%p inuse=%d fp=0x%p\n", - s->name, object, page->inuse, - page->freelist); - print_section("Object", (void *)object, s->objsize); - dump_stack(); - } + trace(s, page, object, 0); init_object(s, object, 0); goto checks_ok; } @@ -1424,13 +1552,6 @@ static int slub_min_objects = DEFAULT_MIN_OBJECTS; */ static int slub_nomerge; -/* - * Debug settings: - */ -static int slub_debug; - -static char *slub_debug_slabs; - /* * Calculate the order of allocation given an slab object size. * @@ -1456,34 +1577,75 @@ static char *slub_debug_slabs; * requested a higher mininum order then we start with that one instead of * the smallest order which will fit the object. */ -static int calculate_order(int size) +static inline int slab_order(int size, int min_objects, + int max_order, int fract_leftover) { int order; int rem; - for (order = max(slub_min_order, fls(size - 1) - PAGE_SHIFT); - order < MAX_ORDER; order++) { - unsigned long slab_size = PAGE_SIZE << order; + for (order = max(slub_min_order, + fls(min_objects * size - 1) - PAGE_SHIFT); + order <= max_order; order++) { - if (slub_max_order > order && - slab_size < slub_min_objects * size) - continue; + unsigned long slab_size = PAGE_SIZE << order; - if (slab_size < size) + if (slab_size < min_objects * size) continue; rem = slab_size % size; - if (rem <= slab_size / 8) + if (rem <= slab_size / fract_leftover) break; } - if (order >= MAX_ORDER) - return -E2BIG; return order; } +static inline int calculate_order(int size) +{ + int order; + int min_objects; + int fraction; + + /* + * Attempt to find best configuration for a slab. This + * works by first attempting to generate a layout with + * the best configuration and backing off gradually. + * + * First we reduce the acceptable waste in a slab. Then + * we reduce the minimum objects required in a slab. + */ + min_objects = slub_min_objects; + while (min_objects > 1) { + fraction = 8; + while (fraction >= 4) { + order = slab_order(size, min_objects, + slub_max_order, fraction); + if (order <= slub_max_order) + return order; + fraction /= 2; + } + min_objects /= 2; + } + + /* + * We were unable to place multiple objects in a slab. Now + * lets see if we can place a single object there. + */ + order = slab_order(size, 1, slub_max_order, 1); + if (order <= slub_max_order) + return order; + + /* + * Doh this slab cannot be placed using slub_max_order. + */ + order = slab_order(size, 1, MAX_ORDER, 1); + if (order <= MAX_ORDER) + return order; + return -ENOSYS; +} + /* * Figure out what the alignment of the objects will be. */ @@ -1639,6 +1801,7 @@ static int calculate_sizes(struct kmem_cache *s) */ size = ALIGN(size, sizeof(void *)); +#ifdef CONFIG_SLUB_DEBUG /* * If we are Redzoning then check if there is some space between the * end of the object and the free pointer. If not then add an @@ -1646,6 +1809,7 @@ static int calculate_sizes(struct kmem_cache *s) */ if ((flags & SLAB_RED_ZONE) && size == s->objsize) size += sizeof(void *); +#endif /* * With that we have determined the number of bytes in actual use @@ -1653,6 +1817,7 @@ static int calculate_sizes(struct kmem_cache *s) */ s->inuse = size; +#ifdef CONFIG_SLUB_DEBUG if (((flags & (SLAB_DESTROY_BY_RCU | SLAB_POISON)) || s->ctor || s->dtor)) { /* @@ -1683,6 +1848,7 @@ static int calculate_sizes(struct kmem_cache *s) * of the object. */ size += sizeof(void *); +#endif /* * Determine the alignment based on various parameters that the @@ -1732,32 +1898,7 @@ static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags, s->objsize = size; s->flags = flags; s->align = align; - - /* - * The page->offset field is only 16 bit wide. This is an offset - * in units of words from the beginning of an object. If the slab - * size is bigger then we cannot move the free pointer behind the - * object anymore. - * - * On 32 bit platforms the limit is 256k. On 64bit platforms - * the limit is 512k. - * - * Debugging or ctor/dtors may create a need to move the free - * pointer. Fail if this happens. - */ - if (s->size >= 65535 * sizeof(void *)) { - BUG_ON(flags & (SLAB_RED_ZONE | SLAB_POISON | - SLAB_STORE_USER | SLAB_DESTROY_BY_RCU)); - BUG_ON(ctor || dtor); - } - else - /* - * Enable debugging if selected on the kernel commandline. - */ - if (slub_debug && (!slub_debug_slabs || - strncmp(slub_debug_slabs, name, - strlen(slub_debug_slabs)) == 0)) - s->flags |= slub_debug; + kmem_cache_open_debug_check(s); if (!calculate_sizes(s)) goto error; @@ -1928,45 +2069,6 @@ static int __init setup_slub_nomerge(char *str) __setup("slub_nomerge", setup_slub_nomerge); -static int __init setup_slub_debug(char *str) -{ - if (!str || *str != '=') - slub_debug = DEBUG_DEFAULT_FLAGS; - else { - str++; - if (*str == 0 || *str == ',') - slub_debug = DEBUG_DEFAULT_FLAGS; - else - for( ;*str && *str != ','; str++) - switch (*str) { - case 'f' : case 'F' : - slub_debug |= SLAB_DEBUG_FREE; - break; - case 'z' : case 'Z' : - slub_debug |= SLAB_RED_ZONE; - break; - case 'p' : case 'P' : - slub_debug |= SLAB_POISON; - break; - case 'u' : case 'U' : - slub_debug |= SLAB_STORE_USER; - break; - case 't' : case 'T' : - slub_debug |= SLAB_TRACE; - break; - default: - printk(KERN_ERR "slub_debug option '%c' " - "unknown. skipped\n",*str); - } - } - - if (*str == ',') - slub_debug_slabs = str + 1; - return 1; -} - -__setup("slub_debug", setup_slub_debug); - static struct kmem_cache *create_kmalloc_cache(struct kmem_cache *s, const char *name, int size, gfp_t gfp_flags) { @@ -2184,7 +2286,6 @@ EXPORT_SYMBOL(kmem_cache_shrink); /** * krealloc - reallocate memory. The contents will remain unchanged. - * * @p: object to reallocate memory for. * @new_size: how many bytes of memory are required. * @flags: the type of memory to allocate. @@ -2413,7 +2514,9 @@ static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb, switch (action) { case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: case CPU_DEAD: + case CPU_DEAD_FROZEN: for_all_slabs(__flush_cpu_slab, cpu); break; default: @@ -2427,91 +2530,6 @@ static struct notifier_block __cpuinitdata slab_notifier = #endif -#ifdef CONFIG_NUMA - -/***************************************************************** - * Generic reaper used to support the page allocator - * (the cpu slabs are reaped by a per slab workqueue). - * - * Maybe move this to the page allocator? - ****************************************************************/ - -static DEFINE_PER_CPU(unsigned long, reap_node); - -static void init_reap_node(int cpu) -{ - int node; - - node = next_node(cpu_to_node(cpu), node_online_map); - if (node == MAX_NUMNODES) - node = first_node(node_online_map); - - __get_cpu_var(reap_node) = node; -} - -static void next_reap_node(void) -{ - int node = __get_cpu_var(reap_node); - - /* - * Also drain per cpu pages on remote zones - */ - if (node != numa_node_id()) - drain_node_pages(node); - - node = next_node(node, node_online_map); - if (unlikely(node >= MAX_NUMNODES)) - node = first_node(node_online_map); - __get_cpu_var(reap_node) = node; -} -#else -#define init_reap_node(cpu) do { } while (0) -#define next_reap_node(void) do { } while (0) -#endif - -#define REAPTIMEOUT_CPUC (2*HZ) - -#ifdef CONFIG_SMP -static DEFINE_PER_CPU(struct delayed_work, reap_work); - -static void cache_reap(struct work_struct *unused) -{ - next_reap_node(); - refresh_cpu_vm_stats(smp_processor_id()); - schedule_delayed_work(&__get_cpu_var(reap_work), - REAPTIMEOUT_CPUC); -} - -static void __devinit start_cpu_timer(int cpu) -{ - struct delayed_work *reap_work = &per_cpu(reap_work, cpu); - - /* - * When this gets called from do_initcalls via cpucache_init(), - * init_workqueues() has already run, so keventd will be setup - * at that time. - */ - if (keventd_up() && reap_work->work.func == NULL) { - init_reap_node(cpu); - INIT_DELAYED_WORK(reap_work, cache_reap); - schedule_delayed_work_on(cpu, reap_work, HZ + 3 * cpu); - } -} - -static int __init cpucache_init(void) -{ - int cpu; - - /* - * Register the timers that drain pcp pages and update vm statistics - */ - for_each_online_cpu(cpu) - start_cpu_timer(cpu); - return 0; -} -__initcall(cpucache_init); -#endif - void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, void *caller) { struct kmem_cache *s = get_slab(size, gfpflags); @@ -2533,8 +2551,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, return slab_alloc(s, gfpflags, node, caller); } -#ifdef CONFIG_SYSFS - +#if defined(CONFIG_SYSFS) && defined(CONFIG_SLUB_DEBUG) static int validate_slab(struct kmem_cache *s, struct page *page) { void *p; @@ -2571,12 +2588,12 @@ static void validate_slab_slab(struct kmem_cache *s, struct page *page) s->name, page); if (s->flags & DEBUG_DEFAULT_FLAGS) { - if (!PageError(page)) - printk(KERN_ERR "SLUB %s: PageError not set " + if (!SlabDebug(page)) + printk(KERN_ERR "SLUB %s: SlabDebug not set " "on slab 0x%p\n", s->name, page); } else { - if (PageError(page)) - printk(KERN_ERR "SLUB %s: PageError set on " + if (SlabDebug(page)) + printk(KERN_ERR "SLUB %s: SlabDebug set on " "slab 0x%p\n", s->name, page); } } @@ -2691,6 +2708,13 @@ static void resiliency_test(void) {}; struct location { unsigned long count; void *addr; + long long sum_time; + long min_time; + long max_time; + long min_pid; + long max_pid; + cpumask_t cpus; + nodemask_t nodes; }; struct loc_track { @@ -2731,11 +2755,12 @@ static int alloc_loc_track(struct loc_track *t, unsigned long max) } static int add_location(struct loc_track *t, struct kmem_cache *s, - void *addr) + const struct track *track) { long start, end, pos; struct location *l; void *caddr; + unsigned long age = jiffies - track->when; start = -1; end = t->count; @@ -2751,12 +2776,29 @@ static int add_location(struct loc_track *t, struct kmem_cache *s, break; caddr = t->loc[pos].addr; - if (addr == caddr) { - t->loc[pos].count++; + if (track->addr == caddr) { + + l = &t->loc[pos]; + l->count++; + if (track->when) { + l->sum_time += age; + if (age < l->min_time) + l->min_time = age; + if (age > l->max_time) + l->max_time = age; + + if (track->pid < l->min_pid) + l->min_pid = track->pid; + if (track->pid > l->max_pid) + l->max_pid = track->pid; + + cpu_set(track->cpu, l->cpus); + } + node_set(page_to_nid(virt_to_page(track)), l->nodes); return 1; } - if (addr < caddr) + if (track->addr < caddr) end = pos; else start = pos; @@ -2774,7 +2816,16 @@ static int add_location(struct loc_track *t, struct kmem_cache *s, (t->count - pos) * sizeof(struct location)); t->count++; l->count = 1; - l->addr = addr; + l->addr = track->addr; + l->sum_time = age; + l->min_time = age; + l->max_time = age; + l->min_pid = track->pid; + l->max_pid = track->pid; + cpus_clear(l->cpus); + cpu_set(track->cpu, l->cpus); + nodes_clear(l->nodes); + node_set(page_to_nid(virt_to_page(track)), l->nodes); return 1; } @@ -2790,11 +2841,8 @@ static void process_slab(struct loc_track *t, struct kmem_cache *s, set_bit(slab_index(p, s, addr), map); for_each_object(p, s, addr) - if (!test_bit(slab_index(p, s, addr), map)) { - void *addr = get_track(s, p, alloc)->addr; - - add_location(t, s, addr); - } + if (!test_bit(slab_index(p, s, addr), map)) + add_location(t, s, get_track(s, p, alloc)); } static int list_locations(struct kmem_cache *s, char *buf, @@ -2828,15 +2876,47 @@ static int list_locations(struct kmem_cache *s, char *buf, } for (i = 0; i < t.count; i++) { - void *addr = t.loc[i].addr; + struct location *l = &t.loc[i]; if (n > PAGE_SIZE - 100) break; - n += sprintf(buf + n, "%7ld ", t.loc[i].count); - if (addr) - n += sprint_symbol(buf + n, (unsigned long)t.loc[i].addr); + n += sprintf(buf + n, "%7ld ", l->count); + + if (l->addr) + n += sprint_symbol(buf + n, (unsigned long)l->addr); else n += sprintf(buf + n, ""); + + if (l->sum_time != l->min_time) { + unsigned long remainder; + + n += sprintf(buf + n, " age=%ld/%ld/%ld", + l->min_time, + div_long_long_rem(l->sum_time, l->count, &remainder), + l->max_time); + } else + n += sprintf(buf + n, " age=%ld", + l->min_time); + + if (l->min_pid != l->max_pid) + n += sprintf(buf + n, " pid=%ld-%ld", + l->min_pid, l->max_pid); + else + n += sprintf(buf + n, " pid=%ld", + l->min_pid); + + if (num_online_cpus() > 1 && !cpus_empty(l->cpus)) { + n += sprintf(buf + n, " cpus="); + n += cpulist_scnprintf(buf + n, PAGE_SIZE - n - 50, + l->cpus); + } + + if (num_online_nodes() > 1 && !nodes_empty(l->nodes)) { + n += sprintf(buf + n, " nodes="); + n += nodelist_scnprintf(buf + n, PAGE_SIZE - n - 50, + l->nodes); + } + n += sprintf(buf + n, "\n"); }