SLUB: Free slabs and sort partial slab lists in kmem_cache_shrink
[powerpc.git] / mm / slub.c
index a623196..ed28462 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
  */
 #define SLUB_UNIMPLEMENTED (SLAB_DEBUG_INITIAL)
 
-/* Mininum number of partial slabs */
+/*
+ * Mininum number of partial slabs. These will be left on the partial
+ * lists even if they are empty. kmem_cache_shrink may reclaim them.
+ */
 #define MIN_PARTIAL 2
 
+/*
+ * Maximum number of desirable partial slabs.
+ * The existence of more partial slabs makes kmem_cache_shrink
+ * sort the partial list by the number of objects in the.
+ */
+#define MAX_PARTIAL 10
+
 #define DEBUG_DEFAULT_FLAGS (SLAB_DEBUG_FREE | SLAB_RED_ZONE | \
                                SLAB_POISON | SLAB_STORE_USER)
 /*
@@ -1882,7 +1892,7 @@ static int kmem_cache_close(struct kmem_cache *s)
        for_each_online_node(node) {
                struct kmem_cache_node *n = get_node(s, node);
 
-               free_list(s, n, &n->partial);
+               n->nr_partial -= free_list(s, n, &n->partial);
                if (atomic_long_read(&n->nr_slabs))
                        return 1;
        }
@@ -2130,6 +2140,86 @@ void kfree(const void *x)
 }
 EXPORT_SYMBOL(kfree);
 
+/*
+ *  kmem_cache_shrink removes empty slabs from the partial lists
+ *  and then sorts the partially allocated slabs by the number
+ *  of items in use. The slabs with the most items in use
+ *  come first. New allocations will remove these from the
+ *  partial list because they are full. The slabs with the
+ *  least items are placed last. If it happens that the objects
+ *  are freed then the page can be returned to the page allocator.
+ */
+int kmem_cache_shrink(struct kmem_cache *s)
+{
+       int node;
+       int i;
+       struct kmem_cache_node *n;
+       struct page *page;
+       struct page *t;
+       struct list_head *slabs_by_inuse =
+               kmalloc(sizeof(struct list_head) * s->objects, GFP_KERNEL);
+       unsigned long flags;
+
+       if (!slabs_by_inuse)
+               return -ENOMEM;
+
+       flush_all(s);
+       for_each_online_node(node) {
+               n = get_node(s, node);
+
+               if (!n->nr_partial)
+                       continue;
+
+               for (i = 0; i < s->objects; i++)
+                       INIT_LIST_HEAD(slabs_by_inuse + i);
+
+               spin_lock_irqsave(&n->list_lock, flags);
+
+               /*
+                * Build lists indexed by the items in use in
+                * each slab or free slabs if empty.
+                *
+                * Note that concurrent frees may occur while
+                * we hold the list_lock. page->inuse here is
+                * the upper limit.
+                */
+               list_for_each_entry_safe(page, t, &n->partial, lru) {
+                       if (!page->inuse && slab_trylock(page)) {
+                               /*
+                                * Must hold slab lock here because slab_free
+                                * may have freed the last object and be
+                                * waiting to release the slab.
+                                */
+                               list_del(&page->lru);
+                               n->nr_partial--;
+                               slab_unlock(page);
+                               discard_slab(s, page);
+                       } else {
+                               if (n->nr_partial > MAX_PARTIAL)
+                                       list_move(&page->lru,
+                                       slabs_by_inuse + page->inuse);
+                       }
+               }
+
+               if (n->nr_partial <= MAX_PARTIAL)
+                       goto out;
+
+               /*
+                * Rebuild the partial list with the slabs filled up
+                * most first and the least used slabs at the end.
+                */
+               for (i = s->objects - 1; i >= 0; i--)
+                       list_splice(slabs_by_inuse + i, n->partial.prev);
+
+       out:
+               spin_unlock_irqrestore(&n->list_lock, flags);
+       }
+
+       kfree(slabs_by_inuse);
+       return 0;
+}
+EXPORT_SYMBOL(kmem_cache_shrink);
+
 /**
  * krealloc - reallocate memory. The contents will remain unchanged.
  *
@@ -2382,17 +2472,6 @@ static struct notifier_block __cpuinitdata slab_notifier =
 
 #endif
 
-/***************************************************************
- *     Compatiblility definitions
- **************************************************************/
-
-int kmem_cache_shrink(struct kmem_cache *s)
-{
-       flush_all(s);
-       return 0;
-}
-EXPORT_SYMBOL(kmem_cache_shrink);
-
 #ifdef CONFIG_NUMA
 
 /*****************************************************************
@@ -3169,6 +3248,25 @@ static ssize_t validate_store(struct kmem_cache *s,
 }
 SLAB_ATTR(validate);
 
+static ssize_t shrink_show(struct kmem_cache *s, char *buf)
+{
+       return 0;
+}
+
+static ssize_t shrink_store(struct kmem_cache *s,
+                       const char *buf, size_t length)
+{
+       if (buf[0] == '1') {
+               int rc = kmem_cache_shrink(s);
+
+               if (rc)
+                       return rc;
+       } else
+               return -EINVAL;
+       return length;
+}
+SLAB_ATTR(shrink);
+
 static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf)
 {
        if (!(s->flags & SLAB_STORE_USER))
@@ -3225,6 +3323,7 @@ static struct attribute * slab_attrs[] = {
        &poison_attr.attr,
        &store_user_attr.attr,
        &validate_attr.attr,
+       &shrink_attr.attr,
        &alloc_calls_attr.attr,
        &free_calls_attr.attr,
 #ifdef CONFIG_ZONE_DMA