#include <linux/sort.h>
#include <linux/pfn.h>
#include <linux/backing-dev.h>
+#include <linux/fault-inject.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
* don't need any ZONE_NORMAL reservation
*/
int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = {
+#ifdef CONFIG_ZONE_DMA
256,
+#endif
#ifdef CONFIG_ZONE_DMA32
256,
#endif
EXPORT_SYMBOL(totalram_pages);
-static char *zone_names[MAX_NR_ZONES] = {
+static char * const zone_names[MAX_NR_ZONES] = {
+#ifdef CONFIG_ZONE_DMA
"DMA",
+#endif
#ifdef CONFIG_ZONE_DMA32
"DMA32",
#endif
static int page_is_consistent(struct zone *zone, struct page *page)
{
-#ifdef CONFIG_HOLES_IN_ZONE
- if (!pfn_valid(page_to_pfn(page)))
+ if (!pfn_valid_within(page_to_pfn(page)))
return 0;
-#endif
if (zone != page_zone(page))
return 0;
static void free_compound_page(struct page *page)
{
- __free_pages_ok(page, (unsigned long)page[1].lru.prev);
+ __free_pages_ok(page, compound_order(page));
}
static void prep_compound_page(struct page *page, unsigned long order)
int i;
int nr_pages = 1 << order;
- page[1].lru.next = (void *)free_compound_page; /* set dtor */
- page[1].lru.prev = (void *)order;
- for (i = 0; i < nr_pages; i++) {
+ set_compound_page_dtor(page, free_compound_page);
+ set_compound_order(page, order);
+ __SetPageHead(page);
+ for (i = 1; i < nr_pages; i++) {
struct page *p = page + i;
- __SetPageCompound(p);
- set_page_private(p, (unsigned long)page);
+ __SetPageTail(p);
+ p->first_page = page;
}
}
int i;
int nr_pages = 1 << order;
- if (unlikely((unsigned long)page[1].lru.prev != order))
+ if (unlikely(compound_order(page) != order))
bad_page(page);
- for (i = 0; i < nr_pages; i++) {
+ if (unlikely(!PageHead(page)))
+ bad_page(page);
+ __ClearPageHead(page);
+ for (i = 1; i < nr_pages; i++) {
struct page *p = page + i;
- if (unlikely(!PageCompound(p) |
- (page_private(p) != (unsigned long)page)))
+ if (unlikely(!PageTail(p) |
+ (p->first_page != page)))
bad_page(page);
- __ClearPageCompound(p);
+ __ClearPageTail(p);
}
}
static inline int page_is_buddy(struct page *page, struct page *buddy,
int order)
{
-#ifdef CONFIG_HOLES_IN_ZONE
- if (!pfn_valid(page_to_pfn(buddy)))
+ if (!pfn_valid_within(page_to_pfn(buddy)))
return 0;
-#endif
if (page_zone_id(page) != page_zone_id(buddy))
return 0;
VM_BUG_ON(page_idx & (order_size - 1));
VM_BUG_ON(bad_range(zone, page));
- zone->free_pages += order_size;
+ __mod_zone_page_state(zone, NR_FREE_PAGES, order_size);
while (order < MAX_ORDER-1) {
unsigned long combined_idx;
struct free_area *area;
1 << PG_private |
1 << PG_locked |
1 << PG_active |
- 1 << PG_reclaim |
1 << PG_slab |
1 << PG_swapcache |
1 << PG_writeback |
1 << PG_reserved |
1 << PG_buddy ))))
bad_page(page);
+ /*
+ * PageReclaim == PageTail. It is only an error
+ * for PageReclaim to be set if PageCompound is clear.
+ */
+ if (unlikely(!PageCompound(page) && PageReclaim(page)))
+ bad_page(page);
if (PageDirty(page))
__ClearPageDirty(page);
/*
page->flags &= ~(1 << PG_uptodate | 1 << PG_error |
1 << PG_referenced | 1 << PG_arch_1 |
- 1 << PG_checked | 1 << PG_mappedtodisk);
+ 1 << PG_owner_priv_1 | 1 << PG_mappedtodisk);
set_page_private(page, 0);
set_page_refcounted(page);
list_del(&page->lru);
rmv_page_order(page);
area->nr_free--;
- zone->free_pages -= 1UL << order;
+ __mod_zone_page_state(zone, NR_FREE_PAGES, - (1UL << order));
expand(zone, page, order, current_order, area);
return page;
}
return i;
}
+#if MAX_NUMNODES > 1
+int nr_node_ids __read_mostly = MAX_NUMNODES;
+EXPORT_SYMBOL(nr_node_ids);
+
+/*
+ * Figure out the number of possible node ids.
+ */
+static void __init setup_nr_node_ids(void)
+{
+ unsigned int node;
+ unsigned int highest = 0;
+
+ for_each_node_mask(node, node_possible_map)
+ highest = node;
+ nr_node_ids = highest + 1;
+}
+#else
+static void __init setup_nr_node_ids(void) {}
+#endif
+
#ifdef CONFIG_NUMA
/*
* Called from the slab reaper to drain pagesets on a particular node that
pcp = &pset->pcp[i];
if (pcp->count) {
+ int to_drain;
+
local_irq_save(flags);
- free_pages_bulk(zone, pcp->count, &pcp->list, 0);
- pcp->count = 0;
+ if (pcp->count >= pcp->batch)
+ to_drain = pcp->batch;
+ else
+ to_drain = pcp->count;
+ free_pages_bulk(zone, to_drain, &pcp->list, 0);
+ pcp->count -= to_drain;
local_irq_restore(flags);
}
}
}
#endif
-#if defined(CONFIG_PM) || defined(CONFIG_HOTPLUG_CPU)
static void __drain_pages(unsigned int cpu)
{
unsigned long flags;
for_each_zone(zone) {
struct per_cpu_pageset *pset;
+ if (!populated_zone(zone))
+ continue;
+
pset = zone_pcp(zone, cpu);
for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) {
struct per_cpu_pages *pcp;
}
}
}
-#endif /* CONFIG_PM || CONFIG_HOTPLUG_CPU */
#ifdef CONFIG_PM
if (pfn_valid(pfn)) {
struct page *page = pfn_to_page(pfn);
- if (!PageNosave(page))
- ClearPageNosaveFree(page);
+ if (!swsusp_page_is_forbidden(page))
+ swsusp_unset_page_free(page);
}
for (order = MAX_ORDER - 1; order >= 0; --order)
pfn = page_to_pfn(list_entry(curr, struct page, lru));
for (i = 0; i < (1UL << order); i++)
- SetPageNosaveFree(pfn_to_page(pfn + i));
+ swsusp_set_page_free(pfn_to_page(pfn + i));
}
spin_unlock_irqrestore(&zone->lock, flags);
#define ALLOC_HIGH 0x20 /* __GFP_HIGH set */
#define ALLOC_CPUSET 0x40 /* check for correct cpuset */
+#ifdef CONFIG_FAIL_PAGE_ALLOC
+
+static struct fail_page_alloc_attr {
+ struct fault_attr attr;
+
+ u32 ignore_gfp_highmem;
+ u32 ignore_gfp_wait;
+
+#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
+
+ struct dentry *ignore_gfp_highmem_file;
+ struct dentry *ignore_gfp_wait_file;
+
+#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */
+
+} fail_page_alloc = {
+ .attr = FAULT_ATTR_INITIALIZER,
+ .ignore_gfp_wait = 1,
+ .ignore_gfp_highmem = 1,
+};
+
+static int __init setup_fail_page_alloc(char *str)
+{
+ return setup_fault_attr(&fail_page_alloc.attr, str);
+}
+__setup("fail_page_alloc=", setup_fail_page_alloc);
+
+static int should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
+{
+ if (gfp_mask & __GFP_NOFAIL)
+ return 0;
+ if (fail_page_alloc.ignore_gfp_highmem && (gfp_mask & __GFP_HIGHMEM))
+ return 0;
+ if (fail_page_alloc.ignore_gfp_wait && (gfp_mask & __GFP_WAIT))
+ return 0;
+
+ return should_fail(&fail_page_alloc.attr, 1 << order);
+}
+
+#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
+
+static int __init fail_page_alloc_debugfs(void)
+{
+ mode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
+ struct dentry *dir;
+ int err;
+
+ err = init_fault_attr_dentries(&fail_page_alloc.attr,
+ "fail_page_alloc");
+ if (err)
+ return err;
+ dir = fail_page_alloc.attr.dentries.dir;
+
+ fail_page_alloc.ignore_gfp_wait_file =
+ debugfs_create_bool("ignore-gfp-wait", mode, dir,
+ &fail_page_alloc.ignore_gfp_wait);
+
+ fail_page_alloc.ignore_gfp_highmem_file =
+ debugfs_create_bool("ignore-gfp-highmem", mode, dir,
+ &fail_page_alloc.ignore_gfp_highmem);
+
+ if (!fail_page_alloc.ignore_gfp_wait_file ||
+ !fail_page_alloc.ignore_gfp_highmem_file) {
+ err = -ENOMEM;
+ debugfs_remove(fail_page_alloc.ignore_gfp_wait_file);
+ debugfs_remove(fail_page_alloc.ignore_gfp_highmem_file);
+ cleanup_fault_attr_dentries(&fail_page_alloc.attr);
+ }
+
+ return err;
+}
+
+late_initcall(fail_page_alloc_debugfs);
+
+#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */
+
+#else /* CONFIG_FAIL_PAGE_ALLOC */
+
+static inline int should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
+{
+ return 0;
+}
+
+#endif /* CONFIG_FAIL_PAGE_ALLOC */
+
/*
* Return 1 if free pages are above 'mark'. This takes into account the order
* of the allocation.
int classzone_idx, int alloc_flags)
{
/* free_pages my go negative - that's OK */
- unsigned long min = mark;
- long free_pages = z->free_pages - (1 << order) + 1;
+ long min = mark;
+ long free_pages = zone_page_state(z, NR_FREE_PAGES) - (1 << order) + 1;
int o;
if (alloc_flags & ALLOC_HIGH)
zone->zone_pgdat != zonelist->zones[0]->zone_pgdat))
break;
if ((alloc_flags & ALLOC_CPUSET) &&
- !cpuset_zone_allowed(zone, gfp_mask))
+ !cpuset_zone_allowed_softwall(zone, gfp_mask))
goto try_next_zone;
if (!(alloc_flags & ALLOC_NO_WATERMARKS)) {
might_sleep_if(wait);
+ if (should_fail_alloc_page(gfp_mask, order))
+ return NULL;
+
restart:
z = zonelist->zones; /* the list of zones suitable for gfp_mask */
if (page)
goto got_pg;
+ /*
+ * GFP_THISNODE (meaning __GFP_THISNODE, __GFP_NORETRY and
+ * __GFP_NOWARN set) should not cause reclaim since the subsystem
+ * (f.e. slab) using GFP_THISNODE may choose to trigger reclaim
+ * using a larger set of nodes after it has established that the
+ * allowed per node queues are empty and that nodes are
+ * over allocated.
+ */
+ if (NUMA_BUILD && (gfp_mask & GFP_THISNODE) == GFP_THISNODE)
+ goto nopage;
+
for (z = zonelist->zones; *z; z++)
wakeup_kswapd(*z, order);
/* This allocation should allow future memory freeing. */
+rebalance:
if (((p->flags & PF_MEMALLOC) || unlikely(test_thread_flag(TIF_MEMDIE)))
&& !in_interrupt()) {
if (!(gfp_mask & __GFP_NOMEMALLOC)) {
if (!wait)
goto nopage;
-rebalance:
cond_resched();
/* We now go into synchronous reclaim */
EXPORT_SYMBOL(free_pages);
-/*
- * Total amount of free (allocatable) RAM:
- */
-unsigned int nr_free_pages(void)
-{
- unsigned int sum = 0;
- struct zone *zone;
-
- for_each_zone(zone)
- sum += zone->free_pages;
-
- return sum;
-}
-
-EXPORT_SYMBOL(nr_free_pages);
-
-#ifdef CONFIG_NUMA
-unsigned int nr_free_pages_pgdat(pg_data_t *pgdat)
-{
- unsigned int sum = 0;
- enum zone_type i;
-
- for (i = 0; i < MAX_NR_ZONES; i++)
- sum += pgdat->node_zones[i].free_pages;
-
- return sum;
-}
-#endif
-
static unsigned int nr_free_zone_pages(int offset)
{
/* Just pick one node, since fallback list is circular */
static inline void show_node(struct zone *zone)
{
if (NUMA_BUILD)
- printk("Node %ld ", zone_to_nid(zone));
+ printk("Node %d ", zone_to_nid(zone));
}
void si_meminfo(struct sysinfo *val)
{
val->totalram = totalram_pages;
val->sharedram = 0;
- val->freeram = nr_free_pages();
+ val->freeram = global_page_state(NR_FREE_PAGES);
val->bufferram = nr_blockdev_pages();
val->totalhigh = totalhigh_pages;
val->freehigh = nr_free_highpages();
pg_data_t *pgdat = NODE_DATA(nid);
val->totalram = pgdat->node_present_pages;
- val->freeram = nr_free_pages_pgdat(pgdat);
+ val->freeram = node_page_state(nid, NR_FREE_PAGES);
#ifdef CONFIG_HIGHMEM
val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].present_pages;
- val->freehigh = pgdat->node_zones[ZONE_HIGHMEM].free_pages;
+ val->freehigh = zone_page_state(&pgdat->node_zones[ZONE_HIGHMEM],
+ NR_FREE_PAGES);
#else
val->totalhigh = 0;
val->freehigh = 0;
void show_free_areas(void)
{
int cpu;
- unsigned long active;
- unsigned long inactive;
- unsigned long free;
struct zone *zone;
for_each_zone(zone) {
}
}
- get_zone_counts(&active, &inactive, &free);
-
- printk("Active:%lu inactive:%lu dirty:%lu writeback:%lu "
- "unstable:%lu free:%u slab:%lu mapped:%lu pagetables:%lu\n",
- active,
- inactive,
+ printk("Active:%lu inactive:%lu dirty:%lu writeback:%lu unstable:%lu\n"
+ " free:%lu slab:%lu mapped:%lu pagetables:%lu bounce:%lu\n",
+ global_page_state(NR_ACTIVE),
+ global_page_state(NR_INACTIVE),
global_page_state(NR_FILE_DIRTY),
global_page_state(NR_WRITEBACK),
global_page_state(NR_UNSTABLE_NFS),
- nr_free_pages(),
+ global_page_state(NR_FREE_PAGES),
global_page_state(NR_SLAB_RECLAIMABLE) +
global_page_state(NR_SLAB_UNRECLAIMABLE),
global_page_state(NR_FILE_MAPPED),
- global_page_state(NR_PAGETABLE));
+ global_page_state(NR_PAGETABLE),
+ global_page_state(NR_BOUNCE));
for_each_zone(zone) {
int i;
" all_unreclaimable? %s"
"\n",
zone->name,
- K(zone->free_pages),
+ K(zone_page_state(zone, NR_FREE_PAGES)),
K(zone->pages_min),
K(zone->pages_low),
K(zone->pages_high),
- K(zone->nr_active),
- K(zone->nr_inactive),
+ K(zone_page_state(zone, NR_ACTIVE)),
+ K(zone_page_state(zone, NR_INACTIVE)),
K(zone->present_pages),
zone->pages_scanned,
(zone->all_unreclaimable ? "yes" : "no")
* done. Non-atomic initialization, single-pass.
*/
void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
- unsigned long start_pfn)
+ unsigned long start_pfn, enum memmap_context context)
{
struct page *page;
unsigned long end_pfn = start_pfn + size;
unsigned long pfn;
for (pfn = start_pfn; pfn < end_pfn; pfn++) {
- if (!early_pfn_valid(pfn))
- continue;
- if (!early_pfn_in_nid(pfn, nid))
- continue;
+ /*
+ * There can be holes in boot-time mem_map[]s
+ * handed to this function. They do not
+ * exist on hotplugged memory.
+ */
+ if (context == MEMMAP_EARLY) {
+ if (!early_pfn_valid(pfn))
+ continue;
+ if (!early_pfn_in_nid(pfn, nid))
+ continue;
+ }
page = pfn_to_page(pfn);
set_page_links(page, zone, nid, pfn);
init_page_count(page);
#ifndef __HAVE_ARCH_MEMMAP_INIT
#define memmap_init(size, nid, zone, start_pfn) \
- memmap_init_zone((size), (nid), (zone), (start_pfn))
+ memmap_init_zone((size), (nid), (zone), (start_pfn), MEMMAP_EARLY)
#endif
static int __cpuinit zone_batchsize(struct zone *zone)
int ret = NOTIFY_OK;
switch (action) {
- case CPU_UP_PREPARE:
- if (process_zones(cpu))
- ret = NOTIFY_BAD;
- break;
- case CPU_UP_CANCELED:
- case CPU_DEAD:
- free_zone_pagesets(cpu);
- break;
- default:
- break;
+ case CPU_UP_PREPARE:
+ if (process_zones(cpu))
+ ret = NOTIFY_BAD;
+ break;
+ case CPU_UP_CANCELED:
+ case CPU_DEAD:
+ free_zone_pagesets(cpu);
+ break;
+ default:
+ break;
}
return ret;
}
__meminit int init_currently_empty_zone(struct zone *zone,
unsigned long zone_start_pfn,
- unsigned long size)
+ unsigned long size,
+ enum memmap_context context)
{
struct pglist_data *pgdat = zone->zone_pgdat;
int ret;
" %s zone: %lu pages exceeds realsize %lu\n",
zone_names[j], memmap_pages, realsize);
- /* Account for reserved DMA pages */
- if (j == ZONE_DMA && realsize > dma_reserve) {
+ /* Account for reserved pages */
+ if (j == 0 && realsize > dma_reserve) {
realsize -= dma_reserve;
- printk(KERN_DEBUG " DMA zone: %lu pages reserved\n",
- dma_reserve);
+ printk(KERN_DEBUG " %s zone: %lu pages reserved\n",
+ zone_names[0], dma_reserve);
}
if (!is_highmem_idx(j))
spin_lock_init(&zone->lru_lock);
zone_seqlock_init(zone);
zone->zone_pgdat = pgdat;
- zone->free_pages = 0;
zone->prev_priority = DEF_PRIORITY;
INIT_LIST_HEAD(&zone->inactive_list);
zone->nr_scan_active = 0;
zone->nr_scan_inactive = 0;
- zone->nr_active = 0;
- zone->nr_inactive = 0;
zap_zone_vm_stats(zone);
atomic_set(&zone->reclaim_in_progress, 0);
if (!size)
continue;
- ret = init_currently_empty_zone(zone, zone_start_pfn, size);
+ ret = init_currently_empty_zone(zone, zone_start_pfn,
+ size, MEMMAP_EARLY);
BUG_ON(ret);
zone_start_pfn += size;
}
cmp_node_active_region, NULL);
}
-/* Find the lowest pfn for a node. This depends on a sorted early_node_map */
+/* Find the lowest pfn for a node */
unsigned long __init find_min_pfn_for_node(unsigned long nid)
{
int i;
-
- /* Regions in the early_node_map can be in any order */
- sort_node_map();
+ unsigned long min_pfn = ULONG_MAX;
/* Assuming a sorted map, the first range found has the starting pfn */
for_each_active_range_index_in_nid(i, nid)
- return early_node_map[i].start_pfn;
+ min_pfn = min(min_pfn, early_node_map[i].start_pfn);
- printk(KERN_WARNING "Could not find start_pfn for node %lu\n", nid);
- return 0;
+ if (min_pfn == ULONG_MAX) {
+ printk(KERN_WARNING
+ "Could not find start_pfn for node %lu\n", nid);
+ return 0;
+ }
+
+ return min_pfn;
}
/**
unsigned long nid;
enum zone_type i;
+ /* Sort early_node_map as initialisation assumes it is sorted */
+ sort_node_map();
+
/* Record where the zone boundaries are */
memset(arch_zone_lowest_possible_pfn, 0,
sizeof(arch_zone_lowest_possible_pfn));
early_node_map[i].end_pfn);
/* Initialise every node */
+ setup_nr_node_ids();
for_each_online_node(nid) {
pg_data_t *pgdat = NODE_DATA(nid);
free_area_init_node(nid, pgdat, NULL,
__pa(PAGE_OFFSET) >> PAGE_SHIFT, NULL);
}
-#ifdef CONFIG_HOTPLUG_CPU
static int page_alloc_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
}
return NOTIFY_OK;
}
-#endif /* CONFIG_HOTPLUG_CPU */
void __init page_alloc_init(void)
{
struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
{
proc_dointvec(table, write, file, buffer, length, ppos);
- setup_per_zone_pages_min();
+ if (write)
+ setup_per_zone_pages_min();
return 0;
}
/* allow the kernel cmdline to have a say */
if (!numentries) {
/* round applicable memory size up to nearest megabyte */
- numentries = (flags & HASH_HIGHMEM) ? nr_all_pages : nr_kernel_pages;
+ numentries = nr_kernel_pages;
numentries += (1UL << (20 - PAGE_SHIFT)) - 1;
numentries >>= 20 - PAGE_SHIFT;
numentries <<= 20 - PAGE_SHIFT;
numentries >>= (scale - PAGE_SHIFT);
else
numentries <<= (PAGE_SHIFT - scale);
+
+ /* Make sure we've got at least a 0-order allocation.. */
+ if (unlikely((numentries * bucketsize) < PAGE_SIZE))
+ numentries = PAGE_SIZE / bucketsize;
}
numentries = roundup_pow_of_two(numentries);
if (numentries > max)
numentries = max;
- log2qty = long_log2(numentries);
+ log2qty = ilog2(numentries);
do {
size = bucketsize << log2qty;
printk("%s hash table entries: %d (order: %d, %lu bytes)\n",
tablename,
(1U << log2qty),
- long_log2(size) - PAGE_SHIFT,
+ ilog2(size) - PAGE_SHIFT,
size);
if (_hash_shift)
EXPORT_SYMBOL(page_to_pfn);
#endif /* CONFIG_OUT_OF_LINE_PFN_TO_PAGE */
-#if MAX_NUMNODES > 1
-/*
- * Find the highest possible node id.
- */
-int highest_possible_node_id(void)
-{
- unsigned int node;
- unsigned int highest = 0;
- for_each_node_mask(node, node_possible_map)
- highest = node;
- return highest;
-}
-EXPORT_SYMBOL(highest_possible_node_id);
-#endif