X-Git-Url: http://git.rot13.org/?a=blobdiff_plain;f=mm%2Fhugetlb.c;h=eb7180db303326f73f7e099f84e2557e84cc87b1;hb=5351fb106a84d6ac584c2501e3b335093d38a58c;hp=0ccc7f2302529b0e3a237c556f895ab4ddfb3911;hpb=1709775828de83b099554176b6d4971ebc772962;p=powerpc.git diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 0ccc7f2302..eb7180db30 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -44,14 +44,14 @@ static void clear_huge_page(struct page *page, unsigned long addr) } static void copy_huge_page(struct page *dst, struct page *src, - unsigned long addr) + unsigned long addr, struct vm_area_struct *vma) { int i; might_sleep(); for (i = 0; i < HPAGE_SIZE/PAGE_SIZE; i++) { cond_resched(); - copy_user_highpage(dst + i, src + i, addr + i*PAGE_SIZE); + copy_user_highpage(dst + i, src + i, addr + i*PAGE_SIZE, vma); } } @@ -73,7 +73,7 @@ static struct page *dequeue_huge_page(struct vm_area_struct *vma, for (z = zonelist->zones; *z; z++) { nid = zone_to_nid(*z); - if (cpuset_zone_allowed(*z, GFP_HIGHUSER) && + if (cpuset_zone_allowed_softwall(*z, GFP_HIGHUSER) && !list_empty(&hugepage_freelists[nid])) break; } @@ -140,6 +140,8 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma, return page; fail: + if (vma->vm_flags & VM_MAYSHARE) + resv_huge_pages++; spin_unlock(&hugetlb_lock); return NULL; } @@ -172,6 +174,17 @@ static int __init hugetlb_setup(char *s) } __setup("hugepages=", hugetlb_setup); +static unsigned int cpuset_mems_nr(unsigned int *array) +{ + int node; + unsigned int nr = 0; + + for_each_node_mask(node, cpuset_current_mems_allowed) + nr += array[node]; + + return nr; +} + #ifdef CONFIG_SYSCTL static void update_and_free_page(struct page *page) { @@ -389,6 +402,8 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, continue; page = pte_page(pte); + if (pte_dirty(pte)) + set_page_dirty(page); list_add(&page->lru, &page_list); } spin_unlock(&mm->page_table_lock); @@ -442,7 +457,7 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, } spin_unlock(&mm->page_table_lock); - copy_huge_page(new_page, old_page, address); + copy_huge_page(new_page, old_page, address, vma); spin_lock(&mm->page_table_lock); ptep = huge_pte_offset(mm, address & HPAGE_MASK); @@ -815,6 +830,26 @@ int hugetlb_reserve_pages(struct inode *inode, long from, long to) chg = region_chg(&inode->i_mapping->private_list, from, to); if (chg < 0) return chg; + /* + * When cpuset is configured, it breaks the strict hugetlb page + * reservation as the accounting is done on a global variable. Such + * reservation is completely rubbish in the presence of cpuset because + * the reservation is not checked against page availability for the + * current cpuset. Application can still potentially OOM'ed by kernel + * with lack of free htlb page in cpuset that the task is in. + * Attempt to enforce strict accounting with cpuset is almost + * impossible (or too ugly) because cpuset is too fluid that + * task or memory node can be dynamically moved between cpusets. + * + * The change of semantics for shared hugetlb mapping with cpuset is + * undesirable. However, in order to preserve some of the semantics, + * we fall back to check against current free page availability as + * a best attempt and hopefully to minimize the impact of changing + * semantics that cpuset has. + */ + if (chg > cpuset_mems_nr(free_huge_pages_node)) + return -ENOMEM; + ret = hugetlb_acct_memory(chg); if (ret < 0) return ret;