mm/sparse: don't panic if the allocation in sparse_buffer_init fails
[linux] / mm / gup.c
index 8cb68a5..22291db 100644 (file)
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -13,6 +13,9 @@
 #include <linux/sched/signal.h>
 #include <linux/rwsem.h>
 #include <linux/hugetlb.h>
+#include <linux/migrate.h>
+#include <linux/mm_inline.h>
+#include <linux/sched/mm.h>
 
 #include <asm/mmu_context.h>
 #include <asm/pgtable.h>
@@ -727,7 +730,7 @@ retry:
                 * If we have a pending SIGKILL, don't keep faulting pages and
                 * potentially allocating memory.
                 */
-               if (unlikely(fatal_signal_pending(current))) {
+               if (fatal_signal_pending(current)) {
                        ret = -ERESTARTSYS;
                        goto out;
                }
@@ -1126,7 +1129,167 @@ long get_user_pages(unsigned long start, unsigned long nr_pages,
 }
 EXPORT_SYMBOL(get_user_pages);
 
+#if defined(CONFIG_FS_DAX) || defined (CONFIG_CMA)
+
 #ifdef CONFIG_FS_DAX
+static bool check_dax_vmas(struct vm_area_struct **vmas, long nr_pages)
+{
+       long i;
+       struct vm_area_struct *vma_prev = NULL;
+
+       for (i = 0; i < nr_pages; i++) {
+               struct vm_area_struct *vma = vmas[i];
+
+               if (vma == vma_prev)
+                       continue;
+
+               vma_prev = vma;
+
+               if (vma_is_fsdax(vma))
+                       return true;
+       }
+       return false;
+}
+#else
+static inline bool check_dax_vmas(struct vm_area_struct **vmas, long nr_pages)
+{
+       return false;
+}
+#endif
+
+#ifdef CONFIG_CMA
+static struct page *new_non_cma_page(struct page *page, unsigned long private)
+{
+       /*
+        * We want to make sure we allocate the new page from the same node
+        * as the source page.
+        */
+       int nid = page_to_nid(page);
+       /*
+        * Trying to allocate a page for migration. Ignore allocation
+        * failure warnings. We don't force __GFP_THISNODE here because
+        * this node here is the node where we have CMA reservation and
+        * in some case these nodes will have really less non movable
+        * allocation memory.
+        */
+       gfp_t gfp_mask = GFP_USER | __GFP_NOWARN;
+
+       if (PageHighMem(page))
+               gfp_mask |= __GFP_HIGHMEM;
+
+#ifdef CONFIG_HUGETLB_PAGE
+       if (PageHuge(page)) {
+               struct hstate *h = page_hstate(page);
+               /*
+                * We don't want to dequeue from the pool because pool pages will
+                * mostly be from the CMA region.
+                */
+               return alloc_migrate_huge_page(h, gfp_mask, nid, NULL);
+       }
+#endif
+       if (PageTransHuge(page)) {
+               struct page *thp;
+               /*
+                * ignore allocation failure warnings
+                */
+               gfp_t thp_gfpmask = GFP_TRANSHUGE | __GFP_NOWARN;
+
+               /*
+                * Remove the movable mask so that we don't allocate from
+                * CMA area again.
+                */
+               thp_gfpmask &= ~__GFP_MOVABLE;
+               thp = __alloc_pages_node(nid, thp_gfpmask, HPAGE_PMD_ORDER);
+               if (!thp)
+                       return NULL;
+               prep_transhuge_page(thp);
+               return thp;
+       }
+
+       return __alloc_pages_node(nid, gfp_mask, 0);
+}
+
+static long check_and_migrate_cma_pages(unsigned long start, long nr_pages,
+                                       unsigned int gup_flags,
+                                       struct page **pages,
+                                       struct vm_area_struct **vmas)
+{
+       long i;
+       bool drain_allow = true;
+       bool migrate_allow = true;
+       LIST_HEAD(cma_page_list);
+
+check_again:
+       for (i = 0; i < nr_pages; i++) {
+               /*
+                * If we get a page from the CMA zone, since we are going to
+                * be pinning these entries, we might as well move them out
+                * of the CMA zone if possible.
+                */
+               if (is_migrate_cma_page(pages[i])) {
+
+                       struct page *head = compound_head(pages[i]);
+
+                       if (PageHuge(head)) {
+                               isolate_huge_page(head, &cma_page_list);
+                       } else {
+                               if (!PageLRU(head) && drain_allow) {
+                                       lru_add_drain_all();
+                                       drain_allow = false;
+                               }
+
+                               if (!isolate_lru_page(head)) {
+                                       list_add_tail(&head->lru, &cma_page_list);
+                                       mod_node_page_state(page_pgdat(head),
+                                                           NR_ISOLATED_ANON +
+                                                           page_is_file_cache(head),
+                                                           hpage_nr_pages(head));
+                               }
+                       }
+               }
+       }
+
+       if (!list_empty(&cma_page_list)) {
+               /*
+                * drop the above get_user_pages reference.
+                */
+               for (i = 0; i < nr_pages; i++)
+                       put_page(pages[i]);
+
+               if (migrate_pages(&cma_page_list, new_non_cma_page,
+                                 NULL, 0, MIGRATE_SYNC, MR_CONTIG_RANGE)) {
+                       /*
+                        * some of the pages failed migration. Do get_user_pages
+                        * without migration.
+                        */
+                       migrate_allow = false;
+
+                       if (!list_empty(&cma_page_list))
+                               putback_movable_pages(&cma_page_list);
+               }
+               /*
+                * We did migrate all the pages, Try to get the page references again
+                * migrating any new CMA pages which we failed to isolate earlier.
+                */
+               nr_pages = get_user_pages(start, nr_pages, gup_flags, pages, vmas);
+               if ((nr_pages > 0) && migrate_allow) {
+                       drain_allow = true;
+                       goto check_again;
+               }
+       }
+
+       return nr_pages;
+}
+#else
+static inline long check_and_migrate_cma_pages(unsigned long start, long nr_pages,
+                                              unsigned int gup_flags,
+                                              struct page **pages,
+                                              struct vm_area_struct **vmas)
+{
+       return nr_pages;
+}
+#endif
+
 /*
  * This is the same as get_user_pages() in that it assumes we are
  * operating on the current task's mm, but it goes further to validate
@@ -1140,11 +1303,11 @@ EXPORT_SYMBOL(get_user_pages);
  * Contrast this to iov_iter_get_pages() usages which are transient.
  */
 long get_user_pages_longterm(unsigned long start, unsigned long nr_pages,
-               unsigned int gup_flags, struct page **pages,
-               struct vm_area_struct **vmas_arg)
+                            unsigned int gup_flags, struct page **pages,
+                            struct vm_area_struct **vmas_arg)
 {
        struct vm_area_struct **vmas = vmas_arg;
-       struct vm_area_struct *vma_prev = NULL;
+       unsigned long flags;
        long rc, i;
 
        if (!pages)
@@ -1157,31 +1320,20 @@ long get_user_pages_longterm(unsigned long start, unsigned long nr_pages,
                        return -ENOMEM;
        }
 
+       flags = memalloc_nocma_save();
        rc = get_user_pages(start, nr_pages, gup_flags, pages, vmas);
+       memalloc_nocma_restore(flags);
+       if (rc < 0)
+               goto out;
 
-       for (i = 0; i < rc; i++) {
-               struct vm_area_struct *vma = vmas[i];
-
-               if (vma == vma_prev)
-                       continue;
-
-               vma_prev = vma;
-
-               if (vma_is_fsdax(vma))
-                       break;
-       }
-
-       /*
-        * Either get_user_pages() failed, or the vma validation
-        * succeeded, in either case we don't need to put_page() before
-        * returning.
-        */
-       if (i >= rc)
+       if (check_dax_vmas(vmas, rc)) {
+               for (i = 0; i < rc; i++)
+                       put_page(pages[i]);
+               rc = -EOPNOTSUPP;
                goto out;
+       }
 
-       for (i = 0; i < rc; i++)
-               put_page(pages[i]);
-       rc = -EOPNOTSUPP;
+       rc = check_and_migrate_cma_pages(start, rc, gup_flags, pages, vmas);
 out:
        if (vmas != vmas_arg)
                kfree(vmas);
@@ -1674,7 +1826,8 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
                if (!pmd_present(pmd))
                        return 0;
 
-               if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd))) {
+               if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd) ||
+                            pmd_devmap(pmd))) {
                        /*
                         * NUMA hinting faults need to be handled in the GUP
                         * slowpath for accounting purposes and so that they
@@ -1813,8 +1966,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
        len = (unsigned long) nr_pages << PAGE_SHIFT;
        end = start + len;
 
-       if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
-                                       (void __user *)start, len)))
+       if (unlikely(!access_ok((void __user *)start, len)))
                return 0;
 
        /*
@@ -1868,8 +2020,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
        if (nr_pages <= 0)
                return 0;
 
-       if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
-                                       (void __user *)start, len)))
+       if (unlikely(!access_ok((void __user *)start, len)))
                return -EFAULT;
 
        if (gup_fast_permitted(start, nr_pages, write)) {