mm/sparse: don't panic if the allocation in sparse_buffer_init fails

[linux] / mm / gup.c
diff --git a/mm/gup.c b/mm/gup.c

index 8cb68a5..22291db 100644 (file)
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -13,6 +13,9 @@
  #include <linux/sched/signal.h>
  #include <linux/rwsem.h>
  #include <linux/hugetlb.h>
+#include <linux/migrate.h>
+#include <linux/mm_inline.h>
+#include <linux/sched/mm.h>
  
  #include <asm/mmu_context.h>
  #include <asm/pgtable.h>
@@ -727,7 +730,7 @@ retry:
                  * If we have a pending SIGKILL, don't keep faulting pages and
                  * potentially allocating memory.
                  */
-               if (unlikely(fatal_signal_pending(current))) {
+               if (fatal_signal_pending(current)) {
                         ret = -ERESTARTSYS;
                         goto out;
                 }
@@ -1126,7 +1129,167 @@ long get_user_pages(unsigned long start, unsigned long nr_pages,
  }
  EXPORT_SYMBOL(get_user_pages);
  
+#if defined(CONFIG_FS_DAX) || defined (CONFIG_CMA)
+
  #ifdef CONFIG_FS_DAX
+static bool check_dax_vmas(struct vm_area_struct **vmas, long nr_pages)
+{
+       long i;
+       struct vm_area_struct *vma_prev = NULL;
+
+       for (i = 0; i < nr_pages; i++) {
+               struct vm_area_struct *vma = vmas[i];
+
+               if (vma == vma_prev)
+                       continue;
+
+               vma_prev = vma;
+
+               if (vma_is_fsdax(vma))
+                       return true;
+       }
+       return false;
+}
+#else
+static inline bool check_dax_vmas(struct vm_area_struct **vmas, long nr_pages)
+{
+       return false;
+}
+#endif
+
+#ifdef CONFIG_CMA
+static struct page *new_non_cma_page(struct page *page, unsigned long private)
+{
+       /*
+        * We want to make sure we allocate the new page from the same node
+        * as the source page.
+        */
+       int nid = page_to_nid(page);
+       /*
+        * Trying to allocate a page for migration. Ignore allocation
+        * failure warnings. We don't force __GFP_THISNODE here because
+        * this node here is the node where we have CMA reservation and
+        * in some case these nodes will have really less non movable
+        * allocation memory.
+        */
+       gfp_t gfp_mask = GFP_USER | __GFP_NOWARN;
+
+       if (PageHighMem(page))
+               gfp_mask |= __GFP_HIGHMEM;
+
+#ifdef CONFIG_HUGETLB_PAGE
+       if (PageHuge(page)) {
+               struct hstate *h = page_hstate(page);
+               /*
+                * We don't want to dequeue from the pool because pool pages will
+                * mostly be from the CMA region.
+                */
+               return alloc_migrate_huge_page(h, gfp_mask, nid, NULL);
+       }
+#endif
+       if (PageTransHuge(page)) {
+               struct page *thp;
+               /*
+                * ignore allocation failure warnings
+                */
+               gfp_t thp_gfpmask = GFP_TRANSHUGE | __GFP_NOWARN;
+
+               /*
+                * Remove the movable mask so that we don't allocate from
+                * CMA area again.
+                */
+               thp_gfpmask &= ~__GFP_MOVABLE;
+               thp = __alloc_pages_node(nid, thp_gfpmask, HPAGE_PMD_ORDER);
+               if (!thp)
+                       return NULL;
+               prep_transhuge_page(thp);
+               return thp;
+       }
+
+       return __alloc_pages_node(nid, gfp_mask, 0);
+}
+
+static long check_and_migrate_cma_pages(unsigned long start, long nr_pages,
+                                       unsigned int gup_flags,
+                                       struct page **pages,
+                                       struct vm_area_struct **vmas)
+{
+       long i;
+       bool drain_allow = true;
+       bool migrate_allow = true;
+       LIST_HEAD(cma_page_list);
+
+check_again:
+       for (i = 0; i < nr_pages; i++) {
+               /*
+                * If we get a page from the CMA zone, since we are going to
+                * be pinning these entries, we might as well move them out
+                * of the CMA zone if possible.
+                */
+               if (is_migrate_cma_page(pages[i])) {
+
+                       struct page *head = compound_head(pages[i]);
+
+                       if (PageHuge(head)) {
+                               isolate_huge_page(head, &cma_page_list);
+                       } else {
+                               if (!PageLRU(head) && drain_allow) {
+                                       lru_add_drain_all();
+                                       drain_allow = false;
+                               }
+
+                               if (!isolate_lru_page(head)) {
+                                       list_add_tail(&head->lru, &cma_page_list);
+                                       mod_node_page_state(page_pgdat(head),
+                                                           NR_ISOLATED_ANON +
+                                                           page_is_file_cache(head),
+                                                           hpage_nr_pages(head));
+                               }
+                       }
+               }
+       }
+
+       if (!list_empty(&cma_page_list)) {
+               /*
+                * drop the above get_user_pages reference.
+                */
+               for (i = 0; i < nr_pages; i++)
+                       put_page(pages[i]);
+
+               if (migrate_pages(&cma_page_list, new_non_cma_page,
+                                 NULL, 0, MIGRATE_SYNC, MR_CONTIG_RANGE)) {
+                       /*
+                        * some of the pages failed migration. Do get_user_pages
+                        * without migration.
+                        */
+                       migrate_allow = false;
+
+                       if (!list_empty(&cma_page_list))
+                               putback_movable_pages(&cma_page_list);
+               }
+               /*
+                * We did migrate all the pages, Try to get the page references again
+                * migrating any new CMA pages which we failed to isolate earlier.
+                */
+               nr_pages = get_user_pages(start, nr_pages, gup_flags, pages, vmas);
+               if ((nr_pages > 0) && migrate_allow) {
+                       drain_allow = true;
+                       goto check_again;
+               }
+       }
+
+       return nr_pages;
+}
+#else
+static inline long check_and_migrate_cma_pages(unsigned long start, long nr_pages,
+                                              unsigned int gup_flags,
+                                              struct page **pages,
+                                              struct vm_area_struct **vmas)
+{
+       return nr_pages;
+}
+#endif
+
  /*
   * This is the same as get_user_pages() in that it assumes we are
   * operating on the current task's mm, but it goes further to validate
@@ -1140,11 +1303,11 @@ EXPORT_SYMBOL(get_user_pages);
   * Contrast this to iov_iter_get_pages() usages which are transient.
   */
  long get_user_pages_longterm(unsigned long start, unsigned long nr_pages,
-               unsigned int gup_flags, struct page **pages,
-               struct vm_area_struct **vmas_arg)
+                            unsigned int gup_flags, struct page **pages,
+                            struct vm_area_struct **vmas_arg)
  {
         struct vm_area_struct **vmas = vmas_arg;
-       struct vm_area_struct *vma_prev = NULL;
+       unsigned long flags;
         long rc, i;
  
         if (!pages)
@@ -1157,31 +1320,20 @@ long get_user_pages_longterm(unsigned long start, unsigned long nr_pages,
                         return -ENOMEM;
         }
  
+       flags = memalloc_nocma_save();
         rc = get_user_pages(start, nr_pages, gup_flags, pages, vmas);
+       memalloc_nocma_restore(flags);
+       if (rc < 0)
+               goto out;
  
-       for (i = 0; i < rc; i++) {
-               struct vm_area_struct *vma = vmas[i];
-
-               if (vma == vma_prev)
-                       continue;
-
-               vma_prev = vma;
-
-               if (vma_is_fsdax(vma))
-                       break;
-       }
-
-       /*
-        * Either get_user_pages() failed, or the vma validation
-        * succeeded, in either case we don't need to put_page() before
-        * returning.
-        */
-       if (i >= rc)
+       if (check_dax_vmas(vmas, rc)) {
+               for (i = 0; i < rc; i++)
+                       put_page(pages[i]);
+               rc = -EOPNOTSUPP;
                 goto out;
+       }
  
-       for (i = 0; i < rc; i++)
-               put_page(pages[i]);
-       rc = -EOPNOTSUPP;
+       rc = check_and_migrate_cma_pages(start, rc, gup_flags, pages, vmas);
  out:
         if (vmas != vmas_arg)
                 kfree(vmas);
@@ -1674,7 +1826,8 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
                 if (!pmd_present(pmd))
                         return 0;
  
-               if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd))) {
+               if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd) ||
+                            pmd_devmap(pmd))) {
                         /*
                          * NUMA hinting faults need to be handled in the GUP
                          * slowpath for accounting purposes and so that they
@@ -1813,8 +1966,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
         len = (unsigned long) nr_pages << PAGE_SHIFT;
         end = start + len;
  
-       if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
-                                       (void __user *)start, len)))
+       if (unlikely(!access_ok((void __user *)start, len)))
                 return 0;
  
         /*
@@ -1868,8 +2020,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
         if (nr_pages <= 0)
                 return 0;
  
-       if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
-                                       (void __user *)start, len)))
+       if (unlikely(!access_ok((void __user *)start, len)))
                 return -EFAULT;
  
         if (gup_fast_permitted(start, nr_pages, write)) {