Merge tag 'f2fs-for-4.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeu...
[linux] / fs / f2fs / node.c
index a7f6603..d332275 100644 (file)
@@ -138,6 +138,44 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
        return dst_page;
 }
 
+static struct nat_entry *__alloc_nat_entry(nid_t nid, bool no_fail)
+{
+       struct nat_entry *new;
+
+       if (no_fail)
+               new = f2fs_kmem_cache_alloc(nat_entry_slab,
+                                               GFP_NOFS | __GFP_ZERO);
+       else
+               new = kmem_cache_alloc(nat_entry_slab,
+                                               GFP_NOFS | __GFP_ZERO);
+       if (new) {
+               nat_set_nid(new, nid);
+               nat_reset_flag(new);
+       }
+       return new;
+}
+
+static void __free_nat_entry(struct nat_entry *e)
+{
+       kmem_cache_free(nat_entry_slab, e);
+}
+
+/* must be locked by nat_tree_lock */
+static struct nat_entry *__init_nat_entry(struct f2fs_nm_info *nm_i,
+       struct nat_entry *ne, struct f2fs_nat_entry *raw_ne, bool no_fail)
+{
+       if (no_fail)
+               f2fs_radix_tree_insert(&nm_i->nat_root, nat_get_nid(ne), ne);
+       else if (radix_tree_insert(&nm_i->nat_root, nat_get_nid(ne), ne))
+               return NULL;
+
+       if (raw_ne)
+               node_info_from_raw_nat(&ne->ni, raw_ne);
+       list_add_tail(&ne->list, &nm_i->nat_entries);
+       nm_i->nat_cnt++;
+       return ne;
+}
+
 static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n)
 {
        return radix_tree_lookup(&nm_i->nat_root, n);
@@ -154,7 +192,7 @@ static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e)
        list_del(&e->list);
        radix_tree_delete(&nm_i->nat_root, nat_get_nid(e));
        nm_i->nat_cnt--;
-       kmem_cache_free(nat_entry_slab, e);
+       __free_nat_entry(e);
 }
 
 static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i,
@@ -250,49 +288,29 @@ bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino)
        return need_update;
 }
 
-static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid,
-                                                               bool no_fail)
-{
-       struct nat_entry *new;
-
-       if (no_fail) {
-               new = f2fs_kmem_cache_alloc(nat_entry_slab, GFP_NOFS);
-               f2fs_radix_tree_insert(&nm_i->nat_root, nid, new);
-       } else {
-               new = kmem_cache_alloc(nat_entry_slab, GFP_NOFS);
-               if (!new)
-                       return NULL;
-               if (radix_tree_insert(&nm_i->nat_root, nid, new)) {
-                       kmem_cache_free(nat_entry_slab, new);
-                       return NULL;
-               }
-       }
-
-       memset(new, 0, sizeof(struct nat_entry));
-       nat_set_nid(new, nid);
-       nat_reset_flag(new);
-       list_add_tail(&new->list, &nm_i->nat_entries);
-       nm_i->nat_cnt++;
-       return new;
-}
-
+/* must be locked by nat_tree_lock */
 static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid,
                                                struct f2fs_nat_entry *ne)
 {
        struct f2fs_nm_info *nm_i = NM_I(sbi);
-       struct nat_entry *e;
+       struct nat_entry *new, *e;
+
+       new = __alloc_nat_entry(nid, false);
+       if (!new)
+               return;
 
+       down_write(&nm_i->nat_tree_lock);
        e = __lookup_nat_cache(nm_i, nid);
-       if (!e) {
-               e = grab_nat_entry(nm_i, nid, false);
-               if (e)
-                       node_info_from_raw_nat(&e->ni, ne);
-       } else {
+       if (!e)
+               e = __init_nat_entry(nm_i, new, ne, false);
+       else
                f2fs_bug_on(sbi, nat_get_ino(e) != le32_to_cpu(ne->ino) ||
                                nat_get_blkaddr(e) !=
                                        le32_to_cpu(ne->block_addr) ||
                                nat_get_version(e) != ne->version);
-       }
+       up_write(&nm_i->nat_tree_lock);
+       if (e != new)
+               __free_nat_entry(new);
 }
 
 static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
@@ -300,11 +318,12 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
 {
        struct f2fs_nm_info *nm_i = NM_I(sbi);
        struct nat_entry *e;
+       struct nat_entry *new = __alloc_nat_entry(ni->nid, true);
 
        down_write(&nm_i->nat_tree_lock);
        e = __lookup_nat_cache(nm_i, ni->nid);
        if (!e) {
-               e = grab_nat_entry(nm_i, ni->nid, true);
+               e = __init_nat_entry(nm_i, new, NULL, true);
                copy_node_info(&e->ni, ni);
                f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR);
        } else if (new_blkaddr == NEW_ADDR) {
@@ -316,6 +335,9 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
                copy_node_info(&e->ni, ni);
                f2fs_bug_on(sbi, ni->blk_addr != NULL_ADDR);
        }
+       /* let's free early to reduce memory consumption */
+       if (e != new)
+               __free_nat_entry(new);
 
        /* sanity check */
        f2fs_bug_on(sbi, nat_get_blkaddr(e) != ni->blk_addr);
@@ -424,9 +446,7 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
        f2fs_put_page(page, 1);
 cache:
        /* cache nat entry */
-       down_write(&nm_i->nat_tree_lock);
        cache_nat_entry(sbi, nid, &ne);
-       up_write(&nm_i->nat_tree_lock);
 }
 
 /*
@@ -1245,21 +1265,17 @@ iput_out:
 
 static struct page *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino)
 {
-       pgoff_t index, end;
+       pgoff_t index;
        struct pagevec pvec;
        struct page *last_page = NULL;
+       int nr_pages;
 
-       pagevec_init(&pvec, 0);
+       pagevec_init(&pvec);
        index = 0;
-       end = ULONG_MAX;
-
-       while (index <= end) {
-               int i, nr_pages;
-               nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
-                               PAGECACHE_TAG_DIRTY,
-                               min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
-               if (nr_pages == 0)
-                       break;
+
+       while ((nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
+                               PAGECACHE_TAG_DIRTY))) {
+               int i;
 
                for (i = 0; i < nr_pages; i++) {
                        struct page *page = pvec.pages[i];
@@ -1425,13 +1441,14 @@ static int f2fs_write_node_page(struct page *page,
 int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
                        struct writeback_control *wbc, bool atomic)
 {
-       pgoff_t index, end;
+       pgoff_t index;
        pgoff_t last_idx = ULONG_MAX;
        struct pagevec pvec;
        int ret = 0;
        struct page *last_page = NULL;
        bool marked = false;
        nid_t ino = inode->i_ino;
+       int nr_pages;
 
        if (atomic) {
                last_page = last_fsync_dnode(sbi, ino);
@@ -1439,17 +1456,12 @@ int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
                        return PTR_ERR_OR_ZERO(last_page);
        }
 retry:
-       pagevec_init(&pvec, 0);
+       pagevec_init(&pvec);
        index = 0;
-       end = ULONG_MAX;
-
-       while (index <= end) {
-               int i, nr_pages;
-               nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
-                               PAGECACHE_TAG_DIRTY,
-                               min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
-               if (nr_pages == 0)
-                       break;
+
+       while ((nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
+                               PAGECACHE_TAG_DIRTY))) {
+               int i;
 
                for (i = 0; i < nr_pages; i++) {
                        struct page *page = pvec.pages[i];
@@ -1548,25 +1560,21 @@ out:
 int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc,
                                bool do_balance, enum iostat_type io_type)
 {
-       pgoff_t index, end;
+       pgoff_t index;
        struct pagevec pvec;
        int step = 0;
        int nwritten = 0;
        int ret = 0;
+       int nr_pages;
 
-       pagevec_init(&pvec, 0);
+       pagevec_init(&pvec);
 
 next_step:
        index = 0;
-       end = ULONG_MAX;
-
-       while (index <= end) {
-               int i, nr_pages;
-               nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
-                               PAGECACHE_TAG_DIRTY,
-                               min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
-               if (nr_pages == 0)
-                       break;
+
+       while ((nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
+                               PAGECACHE_TAG_DIRTY))) {
+               int i;
 
                for (i = 0; i < nr_pages; i++) {
                        struct page *page = pvec.pages[i];
@@ -1655,27 +1663,20 @@ out:
 
 int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino)
 {
-       pgoff_t index = 0, end = ULONG_MAX;
+       pgoff_t index = 0;
        struct pagevec pvec;
        int ret2, ret = 0;
+       int nr_pages;
 
-       pagevec_init(&pvec, 0);
+       pagevec_init(&pvec);
 
-       while (index <= end) {
-               int i, nr_pages;
-               nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
-                               PAGECACHE_TAG_WRITEBACK,
-                               min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
-               if (nr_pages == 0)
-                       break;
+       while ((nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
+                               PAGECACHE_TAG_WRITEBACK))) {
+               int i;
 
                for (i = 0; i < nr_pages; i++) {
                        struct page *page = pvec.pages[i];
 
-                       /* until radix tree lookup accepts end_index */
-                       if (unlikely(page->index > end))
-                               continue;
-
                        if (ino && ino_of_node(page) == ino) {
                                f2fs_wait_on_page_writeback(page, NODE, true);
                                if (TestClearPageError(page))
@@ -1906,15 +1907,18 @@ static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid,
        if (!test_bit_le(nat_ofs, nm_i->nat_block_bitmap))
                return;
 
-       if (set)
+       if (set) {
+               if (test_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]))
+                       return;
                __set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
-       else
-               __clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
-
-       if (set)
                nm_i->free_nid_count[nat_ofs]++;
-       else if (!build)
-               nm_i->free_nid_count[nat_ofs]--;
+       } else {
+               if (!test_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]))
+                       return;
+               __clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
+               if (!build)
+                       nm_i->free_nid_count[nat_ofs]--;
+       }
 }
 
 static void scan_nat_page(struct f2fs_sb_info *sbi,
@@ -1949,12 +1953,32 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
        }
 }
 
-static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
+static void scan_curseg_cache(struct f2fs_sb_info *sbi)
 {
-       struct f2fs_nm_info *nm_i = NM_I(sbi);
        struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
        struct f2fs_journal *journal = curseg->journal;
+       int i;
+
+       down_read(&curseg->journal_rwsem);
+       for (i = 0; i < nats_in_cursum(journal); i++) {
+               block_t addr;
+               nid_t nid;
+
+               addr = le32_to_cpu(nat_in_journal(journal, i).block_addr);
+               nid = le32_to_cpu(nid_in_journal(journal, i));
+               if (addr == NULL_ADDR)
+                       add_free_nid(sbi, nid, true);
+               else
+                       remove_free_nid(sbi, nid);
+       }
+       up_read(&curseg->journal_rwsem);
+}
+
+static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
+{
+       struct f2fs_nm_info *nm_i = NM_I(sbi);
        unsigned int i, idx;
+       nid_t nid;
 
        down_read(&nm_i->nat_tree_lock);
 
@@ -1964,10 +1988,10 @@ static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
                if (!nm_i->free_nid_count[i])
                        continue;
                for (idx = 0; idx < NAT_ENTRY_PER_BLOCK; idx++) {
-                       nid_t nid;
-
-                       if (!test_bit_le(idx, nm_i->free_nid_bitmap[i]))
-                               continue;
+                       idx = find_next_bit_le(nm_i->free_nid_bitmap[i],
+                                               NAT_ENTRY_PER_BLOCK, idx);
+                       if (idx >= NAT_ENTRY_PER_BLOCK)
+                               break;
 
                        nid = i * NAT_ENTRY_PER_BLOCK + idx;
                        add_free_nid(sbi, nid, true);
@@ -1977,27 +2001,14 @@ static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
                }
        }
 out:
-       down_read(&curseg->journal_rwsem);
-       for (i = 0; i < nats_in_cursum(journal); i++) {
-               block_t addr;
-               nid_t nid;
+       scan_curseg_cache(sbi);
 
-               addr = le32_to_cpu(nat_in_journal(journal, i).block_addr);
-               nid = le32_to_cpu(nid_in_journal(journal, i));
-               if (addr == NULL_ADDR)
-                       add_free_nid(sbi, nid, true);
-               else
-                       remove_free_nid(sbi, nid);
-       }
-       up_read(&curseg->journal_rwsem);
        up_read(&nm_i->nat_tree_lock);
 }
 
 static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
 {
        struct f2fs_nm_info *nm_i = NM_I(sbi);
-       struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
-       struct f2fs_journal *journal = curseg->journal;
        int i = 0;
        nid_t nid = nm_i->next_scan_nid;
 
@@ -2015,7 +2026,7 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
                /* try to find free nids in free_nid_bitmap */
                scan_free_nid_bits(sbi);
 
-               if (nm_i->nid_cnt[FREE_NID])
+               if (nm_i->nid_cnt[FREE_NID] >= NAT_ENTRY_PER_BLOCK)
                        return;
        }
 
@@ -2043,18 +2054,8 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
        nm_i->next_scan_nid = nid;
 
        /* find free nids from current sum_pages */
-       down_read(&curseg->journal_rwsem);
-       for (i = 0; i < nats_in_cursum(journal); i++) {
-               block_t addr;
+       scan_curseg_cache(sbi);
 
-               addr = le32_to_cpu(nat_in_journal(journal, i).block_addr);
-               nid = le32_to_cpu(nid_in_journal(journal, i));
-               if (addr == NULL_ADDR)
-                       add_free_nid(sbi, nid, true);
-               else
-                       remove_free_nid(sbi, nid);
-       }
-       up_read(&curseg->journal_rwsem);
        up_read(&nm_i->nat_tree_lock);
 
        ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid),
@@ -2374,8 +2375,8 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
 
                ne = __lookup_nat_cache(nm_i, nid);
                if (!ne) {
-                       ne = grab_nat_entry(nm_i, nid, true);
-                       node_info_from_raw_nat(&ne->ni, &raw_ne);
+                       ne = __alloc_nat_entry(nid, true);
+                       __init_nat_entry(nm_i, ne, &raw_ne, true);
                }
 
                /*
@@ -2626,7 +2627,7 @@ static inline void load_free_nid_bitmap(struct f2fs_sb_info *sbi)
                __set_bit_le(i, nm_i->nat_block_bitmap);
 
                nid = i * NAT_ENTRY_PER_BLOCK;
-               last_nid = (i + 1) * NAT_ENTRY_PER_BLOCK;
+               last_nid = nid + NAT_ENTRY_PER_BLOCK;
 
                spin_lock(&NM_I(sbi)->nid_list_lock);
                for (; nid < last_nid; nid++)