X-Git-Url: http://git.rot13.org/?a=blobdiff_plain;f=fs%2Fext4%2Fialloc.c;h=7b5cfa62b66307ab08d65fdaa140753583eb50ae;hb=2aa9fc4c405467f6afbbb2162ff8afaced47d99b;hp=427f83066a0da425b75757c68430768bba7428ea;hpb=e8b495fe09bc793ae26774e7b2667f7f658d56e2;p=powerpc.git diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 427f83066a..7b5cfa62b6 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -28,6 +28,7 @@ #include "xattr.h" #include "acl.h" +#include "group.h" /* * ialloc.c contains the inodes allocation and deallocation routines @@ -43,6 +44,52 @@ * the free blocks count in the block. */ +/* + * To avoid calling the atomic setbit hundreds or thousands of times, we only + * need to use it within a single byte (to ensure we get endianness right). + * We can use memset for the rest of the bitmap as there are no other users. + */ +void mark_bitmap_end(int start_bit, int end_bit, char *bitmap) +{ + int i; + + if (start_bit >= end_bit) + return; + + ext4_debug("mark end bits +%d through +%d used\n", start_bit, end_bit); + for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++) + ext4_set_bit(i, bitmap); + if (i < end_bit) + memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3); +} + +/* Initializes an uninitialized inode bitmap */ +unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh, + ext4_group_t block_group, + struct ext4_group_desc *gdp) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + + J_ASSERT_BH(bh, buffer_locked(bh)); + + /* If checksum is bad mark all blocks and inodes use to prevent + * allocation, essentially implementing a per-group read-only flag. */ + if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { + ext4_error(sb, __FUNCTION__, "Checksum bad for group %lu\n", + block_group); + gdp->bg_free_blocks_count = 0; + gdp->bg_free_inodes_count = 0; + gdp->bg_itable_unused = 0; + memset(bh->b_data, 0xff, sb->s_blocksize); + return 0; + } + + memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8); + mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), EXT4_BLOCKS_PER_GROUP(sb), + bh->b_data); + + return EXT4_INODES_PER_GROUP(sb); +} /* * Read the inode allocation bitmap for a given block_group, reading @@ -51,7 +98,7 @@ * Return buffer_head of bitmap on success or NULL. */ static struct buffer_head * -read_inode_bitmap(struct super_block * sb, unsigned long block_group) +read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) { struct ext4_group_desc *desc; struct buffer_head *bh = NULL; @@ -59,8 +106,20 @@ read_inode_bitmap(struct super_block * sb, unsigned long block_group) desc = ext4_get_group_desc(sb, block_group, NULL); if (!desc) goto error_out; - - bh = sb_bread(sb, ext4_inode_bitmap(sb, desc)); + if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { + bh = sb_getblk(sb, ext4_inode_bitmap(sb, desc)); + if (!buffer_uptodate(bh)) { + lock_buffer(bh); + if (!buffer_uptodate(bh)) { + ext4_init_inode_bitmap(sb, bh, block_group, + desc); + set_buffer_uptodate(bh); + } + unlock_buffer(bh); + } + } else { + bh = sb_bread(sb, ext4_inode_bitmap(sb, desc)); + } if (!bh) ext4_error(sb, "read_inode_bitmap", "Cannot read inode bitmap - " @@ -93,7 +152,7 @@ void ext4_free_inode (handle_t *handle, struct inode * inode) unsigned long ino; struct buffer_head *bitmap_bh = NULL; struct buffer_head *bh2; - unsigned long block_group; + ext4_group_t block_group; unsigned long bit; struct ext4_group_desc * gdp; struct ext4_super_block * es; @@ -169,6 +228,8 @@ void ext4_free_inode (handle_t *handle, struct inode * inode) if (is_directory) gdp->bg_used_dirs_count = cpu_to_le16( le16_to_cpu(gdp->bg_used_dirs_count) - 1); + gdp->bg_checksum = ext4_group_desc_csum(sbi, + block_group, gdp); spin_unlock(sb_bgl_lock(sbi, block_group)); percpu_counter_inc(&sbi->s_freeinodes_counter); if (is_directory) @@ -199,19 +260,20 @@ error_return: * For other inodes, search forward from the parent directory\'s block * group to find a free inode. */ -static int find_group_dir(struct super_block *sb, struct inode *parent) +static int find_group_dir(struct super_block *sb, struct inode *parent, + ext4_group_t *best_group) { - int ngroups = EXT4_SB(sb)->s_groups_count; + ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; unsigned int freei, avefreei; struct ext4_group_desc *desc, *best_desc = NULL; - struct buffer_head *bh; - int group, best_group = -1; + ext4_group_t group; + int ret = -1; freei = percpu_counter_read_positive(&EXT4_SB(sb)->s_freeinodes_counter); avefreei = freei / ngroups; for (group = 0; group < ngroups; group++) { - desc = ext4_get_group_desc (sb, group, &bh); + desc = ext4_get_group_desc (sb, group, NULL); if (!desc || !desc->bg_free_inodes_count) continue; if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei) @@ -219,11 +281,12 @@ static int find_group_dir(struct super_block *sb, struct inode *parent) if (!best_desc || (le16_to_cpu(desc->bg_free_blocks_count) > le16_to_cpu(best_desc->bg_free_blocks_count))) { - best_group = group; + *best_group = group; best_desc = desc; + ret = 0; } } - return best_group; + return ret; } /* @@ -254,12 +317,13 @@ static int find_group_dir(struct super_block *sb, struct inode *parent) #define INODE_COST 64 #define BLOCK_COST 256 -static int find_group_orlov(struct super_block *sb, struct inode *parent) +static int find_group_orlov(struct super_block *sb, struct inode *parent, + ext4_group_t *group) { - int parent_group = EXT4_I(parent)->i_block_group; + ext4_group_t parent_group = EXT4_I(parent)->i_block_group; struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; - int ngroups = sbi->s_groups_count; + ext4_group_t ngroups = sbi->s_groups_count; int inodes_per_group = EXT4_INODES_PER_GROUP(sb); unsigned int freei, avefreei; ext4_fsblk_t freeb, avefreeb; @@ -267,9 +331,8 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent) unsigned int ndirs; int max_debt, max_dirs, min_inodes; ext4_grpblk_t min_blocks; - int group = -1, i; + ext4_group_t i; struct ext4_group_desc *desc; - struct buffer_head *bh; freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter); avefreei = freei / ngroups; @@ -281,13 +344,14 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent) if ((parent == sb->s_root->d_inode) || (EXT4_I(parent)->i_flags & EXT4_TOPDIR_FL)) { int best_ndir = inodes_per_group; - int best_group = -1; + ext4_group_t grp; + int ret = -1; - get_random_bytes(&group, sizeof(group)); - parent_group = (unsigned)group % ngroups; + get_random_bytes(&grp, sizeof(grp)); + parent_group = (unsigned)grp % ngroups; for (i = 0; i < ngroups; i++) { - group = (parent_group + i) % ngroups; - desc = ext4_get_group_desc (sb, group, &bh); + grp = (parent_group + i) % ngroups; + desc = ext4_get_group_desc(sb, grp, NULL); if (!desc || !desc->bg_free_inodes_count) continue; if (le16_to_cpu(desc->bg_used_dirs_count) >= best_ndir) @@ -296,11 +360,12 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent) continue; if (le16_to_cpu(desc->bg_free_blocks_count) < avefreeb) continue; - best_group = group; + *group = grp; + ret = 0; best_ndir = le16_to_cpu(desc->bg_used_dirs_count); } - if (best_group >= 0) - return best_group; + if (ret == 0) + return ret; goto fallback; } @@ -321,8 +386,8 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent) max_debt = 1; for (i = 0; i < ngroups; i++) { - group = (parent_group + i) % ngroups; - desc = ext4_get_group_desc (sb, group, &bh); + *group = (parent_group + i) % ngroups; + desc = ext4_get_group_desc(sb, *group, NULL); if (!desc || !desc->bg_free_inodes_count) continue; if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs) @@ -331,17 +396,16 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent) continue; if (le16_to_cpu(desc->bg_free_blocks_count) < min_blocks) continue; - return group; + return 0; } fallback: for (i = 0; i < ngroups; i++) { - group = (parent_group + i) % ngroups; - desc = ext4_get_group_desc (sb, group, &bh); - if (!desc || !desc->bg_free_inodes_count) - continue; - if (le16_to_cpu(desc->bg_free_inodes_count) >= avefreei) - return group; + *group = (parent_group + i) % ngroups; + desc = ext4_get_group_desc(sb, *group, NULL); + if (desc && desc->bg_free_inodes_count && + le16_to_cpu(desc->bg_free_inodes_count) >= avefreei) + return 0; } if (avefreei) { @@ -356,22 +420,22 @@ fallback: return -1; } -static int find_group_other(struct super_block *sb, struct inode *parent) +static int find_group_other(struct super_block *sb, struct inode *parent, + ext4_group_t *group) { - int parent_group = EXT4_I(parent)->i_block_group; - int ngroups = EXT4_SB(sb)->s_groups_count; + ext4_group_t parent_group = EXT4_I(parent)->i_block_group; + ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; struct ext4_group_desc *desc; - struct buffer_head *bh; - int group, i; + ext4_group_t i; /* * Try to place the inode in its parent directory */ - group = parent_group; - desc = ext4_get_group_desc (sb, group, &bh); + *group = parent_group; + desc = ext4_get_group_desc(sb, *group, NULL); if (desc && le16_to_cpu(desc->bg_free_inodes_count) && le16_to_cpu(desc->bg_free_blocks_count)) - return group; + return 0; /* * We're going to place this inode in a different blockgroup from its @@ -382,33 +446,33 @@ static int find_group_other(struct super_block *sb, struct inode *parent) * * So add our directory's i_ino into the starting point for the hash. */ - group = (group + parent->i_ino) % ngroups; + *group = (*group + parent->i_ino) % ngroups; /* * Use a quadratic hash to find a group with a free inode and some free * blocks. */ for (i = 1; i < ngroups; i <<= 1) { - group += i; - if (group >= ngroups) - group -= ngroups; - desc = ext4_get_group_desc (sb, group, &bh); + *group += i; + if (*group >= ngroups) + *group -= ngroups; + desc = ext4_get_group_desc(sb, *group, NULL); if (desc && le16_to_cpu(desc->bg_free_inodes_count) && le16_to_cpu(desc->bg_free_blocks_count)) - return group; + return 0; } /* * That failed: try linear search for a free inode, even if that group * has no free blocks. */ - group = parent_group; + *group = parent_group; for (i = 0; i < ngroups; i++) { - if (++group >= ngroups) - group = 0; - desc = ext4_get_group_desc (sb, group, &bh); + if (++*group >= ngroups) + *group = 0; + desc = ext4_get_group_desc(sb, *group, NULL); if (desc && le16_to_cpu(desc->bg_free_inodes_count)) - return group; + return 0; } return -1; @@ -429,16 +493,17 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode) struct super_block *sb; struct buffer_head *bitmap_bh = NULL; struct buffer_head *bh2; - int group; + ext4_group_t group = 0; unsigned long ino = 0; struct inode * inode; struct ext4_group_desc * gdp = NULL; struct ext4_super_block * es; struct ext4_inode_info *ei; struct ext4_sb_info *sbi; - int err = 0; + int ret2, err = 0; struct inode *ret; - int i; + ext4_group_t i; + int free = 0; /* Cannot create files in a deleted directory */ if (!dir || !dir->i_nlink) @@ -454,14 +519,14 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode) es = sbi->s_es; if (S_ISDIR(mode)) { if (test_opt (sb, OLDALLOC)) - group = find_group_dir(sb, dir); + ret2 = find_group_dir(sb, dir, &group); else - group = find_group_orlov(sb, dir); + ret2 = find_group_orlov(sb, dir, &group); } else - group = find_group_other(sb, dir); + ret2 = find_group_other(sb, dir, &group); err = -ENOSPC; - if (group == -1) + if (ret2 == -1) goto out; for (i = 0; i < sbi->s_groups_count; i++) { @@ -520,11 +585,13 @@ repeat_in_this_group: goto out; got: - ino += group * EXT4_INODES_PER_GROUP(sb) + 1; - if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { - ext4_error (sb, "ext4_new_inode", - "reserved inode or inode > inodes count - " - "block_group = %d, inode=%lu", group, ino); + ino++; + if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || + ino > EXT4_INODES_PER_GROUP(sb)) { + ext4_error(sb, __FUNCTION__, + "reserved inode or inode > inodes count - " + "block_group = %lu, inode=%lu", group, + ino + group * EXT4_INODES_PER_GROUP(sb)); err = -EIO; goto fail; } @@ -532,13 +599,78 @@ got: BUFFER_TRACE(bh2, "get_write_access"); err = ext4_journal_get_write_access(handle, bh2); if (err) goto fail; + + /* We may have to initialize the block bitmap if it isn't already */ + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) && + gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { + struct buffer_head *block_bh = read_block_bitmap(sb, group); + + BUFFER_TRACE(block_bh, "get block bitmap access"); + err = ext4_journal_get_write_access(handle, block_bh); + if (err) { + brelse(block_bh); + goto fail; + } + + free = 0; + spin_lock(sb_bgl_lock(sbi, group)); + /* recheck and clear flag under lock if we still need to */ + if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { + gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); + free = ext4_free_blocks_after_init(sb, group, gdp); + gdp->bg_free_blocks_count = cpu_to_le16(free); + } + spin_unlock(sb_bgl_lock(sbi, group)); + + /* Don't need to dirty bitmap block if we didn't change it */ + if (free) { + BUFFER_TRACE(block_bh, "dirty block bitmap"); + err = ext4_journal_dirty_metadata(handle, block_bh); + } + + brelse(block_bh); + if (err) + goto fail; + } + spin_lock(sb_bgl_lock(sbi, group)); + /* If we didn't allocate from within the initialized part of the inode + * table then we need to initialize up to this inode. */ + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { + if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { + gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT); + + /* When marking the block group with + * ~EXT4_BG_INODE_UNINIT we don't want to depend + * on the value of bg_itable_unsed even though + * mke2fs could have initialized the same for us. + * Instead we calculated the value below + */ + + free = 0; + } else { + free = EXT4_INODES_PER_GROUP(sb) - + le16_to_cpu(gdp->bg_itable_unused); + } + + /* + * Check the relative inode number against the last used + * relative inode number in this group. if it is greater + * we need to update the bg_itable_unused count + * + */ + if (ino > free) + gdp->bg_itable_unused = + cpu_to_le16(EXT4_INODES_PER_GROUP(sb) - ino); + } + gdp->bg_free_inodes_count = cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1); if (S_ISDIR(mode)) { gdp->bg_used_dirs_count = cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1); } + gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); spin_unlock(sb_bgl_lock(sbi, group)); BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata"); err = ext4_journal_dirty_metadata(handle, bh2); @@ -560,7 +692,7 @@ got: inode->i_gid = current->fsgid; inode->i_mode = mode; - inode->i_ino = ino; + inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb); /* This is the optimal IO size (for stat), not the fs block size */ inode->i_blocks = 0; inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime = @@ -576,11 +708,6 @@ got: /* dirsync only applies to directories */ if (!S_ISDIR(mode)) ei->i_flags &= ~EXT4_DIRSYNC_FL; -#ifdef EXT4_FRAGMENTS - ei->i_faddr = 0; - ei->i_frag_no = 0; - ei->i_frag_size = 0; -#endif ei->i_file_acl = 0; ei->i_dir_acl = 0; ei->i_dtime = 0; @@ -657,7 +784,7 @@ fail_drop: struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino) { unsigned long max_ino = le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count); - unsigned long block_group; + ext4_group_t block_group; int bit; struct buffer_head *bitmap_bh = NULL; struct inode *inode = NULL; @@ -713,7 +840,7 @@ unsigned long ext4_count_free_inodes (struct super_block * sb) { unsigned long desc_count; struct ext4_group_desc *gdp; - int i; + ext4_group_t i; #ifdef EXT4FS_DEBUG struct ext4_super_block *es; unsigned long bitmap_count, x; @@ -759,7 +886,7 @@ unsigned long ext4_count_free_inodes (struct super_block * sb) unsigned long ext4_count_dirs (struct super_block * sb) { unsigned long count = 0; - int i; + ext4_group_t i; for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { struct ext4_group_desc *gdp = ext4_get_group_desc (sb, i, NULL);