[GFS2] Fix warning on impossible event in eattr code
[powerpc.git] / fs / gfs2 / dir.c
index f31f163..6918a58 100644 (file)
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
  *
  * This copyrighted material is made available to anyone wishing to use,
  * modify, copy, or redistribute it subject to the terms and conditions
@@ -8,59 +8,59 @@
  */
 
 /*
-* Implements Extendible Hashing as described in:
-*   "Extendible Hashing" by Fagin, et al in
-*     __ACM Trans. on Database Systems__, Sept 1979.
-*
-*
-* Here's the layout of dirents which is essentially the same as that of ext2
-* within a single block. The field de_name_len is the number of bytes
-* actually required for the name (no null terminator). The field de_rec_len
-* is the number of bytes allocated to the dirent. The offset of the next
-* dirent in the block is (dirent + dirent->de_rec_len). When a dirent is
-* deleted, the preceding dirent inherits its allocated space, ie
-* prev->de_rec_len += deleted->de_rec_len. Since the next dirent is obtained
-* by adding de_rec_len to the current dirent, this essentially causes the
-* deleted dirent to get jumped over when iterating through all the dirents.
-*
-* When deleting the first dirent in a block, there is no previous dirent so
-* the field de_ino is set to zero to designate it as deleted. When allocating
-* a dirent, gfs2_dirent_alloc iterates through the dirents in a block. If the
-* first dirent has (de_ino == 0) and de_rec_len is large enough, this first
-* dirent is allocated. Otherwise it must go through all the 'used' dirents
-* searching for one in which the amount of total space minus the amount of
-* used space will provide enough space for the new dirent.
-*
-* There are two types of blocks in which dirents reside. In a stuffed dinode,
-* the dirents begin at offset sizeof(struct gfs2_dinode) from the beginning of
-* the block.  In leaves, they begin at offset sizeof(struct gfs2_leaf) from the
-* beginning of the leaf block. The dirents reside in leaves when
-*
-* dip->i_di.di_flags & GFS2_DIF_EXHASH is true
-*
-* Otherwise, the dirents are "linear", within a single stuffed dinode block.
-*
-* When the dirents are in leaves, the actual contents of the directory file are
-* used as an array of 64-bit block pointers pointing to the leaf blocks. The
-* dirents are NOT in the directory file itself. There can be more than one block
-* pointer in the array that points to the same leaf. In fact, when a directory
-* is first converted from linear to exhash, all of the pointers point to the
-* same leaf.
-*
-* When a leaf is completely full, the size of the hash table can be
-* doubled unless it is already at the maximum size which is hard coded into
-* GFS2_DIR_MAX_DEPTH. After that, leaves are chained together in a linked list,
-* but never before the maximum hash table size has been reached.
-*/
+ * Implements Extendible Hashing as described in:
+ *   "Extendible Hashing" by Fagin, et al in
+ *     __ACM Trans. on Database Systems__, Sept 1979.
+ *
+ *
+ * Here's the layout of dirents which is essentially the same as that of ext2
+ * within a single block. The field de_name_len is the number of bytes
+ * actually required for the name (no null terminator). The field de_rec_len
+ * is the number of bytes allocated to the dirent. The offset of the next
+ * dirent in the block is (dirent + dirent->de_rec_len). When a dirent is
+ * deleted, the preceding dirent inherits its allocated space, ie
+ * prev->de_rec_len += deleted->de_rec_len. Since the next dirent is obtained
+ * by adding de_rec_len to the current dirent, this essentially causes the
+ * deleted dirent to get jumped over when iterating through all the dirents.
+ *
+ * When deleting the first dirent in a block, there is no previous dirent so
+ * the field de_ino is set to zero to designate it as deleted. When allocating
+ * a dirent, gfs2_dirent_alloc iterates through the dirents in a block. If the
+ * first dirent has (de_ino == 0) and de_rec_len is large enough, this first
+ * dirent is allocated. Otherwise it must go through all the 'used' dirents
+ * searching for one in which the amount of total space minus the amount of
+ * used space will provide enough space for the new dirent.
+ *
+ * There are two types of blocks in which dirents reside. In a stuffed dinode,
+ * the dirents begin at offset sizeof(struct gfs2_dinode) from the beginning of
+ * the block.  In leaves, they begin at offset sizeof(struct gfs2_leaf) from the
+ * beginning of the leaf block. The dirents reside in leaves when
+ *
+ * dip->i_di.di_flags & GFS2_DIF_EXHASH is true
+ *
+ * Otherwise, the dirents are "linear", within a single stuffed dinode block.
+ *
+ * When the dirents are in leaves, the actual contents of the directory file are
+ * used as an array of 64-bit block pointers pointing to the leaf blocks. The
+ * dirents are NOT in the directory file itself. There can be more than one
+ * block pointer in the array that points to the same leaf. In fact, when a
+ * directory is first converted from linear to exhash, all of the pointers
+ * point to the same leaf.
+ *
+ * When a leaf is completely full, the size of the hash table can be
+ * doubled unless it is already at the maximum size which is hard coded into
+ * GFS2_DIR_MAX_DEPTH. After that, leaves are chained together in a linked list,
+ * but never before the maximum hash table size has been reached.
+ */
 
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
-#include <linux/completion.h>
 #include <linux/buffer_head.h>
 #include <linux/sort.h>
 #include <linux/gfs2_ondisk.h>
-#include <asm/semaphore.h>
+#include <linux/crc32.h>
+#include <linux/vmalloc.h>
 
 #include "gfs2.h"
 #include "lm_interface.h"
@@ -90,33 +90,36 @@ typedef int (*leaf_call_t) (struct gfs2_inode *dip,
                            uint32_t index, uint32_t len, uint64_t leaf_no,
                            void *data);
 
-int gfs2_dir_get_buffer(struct gfs2_inode *ip, uint64_t block, int new,
-                        struct buffer_head **bhp)
+
+int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, uint64_t block,
+                           struct buffer_head **bhp)
 {
        struct buffer_head *bh;
-       int error = 0;
-
-       if (new) {
-               bh = gfs2_meta_new(ip->i_gl, block);
-               gfs2_trans_add_bh(ip->i_gl, bh, 1);
-               gfs2_metatype_set(bh, GFS2_METATYPE_JD, GFS2_FORMAT_JD);
-               gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header));
-       } else {
-               error = gfs2_meta_read(ip->i_gl, block, DIO_START | DIO_WAIT,
-                                      &bh);
-               if (error)
-                       return error;
-               if (gfs2_metatype_check(ip->i_sbd, bh, GFS2_METATYPE_JD)) {
-                       brelse(bh);
-                       return -EIO;
-               }
-       }
 
+       bh = gfs2_meta_new(ip->i_gl, block);
+       gfs2_trans_add_bh(ip->i_gl, bh, 1);
+       gfs2_metatype_set(bh, GFS2_METATYPE_JD, GFS2_FORMAT_JD);
+       gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header));
        *bhp = bh;
        return 0;
 }
 
+static int gfs2_dir_get_existing_buffer(struct gfs2_inode *ip, uint64_t block,
+                                       struct buffer_head **bhp)
+{
+       struct buffer_head *bh;
+       int error;
 
+       error = gfs2_meta_read(ip->i_gl, block, DIO_START | DIO_WAIT, &bh);
+       if (error)
+               return error;
+       if (gfs2_metatype_check(ip->i_sbd, bh, GFS2_METATYPE_JD)) {
+               brelse(bh);
+               return -EIO;
+       }
+       *bhp = bh;
+       return 0;
+}
 
 static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf,
                                  unsigned int offset, unsigned int size)
@@ -194,8 +197,8 @@ static int gfs2_dir_write_data(struct gfs2_inode *ip, const char *buf,
 
                if (!extlen) {
                        new = 1;
-                       error = gfs2_block_map(ip, lblock, &new, &dblock,
-                                              &extlen);
+                       error = gfs2_extent_map(ip->i_vnode, lblock, &new,
+                                               &dblock, &extlen);
                        if (error)
                                goto fail;
                        error = -EIO;
@@ -203,9 +206,11 @@ static int gfs2_dir_write_data(struct gfs2_inode *ip, const char *buf,
                                goto fail;
                }
 
-               error = gfs2_dir_get_buffer(ip, dblock,
-                                           (amount == sdp->sd_jbsize) ?
-                                           1 : new, &bh);
+               if (amount == sdp->sd_jbsize || new)
+                       error = gfs2_dir_get_new_buffer(ip, dblock, &bh);
+               else
+                       error = gfs2_dir_get_existing_buffer(ip, dblock, &bh);
+
                if (error)
                        goto fail;
 
@@ -309,8 +314,8 @@ static int gfs2_dir_read_data(struct gfs2_inode *ip, char *buf,
 
                if (!extlen) {
                        new = 0;
-                       error = gfs2_block_map(ip, lblock, &new, &dblock,
-                                              &extlen);
+                       error = gfs2_extent_map(ip->i_vnode, lblock, &new,
+                                               &dblock, &extlen);
                        if (error)
                                goto fail;
                }
@@ -319,7 +324,10 @@ static int gfs2_dir_read_data(struct gfs2_inode *ip, char *buf,
                        gfs2_meta_ra(ip->i_gl, dblock, extlen);
 
                if (dblock) {
-                       error = gfs2_dir_get_buffer(ip, dblock, new, &bh);
+                       if (new)
+                               error = gfs2_dir_get_new_buffer(ip, dblock, &bh);
+                       else
+                               error = gfs2_dir_get_existing_buffer(ip, dblock, &bh);
                        if (error)
                                goto fail;
                        dblock++;
@@ -344,7 +352,8 @@ fail:
 }
 
 typedef int (*gfs2_dscan_t)(const struct gfs2_dirent *dent,
-                           const struct qstr *name);
+                           const struct qstr *name,
+                           void *opaque);
 
 static inline int __gfs2_dirent_find(const struct gfs2_dirent *dent,
                                     const struct qstr *name, int ret)
@@ -358,13 +367,15 @@ static inline int __gfs2_dirent_find(const struct gfs2_dirent *dent,
 }
 
 static int gfs2_dirent_find(const struct gfs2_dirent *dent,
-                           const struct qstr *name)
+                           const struct qstr *name,
+                           void *opaque)
 {
        return __gfs2_dirent_find(dent, name, 1);
 }
 
 static int gfs2_dirent_prev(const struct gfs2_dirent *dent,
-                           const struct qstr *name)
+                           const struct qstr *name,
+                           void *opaque)
 {
        return __gfs2_dirent_find(dent, name, 2);
 }
@@ -374,7 +385,8 @@ static int gfs2_dirent_prev(const struct gfs2_dirent *dent,
  * name->len holds size of block.
  */
 static int gfs2_dirent_last(const struct gfs2_dirent *dent,
-                           const struct qstr *name)
+                           const struct qstr *name,
+                           void *opaque)
 {
        const char *start = name->name;
        const char *end = (const char *)dent + be16_to_cpu(dent->de_rec_len);
@@ -384,17 +396,36 @@ static int gfs2_dirent_last(const struct gfs2_dirent *dent,
 }
 
 static int gfs2_dirent_find_space(const struct gfs2_dirent *dent,
-                                 const struct qstr *name)
+                                 const struct qstr *name,
+                                 void *opaque)
 {
        unsigned required = GFS2_DIRENT_SIZE(name->len);
        unsigned actual = GFS2_DIRENT_SIZE(be16_to_cpu(dent->de_name_len));
        unsigned totlen = be16_to_cpu(dent->de_rec_len);
 
+       if (!dent->de_inum.no_addr)
+               actual = GFS2_DIRENT_SIZE(0);
        if ((totlen - actual) >= required)
                return 1;
        return 0;
 }
 
+struct dirent_gather {
+       const struct gfs2_dirent **pdent;
+       unsigned offset;
+};
+
+static int gfs2_dirent_gather(const struct gfs2_dirent *dent,
+                             const struct qstr *name,
+                             void *opaque)
+{
+       struct dirent_gather *g = opaque;
+       if (dent->de_inum.no_addr) {
+               g->pdent[g->offset++] = dent;
+       }
+       return 0;
+}
+
 /*
  * Other possible things to check:
  * - Inode located within filesystem size (and on valid block)
@@ -431,21 +462,14 @@ error:
        return -EIO;
 }
 
-static struct gfs2_dirent *gfs2_dirent_scan(struct inode *inode,
-                                           void *buf,
-                                           unsigned int len, gfs2_dscan_t scan,
-                                           const struct qstr *name)
+static int gfs2_dirent_offset(const void *buf)
 {
-       struct gfs2_meta_header *h = buf;
-       struct gfs2_dirent *dent, *prev;
-       unsigned offset;
-       unsigned size;
-       int ret = 0;
+       const struct gfs2_meta_header *h = buf;
+       int offset;
 
        BUG_ON(buf == NULL);
-       BUG_ON(name == NULL);
 
-       switch(be16_to_cpu(h->mh_type)) {
+       switch(be32_to_cpu(h->mh_type)) {
        case GFS2_METATYPE_LF:
                offset = sizeof(struct gfs2_leaf);
                break;
@@ -455,14 +479,36 @@ static struct gfs2_dirent *gfs2_dirent_scan(struct inode *inode,
        default:
                goto wrong_type;
        }
+       return offset;
+wrong_type:
+       printk(KERN_WARNING "gfs2_scan_dirent: wrong block type %u\n",
+              be32_to_cpu(h->mh_type));
+       return -1;
+}
 
+static struct gfs2_dirent *gfs2_dirent_scan(struct inode *inode,
+                                           void *buf,
+                                           unsigned int len, gfs2_dscan_t scan,
+                                           const struct qstr *name,
+                                           void *opaque)
+{
+       struct gfs2_dirent *dent, *prev;
+       unsigned offset;
+       unsigned size;
+       int ret = 0;
+
+       ret = gfs2_dirent_offset(buf);
+       if (ret < 0)
+               goto consist_inode;
+
+       offset = ret;
        prev = NULL;
        dent = (struct gfs2_dirent *)(buf + offset);
        size = be16_to_cpu(dent->de_rec_len);
        if (gfs2_check_dirent(dent, offset, size, len, 1))
                goto consist_inode;
        do {
-               ret = scan(dent, name);
+               ret = scan(dent, name, opaque);
                if (ret)
                        break;
                offset += size;
@@ -487,9 +533,6 @@ static struct gfs2_dirent *gfs2_dirent_scan(struct inode *inode,
                return ERR_PTR(ret);
        }
 
-wrong_type:
-       printk(KERN_WARNING "gfs2_scan_dirent: %p wrong block type %u\n", scan,
-              be16_to_cpu(h->mh_type));
 consist_inode:
        gfs2_consist_inode(inode->u.generic_ip);
        return ERR_PTR(-EIO);
@@ -512,7 +555,7 @@ static int dirent_first(struct gfs2_inode *dip, struct buffer_head *bh,
 {
        struct gfs2_meta_header *h = (struct gfs2_meta_header *)bh->b_data;
 
-       if (be16_to_cpu(h->mh_type) == GFS2_METATYPE_LF) {
+       if (be32_to_cpu(h->mh_type) == GFS2_METATYPE_LF) {
                if (gfs2_meta_check(dip->i_sbd, bh))
                        return -EIO;
                *dent = (struct gfs2_dirent *)(bh->b_data +
@@ -626,10 +669,10 @@ static void dirent_del(struct gfs2_inode *dip, struct buffer_head *bh,
  * Takes a dent from which to grab space as an argument. Returns the
  * newly created dent.
  */
-struct gfs2_dirent *gfs2_init_dirent(struct inode *inode,
-                                    struct gfs2_dirent *dent,
-                                    const struct qstr *name,
-                                    struct buffer_head *bh)
+static struct gfs2_dirent *gfs2_init_dirent(struct inode *inode,
+                                           struct gfs2_dirent *dent,
+                                           const struct qstr *name,
+                                           struct buffer_head *bh)
 {
        struct gfs2_inode *ip = inode->u.generic_ip;
        struct gfs2_dirent *ndent;
@@ -651,7 +694,8 @@ static struct gfs2_dirent *gfs2_dirent_alloc(struct inode *inode,
                                             const struct qstr *name)
 {
        struct gfs2_dirent *dent;
-       dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size, gfs2_dirent_find_space, name);
+       dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size, 
+                               gfs2_dirent_find_space, name, NULL);
        if (!dent || IS_ERR(dent))
                return dent;
        return gfs2_init_dirent(inode, dent, name, bh);
@@ -734,7 +778,7 @@ static struct gfs2_dirent *gfs2_dirent_search(struct inode *inode,
                        return ERR_PTR(error);
                do {
                        dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size,
-                                               scan, name);
+                                               scan, name, NULL);
                        if (dent)
                                goto got_dent;
                        leaf = (struct gfs2_leaf *)bh->b_data;
@@ -751,8 +795,12 @@ static struct gfs2_dirent *gfs2_dirent_search(struct inode *inode,
        error = gfs2_meta_inode_buffer(ip, &bh);
        if (error)
                return ERR_PTR(error);
-       dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size, scan, name);
+       dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size, scan, name, NULL);
 got_dent:
+       if (unlikely(dent == NULL || IS_ERR(dent))) {
+               brelse(bh);
+               bh = NULL;
+       }
        *pbh = bh;
        return dent;
 }
@@ -764,6 +812,7 @@ static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh,
        struct buffer_head *bh = gfs2_meta_new(ip->i_gl, bn);
        struct gfs2_leaf *leaf;
        struct gfs2_dirent *dent;
+       struct qstr name = { .name = "", .len = 0, .hash = 0 };
        if (!bh)
                return NULL;
        gfs2_trans_add_bh(ip->i_gl, bh, 1);
@@ -775,12 +824,7 @@ static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh,
        leaf->lf_next = cpu_to_be64(0);
        memset(leaf->lf_reserved, 0, sizeof(leaf->lf_reserved));
        dent = (struct gfs2_dirent *)(leaf+1);
-       dent->de_inum.no_formal_ino = cpu_to_be64(0);
-       dent->de_inum.no_addr = cpu_to_be64(0);
-       dent->de_hash = cpu_to_be32(0);
-       dent->de_rec_len = cpu_to_be16(bh->b_size - sizeof(struct gfs2_leaf));
-       dent->de_name_len = cpu_to_be16(0);
-       dent->de_type = cpu_to_be16(0);
+       gfs2_qstr2dirent(&name, bh->b_size - sizeof(struct gfs2_leaf), dent);
        *pbh = bh;
        return leaf;
 }
@@ -831,7 +875,7 @@ static int dir_make_exhash(struct inode *inode)
                   sizeof(struct gfs2_leaf);
        args.name = bh->b_data;
        dent = gfs2_dirent_scan(dip->i_vnode, bh->b_data, bh->b_size,
-                               gfs2_dirent_last, &args);
+                               gfs2_dirent_last, &args, NULL);
        if (!dent) {
                brelse(bh);
                brelse(dibh);
@@ -907,10 +951,15 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
        /*  Get the old leaf block  */
        error = get_leaf(dip, leaf_no, &obh);
        if (error)
-               goto fail;
+               return error;
 
-       gfs2_trans_add_bh(dip->i_gl, obh, 1);
        oleaf = (struct gfs2_leaf *)obh->b_data;
+       if (dip->i_di.di_depth == be16_to_cpu(oleaf->lf_depth)) {
+               brelse(obh);
+               return 1; /* can't split */
+       }
+
+       gfs2_trans_add_bh(dip->i_gl, obh, 1);
 
        nleaf = new_leaf(inode, &nbh, be16_to_cpu(oleaf->lf_depth) + 1);
        if (!nleaf) {
@@ -923,6 +972,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
        len = 1 << (dip->i_di.di_depth - be16_to_cpu(oleaf->lf_depth));
        half_len = len >> 1;
        if (!half_len) {
+               printk(KERN_WARNING "di_depth %u lf_depth %u index %u\n", dip->i_di.di_depth, be16_to_cpu(oleaf->lf_depth), index);
                gfs2_consist_inode(dip);
                error = -EIO;
                goto fail_brelse;
@@ -939,7 +989,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
                lp[x] = cpu_to_be64(bn);
 
        error = gfs2_dir_write_data(dip, (char *)lp, start * sizeof(uint64_t),
-                                    half_len * sizeof(uint64_t));
+                                   half_len * sizeof(uint64_t));
        if (error != half_len * sizeof(uint64_t)) {
                if (error >= 0)
                        error = -EIO;
@@ -965,7 +1015,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
                        str.name = (char*)(dent+1);
                        str.len = be16_to_cpu(dent->de_name_len);
                        str.hash = be32_to_cpu(dent->de_hash);
-                       new = gfs2_dirent_alloc(dip->i_vnode, nbh, &str);
+                       new = gfs2_dirent_alloc(inode, nbh, &str);
                        if (IS_ERR(new)) {
                                error = PTR_ERR(new);
                                break;
@@ -1005,13 +1055,11 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
 
        return error;
 
- fail_lpfree:
+fail_lpfree:
        kfree(lp);
 
- fail_brelse:
+fail_brelse:
        brelse(obh);
-
- fail:
        brelse(nbh);
        return error;
 }
@@ -1154,10 +1202,10 @@ static int compare_dents(const void *a, const void *b)
 
 static int do_filldir_main(struct gfs2_inode *dip, uint64_t *offset,
                           void *opaque, gfs2_filldir_t filldir,
-                          struct gfs2_dirent **darr, uint32_t entries,
+                          const struct gfs2_dirent **darr, uint32_t entries,
                           int *copied)
 {
-       struct gfs2_dirent *dent, *dent_next;
+       const struct gfs2_dirent *dent, *dent_next;
        struct gfs2_inum inum;
        uint64_t off, off_next;
        unsigned int x, y;
@@ -1216,189 +1264,74 @@ static int do_filldir_main(struct gfs2_inode *dip, uint64_t *offset,
        return 0;
 }
 
-/**
- * do_filldir_single - Read directory entries out of a single block
- * @dip: The GFS2 inode
- * @offset: The offset in the file to read from
- * @opaque: opaque data to pass to filldir
- * @filldir: The function to pass entries to
- * @bh: the block
- * @entries: the number of entries in the block
- * @copied: pointer to int that's non-zero if a entry has been copied out
- *
- * Returns: errno, >0 on exception from filldir
- */
-
-static int do_filldir_single(struct gfs2_inode *dip, uint64_t *offset,
-                            void *opaque, gfs2_filldir_t filldir,
-                            struct buffer_head *bh, uint32_t entries,
-                            int *copied)
+static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque,
+                             gfs2_filldir_t filldir, int *copied,
+                             unsigned *depth, u64 leaf_no)
 {
-       struct gfs2_dirent **darr;
-       struct gfs2_dirent *de;
-       unsigned int e = 0;
-       int error;
-
-       if (!entries)
-               return 0;
-
-       darr = kcalloc(entries, sizeof(struct gfs2_dirent *), GFP_KERNEL);
-       if (!darr)
-               return -ENOMEM;
+       struct gfs2_inode *ip = inode->u.generic_ip;
+       struct buffer_head *bh;
+       struct gfs2_leaf *lf;
+       unsigned entries = 0;
+       unsigned leaves = 0;
+       const struct gfs2_dirent **darr, *dent;
+       struct dirent_gather g;
+       struct buffer_head **larr;
+       int leaf = 0;
+       int error, i;
+       u64 lfn = leaf_no;
 
-       dirent_first(dip, bh, &de);
        do {
-               if (!de->de_inum.no_addr)
-                       continue;
-               if (e >= entries) {
-                       gfs2_consist_inode(dip);
-                       error = -EIO;
-                       goto out;
-               }
-               darr[e++] = de;
-       } while (dirent_next(dip, bh, &de) == 0);
-
-       if (e != entries) {
-               gfs2_consist_inode(dip);
-               error = -EIO;
-               goto out;
-       }
-
-       error = do_filldir_main(dip, offset, opaque, filldir, darr,
-                               entries, copied);
-
- out:
-       kfree(darr);
-
-       return error;
-}
-
-/**
- * do_filldir_multi - Read directory entries out of a linked leaf list
- * @dip: The GFS2 inode
- * @offset: The offset in the file to read from
- * @opaque: opaque data to pass to filldir
- * @filldir: The function to pass entries to
- * @bh: the first leaf in the list
- * @copied: pointer to int that's non-zero if a entry has been copied out
- *
- * Returns: errno, >0 on exception from filldir
- */
-
-static int do_filldir_multi(struct gfs2_inode *dip, uint64_t *offset,
-                           void *opaque, gfs2_filldir_t filldir,
-                           struct buffer_head *bh, int *copied)
-{
-       struct buffer_head **larr = NULL;
-       struct gfs2_dirent **darr;
-       struct gfs2_leaf *leaf;
-       struct buffer_head *tmp_bh;
-       struct gfs2_dirent *de;
-       unsigned int entries, e = 0;
-       unsigned int leaves = 0, l = 0;
-       unsigned int x;
-       uint64_t ln;
-       int error = 0;
-
-       /*  Count leaves and entries  */
-
-       leaf = (struct gfs2_leaf *)bh->b_data;
-       entries = be16_to_cpu(leaf->lf_entries);
-       ln = be64_to_cpu(leaf->lf_next);
-
-       while (ln) {
-               error = get_leaf(dip, ln, &tmp_bh);
+               error = get_leaf(ip, lfn, &bh);
                if (error)
-                       return error;
-
-               leaf = (struct gfs2_leaf *)tmp_bh->b_data;
-               if (leaf->lf_entries) {
-                       entries += be16_to_cpu(leaf->lf_entries);
-                       leaves++;
-               }
-               ln = be64_to_cpu(leaf->lf_next);
-
-               brelse(tmp_bh);
-       }
+                       goto out;
+               lf = (struct gfs2_leaf *)bh->b_data;
+               if (leaves == 0)
+                       *depth = be16_to_cpu(lf->lf_depth);
+               entries += be16_to_cpu(lf->lf_entries);
+               leaves++;
+               lfn = be64_to_cpu(lf->lf_next);
+               brelse(bh);
+       } while(lfn);
 
        if (!entries)
                return 0;
 
-       if (leaves) {
-               larr = kcalloc(leaves, sizeof(struct buffer_head *),GFP_KERNEL);
-               if (!larr)
-                       return -ENOMEM;
-       }
-
-       darr = kcalloc(entries, sizeof(struct gfs2_dirent *), GFP_KERNEL);
-       if (!darr) {
-               kfree(larr);
-               return -ENOMEM;
-       }
-
-       leaf = (struct gfs2_leaf *)bh->b_data;
-       if (leaf->lf_entries) {
-               dirent_first(dip, bh, &de);
-               do {
-                       if (!de->de_inum.no_addr)
-                               continue;
-                       if (e >= entries) {
-                               gfs2_consist_inode(dip);
-                               error = -EIO;
-                               goto out;
-                       }
-                       darr[e++] = de;
-               } while (dirent_next(dip, bh, &de) == 0);
-       }
-       ln = be64_to_cpu(leaf->lf_next);
+       error = -ENOMEM;
+       larr = vmalloc((leaves + entries) * sizeof(void*));
+       if (!larr)
+               goto out;
+       darr = (const struct gfs2_dirent **)(larr + leaves);
+       g.pdent = darr;
+       g.offset = 0;
+       lfn = leaf_no;
 
-       while (ln) {
-               error = get_leaf(dip, ln, &tmp_bh);
+       do {
+               error = get_leaf(ip, lfn, &bh);
                if (error)
-                       goto out;
-
-               leaf = (struct gfs2_leaf *)tmp_bh->b_data;
-               if (leaf->lf_entries) {
-                       dirent_first(dip, tmp_bh, &de);
-                       do {
-                               if (!de->de_inum.no_addr)
-                                       continue;
-                               if (e >= entries) {
-                                       gfs2_consist_inode(dip);
-                                       error = -EIO;
-                                       goto out;
-                               }
-                               darr[e++] = de;
-                       } while (dirent_next(dip, tmp_bh, &de) == 0);
-
-                       larr[l++] = tmp_bh;
-
-                       ln = be64_to_cpu(leaf->lf_next);
+                       goto out_kfree;
+               lf = (struct gfs2_leaf *)bh->b_data;
+               lfn = be64_to_cpu(lf->lf_next);
+               if (lf->lf_entries) {
+                       dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size,
+                                               gfs2_dirent_gather, NULL, &g);
+                       error = PTR_ERR(dent);
+                       if (IS_ERR(dent)) {
+                               goto out_kfree;
+                       }
+                       error = 0;
+                       larr[leaf++] = bh;
                } else {
-                       ln = be64_to_cpu(leaf->lf_next);
-                       brelse(tmp_bh);
+                       brelse(bh);
                }
-       }
-
-       if (gfs2_assert_withdraw(dip->i_sbd, l == leaves)) {
-               error = -EIO;
-               goto out;
-       }
-       if (e != entries) {
-               gfs2_consist_inode(dip);
-               error = -EIO;
-               goto out;
-       }
+       } while(lfn);
 
-       error = do_filldir_main(dip, offset, opaque, filldir, darr,
+       error = do_filldir_main(ip, offset, opaque, filldir, darr,
                                entries, copied);
-
- out:
-       kfree(darr);
-       for (x = 0; x < l; x++)
-               brelse(larr[x]);
-       kfree(larr);
-
+out_kfree:
+       for(i = 0; i < leaf; i++)
+               brelse(larr[i]);
+       vfree(larr);
+out:
        return error;
 }
 
@@ -1412,18 +1345,18 @@ static int do_filldir_multi(struct gfs2_inode *dip, uint64_t *offset,
  * Returns: errno
  */
 
-static int dir_e_read(struct gfs2_inode *dip, uint64_t *offset, void *opaque,
+static int dir_e_read(struct inode *inode, uint64_t *offset, void *opaque,
                      gfs2_filldir_t filldir)
 {
+       struct gfs2_inode *dip = inode->u.generic_ip;
        struct gfs2_sbd *sdp = dip->i_sbd;
-       struct buffer_head *bh;
-       struct gfs2_leaf *leaf;
-       uint32_t hsize, len;
+       uint32_t hsize, len = 0;
        uint32_t ht_offset, lp_offset, ht_offset_cur = -1;
        uint32_t hash, index;
        uint64_t *lp;
        int copied = 0;
        int error = 0;
+       unsigned depth;
 
        hsize = 1 << dip->i_di.di_depth;
        if (hsize * sizeof(uint64_t) != dip->i_di.di_size) {
@@ -1454,61 +1387,66 @@ static int dir_e_read(struct gfs2_inode *dip, uint64_t *offset, void *opaque,
                        ht_offset_cur = ht_offset;
                }
 
-               error = get_leaf(dip, be64_to_cpu(lp[lp_offset]), &bh);
+               error = gfs2_dir_read_leaf(inode, offset, opaque, filldir,
+                                          &copied, &depth,
+                                          be64_to_cpu(lp[lp_offset]));
                if (error)
-                       goto out;
-
-               leaf = (struct gfs2_leaf *)bh->b_data;
-               if (leaf->lf_next)
-                       error = do_filldir_multi(dip, offset, opaque, filldir,
-                                                bh, &copied);
-               else
-                       error = do_filldir_single(dip, offset, opaque, filldir,
-                                                 bh, 
-                                                 be16_to_cpu(leaf->lf_entries),
-                                                 &copied);
-
-               brelse(bh);
-
-               if (error) {
-                       if (error > 0)
-                               error = 0;
-                       goto out;
-               }
+                       break;
 
-               len = 1 << (dip->i_di.di_depth - be16_to_cpu(leaf->lf_depth));
+               len = 1 << (dip->i_di.di_depth - depth);
                index = (index & ~(len - 1)) + len;
        }
 
- out:
+out:
        kfree(lp);
-
+       if (error > 0)
+               error = 0;
        return error;
 }
 
-static int dir_l_read(struct gfs2_inode *dip, uint64_t *offset, void *opaque,
-                     gfs2_filldir_t filldir)
+int gfs2_dir_read(struct inode *inode, uint64_t *offset, void *opaque,
+                 gfs2_filldir_t filldir)
 {
+       struct gfs2_inode *dip = inode->u.generic_ip;
+       struct dirent_gather g;
+       const struct gfs2_dirent **darr, *dent;
        struct buffer_head *dibh;
        int copied = 0;
        int error;
 
+       if (!dip->i_di.di_entries)
+               return 0;
+
+       if (dip->i_di.di_flags & GFS2_DIF_EXHASH)
+               return dir_e_read(inode, offset, opaque, filldir);
+
        if (!gfs2_is_stuffed(dip)) {
                gfs2_consist_inode(dip);
                return -EIO;
        }
 
-       if (!dip->i_di.di_entries)
-               return 0;
-
        error = gfs2_meta_inode_buffer(dip, &dibh);
        if (error)
                return error;
 
-       error = do_filldir_single(dip, offset,
-                                 opaque, filldir,
-                                 dibh, dip->i_di.di_entries,
-                                 &copied);
+       error = -ENOMEM;
+       darr = kmalloc(dip->i_di.di_entries * sizeof(struct gfs2_dirent *),
+                      GFP_KERNEL);
+       if (darr) {
+               g.pdent = darr;
+               g.offset = 0;
+               dent = gfs2_dirent_scan(inode, dibh->b_data, dibh->b_size,
+                                       gfs2_dirent_gather, NULL, &g);
+               if (IS_ERR(dent)) {
+                       error = PTR_ERR(dent);
+                       goto out;
+               }
+               error = do_filldir_main(dip, offset, opaque, filldir, darr,
+                                       dip->i_di.di_entries, &copied);
+out:
+               kfree(darr);
+       }
+
        if (error > 0)
                error = 0;
 
@@ -1580,7 +1518,7 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name)
                brelse(obh);
                return -ENOSPC;
        }
-       oleaf->lf_next = cpu_to_be64(bn);
+       oleaf->lf_next = cpu_to_be64(bh->b_blocknr);
        brelse(bh);
        brelse(obh);
 
@@ -1647,16 +1585,17 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
                error = dir_split_leaf(inode, name);
                if (error == 0)
                        continue;
-               if (error != -ENOSPC)
+               if (error < 0)
                        break;
                if (ip->i_di.di_depth < GFS2_DIR_MAX_DEPTH) {
                        error = dir_double_exhash(ip);
                        if (error)
                                break;
                        error = dir_split_leaf(inode, name);
-                       if (error)
+                       if (error < 0)
                                break;
-                       continue;
+                       if (error == 0)
+                               continue;
                }
                error = dir_new_leaf(inode, name);
                if (!error)
@@ -1694,7 +1633,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name)
                return PTR_ERR(dent);
        }
        /* If not first in block, adjust pointers accordingly */
-       if (gfs2_dirent_find(dent, name) == 0) {
+       if (gfs2_dirent_find(dent, name, NULL) == 0) {
                prev = dent;
                dent = (struct gfs2_dirent *)((char *)dent + be16_to_cpu(prev->de_rec_len));
        }
@@ -1706,8 +1645,8 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name)
                if (!entries)
                        gfs2_consist_inode(dip);
                leaf->lf_entries = cpu_to_be16(--entries);
-               brelse(bh);
        }
+       brelse(bh);
 
        error = gfs2_meta_inode_buffer(dip, &bh);
        if (error)
@@ -1724,19 +1663,6 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name)
        return error;
 }
 
-int gfs2_dir_read(struct gfs2_inode *dip, uint64_t *offset, void *opaque,
-                 gfs2_filldir_t filldir)
-{
-       int error;
-
-       if (dip->i_di.di_flags & GFS2_DIF_EXHASH)
-               error = dir_e_read(dip, offset, opaque, filldir);
-       else
-               error = dir_l_read(dip, offset, opaque, filldir);
-
-       return error;
-}
-
 /**
  * gfs2_dir_mvino - Change inode number of directory entry
  * @dip: The GFS2 inode
@@ -2031,15 +1957,15 @@ int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip)
  * Returns: 1 if alloc required, 0 if not, -ve on error
  */
 
-int gfs2_diradd_alloc_required(struct inode *inode,
-                              const struct qstr *name)
+int gfs2_diradd_alloc_required(struct inode *inode, const struct qstr *name)
 {
        struct gfs2_dirent *dent;
        struct buffer_head *bh;
 
        dent = gfs2_dirent_search(inode, name, gfs2_dirent_find_space, &bh);
-       if (!dent)
+       if (!dent) {
                return 1;
+       }
        if (IS_ERR(dent))
                return PTR_ERR(dent);
        brelse(bh);