Merge git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/v4l-dvb
[powerpc.git] / fs / xfs / xfs_inode.c
index 41a0c73..3449480 100644 (file)
 #include "xfs_quota.h"
 #include "xfs_acl.h"
 #include "xfs_filestream.h"
+#include "xfs_vnodeops.h"
 
 kmem_zone_t *xfs_ifork_zone;
 kmem_zone_t *xfs_inode_zone;
-kmem_zone_t *xfs_chashlist_zone;
+kmem_zone_t *xfs_icluster_zone;
 
 /*
  * Used in xfs_itruncate().  This is the maximum number of extents
@@ -863,6 +864,7 @@ xfs_iread(
        ip = kmem_zone_zalloc(xfs_inode_zone, KM_SLEEP);
        ip->i_ino = ino;
        ip->i_mount = mp;
+       atomic_set(&ip->i_iocount, 0);
        spin_lock_init(&ip->i_flags_lock);
 
        /*
@@ -882,6 +884,9 @@ xfs_iread(
         * Initialize inode's trace buffers.
         * Do this before xfs_iformat in case it adds entries.
         */
+#ifdef XFS_VNODE_TRACE
+       ip->i_trace = ktrace_alloc(VNODE_TRACE_SIZE, KM_SLEEP);
+#endif
 #ifdef XFS_BMAP_TRACE
        ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_SLEEP);
 #endif
@@ -1153,7 +1158,7 @@ xfs_ialloc(
        if ((prid != 0) && (ip->i_d.di_version == XFS_DINODE_VERSION_1))
                xfs_bump_ino_vers2(tp, ip);
 
-       if (pip && XFS_INHERIT_GID(pip, vp->v_vfsp)) {
+       if (pip && XFS_INHERIT_GID(pip)) {
                ip->i_d.di_gid = pip->i_d.di_gid;
                if ((pip->i_d.di_mode & S_ISGID) && (mode & S_IFMT) == S_IFDIR) {
                        ip->i_d.di_mode |= S_ISGID;
@@ -1267,7 +1272,7 @@ xfs_ialloc(
        xfs_trans_log_inode(tp, ip, flags);
 
        /* now that we have an i_mode we can setup inode ops and unlock */
-       bhv_vfs_init_vnode(XFS_MTOVFS(tp->t_mountp), vp, XFS_ITOBHV(ip), 1);
+       xfs_initialize_vnode(tp->t_mountp, vp, ip);
 
        *ipp = ip;
        return 0;
@@ -1454,8 +1459,10 @@ xfs_itruncate_start(
        mp = ip->i_mount;
        vp = XFS_ITOV(ip);
 
-       vn_iowait(vp);  /* wait for the completion of any pending DIOs */
-       
+       /* wait for the completion of any pending DIOs */
+       if (new_size < ip->i_size)
+               vn_iowait(ip);
+
        /*
         * Call toss_pages or flushinval_pages to get rid of pages
         * overlapping the region being removed.  We have to use
@@ -1489,9 +1496,11 @@ xfs_itruncate_start(
                         last_byte);
        if (last_byte > toss_start) {
                if (flags & XFS_ITRUNC_DEFINITE) {
-                       bhv_vop_toss_pages(vp, toss_start, -1, FI_REMAPF_LOCKED);
+                       xfs_tosspages(ip, toss_start,
+                                       -1, FI_REMAPF_LOCKED);
                } else {
-                       error = bhv_vop_flushinval_pages(vp, toss_start, -1, FI_REMAPF_LOCKED);
+                       error = xfs_flushinval_pages(ip, toss_start,
+                                       -1, FI_REMAPF_LOCKED);
                }
        }
 
@@ -1924,9 +1933,9 @@ xfs_iunlink(
         */
        error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, agdaddr,
                                   XFS_FSS_TO_BB(mp, 1), 0, &agibp);
-       if (error) {
+       if (error)
                return error;
-       }
+
        /*
         * Validate the magic number of the agi block.
         */
@@ -1950,6 +1959,24 @@ xfs_iunlink(
        ASSERT(agi->agi_unlinked[bucket_index]);
        ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != agino);
 
+       error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0);
+       if (error)
+               return error;
+
+       /*
+        * Clear the on-disk di_nlink. This is to prevent xfs_bulkstat
+        * from picking up this inode when it is reclaimed (its incore state
+        * initialzed but not flushed to disk yet). The in-core di_nlink is
+        * already cleared in xfs_droplink() and a corresponding transaction
+        * logged. The hack here just synchronizes the in-core to on-disk
+        * di_nlink value in advance before the actual inode sync to disk.
+        * This is OK because the inode is already unlinked and would never
+        * change its di_nlink again for this inode generation.
+        * This is a temporary hack that would require a proper fix
+        * in the future.
+        */
+       dip->di_core.di_nlink = 0;
+
        if (be32_to_cpu(agi->agi_unlinked[bucket_index]) != NULLAGINO) {
                /*
                 * There is already another inode in the bucket we need
@@ -1957,10 +1984,6 @@ xfs_iunlink(
                 * Here we put the head pointer into our next pointer,
                 * and then we fall through to point the head at us.
                 */
-               error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0);
-               if (error) {
-                       return error;
-               }
                ASSERT(be32_to_cpu(dip->di_next_unlinked) == NULLAGINO);
                /* both on-disk, don't endian flip twice */
                dip->di_next_unlinked = agi->agi_unlinked[bucket_index];
@@ -2182,10 +2205,10 @@ xfs_ifree_cluster(
        int                     i, j, found, pre_flushed;
        xfs_daddr_t             blkno;
        xfs_buf_t               *bp;
-       xfs_ihash_t             *ih;
        xfs_inode_t             *ip, **ip_found;
        xfs_inode_log_item_t    *iip;
        xfs_log_item_t          *lip;
+       xfs_perag_t             *pag = xfs_get_perag(mp, inum);
        SPLDECL(s);
 
        if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) {
@@ -2220,23 +2243,20 @@ xfs_ifree_cluster(
                 */
                found = 0;
                for (i = 0; i < ninodes; i++) {
-                       ih = XFS_IHASH(mp, inum + i);
-                       read_lock(&ih->ih_lock);
-                       for (ip = ih->ih_next; ip != NULL; ip = ip->i_next) {
-                               if (ip->i_ino == inum + i)
-                                       break;
-                       }
+                       read_lock(&pag->pag_ici_lock);
+                       ip = radix_tree_lookup(&pag->pag_ici_root,
+                                       XFS_INO_TO_AGINO(mp, (inum + i)));
 
                        /* Inode not in memory or we found it already,
                         * nothing to do
                         */
                        if (!ip || xfs_iflags_test(ip, XFS_ISTALE)) {
-                               read_unlock(&ih->ih_lock);
+                               read_unlock(&pag->pag_ici_lock);
                                continue;
                        }
 
                        if (xfs_inode_clean(ip)) {
-                               read_unlock(&ih->ih_lock);
+                               read_unlock(&pag->pag_ici_lock);
                                continue;
                        }
 
@@ -2259,7 +2279,7 @@ xfs_ifree_cluster(
                                                ip_found[found++] = ip;
                                        }
                                }
-                               read_unlock(&ih->ih_lock);
+                               read_unlock(&pag->pag_ici_lock);
                                continue;
                        }
 
@@ -2277,8 +2297,7 @@ xfs_ifree_cluster(
                                        xfs_iunlock(ip, XFS_ILOCK_EXCL);
                                }
                        }
-
-                       read_unlock(&ih->ih_lock);
+                       read_unlock(&pag->pag_ici_lock);
                }
 
                bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, 
@@ -2333,6 +2352,7 @@ xfs_ifree_cluster(
        }
 
        kmem_free(ip_found, ninodes * sizeof(xfs_inode_t *));
+       xfs_put_perag(mp, pag);
 }
 
 /*
@@ -2728,6 +2748,10 @@ xfs_idestroy(
        mrfree(&ip->i_lock);
        mrfree(&ip->i_iolock);
        freesema(&ip->i_flock);
+
+#ifdef XFS_VNODE_TRACE
+       ktrace_free(ip->i_trace);
+#endif
 #ifdef XFS_BMAP_TRACE
        ktrace_free(ip->i_xtrace);
 #endif
@@ -3050,12 +3074,11 @@ xfs_iflush(
        xfs_mount_t             *mp;
        int                     error;
        /* REFERENCED */
-       xfs_chash_t             *ch;
        xfs_inode_t             *iq;
        int                     clcount;        /* count of inodes clustered */
        int                     bufwasdelwri;
+       struct hlist_node       *entry;
        enum { INT_DELWRI = (1 << 0), INT_ASYNC = (1 << 1) };
-       SPLDECL(s);
 
        XFS_STATS_INC(xs_iflush_count);
 
@@ -3169,14 +3192,14 @@ xfs_iflush(
         * inode clustering:
         * see if other inodes can be gathered into this write
         */
-
-       ip->i_chash->chl_buf = bp;
-
-       ch = XFS_CHASH(mp, ip->i_blkno);
-       s = mutex_spinlock(&ch->ch_lock);
+       spin_lock(&ip->i_cluster->icl_lock);
+       ip->i_cluster->icl_buf = bp;
 
        clcount = 0;
-       for (iq = ip->i_cnext; iq != ip; iq = iq->i_cnext) {
+       hlist_for_each_entry(iq, entry, &ip->i_cluster->icl_inodes, i_cnode) {
+               if (iq == ip)
+                       continue;
+
                /*
                 * Do an un-protected check to see if the inode is dirty and
                 * is a candidate for flushing.  These checks will be repeated
@@ -3227,7 +3250,7 @@ xfs_iflush(
                        xfs_iunlock(iq, XFS_ILOCK_SHARED);
                }
        }
-       mutex_spinunlock(&ch->ch_lock, s);
+       spin_unlock(&ip->i_cluster->icl_lock);
 
        if (clcount) {
                XFS_STATS_INC(xs_icluster_flushcnt);
@@ -3264,7 +3287,7 @@ cluster_corrupt_out:
        /* Corruption detected in the clustering loop.  Invalidate the
         * inode buffer and shut down the filesystem.
         */
-       mutex_spinunlock(&ch->ch_lock, s);
+       spin_unlock(&ip->i_cluster->icl_lock);
 
        /*
         * Clean up the buffer.  If it was B_DELWRI, just release it --