Merge git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/v4l-dvb

[powerpc.git] / fs / xfs / xfs_inode.c
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c

index 41a0c73..3449480 100644 (file)
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -49,10 +49,11 @@
  #include "xfs_quota.h"
  #include "xfs_acl.h"
  #include "xfs_filestream.h"
+#include "xfs_vnodeops.h"
  
  kmem_zone_t *xfs_ifork_zone;
  kmem_zone_t *xfs_inode_zone;
-kmem_zone_t *xfs_chashlist_zone;
+kmem_zone_t *xfs_icluster_zone;
  
  /*
   * Used in xfs_itruncate().  This is the maximum number of extents
@@ -863,6 +864,7 @@ xfs_iread(
         ip = kmem_zone_zalloc(xfs_inode_zone, KM_SLEEP);
         ip->i_ino = ino;
         ip->i_mount = mp;
+       atomic_set(&ip->i_iocount, 0);
         spin_lock_init(&ip->i_flags_lock);
  
         /*
@@ -882,6 +884,9 @@ xfs_iread(
          * Initialize inode's trace buffers.
          * Do this before xfs_iformat in case it adds entries.
          */
+#ifdef XFS_VNODE_TRACE
+       ip->i_trace = ktrace_alloc(VNODE_TRACE_SIZE, KM_SLEEP);
+#endif
  #ifdef XFS_BMAP_TRACE
         ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_SLEEP);
  #endif
@@ -1153,7 +1158,7 @@ xfs_ialloc(
         if ((prid != 0) && (ip->i_d.di_version == XFS_DINODE_VERSION_1))
                 xfs_bump_ino_vers2(tp, ip);
  
-       if (pip && XFS_INHERIT_GID(pip, vp->v_vfsp)) {
+       if (pip && XFS_INHERIT_GID(pip)) {
                 ip->i_d.di_gid = pip->i_d.di_gid;
                 if ((pip->i_d.di_mode & S_ISGID) && (mode & S_IFMT) == S_IFDIR) {
                         ip->i_d.di_mode |= S_ISGID;
@@ -1267,7 +1272,7 @@ xfs_ialloc(
         xfs_trans_log_inode(tp, ip, flags);
  
         /* now that we have an i_mode we can setup inode ops and unlock */
-       bhv_vfs_init_vnode(XFS_MTOVFS(tp->t_mountp), vp, XFS_ITOBHV(ip), 1);
+       xfs_initialize_vnode(tp->t_mountp, vp, ip);
  
         *ipp = ip;
         return 0;
@@ -1454,8 +1459,10 @@ xfs_itruncate_start(
         mp = ip->i_mount;
         vp = XFS_ITOV(ip);
  
-       vn_iowait(vp);  /* wait for the completion of any pending DIOs */
-       
+       /* wait for the completion of any pending DIOs */
+       if (new_size < ip->i_size)
+               vn_iowait(ip);
+
         /*
          * Call toss_pages or flushinval_pages to get rid of pages
          * overlapping the region being removed.  We have to use
@@ -1489,9 +1496,11 @@ xfs_itruncate_start(
                          last_byte);
         if (last_byte > toss_start) {
                 if (flags & XFS_ITRUNC_DEFINITE) {
-                       bhv_vop_toss_pages(vp, toss_start, -1, FI_REMAPF_LOCKED);
+                       xfs_tosspages(ip, toss_start,
+                                       -1, FI_REMAPF_LOCKED);
                 } else {
-                       error = bhv_vop_flushinval_pages(vp, toss_start, -1, FI_REMAPF_LOCKED);
+                       error = xfs_flushinval_pages(ip, toss_start,
+                                       -1, FI_REMAPF_LOCKED);
                 }
         }
  
@@ -1924,9 +1933,9 @@ xfs_iunlink(
          */
         error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, agdaddr,
                                    XFS_FSS_TO_BB(mp, 1), 0, &agibp);
-       if (error) {
+       if (error)
                 return error;
-       }
+
         /*
          * Validate the magic number of the agi block.
          */
@@ -1950,6 +1959,24 @@ xfs_iunlink(
         ASSERT(agi->agi_unlinked[bucket_index]);
         ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != agino);
  
+       error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0);
+       if (error)
+               return error;
+
+       /*
+        * Clear the on-disk di_nlink. This is to prevent xfs_bulkstat
+        * from picking up this inode when it is reclaimed (its incore state
+        * initialzed but not flushed to disk yet). The in-core di_nlink is
+        * already cleared in xfs_droplink() and a corresponding transaction
+        * logged. The hack here just synchronizes the in-core to on-disk
+        * di_nlink value in advance before the actual inode sync to disk.
+        * This is OK because the inode is already unlinked and would never
+        * change its di_nlink again for this inode generation.
+        * This is a temporary hack that would require a proper fix
+        * in the future.
+        */
+       dip->di_core.di_nlink = 0;
+
         if (be32_to_cpu(agi->agi_unlinked[bucket_index]) != NULLAGINO) {
                 /*
                  * There is already another inode in the bucket we need
@@ -1957,10 +1984,6 @@ xfs_iunlink(
                  * Here we put the head pointer into our next pointer,
                  * and then we fall through to point the head at us.
                  */
-               error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0);
-               if (error) {
-                       return error;
-               }
                 ASSERT(be32_to_cpu(dip->di_next_unlinked) == NULLAGINO);
                 /* both on-disk, don't endian flip twice */
                 dip->di_next_unlinked = agi->agi_unlinked[bucket_index];
@@ -2182,10 +2205,10 @@ xfs_ifree_cluster(
         int                     i, j, found, pre_flushed;
         xfs_daddr_t             blkno;
         xfs_buf_t               *bp;
-       xfs_ihash_t             *ih;
         xfs_inode_t             *ip, **ip_found;
         xfs_inode_log_item_t    *iip;
         xfs_log_item_t          *lip;
+       xfs_perag_t             *pag = xfs_get_perag(mp, inum);
         SPLDECL(s);
  
         if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) {
@@ -2220,23 +2243,20 @@ xfs_ifree_cluster(
                  */
                 found = 0;
                 for (i = 0; i < ninodes; i++) {
-                       ih = XFS_IHASH(mp, inum + i);
-                       read_lock(&ih->ih_lock);
-                       for (ip = ih->ih_next; ip != NULL; ip = ip->i_next) {
-                               if (ip->i_ino == inum + i)
-                                       break;
-                       }
+                       read_lock(&pag->pag_ici_lock);
+                       ip = radix_tree_lookup(&pag->pag_ici_root,
+                                       XFS_INO_TO_AGINO(mp, (inum + i)));
  
                         /* Inode not in memory or we found it already,
                          * nothing to do
                          */
                         if (!ip || xfs_iflags_test(ip, XFS_ISTALE)) {
-                               read_unlock(&ih->ih_lock);
+                               read_unlock(&pag->pag_ici_lock);
                                 continue;
                         }
  
                         if (xfs_inode_clean(ip)) {
-                               read_unlock(&ih->ih_lock);
+                               read_unlock(&pag->pag_ici_lock);
                                 continue;
                         }
  
@@ -2259,7 +2279,7 @@ xfs_ifree_cluster(
                                                 ip_found[found++] = ip;
                                         }
                                 }
-                               read_unlock(&ih->ih_lock);
+                               read_unlock(&pag->pag_ici_lock);
                                 continue;
                         }
  
@@ -2277,8 +2297,7 @@ xfs_ifree_cluster(
                                         xfs_iunlock(ip, XFS_ILOCK_EXCL);
                                 }
                         }
-
-                       read_unlock(&ih->ih_lock);
+                       read_unlock(&pag->pag_ici_lock);
                 }
  
                 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, 
@@ -2333,6 +2352,7 @@ xfs_ifree_cluster(
         }
  
         kmem_free(ip_found, ninodes * sizeof(xfs_inode_t *));
+       xfs_put_perag(mp, pag);
  }
  
  /*
@@ -2728,6 +2748,10 @@ xfs_idestroy(
         mrfree(&ip->i_lock);
         mrfree(&ip->i_iolock);
         freesema(&ip->i_flock);
+
+#ifdef XFS_VNODE_TRACE
+       ktrace_free(ip->i_trace);
+#endif
  #ifdef XFS_BMAP_TRACE
         ktrace_free(ip->i_xtrace);
  #endif
@@ -3050,12 +3074,11 @@ xfs_iflush(
         xfs_mount_t             *mp;
         int                     error;
         /* REFERENCED */
-       xfs_chash_t             *ch;
         xfs_inode_t             *iq;
         int                     clcount;        /* count of inodes clustered */
         int                     bufwasdelwri;
+       struct hlist_node       *entry;
         enum { INT_DELWRI = (1 << 0), INT_ASYNC = (1 << 1) };
-       SPLDECL(s);
  
         XFS_STATS_INC(xs_iflush_count);
  
@@ -3169,14 +3192,14 @@ xfs_iflush(
          * inode clustering:
          * see if other inodes can be gathered into this write
          */
-
-       ip->i_chash->chl_buf = bp;
-
-       ch = XFS_CHASH(mp, ip->i_blkno);
-       s = mutex_spinlock(&ch->ch_lock);
+       spin_lock(&ip->i_cluster->icl_lock);
+       ip->i_cluster->icl_buf = bp;
  
         clcount = 0;
-       for (iq = ip->i_cnext; iq != ip; iq = iq->i_cnext) {
+       hlist_for_each_entry(iq, entry, &ip->i_cluster->icl_inodes, i_cnode) {
+               if (iq == ip)
+                       continue;
+
                 /*
                  * Do an un-protected check to see if the inode is dirty and
                  * is a candidate for flushing.  These checks will be repeated
@@ -3227,7 +3250,7 @@ xfs_iflush(
                         xfs_iunlock(iq, XFS_ILOCK_SHARED);
                 }
         }
-       mutex_spinunlock(&ch->ch_lock, s);
+       spin_unlock(&ip->i_cluster->icl_lock);
  
         if (clcount) {
                 XFS_STATS_INC(xs_icluster_flushcnt);
@@ -3264,7 +3287,7 @@ cluster_corrupt_out:
         /* Corruption detected in the clustering loop.  Invalidate the
          * inode buffer and shut down the filesystem.
          */
-       mutex_spinunlock(&ch->ch_lock, s);
+       spin_unlock(&ip->i_cluster->icl_lock);
  
         /*
          * Clean up the buffer.  If it was B_DELWRI, just release it --