X-Git-Url: http://git.rot13.org/?a=blobdiff_plain;f=fs%2Fxfs%2Fxfs_vnodeops.c;h=26d96d1b25cd03ccb76a4038c2decd2ab7508e23;hb=d7ede1aa5dfff53e76dbabac5b8087341686f662;hp=7c1f74531463bb11cac411322a6366420c822921;hpb=2ecc26b87a2b3e8650d3c7fe3fc85a8c73d5560d;p=powerpc.git diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 7c1f745314..26d96d1b25 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * Copyright (c) 2000-2006 Silicon Graphics, Inc. * All Rights Reserved. * * This program is free software; you can redistribute it and/or @@ -15,6 +15,7 @@ * along with this program; if not, write the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + #include "xfs.h" #include "xfs_fs.h" #include "xfs_types.h" @@ -55,32 +56,14 @@ #include "xfs_log_priv.h" #include "xfs_mac.h" - -/* - * The maximum pathlen is 1024 bytes. Since the minimum file system - * blocksize is 512 bytes, we can get a max of 2 extents back from - * bmapi. - */ -#define SYMLINK_MAPS 2 - -/* - * For xfs, we check that the file isn't too big to be opened by this kernel. - * No other open action is required for regular files. Devices are handled - * through the specfs file system, pipes through fifofs. Device and - * fifo vnodes are "wrapped" by specfs and fifofs vnodes, respectively, - * when a new vnode is first looked up or created. - */ STATIC int xfs_open( bhv_desc_t *bdp, cred_t *credp) { int mode; - vnode_t *vp; - xfs_inode_t *ip; - - vp = BHV_TO_VNODE(bdp); - ip = XFS_BHVTOI(bdp); + bhv_vnode_t *vp = BHV_TO_VNODE(bdp); + xfs_inode_t *ip = XFS_BHVTOI(bdp); if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return XFS_ERROR(EIO); @@ -98,6 +81,35 @@ xfs_open( return 0; } +STATIC int +xfs_close( + bhv_desc_t *bdp, + int flags, + lastclose_t lastclose, + cred_t *credp) +{ + bhv_vnode_t *vp = BHV_TO_VNODE(bdp); + xfs_inode_t *ip = XFS_BHVTOI(bdp); + + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) + return XFS_ERROR(EIO); + + if (lastclose != L_TRUE || !VN_ISREG(vp)) + return 0; + + /* + * If we previously truncated this file and removed old data in + * the process, we want to initiate "early" writeout on the last + * close. This is an attempt to combat the notorious NULL files + * problem which is particularly noticable from a truncate down, + * buffered (re-)write (delalloc), followed by a crash. What we + * are effectively doing here is significantly reducing the time + * window where we'd otherwise be exposed to that problem. + */ + if (VUNTRUNCATE(vp) && VN_DIRTY(vp) && ip->i_delayed_blks > 0) + return bhv_vop_flush_pages(vp, 0, -1, XFS_B_ASYNC, FI_NONE); + return 0; +} /* * xfs_getattr @@ -105,13 +117,13 @@ xfs_open( STATIC int xfs_getattr( bhv_desc_t *bdp, - vattr_t *vap, + bhv_vattr_t *vap, int flags, cred_t *credp) { xfs_inode_t *ip; xfs_mount_t *mp; - vnode_t *vp; + bhv_vnode_t *vp; vp = BHV_TO_VNODE(bdp); vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); @@ -182,8 +194,7 @@ xfs_getattr( break; } - vap->va_atime.tv_sec = ip->i_d.di_atime.t_sec; - vap->va_atime.tv_nsec = ip->i_d.di_atime.t_nsec; + vn_atime_to_timespec(vp, &vap->va_atime); vap->va_mtime.tv_sec = ip->i_d.di_mtime.t_sec; vap->va_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec; vap->va_ctime.tv_sec = ip->i_d.di_ctime.t_sec; @@ -239,7 +250,7 @@ xfs_getattr( int xfs_setattr( bhv_desc_t *bdp, - vattr_t *vap, + bhv_vattr_t *vap, int flags, cred_t *credp) { @@ -253,7 +264,7 @@ xfs_setattr( uid_t uid=0, iuid=0; gid_t gid=0, igid=0; int timeflags = 0; - vnode_t *vp; + bhv_vnode_t *vp; xfs_prid_t projid=0, iprojid=0; int mandlock_before, mandlock_after; struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2; @@ -336,7 +347,7 @@ xfs_setattr( code = XFS_QM_DQVOPALLOC(mp, ip, uid, gid, projid, qflags, &udqp, &gdqp); if (code) - return (code); + return code; } /* @@ -345,7 +356,6 @@ xfs_setattr( */ tp = NULL; lock_flags = XFS_ILOCK_EXCL; - ASSERT(flags & ATTR_NOLOCK ? flags & ATTR_DMI : 1); if (flags & ATTR_NOLOCK) need_iolock = 0; if (!(mask & XFS_AT_SIZE)) { @@ -540,24 +550,6 @@ xfs_setattr( goto error_return; } - /* - * Can't set extent size unless the file is marked, or - * about to be marked as a realtime file. - * - * This check will be removed when fixed size extents - * with buffered data writes is implemented. - * - */ - if ((mask & XFS_AT_EXTSIZE) && - ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) != - vap->va_extsize) && - (!((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) || - ((mask & XFS_AT_XFLAGS) && - (vap->va_xflags & XFS_XFLAG_REALTIME))))) { - code = XFS_ERROR(EINVAL); - goto error_return; - } - /* * Can't change realtime flag if any extents are allocated. */ @@ -631,6 +623,7 @@ xfs_setattr( code = xfs_igrow_start(ip, vap->va_size, credp); } xfs_iunlock(ip, XFS_ILOCK_EXCL); + vn_iowait(vp); /* wait for the completion of any pending DIOs */ if (!code) code = xfs_itruncate_data(ip, vap->va_size); if (code) { @@ -681,9 +674,17 @@ xfs_setattr( ((ip->i_d.di_nlink != 0 || !(mp->m_flags & XFS_MOUNT_WSYNC)) ? 1 : 0)); - if (code) { + if (code) goto abort_return; - } + /* + * Truncated "down", so we're removing references + * to old data here - if we now delay flushing for + * a long time, we expose ourselves unduly to the + * notorious NULL files problem. So, we mark this + * vnode and flush it when the file is closed, and + * do not wait the usual (long) time for writeout. + */ + VTRUNCATE(vp); } /* * Have to do this even if the file's size doesn't change. @@ -815,18 +816,24 @@ xfs_setattr( di_flags |= XFS_DIFLAG_NODUMP; if (vap->va_xflags & XFS_XFLAG_PROJINHERIT) di_flags |= XFS_DIFLAG_PROJINHERIT; + if (vap->va_xflags & XFS_XFLAG_NODEFRAG) + di_flags |= XFS_DIFLAG_NODEFRAG; if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) { if (vap->va_xflags & XFS_XFLAG_RTINHERIT) di_flags |= XFS_DIFLAG_RTINHERIT; if (vap->va_xflags & XFS_XFLAG_NOSYMLINKS) di_flags |= XFS_DIFLAG_NOSYMLINKS; - } else { + if (vap->va_xflags & XFS_XFLAG_EXTSZINHERIT) + di_flags |= XFS_DIFLAG_EXTSZINHERIT; + } else if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) { if (vap->va_xflags & XFS_XFLAG_REALTIME) { di_flags |= XFS_DIFLAG_REALTIME; ip->i_iocore.io_flags |= XFS_IOCORE_RT; } else { ip->i_iocore.io_flags &= ~XFS_IOCORE_RT; } + if (vap->va_xflags & XFS_XFLAG_EXTSIZE) + di_flags |= XFS_DIFLAG_EXTSIZE; } ip->i_d.di_flags = di_flags; } @@ -859,7 +866,7 @@ xfs_setattr( * If this is a synchronous mount, make sure that the * transaction goes to disk before returning to the user. * This is slightly sub-optimal in that truncates require - * two sync transactions instead of one for wsync filesytems. + * two sync transactions instead of one for wsync filesystems. * One for the truncate and one for the timestamps since we * don't want to change the timestamps unless we're sure the * truncate worked. Truncates are less than 1% of the laddis @@ -880,7 +887,7 @@ xfs_setattr( */ mandlock_after = MANDLOCK(vp, ip->i_d.di_mode); if (mandlock_before != mandlock_after) { - VOP_VNODE_CHANGE(vp, VCHANGE_FLAGS_ENF_LOCKING, + bhv_vop_vnode_change(vp, VCHANGE_FLAGS_ENF_LOCKING, mandlock_after); } @@ -946,6 +953,13 @@ xfs_access( } +/* + * The maximum pathlen is 1024 bytes. Since the minimum file system + * blocksize is 512 bytes, we can get a max of 2 extents back from + * bmapi. + */ +#define SYMLINK_MAPS 2 + /* * xfs_readlink * @@ -961,7 +975,7 @@ xfs_readlink( int count; xfs_off_t offset; int pathlen; - vnode_t *vp; + bhv_vnode_t *vp; int error = 0; xfs_mount_t *mp; int nmaps; @@ -996,10 +1010,6 @@ xfs_readlink( goto error_return; } - if (!(ioflags & IO_INVIS)) { - xfs_ichgtime(ip, XFS_ICHGTIME_ACC); - } - /* * See if the symlink is stored inline. */ @@ -1015,7 +1025,7 @@ xfs_readlink( nmaps = SYMLINK_MAPS; error = xfs_bmapi(NULL, ip, 0, XFS_B_TO_FSB(mp, pathlen), - 0, NULL, 0, mval, &nmaps, NULL); + 0, NULL, 0, mval, &nmaps, NULL, NULL); if (error) { goto error_return; @@ -1043,11 +1053,8 @@ xfs_readlink( } - error_return: - xfs_iunlock(ip, XFS_ILOCK_SHARED); - return error; } @@ -1188,7 +1195,7 @@ xfs_fsync( /* * If this inode is on the RT dev we need to flush that - * cache aswell. + * cache as well. */ if (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp); @@ -1222,21 +1229,22 @@ xfs_inactive_free_eofblocks( last_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); map_len = last_fsb - end_fsb; if (map_len <= 0) - return (0); + return 0; nimaps = 1; xfs_ilock(ip, XFS_ILOCK_SHARED); - error = xfs_bmapi(NULL, ip, end_fsb, map_len, 0, - NULL, 0, &imap, &nimaps, NULL); + error = XFS_BMAPI(mp, NULL, &ip->i_iocore, end_fsb, map_len, 0, + NULL, 0, &imap, &nimaps, NULL, NULL); xfs_iunlock(ip, XFS_ILOCK_SHARED); if (!error && (nimaps != 0) && - (imap.br_startblock != HOLESTARTBLOCK)) { + (imap.br_startblock != HOLESTARTBLOCK || + ip->i_delayed_blks)) { /* * Attach the dquots to the inode up front. */ if ((error = XFS_QM_DQATTACH(mp, ip, 0))) - return (error); + return error; /* * There are blocks after the end of file. @@ -1264,7 +1272,7 @@ xfs_inactive_free_eofblocks( ASSERT(XFS_FORCED_SHUTDOWN(mp)); xfs_trans_cancel(tp, 0); xfs_iunlock(ip, XFS_IOLOCK_EXCL); - return (error); + return error; } xfs_ilock(ip, XFS_ILOCK_EXCL); @@ -1292,7 +1300,7 @@ xfs_inactive_free_eofblocks( } xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); } - return (error); + return error; } /* @@ -1352,10 +1360,10 @@ xfs_inactive_symlink_rmt( */ done = 0; XFS_BMAP_INIT(&free_list, &first_block); - nmaps = sizeof(mval) / sizeof(mval[0]); + nmaps = ARRAY_SIZE(mval); if ((error = xfs_bmapi(tp, ip, 0, XFS_B_TO_FSB(mp, size), XFS_BMAPI_METADATA, &first_block, 0, mval, &nmaps, - &free_list))) + &free_list, NULL))) goto error0; /* * Invalidate the block(s). @@ -1370,7 +1378,7 @@ xfs_inactive_symlink_rmt( * Unmap the dead block(s) to the free_list. */ if ((error = xfs_bunmapi(tp, ip, 0, size, XFS_BMAPI_METADATA, nmaps, - &first_block, &free_list, &done))) + &first_block, &free_list, NULL, &done))) goto error1; ASSERT(done); /* @@ -1397,7 +1405,7 @@ xfs_inactive_symlink_rmt( */ ntp = xfs_trans_dup(tp); /* - * Commit the transaction containing extent freeing and EFD's. + * Commit the transaction containing extent freeing and EFDs. * If we get an error on the commit here or on the reserve below, * we need to unlock the inode since the new transaction doesn't * have the inode attached. @@ -1470,7 +1478,7 @@ xfs_inactive_symlink_local( if (error) { xfs_trans_cancel(*tpp, 0); *tpp = NULL; - return (error); + return error; } xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); @@ -1483,12 +1491,9 @@ xfs_inactive_symlink_local( XFS_DATA_FORK); ASSERT(ip->i_df.if_bytes == 0); } - return (0); + return 0; } -/* - * - */ STATIC int xfs_inactive_attrs( xfs_inode_t *ip, @@ -1509,7 +1514,7 @@ xfs_inactive_attrs( if (error) { *tpp = NULL; xfs_iunlock(ip, XFS_IOLOCK_EXCL); - return (error); /* goto out*/ + return error; /* goto out */ } tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); @@ -1522,7 +1527,7 @@ xfs_inactive_attrs( xfs_trans_cancel(tp, 0); *tpp = NULL; xfs_iunlock(ip, XFS_IOLOCK_EXCL); - return (error); + return error; } xfs_ilock(ip, XFS_ILOCK_EXCL); @@ -1533,7 +1538,7 @@ xfs_inactive_attrs( ASSERT(ip->i_d.di_anextents == 0); *tpp = tp; - return (0); + return 0; } STATIC int @@ -1541,16 +1546,16 @@ xfs_release( bhv_desc_t *bdp) { xfs_inode_t *ip; - vnode_t *vp; + bhv_vnode_t *vp; xfs_mount_t *mp; int error; vp = BHV_TO_VNODE(bdp); ip = XFS_BHVTOI(bdp); + mp = ip->i_mount; - if (!VN_ISREG(vp) || (ip->i_d.di_mode == 0)) { + if (!VN_ISREG(vp) || (ip->i_d.di_mode == 0)) return 0; - } /* If this is a read-only mount, don't do this (would generate I/O) */ if (vp->v_vfsp->vfs_flag & VFS_RDONLY) @@ -1562,17 +1567,17 @@ xfs_release( return 0; #endif - mp = ip->i_mount; - if (ip->i_d.di_nlink != 0) { if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && - ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0)) && + ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0 || + ip->i_delayed_blks > 0)) && (ip->i_df.if_flags & XFS_IFEXTENTS)) && - (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)))) { + (!(ip->i_d.di_flags & + (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) { if ((error = xfs_inactive_free_eofblocks(mp, ip))) - return (error); + return error; /* Update linux inode block count after free above */ - LINVFS_GET_IP(vp)->i_blocks = XFS_FSB_TO_BB(mp, + vn_to_inode(vp)->i_blocks = XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks); } } @@ -1594,8 +1599,8 @@ xfs_inactive( cred_t *credp) { xfs_inode_t *ip; - vnode_t *vp; - xfs_bmap_free_t free_list; + bhv_vnode_t *vp; + xfs_bmap_free_t free_list; xfs_fsblock_t first_block; int committed; xfs_trans_t *tp; @@ -1625,7 +1630,8 @@ xfs_inactive( * only one with a reference to the inode. */ truncate = ((ip->i_d.di_nlink == 0) && - ((ip->i_d.di_size != 0) || (ip->i_d.di_nextents > 0)) && + ((ip->i_d.di_size != 0) || (ip->i_d.di_nextents > 0) || + (ip->i_delayed_blks > 0)) && ((ip->i_d.di_mode & S_IFMT) == S_IFREG)); mp = ip->i_mount; @@ -1643,14 +1649,16 @@ xfs_inactive( if (ip->i_d.di_nlink != 0) { if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && - ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0)) && - (ip->i_df.if_flags & XFS_IFEXTENTS)) && - (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)) || - (ip->i_delayed_blks != 0))) { + ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0 || + ip->i_delayed_blks > 0)) && + (ip->i_df.if_flags & XFS_IFEXTENTS) && + (!(ip->i_d.di_flags & + (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) || + (ip->i_delayed_blks != 0)))) { if ((error = xfs_inactive_free_eofblocks(mp, ip))) - return (VN_INACTIVE_CACHE); + return VN_INACTIVE_CACHE; /* Update linux inode block count after free above */ - LINVFS_GET_IP(vp)->i_blocks = XFS_FSB_TO_BB(mp, + vn_to_inode(vp)->i_blocks = XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks); } goto out; @@ -1659,7 +1667,7 @@ xfs_inactive( ASSERT(ip->i_d.di_nlink == 0); if ((error = XFS_QM_DQATTACH(mp, ip, 0))) - return (VN_INACTIVE_CACHE); + return VN_INACTIVE_CACHE; tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); if (truncate) { @@ -1682,7 +1690,7 @@ xfs_inactive( ASSERT(XFS_FORCED_SHUTDOWN(mp)); xfs_trans_cancel(tp, 0); xfs_iunlock(ip, XFS_IOLOCK_EXCL); - return (VN_INACTIVE_CACHE); + return VN_INACTIVE_CACHE; } xfs_ilock(ip, XFS_ILOCK_EXCL); @@ -1703,7 +1711,7 @@ xfs_inactive( xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); - return (VN_INACTIVE_CACHE); + return VN_INACTIVE_CACHE; } } else if ((ip->i_d.di_mode & S_IFMT) == S_IFLNK) { @@ -1717,7 +1725,7 @@ xfs_inactive( if (error) { ASSERT(tp == NULL); - return (VN_INACTIVE_CACHE); + return VN_INACTIVE_CACHE; } xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); @@ -1730,7 +1738,7 @@ xfs_inactive( if (error) { ASSERT(XFS_FORCED_SHUTDOWN(mp)); xfs_trans_cancel(tp, 0); - return (VN_INACTIVE_CACHE); + return VN_INACTIVE_CACHE; } xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); @@ -1752,7 +1760,7 @@ xfs_inactive( * cancelled, and the inode is unlocked. Just get out. */ if (error) - return (VN_INACTIVE_CACHE); + return VN_INACTIVE_CACHE; } else if (ip->i_afp) { xfs_idestroy_fork(ip, XFS_ATTR_FORK); } @@ -1772,7 +1780,7 @@ xfs_inactive( cmn_err(CE_NOTE, "xfs_inactive: xfs_ifree() returned an error = %d on %s", error, mp->m_fsname); - xfs_force_shutdown(mp, XFS_METADATA_IO_ERROR); + xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); } xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); } else { @@ -1807,17 +1815,17 @@ xfs_inactive( STATIC int xfs_lookup( bhv_desc_t *dir_bdp, - vname_t *dentry, - vnode_t **vpp, + bhv_vname_t *dentry, + bhv_vnode_t **vpp, int flags, - vnode_t *rdir, + bhv_vnode_t *rdir, cred_t *credp) { xfs_inode_t *dp, *ip; xfs_ino_t e_inum; int error; uint lock_mode; - vnode_t *dir_vp; + bhv_vnode_t *dir_vp; dir_vp = BHV_TO_VNODE(dir_bdp); vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address); @@ -1844,15 +1852,15 @@ xfs_lookup( STATIC int xfs_create( bhv_desc_t *dir_bdp, - vname_t *dentry, - vattr_t *vap, - vnode_t **vpp, + bhv_vname_t *dentry, + bhv_vattr_t *vap, + bhv_vnode_t **vpp, cred_t *credp) { char *name = VNAME(dentry); - vnode_t *dir_vp; + bhv_vnode_t *dir_vp; xfs_inode_t *dp, *ip; - vnode_t *vp=NULL; + bhv_vnode_t *vp = NULL; xfs_trans_t *tp; xfs_mount_t *mp; xfs_dev_t rdev; @@ -2035,10 +2043,10 @@ xfs_create( XFS_QM_DQRELE(mp, gdqp); /* - * Propogate the fact that the vnode changed after the + * Propagate the fact that the vnode changed after the * xfs_inode locks have been released. */ - VOP_VNODE_CHANGE(vp, VCHANGE_FLAGS_TRUNCATED, 3); + bhv_vop_vnode_change(vp, VCHANGE_FLAGS_TRUNCATED, 3); *vpp = vp; @@ -2059,8 +2067,8 @@ std_return: abort_return: cancel_flags |= XFS_TRANS_ABORT; /* FALLTHROUGH */ - error_return: + error_return: if (tp != NULL) xfs_trans_cancel(tp, cancel_flags); @@ -2119,7 +2127,7 @@ int xfs_rm_attempts; STATIC int xfs_lock_dir_and_entry( xfs_inode_t *dp, - vname_t *dentry, + bhv_vname_t *dentry, xfs_inode_t *ip) /* inode of entry 'name' */ { int attempts; @@ -2333,10 +2341,10 @@ int remove_which_error_return = 0; STATIC int xfs_remove( bhv_desc_t *dir_bdp, - vname_t *dentry, + bhv_vname_t *dentry, cred_t *credp) { - vnode_t *dir_vp; + bhv_vnode_t *dir_vp; char *name = VNAME(dentry); xfs_inode_t *dp, *ip; xfs_trans_t *tp = NULL; @@ -2382,7 +2390,7 @@ xfs_remove( * for a log reservation. Since we'll have to wait for the * inactive code to complete before returning from xfs_iget, * we need to make sure that we don't have log space reserved - * when we call xfs_iget. Instead we get an unlocked referece + * when we call xfs_iget. Instead we get an unlocked reference * to the inode before getting our log reservation. */ error = xfs_get_dir_entry(dentry, &ip); @@ -2523,7 +2531,7 @@ xfs_remove( /* * Let interposed file systems know about removed links. */ - VOP_LINK_REMOVED(XFS_ITOV(ip), dir_vp, link_zero); + bhv_vop_link_removed(XFS_ITOV(ip), dir_vp, link_zero); IRELE(ip); @@ -2576,8 +2584,8 @@ xfs_remove( STATIC int xfs_link( bhv_desc_t *target_dir_bdp, - vnode_t *src_vp, - vname_t *dentry, + bhv_vnode_t *src_vp, + bhv_vname_t *dentry, cred_t *credp) { xfs_inode_t *tdp, *sip; @@ -2589,8 +2597,7 @@ xfs_link( xfs_fsblock_t first_block; int cancel_flags; int committed; - vnode_t *target_dir_vp; - bhv_desc_t *src_bdp; + bhv_vnode_t *target_dir_vp; int resblks; char *target_name = VNAME(dentry); int target_namelen; @@ -2603,8 +2610,7 @@ xfs_link( if (VN_ISDIR(src_vp)) return XFS_ERROR(EPERM); - src_bdp = vn_bhv_lookup_unlocked(VN_BHV_HEAD(src_vp), &xfs_vnodeops); - sip = XFS_BHVTOI(src_bdp); + sip = xfs_vtoi(src_vp); tdp = XFS_BHVTOI(target_dir_bdp); mp = tdp->i_mount; if (XFS_FORCED_SHUTDOWN(mp)) @@ -2677,7 +2683,7 @@ xfs_link( */ if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && (tdp->i_d.di_projid != sip->i_d.di_projid))) { - error = XFS_ERROR(EPERM); + error = XFS_ERROR(EXDEV); goto error_return; } @@ -2736,9 +2742,9 @@ std_return: abort_return: cancel_flags |= XFS_TRANS_ABORT; /* FALLTHROUGH */ + error_return: xfs_trans_cancel(tp, cancel_flags); - goto std_return; } /* @@ -2748,15 +2754,15 @@ std_return: STATIC int xfs_mkdir( bhv_desc_t *dir_bdp, - vname_t *dentry, - vattr_t *vap, - vnode_t **vpp, + bhv_vname_t *dentry, + bhv_vattr_t *vap, + bhv_vnode_t **vpp, cred_t *credp) { char *dir_name = VNAME(dentry); xfs_inode_t *dp; xfs_inode_t *cdp; /* inode of created dir */ - vnode_t *cvp; /* vnode of created dir */ + bhv_vnode_t *cvp; /* vnode of created dir */ xfs_trans_t *tp; xfs_mount_t *mp; int cancel_flags; @@ -2764,7 +2770,7 @@ xfs_mkdir( int committed; xfs_bmap_free_t free_list; xfs_fsblock_t first_block; - vnode_t *dir_vp; + bhv_vnode_t *dir_vp; boolean_t dp_joined_to_trans; boolean_t created = B_FALSE; int dm_event_sent = 0; @@ -2983,7 +2989,7 @@ std_return: STATIC int xfs_rmdir( bhv_desc_t *dir_bdp, - vname_t *dentry, + bhv_vname_t *dentry, cred_t *credp) { char *name = VNAME(dentry); @@ -2996,7 +3002,7 @@ xfs_rmdir( xfs_fsblock_t first_block; int cancel_flags; int committed; - vnode_t *dir_vp; + bhv_vnode_t *dir_vp; int dm_di_mode = 0; int last_cdp_link; int namelen; @@ -3034,7 +3040,7 @@ xfs_rmdir( * for a log reservation. Since we'll have to wait for the * inactive code to complete before returning from xfs_iget, * we need to make sure that we don't have log space reserved - * when we call xfs_iget. Instead we get an unlocked referece + * when we call xfs_iget. Instead we get an unlocked reference * to the inode before getting our log reservation. */ error = xfs_get_dir_entry(dentry, &cdp); @@ -3195,13 +3201,13 @@ xfs_rmdir( /* * Let interposed file systems know about removed links. */ - VOP_LINK_REMOVED(XFS_ITOV(cdp), dir_vp, last_cdp_link); + bhv_vop_link_removed(XFS_ITOV(cdp), dir_vp, last_cdp_link); IRELE(cdp); /* Fall through to std_return with error = 0 or the errno * from xfs_trans_commit. */ -std_return: + std_return: if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_POSTREMOVE)) { (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE, dir_vp, DM_RIGHT_NULL, @@ -3214,6 +3220,8 @@ std_return: error1: xfs_bmap_cancel(&free_list); cancel_flags |= XFS_TRANS_ABORT; + /* FALLTHROUGH */ + error_return: xfs_trans_cancel(tp, cancel_flags); goto std_return; @@ -3237,7 +3245,6 @@ xfs_readdir( xfs_trans_t *tp = NULL; int error = 0; uint lock_mode; - xfs_off_t start_offset; vn_trace_entry(BHV_TO_VNODE(dir_bdp), __FUNCTION__, (inst_t *)__return_address); @@ -3248,11 +3255,7 @@ xfs_readdir( } lock_mode = xfs_ilock_map_shared(dp); - start_offset = uiop->uio_offset; error = XFS_DIR_GETDENTS(dp->i_mount, tp, dp, uiop, eofp); - if (start_offset != uiop->uio_offset) { - xfs_ichgtime(dp, XFS_ICHGTIME_ACC); - } xfs_iunlock_map_shared(dp, lock_mode); return error; } @@ -3265,10 +3268,10 @@ xfs_readdir( STATIC int xfs_symlink( bhv_desc_t *dir_bdp, - vname_t *dentry, - vattr_t *vap, + bhv_vname_t *dentry, + bhv_vattr_t *vap, char *target_path, - vnode_t **vpp, + bhv_vnode_t **vpp, cred_t *credp) { xfs_trans_t *tp; @@ -3280,7 +3283,7 @@ xfs_symlink( xfs_bmap_free_t free_list; xfs_fsblock_t first_block; boolean_t dp_joined_to_trans; - vnode_t *dir_vp; + bhv_vnode_t *dir_vp; uint cancel_flags; int committed; xfs_fileoff_t first_fsb; @@ -3474,7 +3477,7 @@ xfs_symlink( error = xfs_bmapi(tp, ip, first_fsb, fs_blocks, XFS_BMAPI_WRITE | XFS_BMAPI_METADATA, &first_block, resblks, mval, &nmaps, - &free_list); + &free_list, NULL); if (error) { goto error1; } @@ -3558,7 +3561,7 @@ std_return: } if (!error) { - vnode_t *vp; + bhv_vnode_t *vp; ASSERT(ip); vp = XFS_ITOV(ip); @@ -3623,10 +3626,10 @@ xfs_fid2( int xfs_rwlock( bhv_desc_t *bdp, - vrwlock_t locktype) + bhv_vrwlock_t locktype) { xfs_inode_t *ip; - vnode_t *vp; + bhv_vnode_t *vp; vp = BHV_TO_VNODE(bdp); if (VN_ISDIR(vp)) @@ -3635,9 +3638,9 @@ xfs_rwlock( if (locktype == VRWLOCK_WRITE) { xfs_ilock(ip, XFS_IOLOCK_EXCL); } else if (locktype == VRWLOCK_TRY_READ) { - return (xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)); + return xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED); } else if (locktype == VRWLOCK_TRY_WRITE) { - return (xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)); + return xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL); } else { ASSERT((locktype == VRWLOCK_READ) || (locktype == VRWLOCK_WRITE_DIRECT)); @@ -3654,10 +3657,10 @@ xfs_rwlock( void xfs_rwunlock( bhv_desc_t *bdp, - vrwlock_t locktype) + bhv_vrwlock_t locktype) { xfs_inode_t *ip; - vnode_t *vp; + bhv_vnode_t *vp; vp = BHV_TO_VNODE(bdp); if (VN_ISDIR(vp)) @@ -3761,7 +3764,6 @@ xfs_inode_flush( return error; } - int xfs_set_dmattrs ( bhv_desc_t *bdp, @@ -3802,16 +3804,12 @@ xfs_set_dmattrs ( return error; } - -/* - * xfs_reclaim - */ STATIC int xfs_reclaim( bhv_desc_t *bdp) { xfs_inode_t *ip; - vnode_t *vp; + bhv_vnode_t *vp; vp = BHV_TO_VNODE(bdp); ip = XFS_BHVTOI(bdp); @@ -3829,7 +3827,12 @@ xfs_reclaim( vn_iowait(vp); ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); - ASSERT(VN_CACHED(vp) == 0); + + /* + * Make sure the atime in the XFS inode is correct before freeing the + * Linux inode. + */ + xfs_synchronize_atime(ip); /* If we have nothing to flush with this inode then complete the * teardown now, otherwise break the link between the xfs inode @@ -3861,7 +3864,7 @@ xfs_finish_reclaim( int sync_mode) { xfs_ihash_t *ih = ip->i_hash; - vnode_t *vp = XFS_ITOV_NULL(ip); + bhv_vnode_t *vp = XFS_ITOV_NULL(ip); int error; if (vp && VN_BAD(vp)) @@ -3880,7 +3883,7 @@ xfs_finish_reclaim( xfs_ifunlock(ip); xfs_iunlock(ip, XFS_ILOCK_EXCL); } - return(1); + return 1; } ip->i_flags |= XFS_IRECLAIM; write_unlock(&ih->ih_lock); @@ -3958,8 +3961,9 @@ xfs_finish_reclaim_all(xfs_mount_t *mp, int noblock) } } XFS_MOUNT_IUNLOCK(mp); - xfs_finish_reclaim(ip, noblock, - XFS_IFLUSH_DELWRI_ELSE_ASYNC); + if (xfs_finish_reclaim(ip, noblock, + XFS_IFLUSH_DELWRI_ELSE_ASYNC)) + delay(1); purged = 1; break; } @@ -3998,42 +4002,36 @@ xfs_alloc_file_space( int alloc_type, int attr_flags) { + xfs_mount_t *mp = ip->i_mount; + xfs_off_t count; xfs_filblks_t allocated_fsb; xfs_filblks_t allocatesize_fsb; - int committed; - xfs_off_t count; - xfs_filblks_t datablocks; - int error; + xfs_extlen_t extsz, temp; + xfs_fileoff_t startoffset_fsb; xfs_fsblock_t firstfsb; - xfs_bmap_free_t free_list; - xfs_bmbt_irec_t *imapp; - xfs_bmbt_irec_t imaps[1]; - xfs_mount_t *mp; - int numrtextents; - int reccount; - uint resblks; + int nimaps; + int bmapi_flag; + int quota_flag; int rt; - int rtextsize; - xfs_fileoff_t startoffset_fsb; xfs_trans_t *tp; - int xfs_bmapi_flags; + xfs_bmbt_irec_t imaps[1], *imapp; + xfs_bmap_free_t free_list; + uint qblocks, resblks, resrtextents; + int committed; + int error; vn_trace_entry(XFS_ITOV(ip), __FUNCTION__, (inst_t *)__return_address); - mp = ip->i_mount; if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); - /* - * determine if this is a realtime file - */ - if ((rt = XFS_IS_REALTIME_INODE(ip)) != 0) { - if (ip->i_d.di_extsize) - rtextsize = ip->i_d.di_extsize; - else - rtextsize = mp->m_sb.sb_rextsize; - } else - rtextsize = 0; + rt = XFS_IS_REALTIME_INODE(ip); + if (unlikely(rt)) { + if (!(extsz = ip->i_d.di_extsize)) + extsz = mp->m_sb.sb_rextsize; + } else { + extsz = ip->i_d.di_extsize; + } if ((error = XFS_QM_DQATTACH(mp, ip, 0))) return error; @@ -4044,8 +4042,8 @@ xfs_alloc_file_space( count = len; error = 0; imapp = &imaps[0]; - reccount = 1; - xfs_bmapi_flags = XFS_BMAPI_WRITE | (alloc_type ? XFS_BMAPI_PREALLOC : 0); + nimaps = 1; + bmapi_flag = XFS_BMAPI_WRITE | (alloc_type ? XFS_BMAPI_PREALLOC : 0); startoffset_fsb = XFS_B_TO_FSBT(mp, offset); allocatesize_fsb = XFS_B_TO_FSB(mp, count); @@ -4062,47 +4060,55 @@ xfs_alloc_file_space( offset, end_dmi_offset - offset, 0, NULL); if (error) - return(error); + return error; } /* - * allocate file space until done or until there is an error + * Allocate file space until done or until there is an error */ retry: while (allocatesize_fsb && !error) { + xfs_fileoff_t s, e; + /* - * determine if reserving space on - * the data or realtime partition. + * Determine space reservations for data/realtime. */ - if (rt) { - xfs_fileoff_t s, e; - + if (unlikely(extsz)) { s = startoffset_fsb; - do_div(s, rtextsize); - s *= rtextsize; - e = roundup_64(startoffset_fsb + allocatesize_fsb, - rtextsize); - numrtextents = (int)(e - s) / mp->m_sb.sb_rextsize; - datablocks = 0; + do_div(s, extsz); + s *= extsz; + e = startoffset_fsb + allocatesize_fsb; + if ((temp = do_mod(startoffset_fsb, extsz))) + e += temp; + if ((temp = do_mod(e, extsz))) + e += extsz - temp; } else { - datablocks = allocatesize_fsb; - numrtextents = 0; + s = 0; + e = allocatesize_fsb; + } + + if (unlikely(rt)) { + resrtextents = qblocks = (uint)(e - s); + resrtextents /= mp->m_sb.sb_rextsize; + resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); + quota_flag = XFS_QMOPT_RES_RTBLKS; + } else { + resrtextents = 0; + resblks = qblocks = \ + XFS_DIOSTRAT_SPACE_RES(mp, (uint)(e - s)); + quota_flag = XFS_QMOPT_RES_REGBLKS; } /* - * allocate and setup the transaction + * Allocate and setup the transaction. */ tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); - resblks = XFS_DIOSTRAT_SPACE_RES(mp, datablocks); - error = xfs_trans_reserve(tp, - resblks, - XFS_WRITE_LOG_RES(mp), - numrtextents, + error = xfs_trans_reserve(tp, resblks, + XFS_WRITE_LOG_RES(mp), resrtextents, XFS_TRANS_PERM_LOG_RES, XFS_WRITE_LOG_COUNT); - /* - * check for running out of space + * Check for running out of space */ if (error) { /* @@ -4113,8 +4119,8 @@ retry: break; } xfs_ilock(ip, XFS_ILOCK_EXCL); - error = XFS_TRANS_RESERVE_QUOTA(mp, tp, - ip->i_udquot, ip->i_gdquot, resblks, 0, 0); + error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, + qblocks, 0, quota_flag); if (error) goto error1; @@ -4122,19 +4128,19 @@ retry: xfs_trans_ihold(tp, ip); /* - * issue the bmapi() call to allocate the blocks + * Issue the xfs_bmapi() call to allocate the blocks */ XFS_BMAP_INIT(&free_list, &firstfsb); - error = xfs_bmapi(tp, ip, startoffset_fsb, - allocatesize_fsb, xfs_bmapi_flags, - &firstfsb, 0, imapp, &reccount, - &free_list); + error = XFS_BMAPI(mp, tp, &ip->i_iocore, startoffset_fsb, + allocatesize_fsb, bmapi_flag, + &firstfsb, 0, imapp, &nimaps, + &free_list, NULL); if (error) { goto error0; } /* - * complete the transaction + * Complete the transaction */ error = xfs_bmap_finish(&tp, &free_list, firstfsb, &committed); if (error) { @@ -4149,7 +4155,7 @@ retry: allocated_fsb = imapp->br_blockcount; - if (reccount == 0) { + if (nimaps == 0) { error = XFS_ERROR(ENOSPC); break; } @@ -4172,9 +4178,11 @@ dmapi_enospc_check: return error; - error0: +error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ xfs_bmap_cancel(&free_list); - error1: + XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag); + +error1: /* Just cancel transaction */ xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); xfs_iunlock(ip, XFS_ILOCK_EXCL); goto dmapi_enospc_check; @@ -4206,8 +4214,8 @@ xfs_zero_remaining_bytes( for (offset = startoff; offset <= endoff; offset = lastoffset + 1) { offset_fsb = XFS_B_TO_FSBT(mp, offset); nimap = 1; - error = xfs_bmapi(NULL, ip, offset_fsb, 1, 0, NULL, 0, &imap, - &nimap, NULL); + error = XFS_BMAPI(mp, NULL, &ip->i_iocore, offset_fsb, 1, 0, + NULL, 0, &imap, &nimap, NULL, NULL); if (error || nimap < 1) break; ASSERT(imap.br_blockcount >= 1); @@ -4266,7 +4274,7 @@ xfs_free_file_space( xfs_off_t len, int attr_flags) { - vnode_t *vp; + bhv_vnode_t *vp; int committed; int done; xfs_off_t end_dmi_offset; @@ -4312,14 +4320,15 @@ xfs_free_file_space( offset, end_dmi_offset - offset, AT_DELAY_FLAG(attr_flags), NULL); if (error) - return(error); + return error; } - ASSERT(attr_flags & ATTR_NOLOCK ? attr_flags & ATTR_DMI : 1); if (attr_flags & ATTR_NOLOCK) need_iolock = 0; - if (need_iolock) + if (need_iolock) { xfs_ilock(ip, XFS_IOLOCK_EXCL); + vn_iowait(vp); /* wait for the completion of any pending DIOs */ + } rounding = MAX((__uint8_t)(1 << mp->m_sb.sb_blocklog), (__uint8_t)NBPP); @@ -4331,7 +4340,7 @@ xfs_free_file_space( if (VN_CACHED(vp) != 0) { xfs_inval_cached_trace(&ip->i_iocore, ioffset, -1, ctooff(offtoct(ioffset)), -1); - VOP_FLUSHINVAL_PAGES(vp, ctooff(offtoct(ioffset)), + bhv_vop_flushinval_pages(vp, ctooff(offtoct(ioffset)), -1, FI_REMAPF_LOCKED); } @@ -4343,8 +4352,8 @@ xfs_free_file_space( */ if (rt && !XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb)) { nimap = 1; - error = xfs_bmapi(NULL, ip, startoffset_fsb, 1, 0, NULL, 0, - &imap, &nimap, NULL); + error = XFS_BMAPI(mp, NULL, &ip->i_iocore, startoffset_fsb, + 1, 0, NULL, 0, &imap, &nimap, NULL, NULL); if (error) goto out_unlock_iolock; ASSERT(nimap == 0 || nimap == 1); @@ -4358,8 +4367,8 @@ xfs_free_file_space( startoffset_fsb += mp->m_sb.sb_rextsize - mod; } nimap = 1; - error = xfs_bmapi(NULL, ip, endoffset_fsb - 1, 1, 0, NULL, 0, - &imap, &nimap, NULL); + error = XFS_BMAPI(mp, NULL, &ip->i_iocore, endoffset_fsb - 1, + 1, 0, NULL, 0, &imap, &nimap, NULL, NULL); if (error) goto out_unlock_iolock; ASSERT(nimap == 0 || nimap == 1); @@ -4419,8 +4428,8 @@ xfs_free_file_space( } xfs_ilock(ip, XFS_ILOCK_EXCL); error = XFS_TRANS_RESERVE_QUOTA(mp, tp, - ip->i_udquot, ip->i_gdquot, resblks, 0, rt ? - XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS); + ip->i_udquot, ip->i_gdquot, resblks, 0, + XFS_QMOPT_RES_REGBLKS); if (error) goto error1; @@ -4431,9 +4440,9 @@ xfs_free_file_space( * issue the bunmapi() call to free the blocks */ XFS_BMAP_INIT(&free_list, &firstfsb); - error = xfs_bunmapi(tp, ip, startoffset_fsb, + error = XFS_BUNMAPI(mp, tp, &ip->i_iocore, startoffset_fsb, endoffset_fsb - startoffset_fsb, - 0, 2, &firstfsb, &free_list, &done); + 0, 2, &firstfsb, &free_list, NULL, &done); if (error) { goto error0; } @@ -4493,8 +4502,8 @@ xfs_change_file_space( xfs_off_t startoffset; xfs_off_t llen; xfs_trans_t *tp; - vattr_t va; - vnode_t *vp; + bhv_vattr_t va; + bhv_vnode_t *vp; vp = BHV_TO_VNODE(bdp); vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); @@ -4647,12 +4656,17 @@ xfs_change_file_space( return error; } -vnodeops_t xfs_vnodeops = { +bhv_vnodeops_t xfs_vnodeops = { BHV_IDENTITY_INIT(VN_BHV_XFS,VNODE_POSITION_XFS), .vop_open = xfs_open, + .vop_close = xfs_close, .vop_read = xfs_read, #ifdef HAVE_SENDFILE .vop_sendfile = xfs_sendfile, +#endif +#ifdef HAVE_SPLICE + .vop_splice_read = xfs_splice_read, + .vop_splice_write = xfs_splice_write, #endif .vop_write = xfs_write, .vop_ioctl = xfs_ioctl,