inode->i_flags |= S_DIRSYNC;
}
-struct inode *ocfs2_ilookup_for_vote(struct ocfs2_super *osb,
- u64 blkno,
- int delete_vote)
-{
- struct ocfs2_find_inode_args args;
-
- /* ocfs2_ilookup_for_vote should *only* be called from the
- * vote thread */
- BUG_ON(current != osb->vote_task);
-
- args.fi_blkno = blkno;
- args.fi_flags = OCFS2_FI_FLAG_NOWAIT;
- if (delete_vote)
- args.fi_flags |= OCFS2_FI_FLAG_DELETE;
- args.fi_ino = ino_from_blkno(osb->sb, blkno);
- return ilookup5(osb->sb, args.fi_ino, ocfs2_find_actor, &args);
-}
-
struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, int flags)
{
struct inode *inode = NULL;
if (is_bad_inode(inode)) {
iput(inode);
inode = ERR_PTR(-ESTALE);
- mlog_errno(PTR_ERR(inode));
goto bail;
}
mlog(0, "returning inode with number %llu\n",
(unsigned long long)OCFS2_I(inode)->ip_blkno);
mlog_exit_ptr(inode);
- } else
- mlog_errno(PTR_ERR(inode));
+ }
return inode;
}
if (oi->ip_blkno != args->fi_blkno)
goto bail;
- /* OCFS2_FI_FLAG_NOWAIT is *only* set from
- * ocfs2_ilookup_for_vote which won't create an inode for one
- * that isn't found. The vote thread which doesn't want to get
- * an inode which is in the process of going away - otherwise
- * the call to __wait_on_freeing_inode in find_inode_fast will
- * cause it to deadlock on an inode which may be waiting on a
- * vote (or lock release) in delete_inode */
- if ((args->fi_flags & OCFS2_FI_FLAG_NOWAIT) &&
- (inode->i_state & (I_FREEING|I_CLEAR))) {
- /* As stated above, we're not going to return an
- * inode. In the case of a delete vote, the voting
- * code is going to signal the other node to go
- * ahead. Mark that state here, so this freeing inode
- * has the state when it gets to delete_inode. */
- if (args->fi_flags & OCFS2_FI_FLAG_DELETE) {
- spin_lock(&oi->ip_lock);
- ocfs2_mark_inode_remotely_deleted(inode);
- spin_unlock(&oi->ip_lock);
- }
- goto bail;
- }
-
ret = 1;
bail:
mlog_exit(ret);
* today. change if needed. */
if (!OCFS2_IS_VALID_DINODE(fe) ||
!(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL))) {
- mlog(ML_ERROR, "Invalid dinode: i_ino=%lu, i_blkno=%llu, "
+ mlog(0, "Invalid dinode: i_ino=%lu, i_blkno=%llu, "
"signature = %.*s, flags = 0x%x\n",
inode->i_ino,
(unsigned long long)le64_to_cpu(fe->i_blkno), 7,
(unsigned long long)fe->i_blkno);
OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
- OCFS2_I(inode)->ip_orphaned_slot = OCFS2_INVALID_SLOT;
OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr);
inode->i_nlink = le16_to_cpu(fe->i_links_count);
ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres,
OCFS2_LOCK_TYPE_META, 0, inode);
+
+ ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_open_lockres,
+ OCFS2_LOCK_TYPE_OPEN, 0, inode);
}
ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_rw_lockres,
inode);
ocfs2_set_inode_flags(inode);
- inode->i_flags |= S_NOATIME;
status = 0;
bail:
* cluster lock before trusting anything anyway.
*/
can_lock = !(args->fi_flags & OCFS2_FI_FLAG_SYSFILE)
- && !(args->fi_flags & OCFS2_FI_FLAG_NOLOCK);
+ && !(args->fi_flags & OCFS2_FI_FLAG_ORPHAN_RECOVERY)
+ && !ocfs2_mount_local(osb);
/*
* To maintain backwards compatibility with older versions of
OCFS2_LOCK_TYPE_META,
generation, inode);
+ ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_open_lockres,
+ OCFS2_LOCK_TYPE_OPEN,
+ 0, inode);
+
if (can_lock) {
- status = ocfs2_meta_lock(inode, NULL, NULL, 0);
+ status = ocfs2_open_lock(inode);
+ if (status) {
+ make_bad_inode(inode);
+ mlog_errno(status);
+ return status;
+ }
+ status = ocfs2_meta_lock(inode, NULL, 0);
if (status) {
make_bad_inode(inode);
mlog_errno(status);
}
}
+ if (args->fi_flags & OCFS2_FI_FLAG_ORPHAN_RECOVERY) {
+ status = ocfs2_try_open_lock(inode, 0);
+ if (status) {
+ make_bad_inode(inode);
+ return status;
+ }
+ }
+
status = ocfs2_read_block(osb, args->fi_blkno, &bh, 0,
can_lock ? inode : NULL);
if (status < 0) {
S_ISBLK(le16_to_cpu(fe->i_mode)))
inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev));
- if (ocfs2_populate_inode(inode, fe, 0) < 0) {
- mlog(ML_ERROR, "populate failed! i_blkno=%llu, i_ino=%lu\n",
- (unsigned long long)fe->i_blkno, inode->i_ino);
+ if (ocfs2_populate_inode(inode, fe, 0) < 0)
goto bail;
- }
BUG_ON(args->fi_blkno != le64_to_cpu(fe->i_blkno));
struct buffer_head *fe_bh)
{
int status = 0;
- struct ocfs2_journal_handle *handle = NULL;
struct ocfs2_truncate_context *tc = NULL;
struct ocfs2_dinode *fe;
fe = (struct ocfs2_dinode *) fe_bh->b_data;
- /* zero allocation, zero truncate :) */
- if (!fe->i_clusters)
- goto bail;
-
- handle = ocfs2_start_trans(osb, handle, OCFS2_INODE_UPDATE_CREDITS);
- if (IS_ERR(handle)) {
- status = PTR_ERR(handle);
- handle = NULL;
- mlog_errno(status);
- goto bail;
- }
-
- status = ocfs2_set_inode_size(handle, inode, fe_bh, 0ULL);
- if (status < 0) {
- mlog_errno(status);
- goto bail;
- }
-
- ocfs2_commit_trans(handle);
- handle = NULL;
-
- status = ocfs2_prepare_truncate(osb, inode, fe_bh, &tc);
- if (status < 0) {
- mlog_errno(status);
- goto bail;
- }
+ if (fe->i_clusters) {
+ status = ocfs2_prepare_truncate(osb, inode, fe_bh, &tc);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out;
+ }
- status = ocfs2_commit_truncate(osb, inode, fe_bh, tc);
- if (status < 0) {
- mlog_errno(status);
- goto bail;
+ status = ocfs2_commit_truncate(osb, inode, fe_bh, tc);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out;
+ }
}
-bail:
- if (handle)
- ocfs2_commit_trans(handle);
+out:
mlog_exit(status);
return status;
int status;
struct inode *inode_alloc_inode = NULL;
struct buffer_head *inode_alloc_bh = NULL;
- struct ocfs2_journal_handle *handle;
+ handle_t *handle;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
}
mutex_lock(&inode_alloc_inode->i_mutex);
- status = ocfs2_meta_lock(inode_alloc_inode, NULL, &inode_alloc_bh, 1);
+ status = ocfs2_meta_lock(inode_alloc_inode, &inode_alloc_bh, 1);
if (status < 0) {
mutex_unlock(&inode_alloc_inode->i_mutex);
goto bail;
}
- handle = ocfs2_start_trans(osb, NULL, OCFS2_DELETE_INODE_CREDITS);
+ handle = ocfs2_start_trans(osb, OCFS2_DELETE_INODE_CREDITS);
if (IS_ERR(handle)) {
status = PTR_ERR(handle);
mlog_errno(status);
mlog_errno(status);
bail_commit:
- ocfs2_commit_trans(handle);
+ ocfs2_commit_trans(osb, handle);
bail_unlock:
ocfs2_meta_unlock(inode_alloc_inode, 1);
mutex_unlock(&inode_alloc_inode->i_mutex);
struct inode *orphan_dir_inode = NULL;
struct buffer_head *orphan_dir_bh = NULL;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ struct ocfs2_dinode *di;
- /* We've already voted on this so it should be readonly - no
- * spinlock needed. */
- orphaned_slot = OCFS2_I(inode)->ip_orphaned_slot;
+ di = (struct ocfs2_dinode *) di_bh->b_data;
+ orphaned_slot = le16_to_cpu(di->i_orphaned_slot);
status = ocfs2_check_orphan_recovery_state(osb, orphaned_slot);
if (status)
* delete_inode operation. We do this now to avoid races with
* recovery completion on other nodes. */
mutex_lock(&orphan_dir_inode->i_mutex);
- status = ocfs2_meta_lock(orphan_dir_inode, NULL, &orphan_dir_bh, 1);
+ status = ocfs2_meta_lock(orphan_dir_inode, &orphan_dir_bh, 1);
if (status < 0) {
mutex_unlock(&orphan_dir_inode->i_mutex);
goto bail;
}
- status = ocfs2_request_delete_vote(inode);
- /* -EBUSY means that other nodes are still using the
- * inode. We're done here though, so avoid doing anything on
- * disk and let them worry about deleting it. */
- if (status == -EBUSY) {
+ /*
+ * This is how ocfs2 determines whether an inode is still live
+ * within the cluster. Every node takes a shared read lock on
+ * the inode open lock in ocfs2_read_locked_inode(). When we
+ * get to ->delete_inode(), each node tries to convert it's
+ * lock to an exclusive. Trylocks are serialized by the inode
+ * meta data lock. If the upconvert suceeds, we know the inode
+ * is no longer live and can be deleted.
+ *
+ * Though we call this with the meta data lock held, the
+ * trylock keeps us from ABBA deadlock.
+ */
+ status = ocfs2_try_open_lock(inode, 1);
+ if (status == -EAGAIN) {
status = 0;
mlog(0, "Skipping delete of %llu because it is in use on"
"other nodes\n", (unsigned long long)oi->ip_blkno);
goto bail;
}
- spin_lock(&oi->ip_lock);
- if (oi->ip_orphaned_slot == OCFS2_INVALID_SLOT) {
- /* Nobody knew which slot this inode was orphaned
- * into. This may happen during node death and
- * recovery knows how to clean it up so we can safely
- * ignore this inode for now on. */
- mlog(0, "Nobody knew where inode %llu was orphaned!\n",
- (unsigned long long)oi->ip_blkno);
- } else {
- *wipe = 1;
-
- mlog(0, "Inode %llu is ok to wipe from orphan dir %d\n",
- (unsigned long long)oi->ip_blkno, oi->ip_orphaned_slot);
- }
- spin_unlock(&oi->ip_lock);
+ *wipe = 1;
+ mlog(0, "Inode %llu is ok to wipe from orphan dir %u\n",
+ (unsigned long long)oi->ip_blkno,
+ le16_to_cpu(di->i_orphaned_slot));
bail:
return status;
* allocation lock here as it won't be needed - nobody will
* have the file open.
*/
- status = ocfs2_meta_lock(inode, NULL, &di_bh, 1);
+ status = ocfs2_meta_lock(inode, &di_bh, 1);
if (status < 0) {
if (status != -ENOENT)
mlog_errno(status);
mlog_bug_on_msg(OCFS2_SB(inode->i_sb) == NULL,
"Inode=%lu\n", inode->i_ino);
+ /* For remove delete_inode vote, we hold open lock before,
+ * now it is time to unlock PR and EX open locks. */
+ ocfs2_open_unlock(inode);
+
/* Do these before all the other work so that we don't bounce
* the vote thread while waiting to destroy the locks. */
ocfs2_mark_lockres_freeing(&oi->ip_rw_lockres);
ocfs2_mark_lockres_freeing(&oi->ip_meta_lockres);
ocfs2_mark_lockres_freeing(&oi->ip_data_lockres);
+ ocfs2_mark_lockres_freeing(&oi->ip_open_lockres);
/* We very well may get a clear_inode before all an inodes
* metadata has hit disk. Of course, we can't drop any cluster
"Clear inode of %llu, inode has io markers\n",
(unsigned long long)oi->ip_blkno);
- ocfs2_extent_map_drop(inode, 0);
- ocfs2_extent_map_init(inode);
-
status = ocfs2_drop_inode_locks(inode);
if (status < 0)
mlog_errno(status);
ocfs2_lock_res_free(&oi->ip_rw_lockres);
ocfs2_lock_res_free(&oi->ip_meta_lockres);
ocfs2_lock_res_free(&oi->ip_data_lockres);
+ ocfs2_lock_res_free(&oi->ip_open_lockres);
ocfs2_metadata_cache_purge(inode);
mlog_bug_on_msg(oi->ip_open_count,
"Clear inode of %llu has open count %d\n",
(unsigned long long)oi->ip_blkno, oi->ip_open_count);
- mlog_bug_on_msg(!list_empty(&oi->ip_handle_list),
- "Clear inode of %llu has non empty handle list\n",
- (unsigned long long)oi->ip_blkno);
- mlog_bug_on_msg(oi->ip_handle,
- "Clear inode of %llu has non empty handle pointer\n",
- (unsigned long long)oi->ip_blkno);
/* Clear all other flags. */
oi->ip_flags = OCFS2_INODE_CACHE_INLINE;
mlog(0, "Drop inode %llu, nlink = %u, ip_flags = 0x%x\n",
(unsigned long long)oi->ip_blkno, inode->i_nlink, oi->ip_flags);
- /* Testing ip_orphaned_slot here wouldn't work because we may
- * not have gotten a delete_inode vote from any other nodes
- * yet. */
if (oi->ip_flags & OCFS2_INODE_MAYBE_ORPHANED)
generic_delete_inode(inode);
else
return NULL;
}
- tmperr = ocfs2_extent_map_get_blocks(inode, block, 1,
- &p_blkno, NULL);
+ tmperr = ocfs2_extent_map_get_blocks(inode, block, &p_blkno, NULL);
if (tmperr < 0) {
mlog_errno(tmperr);
goto fail;
/* Let ocfs2_meta_lock do the work of updating our struct
* inode for us. */
- status = ocfs2_meta_lock(inode, NULL, NULL, 0);
+ status = ocfs2_meta_lock(inode, NULL, 0);
if (status < 0) {
if (status != -ENOENT)
mlog_errno(status);
* struct inode.
* Only takes ip_lock.
*/
-int ocfs2_mark_inode_dirty(struct ocfs2_journal_handle *handle,
+int ocfs2_mark_inode_dirty(handle_t *handle,
struct inode *inode,
struct buffer_head *bh)
{