aboutsummaryrefslogtreecommitdiff
path: root/fs/ocfs2
diff options
context:
space:
mode:
authorLinus Torvalds2015-02-17 08:38:30 -0800
committerLinus Torvalds2015-02-17 08:38:30 -0800
commitc397f8fa4379040bada53256c848e62c8b060392 (patch)
tree8101efb5c0c3b0a73e5e65f3474843c0914cc4d0 /fs/ocfs2
parent796e1c55717e9a6ff5c81b12289ffa1ffd919b6f (diff)
parentaaaf5fbf56f16c81a653713cc333b18ad6e25ea9 (diff)
Merge branch 'akpm' (patches from Andrew)
Merge fifth set of updates from Andrew Morton: - A few things which were awaiting merges from linux-next: - rtc - ocfs2 - misc others - Willy's "dax" feature: direct fs access to memory (mainly NV-DIMMs) which isn't backed by pageframes. * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (37 commits) rtc: add driver for DS1685 family of real time clocks MAINTAINERS: add entry for Maxim PMICs on Samsung boards lib/Kconfig: use bool instead of boolean powerpc: drop _PAGE_FILE and pte_file()-related helpers ocfs2: set append dio as a ro compat feature ocfs2: wait for orphan recovery first once append O_DIRECT write crash ocfs2: complete the rest request through buffer io ocfs2: do not fallback to buffer I/O write if appending ocfs2: allocate blocks in ocfs2_direct_IO_get_blocks ocfs2: implement ocfs2_direct_IO_write ocfs2: add orphan recovery types in ocfs2_recover_orphans ocfs2: add functions to add and remove inode in orphan dir ocfs2: prepare some interfaces used in append direct io MAINTAINERS: fix spelling mistake & remove trailing WS dax: does not work correctly with virtual aliasing caches brd: rename XIP to DAX ext4: add DAX functionality dax: add dax_zero_page_range ext2: get rid of most mentions of XIP in ext2 ext2: remove ext2_aops_xip ...
Diffstat (limited to 'fs/ocfs2')
-rw-r--r--fs/ocfs2/aops.c242
-rw-r--r--fs/ocfs2/file.c76
-rw-r--r--fs/ocfs2/file.h9
-rw-r--r--fs/ocfs2/inode.c2
-rw-r--r--fs/ocfs2/inode.h2
-rw-r--r--fs/ocfs2/journal.c110
-rw-r--r--fs/ocfs2/journal.h5
-rw-r--r--fs/ocfs2/namei.c284
-rw-r--r--fs/ocfs2/namei.h8
-rw-r--r--fs/ocfs2/ocfs2.h23
-rw-r--r--fs/ocfs2/ocfs2_fs.h14
-rw-r--r--fs/ocfs2/super.c2
12 files changed, 715 insertions, 62 deletions
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 46d93e941f3d..44db1808cdb5 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -28,6 +28,7 @@
#include <linux/pipe_fs_i.h>
#include <linux/mpage.h>
#include <linux/quotaops.h>
+#include <linux/blkdev.h>
#include <cluster/masklog.h>
@@ -47,6 +48,9 @@
#include "ocfs2_trace.h"
#include "buffer_head_io.h"
+#include "dir.h"
+#include "namei.h"
+#include "sysfile.h"
static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
@@ -506,18 +510,21 @@ bail:
*
* called like this: dio->get_blocks(dio->inode, fs_startblk,
* fs_count, map_bh, dio->rw == WRITE);
- *
- * Note that we never bother to allocate blocks here, and thus ignore the
- * create argument.
*/
static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
{
int ret;
+ u32 cpos = 0;
+ int alloc_locked = 0;
u64 p_blkno, inode_blocks, contig_blocks;
unsigned int ext_flags;
unsigned char blocksize_bits = inode->i_sb->s_blocksize_bits;
unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits;
+ unsigned long len = bh_result->b_size;
+ unsigned int clusters_to_alloc = 0;
+
+ cpos = ocfs2_blocks_to_clusters(inode->i_sb, iblock);
/* This function won't even be called if the request isn't all
* nicely aligned and of the right size, so there's no need
@@ -539,6 +546,40 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
/* We should already CoW the refcounted extent in case of create. */
BUG_ON(create && (ext_flags & OCFS2_EXT_REFCOUNTED));
+ /* allocate blocks if no p_blkno is found, and create == 1 */
+ if (!p_blkno && create) {
+ ret = ocfs2_inode_lock(inode, NULL, 1);
+ if (ret < 0) {
+ mlog_errno(ret);
+ goto bail;
+ }
+
+ alloc_locked = 1;
+
+ /* fill hole, allocate blocks can't be larger than the size
+ * of the hole */
+ clusters_to_alloc = ocfs2_clusters_for_bytes(inode->i_sb, len);
+ if (clusters_to_alloc > contig_blocks)
+ clusters_to_alloc = contig_blocks;
+
+ /* allocate extent and insert them into the extent tree */
+ ret = ocfs2_extend_allocation(inode, cpos,
+ clusters_to_alloc, 0);
+ if (ret < 0) {
+ mlog_errno(ret);
+ goto bail;
+ }
+
+ ret = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno,
+ &contig_blocks, &ext_flags);
+ if (ret < 0) {
+ mlog(ML_ERROR, "get_blocks() failed iblock=%llu\n",
+ (unsigned long long)iblock);
+ ret = -EIO;
+ goto bail;
+ }
+ }
+
/*
* get_more_blocks() expects us to describe a hole by clearing
* the mapped bit on bh_result().
@@ -556,6 +597,8 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
contig_blocks = max_blocks;
bh_result->b_size = contig_blocks << blocksize_bits;
bail:
+ if (alloc_locked)
+ ocfs2_inode_unlock(inode, 1);
return ret;
}
@@ -597,6 +640,184 @@ static int ocfs2_releasepage(struct page *page, gfp_t wait)
return try_to_free_buffers(page);
}
+static int ocfs2_is_overwrite(struct ocfs2_super *osb,
+ struct inode *inode, loff_t offset)
+{
+ int ret = 0;
+ u32 v_cpos = 0;
+ u32 p_cpos = 0;
+ unsigned int num_clusters = 0;
+ unsigned int ext_flags = 0;
+
+ v_cpos = ocfs2_bytes_to_clusters(osb->sb, offset);
+ ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos,
+ &num_clusters, &ext_flags);
+ if (ret < 0) {
+ mlog_errno(ret);
+ return ret;
+ }
+
+ if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN))
+ return 1;
+
+ return 0;
+}
+
+static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
+ struct iov_iter *iter,
+ loff_t offset)
+{
+ ssize_t ret = 0;
+ ssize_t written = 0;
+ bool orphaned = false;
+ int is_overwrite = 0;
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file_inode(file)->i_mapping->host;
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ struct buffer_head *di_bh = NULL;
+ size_t count = iter->count;
+ journal_t *journal = osb->journal->j_journal;
+ u32 zero_len;
+ int cluster_align;
+ loff_t final_size = offset + count;
+ int append_write = offset >= i_size_read(inode) ? 1 : 0;
+ unsigned int num_clusters = 0;
+ unsigned int ext_flags = 0;
+
+ {
+ u64 o = offset;
+
+ zero_len = do_div(o, 1 << osb->s_clustersize_bits);
+ cluster_align = !zero_len;
+ }
+
+ /*
+ * when final_size > inode->i_size, inode->i_size will be
+ * updated after direct write, so add the inode to orphan
+ * dir first.
+ */
+ if (final_size > i_size_read(inode)) {
+ ret = ocfs2_add_inode_to_orphan(osb, inode);
+ if (ret < 0) {
+ mlog_errno(ret);
+ goto out;
+ }
+ orphaned = true;
+ }
+
+ if (append_write) {
+ ret = ocfs2_inode_lock(inode, &di_bh, 1);
+ if (ret < 0) {
+ mlog_errno(ret);
+ goto clean_orphan;
+ }
+
+ if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
+ ret = ocfs2_zero_extend(inode, di_bh, offset);
+ else
+ ret = ocfs2_extend_no_holes(inode, di_bh, offset,
+ offset);
+ if (ret < 0) {
+ mlog_errno(ret);
+ ocfs2_inode_unlock(inode, 1);
+ brelse(di_bh);
+ goto clean_orphan;
+ }
+
+ is_overwrite = ocfs2_is_overwrite(osb, inode, offset);
+ if (is_overwrite < 0) {
+ mlog_errno(is_overwrite);
+ ocfs2_inode_unlock(inode, 1);
+ brelse(di_bh);
+ goto clean_orphan;
+ }
+
+ ocfs2_inode_unlock(inode, 1);
+ brelse(di_bh);
+ di_bh = NULL;
+ }
+
+ written = __blockdev_direct_IO(WRITE, iocb, inode, inode->i_sb->s_bdev,
+ iter, offset,
+ ocfs2_direct_IO_get_blocks,
+ ocfs2_dio_end_io, NULL, 0);
+ if (unlikely(written < 0)) {
+ loff_t i_size = i_size_read(inode);
+
+ if (offset + count > i_size) {
+ ret = ocfs2_inode_lock(inode, &di_bh, 1);
+ if (ret < 0) {
+ mlog_errno(ret);
+ goto clean_orphan;
+ }
+
+ if (i_size == i_size_read(inode)) {
+ ret = ocfs2_truncate_file(inode, di_bh,
+ i_size);
+ if (ret < 0) {
+ if (ret != -ENOSPC)
+ mlog_errno(ret);
+
+ ocfs2_inode_unlock(inode, 1);
+ brelse(di_bh);
+ goto clean_orphan;
+ }
+ }
+
+ ocfs2_inode_unlock(inode, 1);
+ brelse(di_bh);
+
+ ret = jbd2_journal_force_commit(journal);
+ if (ret < 0)
+ mlog_errno(ret);
+ }
+ } else if (written < 0 && append_write && !is_overwrite &&
+ !cluster_align) {
+ u32 p_cpos = 0;
+ u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, offset);
+
+ ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos,
+ &num_clusters, &ext_flags);
+ if (ret < 0) {
+ mlog_errno(ret);
+ goto clean_orphan;
+ }
+
+ BUG_ON(!p_cpos || (ext_flags & OCFS2_EXT_UNWRITTEN));
+
+ ret = blkdev_issue_zeroout(osb->sb->s_bdev,
+ p_cpos << (osb->s_clustersize_bits - 9),
+ zero_len >> 9, GFP_KERNEL, false);
+ if (ret < 0)
+ mlog_errno(ret);
+ }
+
+clean_orphan:
+ if (orphaned) {
+ int tmp_ret;
+ int update_isize = written > 0 ? 1 : 0;
+ loff_t end = update_isize ? offset + written : 0;
+
+ tmp_ret = ocfs2_del_inode_from_orphan(osb, inode,
+ update_isize, end);
+ if (tmp_ret < 0) {
+ ret = tmp_ret;
+ goto out;
+ }
+
+ tmp_ret = jbd2_journal_force_commit(journal);
+ if (tmp_ret < 0) {
+ ret = tmp_ret;
+ mlog_errno(tmp_ret);
+ }
+ }
+
+out:
+ if (ret >= 0)
+ ret = written;
+ return ret;
+}
+
static ssize_t ocfs2_direct_IO(int rw,
struct kiocb *iocb,
struct iov_iter *iter,
@@ -604,6 +825,9 @@ static ssize_t ocfs2_direct_IO(int rw,
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file)->i_mapping->host;
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ int full_coherency = !(osb->s_mount_opt &
+ OCFS2_MOUNT_COHERENCY_BUFFERED);
/*
* Fallback to buffered I/O if we see an inode without
@@ -612,14 +836,20 @@ static ssize_t ocfs2_direct_IO(int rw,
if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
return 0;
- /* Fallback to buffered I/O if we are appending. */
- if (i_size_read(inode) <= offset)
+ /* Fallback to buffered I/O if we are appending and
+ * concurrent O_DIRECT writes are allowed.
+ */
+ if (i_size_read(inode) <= offset && !full_coherency)
return 0;
- return __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev,
+ if (rw == READ)
+ return __blockdev_direct_IO(rw, iocb, inode,
+ inode->i_sb->s_bdev,
iter, offset,
ocfs2_direct_IO_get_blocks,
ocfs2_dio_end_io, NULL, 0);
+ else
+ return ocfs2_direct_IO_write(iocb, iter, offset);
}
static void ocfs2_figure_cluster_boundaries(struct ocfs2_super *osb,
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index e0f04d55fd05..46e0d4e857c7 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -295,7 +295,7 @@ out:
return ret;
}
-static int ocfs2_set_inode_size(handle_t *handle,
+int ocfs2_set_inode_size(handle_t *handle,
struct inode *inode,
struct buffer_head *fe_bh,
u64 new_i_size)
@@ -441,7 +441,7 @@ out:
return status;
}
-static int ocfs2_truncate_file(struct inode *inode,
+int ocfs2_truncate_file(struct inode *inode,
struct buffer_head *di_bh,
u64 new_i_size)
{
@@ -709,6 +709,13 @@ leave:
return status;
}
+int ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
+ u32 clusters_to_add, int mark_unwritten)
+{
+ return __ocfs2_extend_allocation(inode, logical_start,
+ clusters_to_add, mark_unwritten);
+}
+
/*
* While a write will already be ordering the data, a truncate will not.
* Thus, we need to explicitly order the zeroed pages.
@@ -2109,6 +2116,9 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
struct dentry *dentry = file->f_path.dentry;
struct inode *inode = dentry->d_inode;
loff_t saved_pos = 0, end;
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ int full_coherency = !(osb->s_mount_opt &
+ OCFS2_MOUNT_COHERENCY_BUFFERED);
/*
* We start with a read level meta lock and only jump to an ex
@@ -2197,7 +2207,16 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
* one node could wind up truncating another
* nodes writes.
*/
- if (end > i_size_read(inode)) {
+ if (end > i_size_read(inode) && !full_coherency) {
+ *direct_io = 0;
+ break;
+ }
+
+ /*
+ * Fallback to old way if the feature bit is not set.
+ */
+ if (end > i_size_read(inode) &&
+ !ocfs2_supports_append_dio(osb)) {
*direct_io = 0;
break;
}
@@ -2210,7 +2229,13 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
*/
ret = ocfs2_check_range_for_holes(inode, saved_pos, count);
if (ret == 1) {
- *direct_io = 0;
+ /*
+ * Fallback to old way if the feature bit is not set.
+ * Otherwise try dio first and then complete the rest
+ * request through buffer io.
+ */
+ if (!ocfs2_supports_append_dio(osb))
+ *direct_io = 0;
ret = 0;
} else if (ret < 0)
mlog_errno(ret);
@@ -2243,6 +2268,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
u32 old_clusters;
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
+ struct address_space *mapping = file->f_mapping;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
int full_coherency = !(osb->s_mount_opt &
OCFS2_MOUNT_COHERENCY_BUFFERED);
@@ -2357,11 +2383,51 @@ relock:
iov_iter_truncate(from, count);
if (direct_io) {
+ loff_t endbyte;
+ ssize_t written_buffered;
written = generic_file_direct_write(iocb, from, *ppos);
- if (written < 0) {
+ if (written < 0 || written == count) {
ret = written;
goto out_dio;
}
+
+ /*
+ * for completing the rest of the request.
+ */
+ *ppos += written;
+ count -= written;
+ written_buffered = generic_perform_write(file, from, *ppos);
+ /*
+ * If generic_file_buffered_write() returned a synchronous error
+ * then we want to return the number of bytes which were
+ * direct-written, or the error code if that was zero. Note
+ * that this differs from normal direct-io semantics, which
+ * will return -EFOO even if some bytes were written.
+ */
+ if (written_buffered < 0) {
+ ret = written_buffered;
+ goto out_dio;
+ }
+
+ iocb->ki_pos = *ppos + written_buffered;
+ /* We need to ensure that the page cache pages are written to
+ * disk and invalidated to preserve the expected O_DIRECT
+ * semantics.
+ */
+ endbyte = *ppos + written_buffered - 1;
+ ret = filemap_write_and_wait_range(file->f_mapping, *ppos,
+ endbyte);
+ if (ret == 0) {
+ written += written_buffered;
+ invalidate_mapping_pages(mapping,
+ *ppos >> PAGE_CACHE_SHIFT,
+ endbyte >> PAGE_CACHE_SHIFT);
+ } else {
+ /*
+ * We don't know how much we wrote, so just return
+ * the number of bytes which were direct-written
+ */
+ }
} else {
current->backing_dev_info = inode_to_bdi(inode);
written = generic_perform_write(file, from, *ppos);
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index 97bf761c9e7c..e8c62f22215c 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -51,13 +51,22 @@ int ocfs2_add_inode_data(struct ocfs2_super *osb,
struct ocfs2_alloc_context *data_ac,
struct ocfs2_alloc_context *meta_ac,
enum ocfs2_alloc_restarted *reason_ret);
+int ocfs2_set_inode_size(handle_t *handle,
+ struct inode *inode,
+ struct buffer_head *fe_bh,
+ u64 new_i_size);
int ocfs2_simple_size_update(struct inode *inode,
struct buffer_head *di_bh,
u64 new_i_size);
+int ocfs2_truncate_file(struct inode *inode,
+ struct buffer_head *di_bh,
+ u64 new_i_size);
int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh,
u64 new_i_size, u64 zero_to);
int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh,
loff_t zero_to);
+int ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
+ u32 clusters_to_add, int mark_unwritten);
int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
struct kstat *stat);
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index c8b25de9efbb..3025c0da6b8a 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -648,7 +648,7 @@ static int ocfs2_remove_inode(struct inode *inode,
if (!(OCFS2_I(inode)->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)) {
status = ocfs2_orphan_del(osb, handle, orphan_dir_inode, inode,
- orphan_dir_bh);
+ orphan_dir_bh, false);
if (status < 0) {
mlog_errno(status);
goto bail_commit;
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index ca3431ee7f24..5e86b247c821 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -81,6 +81,8 @@ struct ocfs2_inode_info
tid_t i_sync_tid;
tid_t i_datasync_tid;
+ wait_queue_head_t append_dio_wq;
+
struct dquot *i_dquot[MAXQUOTAS];
};
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index d10860fde165..ff531928269e 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -50,6 +50,8 @@
#include "sysfile.h"
#include "uptodate.h"
#include "quota.h"
+#include "file.h"
+#include "namei.h"
#include "buffer_head_io.h"
#include "ocfs2_trace.h"
@@ -69,13 +71,15 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
static int ocfs2_trylock_journal(struct ocfs2_super *osb,
int slot_num);
static int ocfs2_recover_orphans(struct ocfs2_super *osb,
- int slot);
+ int slot,
+ enum ocfs2_orphan_reco_type orphan_reco_type);
static int ocfs2_commit_thread(void *arg);
static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
int slot_num,
struct ocfs2_dinode *la_dinode,
struct ocfs2_dinode *tl_dinode,
- struct ocfs2_quota_recovery *qrec);
+ struct ocfs2_quota_recovery *qrec,
+ enum ocfs2_orphan_reco_type orphan_reco_type);
static inline int ocfs2_wait_on_mount(struct ocfs2_super *osb)
{
@@ -149,7 +153,8 @@ int ocfs2_compute_replay_slots(struct ocfs2_super *osb)
return 0;
}
-void ocfs2_queue_replay_slots(struct ocfs2_super *osb)
+void ocfs2_queue_replay_slots(struct ocfs2_super *osb,
+ enum ocfs2_orphan_reco_type orphan_reco_type)
{
struct ocfs2_replay_map *replay_map = osb->replay_map;
int i;
@@ -163,7 +168,8 @@ void ocfs2_queue_replay_slots(struct ocfs2_super *osb)
for (i = 0; i < replay_map->rm_slots; i++)
if (replay_map->rm_replay_slots[i])
ocfs2_queue_recovery_completion(osb->journal, i, NULL,
- NULL, NULL);
+ NULL, NULL,
+ orphan_reco_type);
replay_map->rm_state = REPLAY_DONE;
}
@@ -1174,6 +1180,7 @@ struct ocfs2_la_recovery_item {
struct ocfs2_dinode *lri_la_dinode;
struct ocfs2_dinode *lri_tl_dinode;
struct ocfs2_quota_recovery *lri_qrec;
+ enum ocfs2_orphan_reco_type lri_orphan_reco_type;
};
/* Does the second half of the recovery process. By this point, the
@@ -1195,6 +1202,7 @@ void ocfs2_complete_recovery(struct work_struct *work)
struct ocfs2_dinode *la_dinode, *tl_dinode;
struct ocfs2_la_recovery_item *item, *n;
struct ocfs2_quota_recovery *qrec;
+ enum ocfs2_orphan_reco_type orphan_reco_type;
LIST_HEAD(tmp_la_list);
trace_ocfs2_complete_recovery(
@@ -1212,6 +1220,7 @@ void ocfs2_complete_recovery(struct work_struct *work)
la_dinode = item->lri_la_dinode;
tl_dinode = item->lri_tl_dinode;
qrec = item->lri_qrec;
+ orphan_reco_type = item->lri_orphan_reco_type;
trace_ocfs2_complete_recovery_slot(item->lri_slot,
la_dinode ? le64_to_cpu(la_dinode->i_blkno) : 0,
@@ -1236,7 +1245,8 @@ void ocfs2_complete_recovery(struct work_struct *work)
kfree(tl_dinode);
}
- ret = ocfs2_recover_orphans(osb, item->lri_slot);
+ ret = ocfs2_recover_orphans(osb, item->lri_slot,
+ orphan_reco_type);
if (ret < 0)
mlog_errno(ret);
@@ -1261,7 +1271,8 @@ static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
int slot_num,
struct ocfs2_dinode *la_dinode,
struct ocfs2_dinode *tl_dinode,
- struct ocfs2_quota_recovery *qrec)
+ struct ocfs2_quota_recovery *qrec,
+ enum ocfs2_orphan_reco_type orphan_reco_type)
{
struct ocfs2_la_recovery_item *item;
@@ -1285,6 +1296,7 @@ static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
item->lri_slot = slot_num;
item->lri_tl_dinode = tl_dinode;
item->lri_qrec = qrec;
+ item->lri_orphan_reco_type = orphan_reco_type;
spin_lock(&journal->j_lock);
list_add_tail(&item->lri_list, &journal->j_la_cleanups);
@@ -1304,7 +1316,8 @@ void ocfs2_complete_mount_recovery(struct ocfs2_super *osb)
/* No need to queue up our truncate_log as regular cleanup will catch
* that */
ocfs2_queue_recovery_completion(journal, osb->slot_num,
- osb->local_alloc_copy, NULL, NULL);
+ osb->local_alloc_copy, NULL, NULL,
+ ORPHAN_NEED_TRUNCATE);
ocfs2_schedule_truncate_log_flush(osb, 0);
osb->local_alloc_copy = NULL;
@@ -1312,7 +1325,7 @@ void ocfs2_complete_mount_recovery(struct ocfs2_super *osb)
/* queue to recover orphan slots for all offline slots */
ocfs2_replay_map_set_state(osb, REPLAY_NEEDED);
- ocfs2_queue_replay_slots(osb);
+ ocfs2_queue_replay_slots(osb, ORPHAN_NEED_TRUNCATE);
ocfs2_free_replay_slots(osb);
}
@@ -1323,7 +1336,8 @@ void ocfs2_complete_quota_recovery(struct ocfs2_super *osb)
osb->slot_num,
NULL,
NULL,
- osb->quota_rec);
+ osb->quota_rec,
+ ORPHAN_NEED_TRUNCATE);
osb->quota_rec = NULL;
}
}
@@ -1360,7 +1374,7 @@ restart:
/* queue recovery for our own slot */
ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL,
- NULL, NULL);
+ NULL, NULL, ORPHAN_NO_NEED_TRUNCATE);
spin_lock(&osb->osb_lock);
while (rm->rm_used) {
@@ -1419,13 +1433,14 @@ skip_recovery:
continue;
}
ocfs2_queue_recovery_completion(osb->journal, rm_quota[i],
- NULL, NULL, qrec);
+ NULL, NULL, qrec,
+ ORPHAN_NEED_TRUNCATE);
}
ocfs2_super_unlock(osb, 1);
/* queue recovery for offline slots */
- ocfs2_queue_replay_slots(osb);
+ ocfs2_queue_replay_slots(osb, ORPHAN_NEED_TRUNCATE);
bail:
mutex_lock(&osb->recovery_lock);
@@ -1711,7 +1726,7 @@ static int ocfs2_recover_node(struct ocfs2_super *osb,
/* This will kfree the memory pointed to by la_copy and tl_copy */
ocfs2_queue_recovery_completion(osb->journal, slot_num, la_copy,
- tl_copy, NULL);
+ tl_copy, NULL, ORPHAN_NEED_TRUNCATE);
status = 0;
done:
@@ -1901,7 +1916,7 @@ void ocfs2_queue_orphan_scan(struct ocfs2_super *osb)
for (i = 0; i < osb->max_slots; i++)
ocfs2_queue_recovery_completion(osb->journal, i, NULL, NULL,
- NULL);
+ NULL, ORPHAN_NO_NEED_TRUNCATE);
/*
* We queued a recovery on orphan slots, increment the sequence
* number and update LVB so other node will skip the scan for a while
@@ -2000,6 +2015,13 @@ static int ocfs2_orphan_filldir(struct dir_context *ctx, const char *name,
if (IS_ERR(iter))
return 0;
+ /* Skip inodes which are already added to recover list, since dio may
+ * happen concurrently with unlink/rename */
+ if (OCFS2_I(iter)->ip_next_orphan) {
+ iput(iter);
+ return 0;
+ }
+
trace_ocfs2_orphan_filldir((unsigned long long)OCFS2_I(iter)->ip_blkno);
/* No locking is required for the next_orphan queue as there
* is only ever a single process doing orphan recovery. */
@@ -2108,7 +2130,8 @@ static void ocfs2_clear_recovering_orphan_dir(struct ocfs2_super *osb,
* advertising our state to ocfs2_delete_inode().
*/
static int ocfs2_recover_orphans(struct ocfs2_super *osb,
- int slot)
+ int slot,
+ enum ocfs2_orphan_reco_type orphan_reco_type)
{
int ret = 0;
struct inode *inode = NULL;
@@ -2132,13 +2155,60 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
(unsigned long long)oi->ip_blkno);
iter = oi->ip_next_orphan;
+ oi->ip_next_orphan = NULL;
+
+ /*
+ * We need to take and drop the inode lock to
+ * force read inode from disk.
+ */
+ ret = ocfs2_inode_lock(inode, NULL, 0);
+ if (ret) {
+ mlog_errno(ret);
+ goto next;
+ }
+ ocfs2_inode_unlock(inode, 0);
+
+ if (inode->i_nlink == 0) {
+ spin_lock(&oi->ip_lock);
+ /* Set the proper information to get us going into
+ * ocfs2_delete_inode. */
+ oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED;
+ spin_unlock(&oi->ip_lock);
+ } else if (orphan_reco_type == ORPHAN_NEED_TRUNCATE) {
+ struct buffer_head *di_bh = NULL;
+
+ ret = ocfs2_rw_lock(inode, 1);
+ if (ret) {
+ mlog_errno(ret);
+ goto next;
+ }
+
+ ret = ocfs2_inode_lock(inode, &di_bh, 1);
+ if (ret < 0) {
+ ocfs2_rw_unlock(inode, 1);
+ mlog_errno(ret);
+ goto next;
+ }
+
+ ret = ocfs2_truncate_file(inode, di_bh,
+ i_size_read(inode));
+ ocfs2_inode_unlock(inode, 1);
+ ocfs2_rw_unlock(inode, 1);
+ brelse(di_bh);
+ if (ret < 0) {
+ if (ret != -ENOSPC)
+ mlog_errno(ret);
+ goto next;
+ }
+
+ ret = ocfs2_del_inode_from_orphan(osb, inode, 0, 0);
+ if (ret)
+ mlog_errno(ret);
- spin_lock(&oi->ip_lock);
- /* Set the proper information to get us going into
- * ocfs2_delete_inode. */
- oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED;
- spin_unlock(&oi->ip_lock);
+ wake_up(&OCFS2_I(inode)->append_dio_wq);
+ } /* else if ORPHAN_NO_NEED_TRUNCATE, do nothing */
+next:
iput(inode);
inode = iter;
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 7f8cde94abfe..f4cd3c3e9fb7 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -472,6 +472,11 @@ static inline int ocfs2_unlink_credits(struct super_block *sb)
* orphan dir index leaf */
#define OCFS2_DELETE_INODE_CREDITS (3 * OCFS2_INODE_UPDATE_CREDITS + 4)
+/* dinode + orphan dir dinode + extent tree leaf block + orphan dir entry +
+ * orphan dir index root + orphan dir index leaf */
+#define OCFS2_INODE_ADD_TO_ORPHAN_CREDITS (2 * OCFS2_INODE_UPDATE_CREDITS + 4)
+#define OCFS2_INODE_DEL_FROM_ORPHAN_CREDITS OCFS2_INODE_ADD_TO_ORPHAN_CREDITS
+
/* dinode update, old dir dinode update, new dir dinode update, old
* dir dir entry, new dir dir entry, dir entry update for renaming
* directory + target unlink + 3 x dir index leaves */
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 914c121ec890..b5c3a5ea3ee6 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -79,7 +79,8 @@ static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
struct inode **ret_orphan_dir,
u64 blkno,
char *name,
- struct ocfs2_dir_lookup_result *lookup);
+ struct ocfs2_dir_lookup_result *lookup,
+ bool dio);
static int ocfs2_orphan_add(struct ocfs2_super *osb,
handle_t *handle,
@@ -87,7 +88,8 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
struct buffer_head *fe_bh,
char *name,
struct ocfs2_dir_lookup_result *lookup,
- struct inode *orphan_dir_inode);
+ struct inode *orphan_dir_inode,
+ bool dio);
static int ocfs2_create_symlink_data(struct ocfs2_super *osb,
handle_t *handle,
@@ -104,6 +106,8 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
static void ocfs2_double_unlock(struct inode *inode1, struct inode *inode2);
/* An orphan dir name is an 8 byte value, printed as a hex string */
#define OCFS2_ORPHAN_NAMELEN ((int)(2 * sizeof(u64)))
+#define OCFS2_DIO_ORPHAN_PREFIX "dio-"
+#define OCFS2_DIO_ORPHAN_PREFIX_LEN 4
static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
unsigned int flags)
@@ -952,7 +956,8 @@ static int ocfs2_unlink(struct inode *dir,
if (ocfs2_inode_is_unlinkable(inode)) {
status = ocfs2_prepare_orphan_dir(osb, &orphan_dir,
OCFS2_I(inode)->ip_blkno,
- orphan_name, &orphan_insert);
+ orphan_name, &orphan_insert,
+ false);
if (status < 0) {
mlog_errno(status);
goto leave;
@@ -1004,7 +1009,7 @@ static int ocfs2_unlink(struct inode *dir,
if (is_unlinkable) {
status = ocfs2_orphan_add(osb, handle, inode, fe_bh,
- orphan_name, &orphan_insert, orphan_dir);
+ orphan_name, &orphan_insert, orphan_dir, false);
if (status < 0)
mlog_errno(status);
}
@@ -1440,7 +1445,8 @@ static int ocfs2_rename(struct inode *old_dir,
if (S_ISDIR(new_inode->i_mode) || (new_inode->i_nlink == 1)) {
status = ocfs2_prepare_orphan_dir(osb, &orphan_dir,
OCFS2_I(new_inode)->ip_blkno,
- orphan_name, &orphan_insert);
+ orphan_name, &orphan_insert,
+ false);
if (status < 0) {
mlog_errno(status);
goto bail;
@@ -1507,7 +1513,7 @@ static int ocfs2_rename(struct inode *old_dir,
if (should_add_orphan) {
status = ocfs2_orphan_add(osb, handle, new_inode,
newfe_bh, orphan_name,
- &orphan_insert, orphan_dir);
+ &orphan_insert, orphan_dir, false);
if (status < 0) {
mlog_errno(status);
goto bail;
@@ -2088,12 +2094,28 @@ static int __ocfs2_prepare_orphan_dir(struct inode *orphan_dir_inode,
struct buffer_head *orphan_dir_bh,
u64 blkno,
char *name,
- struct ocfs2_dir_lookup_result *lookup)
+ struct ocfs2_dir_lookup_result *lookup,
+ bool dio)
{
int ret;
struct ocfs2_super *osb = OCFS2_SB(orphan_dir_inode->i_sb);
+ int namelen = dio ?
+ (OCFS2_DIO_ORPHAN_PREFIX_LEN + OCFS2_ORPHAN_NAMELEN) :
+ OCFS2_ORPHAN_NAMELEN;
+
+ if (dio) {
+ ret = snprintf(name, OCFS2_DIO_ORPHAN_PREFIX_LEN + 1, "%s",
+ OCFS2_DIO_ORPHAN_PREFIX);
+ if (ret != OCFS2_DIO_ORPHAN_PREFIX_LEN) {
+ ret = -EINVAL;
+ mlog_errno(ret);
+ return ret;
+ }
- ret = ocfs2_blkno_stringify(blkno, name);
+ ret = ocfs2_blkno_stringify(blkno,
+ name + OCFS2_DIO_ORPHAN_PREFIX_LEN);
+ } else
+ ret = ocfs2_blkno_stringify(blkno, name);
if (ret < 0) {
mlog_errno(ret);
return ret;
@@ -2101,7 +2123,7 @@ static int __ocfs2_prepare_orphan_dir(struct inode *orphan_dir_inode,
ret = ocfs2_prepare_dir_for_insert(osb, orphan_dir_inode,
orphan_dir_bh, name,
- OCFS2_ORPHAN_NAMELEN, lookup);
+ namelen, lookup);
if (ret < 0) {
mlog_errno(ret);
return ret;
@@ -2128,7 +2150,8 @@ static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
struct inode **ret_orphan_dir,
u64 blkno,
char *name,
- struct ocfs2_dir_lookup_result *lookup)
+ struct ocfs2_dir_lookup_result *lookup,
+ bool dio)
{
struct inode *orphan_dir_inode = NULL;
struct buffer_head *orphan_dir_bh = NULL;
@@ -2142,7 +2165,7 @@ static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
}
ret = __ocfs2_prepare_orphan_dir(orphan_dir_inode, orphan_dir_bh,
- blkno, name, lookup);
+ blkno, name, lookup, dio);
if (ret < 0) {
mlog_errno(ret);
goto out;
@@ -2170,12 +2193,16 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
struct buffer_head *fe_bh,
char *name,
struct ocfs2_dir_lookup_result *lookup,
- struct inode *orphan_dir_inode)
+ struct inode *orphan_dir_inode,
+ bool dio)
{
struct buffer_head *orphan_dir_bh = NULL;
int status = 0;
struct ocfs2_dinode *orphan_fe;
struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data;
+ int namelen = dio ?
+ (OCFS2_DIO_ORPHAN_PREFIX_LEN + OCFS2_ORPHAN_NAMELEN) :
+ OCFS2_ORPHAN_NAMELEN;
trace_ocfs2_orphan_add_begin(
(unsigned long long)OCFS2_I(inode)->ip_blkno);
@@ -2219,7 +2246,7 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
ocfs2_journal_dirty(handle, orphan_dir_bh);
status = __ocfs2_add_entry(handle, orphan_dir_inode, name,
- OCFS2_ORPHAN_NAMELEN, inode,
+ namelen, inode,
OCFS2_I(inode)->ip_blkno,
orphan_dir_bh, lookup);
if (status < 0) {
@@ -2227,13 +2254,21 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
goto rollback;
}
- fe->i_flags |= cpu_to_le32(OCFS2_ORPHANED_FL);
- OCFS2_I(inode)->ip_flags &= ~OCFS2_INODE_SKIP_ORPHAN_DIR;
+ if (dio) {
+ /* Update flag OCFS2_DIO_ORPHANED_FL and record the orphan
+ * slot.
+ */
+ fe->i_flags |= cpu_to_le32(OCFS2_DIO_ORPHANED_FL);
+ fe->i_dio_orphaned_slot = cpu_to_le16(osb->slot_num);
+ } else {
+ fe->i_flags |= cpu_to_le32(OCFS2_ORPHANED_FL);
+ OCFS2_I(inode)->ip_flags &= ~OCFS2_INODE_SKIP_ORPHAN_DIR;
- /* Record which orphan dir our inode now resides
- * in. delete_inode will use this to determine which orphan
- * dir to lock. */
- fe->i_orphaned_slot = cpu_to_le16(osb->slot_num);
+ /* Record which orphan dir our inode now resides
+ * in. delete_inode will use this to determine which orphan
+ * dir to lock. */
+ fe->i_orphaned_slot = cpu_to_le16(osb->slot_num);
+ }
ocfs2_journal_dirty(handle, fe_bh);
@@ -2258,14 +2293,28 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
handle_t *handle,
struct inode *orphan_dir_inode,
struct inode *inode,
- struct buffer_head *orphan_dir_bh)
+ struct buffer_head *orphan_dir_bh,
+ bool dio)
{
- char name[OCFS2_ORPHAN_NAMELEN + 1];
+ const int namelen = OCFS2_DIO_ORPHAN_PREFIX_LEN + OCFS2_ORPHAN_NAMELEN;
+ char name[namelen + 1];
struct ocfs2_dinode *orphan_fe;
int status = 0;
struct ocfs2_dir_lookup_result lookup = { NULL, };
- status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno, name);
+ if (dio) {
+ status = snprintf(name, OCFS2_DIO_ORPHAN_PREFIX_LEN + 1, "%s",
+ OCFS2_DIO_ORPHAN_PREFIX);
+ if (status != OCFS2_DIO_ORPHAN_PREFIX_LEN) {
+ status = -EINVAL;
+ mlog_errno(status);
+ return status;
+ }
+
+ status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno,
+ name + OCFS2_DIO_ORPHAN_PREFIX_LEN);
+ } else
+ status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno, name);
if (status < 0) {
mlog_errno(status);
goto leave;
@@ -2273,10 +2322,10 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
trace_ocfs2_orphan_del(
(unsigned long long)OCFS2_I(orphan_dir_inode)->ip_blkno,
- name, OCFS2_ORPHAN_NAMELEN);
+ name, namelen);
/* find it's spot in the orphan directory */
- status = ocfs2_find_entry(name, OCFS2_ORPHAN_NAMELEN, orphan_dir_inode,
+ status = ocfs2_find_entry(name, namelen, orphan_dir_inode,
&lookup);
if (status) {
mlog_errno(status);
@@ -2376,7 +2425,8 @@ static int ocfs2_prep_new_orphaned_file(struct inode *dir,
}
ret = __ocfs2_prepare_orphan_dir(orphan_dir, orphan_dir_bh,
- di_blkno, orphan_name, orphan_insert);
+ di_blkno, orphan_name, orphan_insert,
+ false);
if (ret < 0) {
mlog_errno(ret);
goto out;
@@ -2482,7 +2532,7 @@ int ocfs2_create_inode_in_orphan(struct inode *dir,
di = (struct ocfs2_dinode *)new_di_bh->b_data;
status = ocfs2_orphan_add(osb, handle, inode, new_di_bh, orphan_name,
- &orphan_insert, orphan_dir);
+ &orphan_insert, orphan_dir, false);
if (status < 0) {
mlog_errno(status);
goto leave;
@@ -2527,6 +2577,186 @@ leave:
return status;
}
+static int ocfs2_dio_orphan_recovered(struct inode *inode)
+{
+ int ret;
+ struct buffer_head *di_bh = NULL;
+ struct ocfs2_dinode *di = NULL;
+
+ ret = ocfs2_inode_lock(inode, &di_bh, 1);
+ if (ret < 0) {
+ mlog_errno(ret);
+ return 0;
+ }
+
+ di = (struct ocfs2_dinode *) di_bh->b_data;
+ ret = !(di->i_flags & cpu_to_le32(OCFS2_DIO_ORPHANED_FL));
+ ocfs2_inode_unlock(inode, 1);
+ brelse(di_bh);
+
+ return ret;
+}
+
+#define OCFS2_DIO_ORPHANED_FL_CHECK_INTERVAL 10000
+int ocfs2_add_inode_to_orphan(struct ocfs2_super *osb,
+ struct inode *inode)
+{
+ char orphan_name[OCFS2_DIO_ORPHAN_PREFIX_LEN + OCFS2_ORPHAN_NAMELEN + 1];
+ struct inode *orphan_dir_inode = NULL;
+ struct ocfs2_dir_lookup_result orphan_insert = { NULL, };
+ struct buffer_head *di_bh = NULL;
+ int status = 0;
+ handle_t *handle = NULL;
+ struct ocfs2_dinode *di = NULL;
+
+restart:
+ status = ocfs2_inode_lock(inode, &di_bh, 1);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail;
+ }
+
+ di = (struct ocfs2_dinode *) di_bh->b_data;
+ /*
+ * Another append dio crashed?
+ * If so, wait for recovery first.
+ */
+ if (unlikely(di->i_flags & cpu_to_le32(OCFS2_DIO_ORPHANED_FL))) {
+ ocfs2_inode_unlock(inode, 1);
+ brelse(di_bh);
+ wait_event_interruptible_timeout(OCFS2_I(inode)->append_dio_wq,
+ ocfs2_dio_orphan_recovered(inode),
+ msecs_to_jiffies(OCFS2_DIO_ORPHANED_FL_CHECK_INTERVAL));
+ goto restart;
+ }
+
+ status = ocfs2_prepare_orphan_dir(osb, &orphan_dir_inode,
+ OCFS2_I(inode)->ip_blkno,
+ orphan_name,
+ &orphan_insert,
+ true);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail_unlock_inode;
+ }
+
+ handle = ocfs2_start_trans(osb,
+ OCFS2_INODE_ADD_TO_ORPHAN_CREDITS);
+ if (IS_ERR(handle)) {
+ status = PTR_ERR(handle);
+ goto bail_unlock_orphan;
+ }
+
+ status = ocfs2_orphan_add(osb, handle, inode, di_bh, orphan_name,
+ &orphan_insert, orphan_dir_inode, true);
+ if (status)
+ mlog_errno(status);
+
+ ocfs2_commit_trans(osb, handle);
+
+bail_unlock_orphan:
+ ocfs2_inode_unlock(orphan_dir_inode, 1);
+ mutex_unlock(&orphan_dir_inode->i_mutex);
+ iput(orphan_dir_inode);
+
+ ocfs2_free_dir_lookup_result(&orphan_insert);
+
+bail_unlock_inode:
+ ocfs2_inode_unlock(inode, 1);
+ brelse(di_bh);
+
+bail:
+ return status;
+}
+
+int ocfs2_del_inode_from_orphan(struct ocfs2_super *osb,
+ struct inode *inode, int update_isize,
+ loff_t end)
+{
+ struct inode *orphan_dir_inode = NULL;
+ struct buffer_head *orphan_dir_bh = NULL;
+ struct buffer_head *di_bh = NULL;
+ struct ocfs2_dinode *di = NULL;
+ handle_t *handle = NULL;
+ int status = 0;
+
+ status = ocfs2_inode_lock(inode, &di_bh, 1);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail;
+ }
+ di = (struct ocfs2_dinode *) di_bh->b_data;
+
+ orphan_dir_inode = ocfs2_get_system_file_inode(osb,
+ ORPHAN_DIR_SYSTEM_INODE,
+ le16_to_cpu(di->i_dio_orphaned_slot));
+ if (!orphan_dir_inode) {
+ status = -ENOENT;
+ mlog_errno(status);
+ goto bail_unlock_inode;
+ }
+
+ mutex_lock(&orphan_dir_inode->i_mutex);
+ status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
+ if (status < 0) {
+ mutex_unlock(&orphan_dir_inode->i_mutex);
+ iput(orphan_dir_inode);
+ mlog_errno(status);
+ goto bail_unlock_inode;
+ }
+
+ handle = ocfs2_start_trans(osb,
+ OCFS2_INODE_DEL_FROM_ORPHAN_CREDITS);
+ if (IS_ERR(handle)) {
+ status = PTR_ERR(handle);
+ goto bail_unlock_orphan;
+ }
+
+ BUG_ON(!(di->i_flags & cpu_to_le32(OCFS2_DIO_ORPHANED_FL)));
+
+ status = ocfs2_orphan_del(osb, handle, orphan_dir_inode,
+ inode, orphan_dir_bh, true);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail_commit;
+ }
+
+ status = ocfs2_journal_access_di(handle,
+ INODE_CACHE(inode),
+ di_bh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail_commit;
+ }
+
+ di->i_flags &= ~cpu_to_le32(OCFS2_DIO_ORPHANED_FL);
+ di->i_dio_orphaned_slot = 0;
+
+ if (update_isize) {
+ status = ocfs2_set_inode_size(handle, inode, di_bh, end);
+ if (status)
+ mlog_errno(status);
+ } else
+ ocfs2_journal_dirty(handle, di_bh);
+
+bail_commit:
+ ocfs2_commit_trans(osb, handle);
+
+bail_unlock_orphan:
+ ocfs2_inode_unlock(orphan_dir_inode, 1);
+ mutex_unlock(&orphan_dir_inode->i_mutex);
+ brelse(orphan_dir_bh);
+ iput(orphan_dir_inode);
+
+bail_unlock_inode:
+ ocfs2_inode_unlock(inode, 1);
+ brelse(di_bh);
+
+bail:
+ return status;
+}
+
int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
struct inode *inode,
struct dentry *dentry)
@@ -2615,7 +2845,7 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
}
status = ocfs2_orphan_del(osb, handle, orphan_dir_inode, inode,
- orphan_dir_bh);
+ orphan_dir_bh, false);
if (status < 0) {
mlog_errno(status);
goto out_commit;
diff --git a/fs/ocfs2/namei.h b/fs/ocfs2/namei.h
index e5d059d4f115..5ddecce172fa 100644
--- a/fs/ocfs2/namei.h
+++ b/fs/ocfs2/namei.h
@@ -34,10 +34,16 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
handle_t *handle,
struct inode *orphan_dir_inode,
struct inode *inode,
- struct buffer_head *orphan_dir_bh);
+ struct buffer_head *orphan_dir_bh,
+ bool dio);
int ocfs2_create_inode_in_orphan(struct inode *dir,
int mode,
struct inode **new_inode);
+int ocfs2_add_inode_to_orphan(struct ocfs2_super *osb,
+ struct inode *inode);
+int ocfs2_del_inode_from_orphan(struct ocfs2_super *osb,
+ struct inode *inode, int update_isize,
+ loff_t end);
int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
struct inode *new_inode,
struct dentry *new_dentry);
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index fdbcbfed529e..8490c64d34fe 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -209,6 +209,11 @@ struct ocfs2_lock_res {
#endif
};
+enum ocfs2_orphan_reco_type {
+ ORPHAN_NO_NEED_TRUNCATE = 0,
+ ORPHAN_NEED_TRUNCATE,
+};
+
enum ocfs2_orphan_scan_state {
ORPHAN_SCAN_ACTIVE,
ORPHAN_SCAN_INACTIVE
@@ -495,6 +500,14 @@ static inline int ocfs2_writes_unwritten_extents(struct ocfs2_super *osb)
return 0;
}
+static inline int ocfs2_supports_append_dio(struct ocfs2_super *osb)
+{
+ if (osb->s_feature_ro_compat & OCFS2_FEATURE_RO_COMPAT_APPEND_DIO)
+ return 1;
+ return 0;
+}
+
+
static inline int ocfs2_supports_inline_data(struct ocfs2_super *osb)
{
if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_INLINE_DATA)
@@ -726,6 +739,16 @@ static inline unsigned int ocfs2_clusters_for_bytes(struct super_block *sb,
return clusters;
}
+static inline unsigned int ocfs2_bytes_to_clusters(struct super_block *sb,
+ u64 bytes)
+{
+ int cl_bits = OCFS2_SB(sb)->s_clustersize_bits;
+ unsigned int clusters;
+
+ clusters = (unsigned int)(bytes >> cl_bits);
+ return clusters;
+}
+
static inline u64 ocfs2_blocks_for_bytes(struct super_block *sb,
u64 bytes)
{
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 938387a10d5d..20e37a3ed26f 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -105,7 +105,8 @@
| OCFS2_FEATURE_INCOMPAT_CLUSTERINFO)
#define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \
| OCFS2_FEATURE_RO_COMPAT_USRQUOTA \
- | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)
+ | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA \
+ | OCFS2_FEATURE_RO_COMPAT_APPEND_DIO)
/*
* Heartbeat-only devices are missing journals and other files. The
@@ -199,6 +200,11 @@
#define OCFS2_FEATURE_RO_COMPAT_USRQUOTA 0x0002
#define OCFS2_FEATURE_RO_COMPAT_GRPQUOTA 0x0004
+/*
+ * Append Direct IO support
+ */
+#define OCFS2_FEATURE_RO_COMPAT_APPEND_DIO 0x0008
+
/* The byte offset of the first backup block will be 1G.
* The following will be 4G, 16G, 64G, 256G and 1T.
*/
@@ -229,6 +235,8 @@
#define OCFS2_CHAIN_FL (0x00000400) /* Chain allocator */
#define OCFS2_DEALLOC_FL (0x00000800) /* Truncate log */
#define OCFS2_QUOTA_FL (0x00001000) /* Quota file */
+#define OCFS2_DIO_ORPHANED_FL (0X00002000) /* On the orphan list especially
+ * for dio */
/*
* Flags on ocfs2_dinode.i_dyn_features
@@ -729,7 +737,9 @@ struct ocfs2_dinode {
inode belongs to. Only valid
if allocated from a
discontiguous block group */
-/*A0*/ __le64 i_reserved2[3];
+/*A0*/ __le16 i_dio_orphaned_slot; /* only used for append dio write */
+ __le16 i_reserved1[3];
+ __le64 i_reserved2[2];
/*B8*/ union {
__le64 i_pad1; /* Generic way to refer to this
64bit union */
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 87a1f7679d9b..26675185b886 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1746,6 +1746,8 @@ static void ocfs2_inode_init_once(void *data)
ocfs2_lock_res_init_once(&oi->ip_inode_lockres);
ocfs2_lock_res_init_once(&oi->ip_open_lockres);
+ init_waitqueue_head(&oi->append_dio_wq);
+
ocfs2_metadata_cache_init(INODE_CACHE(&oi->vfs_inode),
&ocfs2_inode_caching_ops);