aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds2020-06-09 15:48:24 -0700
committerLinus Torvalds2020-06-09 15:48:24 -0700
commit5b14671be58d0084e7e2d1cc9c2c36a94467f6e0 (patch)
treede0d4238d49f55d4ea277b213d52f347a53b2674 /fs
parent52435c86bf0f5c892804912481af7f1a5b95ff2d (diff)
parent9b46418c40fe910e6537618f9932a8be78a3dd6c (diff)
Merge tag 'fuse-update-5.8' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse
Pull fuse updates from Miklos Szeredi: - Fix a rare deadlock in virtiofs - Fix st_blocks in writeback cache mode - Fix wrong checks in splice move causing spurious warnings - Fix a race between a GETATTR request and a FUSE_NOTIFY_INVAL_INODE notification - Use rb-tree instead of linear search for pages currently under writeout by userspace - Fix copy_file_range() inconsistencies * tag 'fuse-update-5.8' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse: fuse: copy_file_range should truncate cache fuse: fix copy_file_range cache issues fuse: optimize writepages search fuse: update attr_version counter on fuse_notify_inval_inode() fuse: don't check refcount after stealing page fuse: fix weird page warning fuse: use dump_page virtiofs: do not use fuse_fill_super_common() for device installation fuse: always allow query of st_dev fuse: always flush dirty data on close(2) fuse: invalidate inode attr in writeback cache mode fuse: Update stale comment in queue_interrupt() fuse: BUG_ON correction in fuse_dev_splice_write() virtiofs: Add mount option and atime behavior to the doc virtiofs: schedule blocking async replies in separate worker
Diffstat (limited to 'fs')
-rw-r--r--fs/fuse/dev.c14
-rw-r--r--fs/fuse/dir.c12
-rw-r--r--fs/fuse/file.c120
-rw-r--r--fs/fuse/fuse_i.h3
-rw-r--r--fs/fuse/inode.c26
-rw-r--r--fs/fuse/virtio_fs.c115
6 files changed, 205 insertions, 85 deletions
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 8ccc97356cb5..02b3c36b3676 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -342,7 +342,7 @@ static int queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req)
list_add_tail(&req->intr_entry, &fiq->interrupts);
/*
* Pairs with smp_mb() implied by test_and_set_bit()
- * from request_end().
+ * from fuse_request_end().
*/
smp_mb();
if (test_bit(FR_FINISHED, &req->flags)) {
@@ -764,16 +764,15 @@ static int fuse_check_page(struct page *page)
{
if (page_mapcount(page) ||
page->mapping != NULL ||
- page_count(page) != 1 ||
(page->flags & PAGE_FLAGS_CHECK_AT_PREP &
~(1 << PG_locked |
1 << PG_referenced |
1 << PG_uptodate |
1 << PG_lru |
1 << PG_active |
- 1 << PG_reclaim))) {
- pr_warn("trying to steal weird page\n");
- pr_warn(" page=%p index=%li flags=%08lx, count=%i, mapcount=%i, mapping=%p\n", page, page->index, page->flags, page_count(page), page_mapcount(page), page->mapping);
+ 1 << PG_reclaim |
+ 1 << PG_waiters))) {
+ dump_page(page, "fuse: trying to steal weird page");
return 1;
}
return 0;
@@ -1977,8 +1976,9 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
struct pipe_buffer *ibuf;
struct pipe_buffer *obuf;
- BUG_ON(nbuf >= pipe->ring_size);
- BUG_ON(tail == head);
+ if (WARN_ON(nbuf >= count || tail == head))
+ goto out_free;
+
ibuf = &pipe->bufs[tail & mask];
obuf = &bufs[nbuf];
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index de1e2fde60bd..26f028bc760b 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1689,8 +1689,18 @@ static int fuse_getattr(const struct path *path, struct kstat *stat,
struct inode *inode = d_inode(path->dentry);
struct fuse_conn *fc = get_fuse_conn(inode);
- if (!fuse_allow_current_process(fc))
+ if (!fuse_allow_current_process(fc)) {
+ if (!request_mask) {
+ /*
+ * If user explicitly requested *nothing* then don't
+ * error out, but return st_dev only.
+ */
+ stat->result_mask = 0;
+ stat->dev = inode->i_sb->s_dev;
+ return 0;
+ }
return -EACCES;
+ }
return fuse_update_get_attr(inode, NULL, stat, request_mask, flags);
}
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index bac51c32d660..e573b0cd2737 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -357,7 +357,7 @@ u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id)
struct fuse_writepage_args {
struct fuse_io_args ia;
- struct list_head writepages_entry;
+ struct rb_node writepages_entry;
struct list_head queue_entry;
struct fuse_writepage_args *next;
struct inode *inode;
@@ -366,17 +366,23 @@ struct fuse_writepage_args {
static struct fuse_writepage_args *fuse_find_writeback(struct fuse_inode *fi,
pgoff_t idx_from, pgoff_t idx_to)
{
- struct fuse_writepage_args *wpa;
+ struct rb_node *n;
+
+ n = fi->writepages.rb_node;
- list_for_each_entry(wpa, &fi->writepages, writepages_entry) {
+ while (n) {
+ struct fuse_writepage_args *wpa;
pgoff_t curr_index;
+ wpa = rb_entry(n, struct fuse_writepage_args, writepages_entry);
WARN_ON(get_fuse_inode(wpa->inode) != fi);
curr_index = wpa->ia.write.in.offset >> PAGE_SHIFT;
- if (idx_from < curr_index + wpa->ia.ap.num_pages &&
- curr_index <= idx_to) {
+ if (idx_from >= curr_index + wpa->ia.ap.num_pages)
+ n = n->rb_right;
+ else if (idx_to < curr_index)
+ n = n->rb_left;
+ else
return wpa;
- }
}
return NULL;
}
@@ -445,9 +451,6 @@ static int fuse_flush(struct file *file, fl_owner_t id)
if (is_bad_inode(inode))
return -EIO;
- if (fc->no_flush)
- return 0;
-
err = write_inode_now(inode, 1);
if (err)
return err;
@@ -460,6 +463,10 @@ static int fuse_flush(struct file *file, fl_owner_t id)
if (err)
return err;
+ err = 0;
+ if (fc->no_flush)
+ goto inval_attr_out;
+
memset(&inarg, 0, sizeof(inarg));
inarg.fh = ff->fh;
inarg.lock_owner = fuse_lock_owner_id(fc, id);
@@ -475,6 +482,14 @@ static int fuse_flush(struct file *file, fl_owner_t id)
fc->no_flush = 1;
err = 0;
}
+
+inval_attr_out:
+ /*
+ * In memory i_blocks is not maintained by fuse, if writeback cache is
+ * enabled, i_blocks from cached attr may not be accurate.
+ */
+ if (!err && fc->writeback_cache)
+ fuse_invalidate_attr(inode);
return err;
}
@@ -712,6 +727,7 @@ static ssize_t fuse_async_req_send(struct fuse_conn *fc,
spin_unlock(&io->lock);
ia->ap.args.end = fuse_aio_complete_req;
+ ia->ap.args.may_block = io->should_dirty;
err = fuse_simple_background(fc, &ia->ap.args, GFP_KERNEL);
if (err)
fuse_aio_complete_req(fc, &ia->ap.args, err);
@@ -1570,7 +1586,7 @@ static void fuse_writepage_finish(struct fuse_conn *fc,
struct backing_dev_info *bdi = inode_to_bdi(inode);
int i;
- list_del(&wpa->writepages_entry);
+ rb_erase(&wpa->writepages_entry, &fi->writepages);
for (i = 0; i < ap->num_pages; i++) {
dec_wb_stat(&bdi->wb, WB_WRITEBACK);
dec_node_page_state(ap->pages[i], NR_WRITEBACK_TEMP);
@@ -1658,6 +1674,36 @@ __acquires(fi->lock)
}
}
+static void tree_insert(struct rb_root *root, struct fuse_writepage_args *wpa)
+{
+ pgoff_t idx_from = wpa->ia.write.in.offset >> PAGE_SHIFT;
+ pgoff_t idx_to = idx_from + wpa->ia.ap.num_pages - 1;
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent = NULL;
+
+ WARN_ON(!wpa->ia.ap.num_pages);
+ while (*p) {
+ struct fuse_writepage_args *curr;
+ pgoff_t curr_index;
+
+ parent = *p;
+ curr = rb_entry(parent, struct fuse_writepage_args,
+ writepages_entry);
+ WARN_ON(curr->inode != wpa->inode);
+ curr_index = curr->ia.write.in.offset >> PAGE_SHIFT;
+
+ if (idx_from >= curr_index + curr->ia.ap.num_pages)
+ p = &(*p)->rb_right;
+ else if (idx_to < curr_index)
+ p = &(*p)->rb_left;
+ else
+ return (void) WARN_ON(true);
+ }
+
+ rb_link_node(&wpa->writepages_entry, parent, p);
+ rb_insert_color(&wpa->writepages_entry, root);
+}
+
static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_args *args,
int error)
{
@@ -1676,7 +1722,7 @@ static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_args *args,
wpa->next = next->next;
next->next = NULL;
next->ia.ff = fuse_file_get(wpa->ia.ff);
- list_add(&next->writepages_entry, &fi->writepages);
+ tree_insert(&fi->writepages, next);
/*
* Skip fuse_flush_writepages() to make it easy to crop requests
@@ -1811,7 +1857,7 @@ static int fuse_writepage_locked(struct page *page)
inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP);
spin_lock(&fi->lock);
- list_add(&wpa->writepages_entry, &fi->writepages);
+ tree_insert(&fi->writepages, wpa);
list_add_tail(&wpa->queue_entry, &fi->queued_writes);
fuse_flush_writepages(inode);
spin_unlock(&fi->lock);
@@ -1923,10 +1969,10 @@ static bool fuse_writepage_in_flight(struct fuse_writepage_args *new_wpa,
WARN_ON(new_ap->num_pages != 0);
spin_lock(&fi->lock);
- list_del(&new_wpa->writepages_entry);
+ rb_erase(&new_wpa->writepages_entry, &fi->writepages);
old_wpa = fuse_find_writeback(fi, page->index, page->index);
if (!old_wpa) {
- list_add(&new_wpa->writepages_entry, &fi->writepages);
+ tree_insert(&fi->writepages, new_wpa);
spin_unlock(&fi->lock);
return false;
}
@@ -2041,7 +2087,7 @@ static int fuse_writepages_fill(struct page *page,
wpa->inode = inode;
spin_lock(&fi->lock);
- list_add(&wpa->writepages_entry, &fi->writepages);
+ tree_insert(&fi->writepages, wpa);
spin_unlock(&fi->lock);
data->wpa = wpa;
@@ -3235,13 +3281,11 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in,
if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb)
return -EXDEV;
- if (fc->writeback_cache) {
- inode_lock(inode_in);
- err = fuse_writeback_range(inode_in, pos_in, pos_in + len);
- inode_unlock(inode_in);
- if (err)
- return err;
- }
+ inode_lock(inode_in);
+ err = fuse_writeback_range(inode_in, pos_in, pos_in + len - 1);
+ inode_unlock(inode_in);
+ if (err)
+ return err;
inode_lock(inode_out);
@@ -3249,11 +3293,27 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in,
if (err)
goto out;
- if (fc->writeback_cache) {
- err = fuse_writeback_range(inode_out, pos_out, pos_out + len);
- if (err)
- goto out;
- }
+ /*
+ * Write out dirty pages in the destination file before sending the COPY
+ * request to userspace. After the request is completed, truncate off
+ * pages (including partial ones) from the cache that have been copied,
+ * since these contain stale data at that point.
+ *
+ * This should be mostly correct, but if the COPY writes to partial
+ * pages (at the start or end) and the parts not covered by the COPY are
+ * written through a memory map after calling fuse_writeback_range(),
+ * then these partial page modifications will be lost on truncation.
+ *
+ * It is unlikely that someone would rely on such mixed style
+ * modifications. Yet this does give less guarantees than if the
+ * copying was performed with write(2).
+ *
+ * To fix this a i_mmap_sem style lock could be used to prevent new
+ * faults while the copy is ongoing.
+ */
+ err = fuse_writeback_range(inode_out, pos_out, pos_out + len - 1);
+ if (err)
+ goto out;
if (is_unstable)
set_bit(FUSE_I_SIZE_UNSTABLE, &fi_out->state);
@@ -3274,6 +3334,10 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in,
if (err)
goto out;
+ truncate_inode_pages_range(inode_out->i_mapping,
+ ALIGN_DOWN(pos_out, PAGE_SIZE),
+ ALIGN(pos_out + outarg.size, PAGE_SIZE) - 1);
+
if (fc->writeback_cache) {
fuse_write_update_size(inode_out, pos_out + outarg.size);
file_update_time(file_out);
@@ -3351,5 +3415,5 @@ void fuse_init_file_inode(struct inode *inode)
INIT_LIST_HEAD(&fi->queued_writes);
fi->writectr = 0;
init_waitqueue_head(&fi->page_waitq);
- INIT_LIST_HEAD(&fi->writepages);
+ fi->writepages = RB_ROOT;
}
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index ca344bf71404..740a8a7d7ae6 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -111,7 +111,7 @@ struct fuse_inode {
wait_queue_head_t page_waitq;
/* List of writepage requestst (pending or sent) */
- struct list_head writepages;
+ struct rb_root writepages;
};
/* readdir cache (directory only) */
@@ -249,6 +249,7 @@ struct fuse_args {
bool out_argvar:1;
bool page_zeroing:1;
bool page_replace:1;
+ bool may_block:1;
struct fuse_in_arg in_args[3];
struct fuse_arg out_args[2];
void (*end)(struct fuse_conn *fc, struct fuse_args *args, int error);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 95d712d44ca1..5b4aebf5821f 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -321,6 +321,8 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid,
loff_t offset, loff_t len)
{
+ struct fuse_conn *fc = get_fuse_conn_super(sb);
+ struct fuse_inode *fi;
struct inode *inode;
pgoff_t pg_start;
pgoff_t pg_end;
@@ -329,6 +331,11 @@ int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid,
if (!inode)
return -ENOENT;
+ fi = get_fuse_inode(inode);
+ spin_lock(&fi->lock);
+ fi->attr_version = atomic64_inc_return(&fc->attr_version);
+ spin_unlock(&fi->lock);
+
fuse_invalidate_attr(inode);
forget_all_cached_acls(inode);
if (offset >= 0) {
@@ -1113,7 +1120,7 @@ EXPORT_SYMBOL_GPL(fuse_dev_free);
int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
{
- struct fuse_dev *fud;
+ struct fuse_dev *fud = NULL;
struct fuse_conn *fc = get_fuse_conn_super(sb);
struct inode *root;
struct dentry *root_dentry;
@@ -1155,9 +1162,12 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
if (sb->s_user_ns != &init_user_ns)
sb->s_xattr = fuse_no_acl_xattr_handlers;
- fud = fuse_dev_alloc_install(fc);
- if (!fud)
- goto err;
+ if (ctx->fudptr) {
+ err = -ENOMEM;
+ fud = fuse_dev_alloc_install(fc);
+ if (!fud)
+ goto err;
+ }
fc->dev = sb->s_dev;
fc->sb = sb;
@@ -1191,7 +1201,7 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
mutex_lock(&fuse_mutex);
err = -EINVAL;
- if (*ctx->fudptr)
+ if (ctx->fudptr && *ctx->fudptr)
goto err_unlock;
err = fuse_ctl_add_conn(fc);
@@ -1200,7 +1210,8 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
list_add_tail(&fc->entry, &fuse_conn_list);
sb->s_root = root_dentry;
- *ctx->fudptr = fud;
+ if (ctx->fudptr)
+ *ctx->fudptr = fud;
mutex_unlock(&fuse_mutex);
return 0;
@@ -1208,7 +1219,8 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
mutex_unlock(&fuse_mutex);
dput(root_dentry);
err_dev_free:
- fuse_dev_free(fud);
+ if (fud)
+ fuse_dev_free(fud);
err:
return err;
}
diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index bade74768903..4c4ef5d69298 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -60,6 +60,12 @@ struct virtio_fs_forget {
struct virtio_fs_forget_req req;
};
+struct virtio_fs_req_work {
+ struct fuse_req *req;
+ struct virtio_fs_vq *fsvq;
+ struct work_struct done_work;
+};
+
static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
struct fuse_req *req, bool in_flight);
@@ -485,19 +491,67 @@ static void copy_args_from_argbuf(struct fuse_args *args, struct fuse_req *req)
}
/* Work function for request completion */
+static void virtio_fs_request_complete(struct fuse_req *req,
+ struct virtio_fs_vq *fsvq)
+{
+ struct fuse_pqueue *fpq = &fsvq->fud->pq;
+ struct fuse_conn *fc = fsvq->fud->fc;
+ struct fuse_args *args;
+ struct fuse_args_pages *ap;
+ unsigned int len, i, thislen;
+ struct page *page;
+
+ /*
+ * TODO verify that server properly follows FUSE protocol
+ * (oh.uniq, oh.len)
+ */
+ args = req->args;
+ copy_args_from_argbuf(args, req);
+
+ if (args->out_pages && args->page_zeroing) {
+ len = args->out_args[args->out_numargs - 1].size;
+ ap = container_of(args, typeof(*ap), args);
+ for (i = 0; i < ap->num_pages; i++) {
+ thislen = ap->descs[i].length;
+ if (len < thislen) {
+ WARN_ON(ap->descs[i].offset);
+ page = ap->pages[i];
+ zero_user_segment(page, len, thislen);
+ len = 0;
+ } else {
+ len -= thislen;
+ }
+ }
+ }
+
+ spin_lock(&fpq->lock);
+ clear_bit(FR_SENT, &req->flags);
+ spin_unlock(&fpq->lock);
+
+ fuse_request_end(fc, req);
+ spin_lock(&fsvq->lock);
+ dec_in_flight_req(fsvq);
+ spin_unlock(&fsvq->lock);
+}
+
+static void virtio_fs_complete_req_work(struct work_struct *work)
+{
+ struct virtio_fs_req_work *w =
+ container_of(work, typeof(*w), done_work);
+
+ virtio_fs_request_complete(w->req, w->fsvq);
+ kfree(w);
+}
+
static void virtio_fs_requests_done_work(struct work_struct *work)
{
struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
done_work);
struct fuse_pqueue *fpq = &fsvq->fud->pq;
- struct fuse_conn *fc = fsvq->fud->fc;
struct virtqueue *vq = fsvq->vq;
struct fuse_req *req;
- struct fuse_args_pages *ap;
struct fuse_req *next;
- struct fuse_args *args;
- unsigned int len, i, thislen;
- struct page *page;
+ unsigned int len;
LIST_HEAD(reqs);
/* Collect completed requests off the virtqueue */
@@ -515,38 +569,20 @@ static void virtio_fs_requests_done_work(struct work_struct *work)
/* End requests */
list_for_each_entry_safe(req, next, &reqs, list) {
- /*
- * TODO verify that server properly follows FUSE protocol
- * (oh.uniq, oh.len)
- */
- args = req->args;
- copy_args_from_argbuf(args, req);
-
- if (args->out_pages && args->page_zeroing) {
- len = args->out_args[args->out_numargs - 1].size;
- ap = container_of(args, typeof(*ap), args);
- for (i = 0; i < ap->num_pages; i++) {
- thislen = ap->descs[i].length;
- if (len < thislen) {
- WARN_ON(ap->descs[i].offset);
- page = ap->pages[i];
- zero_user_segment(page, len, thislen);
- len = 0;
- } else {
- len -= thislen;
- }
- }
- }
-
- spin_lock(&fpq->lock);
- clear_bit(FR_SENT, &req->flags);
list_del_init(&req->list);
- spin_unlock(&fpq->lock);
- fuse_request_end(fc, req);
- spin_lock(&fsvq->lock);
- dec_in_flight_req(fsvq);
- spin_unlock(&fsvq->lock);
+ /* blocking async request completes in a worker context */
+ if (req->args->may_block) {
+ struct virtio_fs_req_work *w;
+
+ w = kzalloc(sizeof(*w), GFP_NOFS | __GFP_NOFAIL);
+ INIT_WORK(&w->done_work, virtio_fs_complete_req_work);
+ w->fsvq = fsvq;
+ w->req = req;
+ schedule_work(&w->done_work);
+ } else {
+ virtio_fs_request_complete(req, fsvq);
+ }
}
}
@@ -1067,7 +1103,7 @@ static int virtio_fs_fill_super(struct super_block *sb)
err = -ENOMEM;
/* Allocate fuse_dev for hiprio and notification queues */
- for (i = 0; i < VQ_REQUEST; i++) {
+ for (i = 0; i < fs->nvqs; i++) {
struct virtio_fs_vq *fsvq = &fs->vqs[i];
fsvq->fud = fuse_dev_alloc();
@@ -1075,18 +1111,15 @@ static int virtio_fs_fill_super(struct super_block *sb)
goto err_free_fuse_devs;
}
- ctx.fudptr = (void **)&fs->vqs[VQ_REQUEST].fud;
+ /* virtiofs allocates and installs its own fuse devices */
+ ctx.fudptr = NULL;
err = fuse_fill_super_common(sb, &ctx);
if (err < 0)
goto err_free_fuse_devs;
- fc = fs->vqs[VQ_REQUEST].fud->fc;
-
for (i = 0; i < fs->nvqs; i++) {
struct virtio_fs_vq *fsvq = &fs->vqs[i];
- if (i == VQ_REQUEST)
- continue; /* already initialized */
fuse_dev_install(fsvq->fud, fc);
}