fs/vboxsf/file.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396

// SPDX-License-Identifier: MIT
/*
 * VirtualBox Guest Shared Folders support: Regular file inode and file ops.
 *
 * Copyright (C) 2006-2018 Oracle Corporation
 */

#include <linux/mm.h>
#include <linux/page-flags.h>
#include <linux/pagemap.h>
#include <linux/highmem.h>
#include <linux/sizes.h>
#include "vfsmod.h"

struct vboxsf_handle {
	u64 handle;
	u32 root;
	u32 access_flags;
	struct kref refcount;
	struct list_head head;
};

struct vboxsf_handle *vboxsf_create_sf_handle(struct inode *inode,
					      u64 handle, u32 access_flags)
{
	struct vboxsf_inode *sf_i = VBOXSF_I(inode);
	struct vboxsf_handle *sf_handle;

	sf_handle = kmalloc(sizeof(*sf_handle), GFP_KERNEL);
	if (!sf_handle)
		return ERR_PTR(-ENOMEM);

	/* the host may have given us different attr then requested */
	sf_i->force_restat = 1;

	/* init our handle struct and add it to the inode's handles list */
	sf_handle->handle = handle;
	sf_handle->root = VBOXSF_SBI(inode->i_sb)->root;
	sf_handle->access_flags = access_flags;
	kref_init(&sf_handle->refcount);

	mutex_lock(&sf_i->handle_list_mutex);
	list_add(&sf_handle->head, &sf_i->handle_list);
	mutex_unlock(&sf_i->handle_list_mutex);

	return sf_handle;
}

static int vboxsf_file_open(struct inode *inode, struct file *file)
{
	struct vboxsf_sbi *sbi = VBOXSF_SBI(inode->i_sb);
	struct shfl_createparms params = {};
	struct vboxsf_handle *sf_handle;
	u32 access_flags = 0;
	int err;

	/*
	 * We check the value of params.handle afterwards to find out if
	 * the call succeeded or failed, as the API does not seem to cleanly
	 * distinguish error and informational messages.
	 *
	 * Furthermore, we must set params.handle to SHFL_HANDLE_NIL to
	 * make the shared folders host service use our mode parameter.
	 */
	params.handle = SHFL_HANDLE_NIL;
	if (file->f_flags & O_CREAT) {
		params.create_flags |= SHFL_CF_ACT_CREATE_IF_NEW;
		/*
		 * We ignore O_EXCL, as the Linux kernel seems to call create
		 * beforehand itself, so O_EXCL should always fail.
		 */
		if (file->f_flags & O_TRUNC)
			params.create_flags |= SHFL_CF_ACT_OVERWRITE_IF_EXISTS;
		else
			params.create_flags |= SHFL_CF_ACT_OPEN_IF_EXISTS;
	} else {
		params.create_flags |= SHFL_CF_ACT_FAIL_IF_NEW;
		if (file->f_flags & O_TRUNC)
			params.create_flags |= SHFL_CF_ACT_OVERWRITE_IF_EXISTS;
	}

	switch (file->f_flags & O_ACCMODE) {
	case O_RDONLY:
		access_flags |= SHFL_CF_ACCESS_READ;
		break;

	case O_WRONLY:
		access_flags |= SHFL_CF_ACCESS_WRITE;
		break;

	case O_RDWR:
		access_flags |= SHFL_CF_ACCESS_READWRITE;
		break;

	default:
		WARN_ON(1);
	}

	if (file->f_flags & O_APPEND)
		access_flags |= SHFL_CF_ACCESS_APPEND;

	params.create_flags |= access_flags;
	params.info.attr.mode = inode->i_mode;

	err = vboxsf_create_at_dentry(file_dentry(file), &params);
	if (err == 0 && params.handle == SHFL_HANDLE_NIL)
		err = (params.result == SHFL_FILE_EXISTS) ? -EEXIST : -ENOENT;
	if (err)
		return err;

	sf_handle = vboxsf_create_sf_handle(inode, params.handle, access_flags);
	if (IS_ERR(sf_handle)) {
		vboxsf_close(sbi->root, params.handle);
		return PTR_ERR(sf_handle);
	}

	file->private_data = sf_handle;
	return 0;
}

static void vboxsf_handle_release(struct kref *refcount)
{
	struct vboxsf_handle *sf_handle =
		container_of(refcount, struct vboxsf_handle, refcount);

	vboxsf_close(sf_handle->root, sf_handle->handle);
	kfree(sf_handle);
}

void vboxsf_release_sf_handle(struct inode *inode, struct vboxsf_handle *sf_handle)
{
	struct vboxsf_inode *sf_i = VBOXSF_I(inode);

	mutex_lock(&sf_i->handle_list_mutex);
	list_del(&sf_handle->head);
	mutex_unlock(&sf_i->handle_list_mutex);

	kref_put(&sf_handle->refcount, vboxsf_handle_release);
}

static int vboxsf_file_release(struct inode *inode, struct file *file)
{
	/*
	 * When a file is closed on our (the guest) side, we want any subsequent
	 * accesses done on the host side to see all changes done from our side.
	 */
	filemap_write_and_wait(inode->i_mapping);

	vboxsf_release_sf_handle(inode, file->private_data);
	return 0;
}

/*
 * Write back dirty pages now, because there may not be any suitable
 * open files later
 */
static void vboxsf_vma_close(struct vm_area_struct *vma)
{
	filemap_write_and_wait(vma->vm_file->f_mapping);
}

static const struct vm_operations_struct vboxsf_file_vm_ops = {
	.close		= vboxsf_vma_close,
	.fault		= filemap_fault,
	.map_pages	= filemap_map_pages,
};

static int vboxsf_file_mmap(struct file *file, struct vm_area_struct *vma)
{
	int err;

	err = generic_file_mmap(file, vma);
	if (!err)
		vma->vm_ops = &vboxsf_file_vm_ops;

	return err;
}

/*
 * Note that since we are accessing files on the host's filesystem, files
 * may always be changed underneath us by the host!
 *
 * The vboxsf API between the guest and the host does not offer any functions
 * to deal with this. There is no inode-generation to check for changes, no
 * events / callback on changes and no way to lock files.
 *
 * To avoid returning stale data when a file gets *opened* on our (the guest)
 * side, we do a "stat" on the host side, then compare the mtime with the
 * last known mtime and invalidate the page-cache if they differ.
 * This is done from vboxsf_inode_revalidate().
 *
 * When reads are done through the read_iter fop, it is possible to do
 * further cache revalidation then, there are 3 options to deal with this:
 *
 * 1)  Rely solely on the revalidation done at open time
 * 2)  Do another "stat" and compare mtime again. Unfortunately the vboxsf
 *     host API does not allow stat on handles, so we would need to use
 *     file->f_path.dentry and the stat will then fail if the file was unlinked
 *     or renamed (and there is no thing like NFS' silly-rename). So we get:
 * 2a) "stat" and compare mtime, on stat failure invalidate the cache
 * 2b) "stat" and compare mtime, on stat failure do nothing
 * 3)  Simply always call invalidate_inode_pages2_range on the range of the read
 *
 * Currently we are keeping things KISS and using option 1. this allows
 * directly using generic_file_read_iter without wrapping it.
 *
 * This means that only data written on the host side before open() on
 * the guest side is guaranteed to be seen by the guest. If necessary
 * we may provide other read-cache strategies in the future and make this
 * configurable through a mount option.
 */
const struct file_operations vboxsf_reg_fops = {
	.llseek = generic_file_llseek,
	.read_iter = generic_file_read_iter,
	.write_iter = generic_file_write_iter,
	.mmap = vboxsf_file_mmap,
	.open = vboxsf_file_open,
	.release = vboxsf_file_release,
	.fsync = noop_fsync,
	.splice_read = generic_file_splice_read,
};

const struct inode_operations vboxsf_reg_iops = {
	.getattr = vboxsf_getattr,
	.setattr = vboxsf_setattr
};

static int vboxsf_readpage(struct file *file, struct page *page)
{
	struct vboxsf_handle *sf_handle = file->private_data;
	loff_t off = page_offset(page);
	u32 nread = PAGE_SIZE;
	u8 *buf;
	int err;

	buf = kmap(page);

	err = vboxsf_read(sf_handle->root, sf_handle->handle, off, &nread, buf);
	if (err == 0) {
		memset(&buf[nread], 0, PAGE_SIZE - nread);
		flush_dcache_page(page);
		SetPageUptodate(page);
	} else {
		SetPageError(page);
	}

	kunmap(page);
	unlock_page(page);
	return err;
}

static struct vboxsf_handle *vboxsf_get_write_handle(struct vboxsf_inode *sf_i)
{
	struct vboxsf_handle *h, *sf_handle = NULL;

	mutex_lock(&sf_i->handle_list_mutex);
	list_for_each_entry(h, &sf_i->handle_list, head) {
		if (h->access_flags == SHFL_CF_ACCESS_WRITE ||
		    h->access_flags == SHFL_CF_ACCESS_READWRITE) {
			kref_get(&h->refcount);
			sf_handle = h;
			break;
		}
	}
	mutex_unlock(&sf_i->handle_list_mutex);

	return sf_handle;
}

static int vboxsf_writepage(struct page *page, struct writeback_control *wbc)
{
	struct inode *inode = page->mapping->host;
	struct vboxsf_inode *sf_i = VBOXSF_I(inode);
	struct vboxsf_handle *sf_handle;
	loff_t off = page_offset(page);
	loff_t size = i_size_read(inode);
	u32 nwrite = PAGE_SIZE;
	u8 *buf;
	int err;

	if (off + PAGE_SIZE > size)
		nwrite = size & ~PAGE_MASK;

	sf_handle = vboxsf_get_write_handle(sf_i);
	if (!sf_handle)
		return -EBADF;

	buf = kmap(page);
	err = vboxsf_write(sf_handle->root, sf_handle->handle,
			   off, &nwrite, buf);
	kunmap(page);

	kref_put(&sf_handle->refcount, vboxsf_handle_release);

	if (err == 0) {
		ClearPageError(page);
		/* mtime changed */
		sf_i->force_restat = 1;
	} else {
		ClearPageUptodate(page);
	}

	unlock_page(page);
	return err;
}

static int vboxsf_write_end(struct file *file, struct address_space *mapping,
			    loff_t pos, unsigned int len, unsigned int copied,
			    struct page *page, void *fsdata)
{
	struct inode *inode = mapping->host;
	struct vboxsf_handle *sf_handle = file->private_data;
	unsigned int from = pos & ~PAGE_MASK;
	u32 nwritten = len;
	u8 *buf;
	int err;

	/* zero the stale part of the page if we did a short copy */
	if (!PageUptodate(page) && copied < len)
		zero_user(page, from + copied, len - copied);

	buf = kmap(page);
	err = vboxsf_write(sf_handle->root, sf_handle->handle,
			   pos, &nwritten, buf + from);
	kunmap(page);

	if (err) {
		nwritten = 0;
		goto out;
	}

	/* mtime changed */
	VBOXSF_I(inode)->force_restat = 1;

	if (!PageUptodate(page) && nwritten == PAGE_SIZE)
		SetPageUptodate(page);

	pos += nwritten;
	if (pos > inode->i_size)
		i_size_write(inode, pos);

out:
	unlock_page(page);
	put_page(page);

	return nwritten;
}

/*
 * Note simple_write_begin does not read the page from disk on partial writes
 * this is ok since vboxsf_write_end only writes the written parts of the
 * page and it does not call SetPageUptodate for partial writes.
 */
const struct address_space_operations vboxsf_reg_aops = {
	.readpage = vboxsf_readpage,
	.writepage = vboxsf_writepage,
	.set_page_dirty = __set_page_dirty_nobuffers,
	.write_begin = simple_write_begin,
	.write_end = vboxsf_write_end,
};

static const char *vboxsf_get_link(struct dentry *dentry, struct inode *inode,
				   struct delayed_call *done)
{
	struct vboxsf_sbi *sbi = VBOXSF_SBI(inode->i_sb);
	struct shfl_string *path;
	char *link;
	int err;

	if (!dentry)
		return ERR_PTR(-ECHILD);

	path = vboxsf_path_from_dentry(sbi, dentry);
	if (IS_ERR(path))
		return ERR_CAST(path);

	link = kzalloc(PATH_MAX, GFP_KERNEL);
	if (!link) {
		__putname(path);
		return ERR_PTR(-ENOMEM);
	}

	err = vboxsf_readlink(sbi->root, path, PATH_MAX, link);
	__putname(path);
	if (err) {
		kfree(link);
		return ERR_PTR(err);
	}

	set_delayed_call(done, kfree_link, link);
	return link;
}

const struct inode_operations vboxsf_lnk_iops = {
	.get_link = vboxsf_get_link
};