aboutsummaryrefslogtreecommitdiff
path: root/drivers/vhost
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/vhost')
-rw-r--r--drivers/vhost/Kconfig2
-rw-r--r--drivers/vhost/net.c32
-rw-r--r--drivers/vhost/scsi.c9
-rw-r--r--drivers/vhost/vhost.c236
-rw-r--r--drivers/vhost/vhost.h11
-rw-r--r--drivers/vhost/vringh.c5
-rw-r--r--drivers/vhost/vsock.c41
7 files changed, 243 insertions, 93 deletions
diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig
index 40764ecad9ce..cfdecea5078f 100644
--- a/drivers/vhost/Kconfig
+++ b/drivers/vhost/Kconfig
@@ -1,6 +1,6 @@
config VHOST_NET
tristate "Host kernel accelerator for virtio net"
- depends on NET && EVENTFD && (TUN || !TUN) && (MACVTAP || !MACVTAP)
+ depends on NET && EVENTFD && (TUN || !TUN) && (TAP || !TAP)
select VHOST
---help---
This kernel module can be loaded in host kernel to accelerate
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 5dc128a8da83..9b519897cc17 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -17,6 +17,8 @@
#include <linux/workqueue.h>
#include <linux/file.h>
#include <linux/slab.h>
+#include <linux/sched/clock.h>
+#include <linux/sched/signal.h>
#include <linux/vmalloc.h>
#include <linux/net.h>
@@ -24,6 +26,7 @@
#include <linux/if_arp.h>
#include <linux/if_tun.h>
#include <linux/if_macvlan.h>
+#include <linux/if_tap.h>
#include <linux/if_vlan.h>
#include <net/sock.h>
@@ -342,7 +345,7 @@ static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
endtime = busy_clock() + vq->busyloop_timeout;
while (vhost_can_busy_poll(vq->dev, endtime) &&
vhost_vq_avail_empty(vq->dev, vq))
- cpu_relax_lowlatency();
+ cpu_relax();
preempt_enable();
r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
out_num, in_num, NULL, NULL);
@@ -351,6 +354,15 @@ static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
return r;
}
+static bool vhost_exceeds_maxpend(struct vhost_net *net)
+{
+ struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
+ struct vhost_virtqueue *vq = &nvq->vq;
+
+ return (nvq->upend_idx + vq->num - VHOST_MAX_PEND) % UIO_MAXIOV
+ == nvq->done_idx;
+}
+
/* Expects to be always run from workqueue - which acts as
* read-size critical section for our kind of RCU. */
static void handle_tx(struct vhost_net *net)
@@ -394,8 +406,7 @@ static void handle_tx(struct vhost_net *net)
/* If more outstanding DMAs, queue the work.
* Handle upend_idx wrap around
*/
- if (unlikely((nvq->upend_idx + vq->num - VHOST_MAX_PEND)
- % UIO_MAXIOV == nvq->done_idx))
+ if (unlikely(vhost_exceeds_maxpend(net)))
break;
head = vhost_net_tx_get_vq_desc(net, vq, vq->iov,
@@ -454,6 +465,16 @@ static void handle_tx(struct vhost_net *net)
msg.msg_control = NULL;
ubufs = NULL;
}
+
+ total_len += len;
+ if (total_len < VHOST_NET_WEIGHT &&
+ !vhost_vq_avail_empty(&net->dev, vq) &&
+ likely(!vhost_exceeds_maxpend(net))) {
+ msg.msg_flags |= MSG_MORE;
+ } else {
+ msg.msg_flags &= ~MSG_MORE;
+ }
+
/* TODO: Check specific error and bomb out unless ENOBUFS? */
err = sock->ops->sendmsg(sock, &msg, len);
if (unlikely(err < 0)) {
@@ -472,7 +493,6 @@ static void handle_tx(struct vhost_net *net)
vhost_add_used_and_signal(&net->dev, vq, head, 0);
else
vhost_zerocopy_signal_used(net, vq);
- total_len += len;
vhost_net_tx_packet(net);
if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
vhost_poll_queue(&vq->poll);
@@ -533,7 +553,7 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk)
while (vhost_can_busy_poll(&net->dev, endtime) &&
!sk_has_rx_data(sk) &&
vhost_vq_avail_empty(&net->dev, vq))
- cpu_relax_lowlatency();
+ cpu_relax();
preempt_enable();
@@ -943,7 +963,7 @@ static struct socket *get_tap_socket(int fd)
sock = tun_get_socket(file);
if (!IS_ERR(sock))
return sock;
- sock = macvtap_get_socket(file);
+ sock = tap_get_socket(file);
if (IS_ERR(sock))
fput(file);
return sock;
diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index 6e29d053843d..fd6c8b66f06f 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -843,7 +843,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
struct iov_iter out_iter, in_iter, prot_iter, data_iter;
u64 tag;
u32 exp_data_len, data_direction;
- unsigned out, in;
+ unsigned int out = 0, in = 0;
int head, ret, prot_bytes;
size_t req_size, rsp_size = sizeof(struct virtio_scsi_cmd_resp);
size_t out_size, in_size;
@@ -922,8 +922,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
*/
iov_iter_init(&out_iter, WRITE, vq->iov, out, out_size);
- ret = copy_from_iter(req, req_size, &out_iter);
- if (unlikely(ret != req_size)) {
+ if (unlikely(!copy_from_iter_full(req, req_size, &out_iter))) {
vq_err(vq, "Faulted on copy_from_iter\n");
vhost_scsi_send_bad_target(vs, vq, head, out);
continue;
@@ -1749,7 +1748,6 @@ out:
static int vhost_scsi_make_nexus(struct vhost_scsi_tpg *tpg,
const char *name)
{
- struct se_portal_group *se_tpg;
struct vhost_scsi_nexus *tv_nexus;
mutex_lock(&tpg->tv_tpg_mutex);
@@ -1758,7 +1756,6 @@ static int vhost_scsi_make_nexus(struct vhost_scsi_tpg *tpg,
pr_debug("tpg->tpg_nexus already exists\n");
return -EEXIST;
}
- se_tpg = &tpg->se_tpg;
tv_nexus = kzalloc(sizeof(struct vhost_scsi_nexus), GFP_KERNEL);
if (!tv_nexus) {
@@ -2090,7 +2087,7 @@ static struct configfs_attribute *vhost_scsi_wwn_attrs[] = {
NULL,
};
-static struct target_core_fabric_ops vhost_scsi_ops = {
+static const struct target_core_fabric_ops vhost_scsi_ops = {
.module = THIS_MODULE,
.name = "vhost",
.get_fabric_name = vhost_scsi_get_fabric_name,
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index c6f2d89c0e97..f0ba362d4c10 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -27,6 +27,8 @@
#include <linux/cgroup.h>
#include <linux/module.h>
#include <linux/sort.h>
+#include <linux/sched/mm.h>
+#include <linux/sched/signal.h>
#include <linux/interval_tree_generic.h>
#include "vhost.h"
@@ -49,7 +51,7 @@ enum {
INTERVAL_TREE_DEFINE(struct vhost_umem_node,
rb, __u64, __subtree_last,
- START, LAST, , vhost_umem_interval_tree);
+ START, LAST, static inline, vhost_umem_interval_tree);
#ifdef CONFIG_VHOST_CROSS_ENDIAN_LEGACY
static void vhost_disable_cross_endian(struct vhost_virtqueue *vq)
@@ -130,14 +132,14 @@ static long vhost_get_vring_endian(struct vhost_virtqueue *vq, u32 idx,
static void vhost_init_is_le(struct vhost_virtqueue *vq)
{
- if (vhost_has_feature(vq, VIRTIO_F_VERSION_1))
- vq->is_le = true;
+ vq->is_le = vhost_has_feature(vq, VIRTIO_F_VERSION_1)
+ || virtio_legacy_is_little_endian();
}
#endif /* CONFIG_VHOST_CROSS_ENDIAN_LEGACY */
static void vhost_reset_is_le(struct vhost_virtqueue *vq)
{
- vq->is_le = virtio_legacy_is_little_endian();
+ vhost_init_is_le(vq);
}
struct vhost_flush_struct {
@@ -261,8 +263,8 @@ void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work)
if (!test_and_set_bit(VHOST_WORK_QUEUED, &work->flags)) {
/* We can only add the work to the list after we're
* sure it was not in the list.
+ * test_and_set_bit() implies a memory barrier.
*/
- smp_mb();
llist_add(&work->node, &dev->work_list);
wake_up_process(dev->worker);
}
@@ -282,6 +284,22 @@ void vhost_poll_queue(struct vhost_poll *poll)
}
EXPORT_SYMBOL_GPL(vhost_poll_queue);
+static void __vhost_vq_meta_reset(struct vhost_virtqueue *vq)
+{
+ int j;
+
+ for (j = 0; j < VHOST_NUM_ADDRS; j++)
+ vq->meta_iotlb[j] = NULL;
+}
+
+static void vhost_vq_meta_reset(struct vhost_dev *d)
+{
+ int i;
+
+ for (i = 0; i < d->nvqs; ++i)
+ __vhost_vq_meta_reset(d->vqs[i]);
+}
+
static void vhost_vq_reset(struct vhost_dev *dev,
struct vhost_virtqueue *vq)
{
@@ -290,6 +308,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
vq->avail = NULL;
vq->used = NULL;
vq->last_avail_idx = 0;
+ vq->last_used_event = 0;
vq->avail_idx = 0;
vq->last_used_idx = 0;
vq->signalled_used = 0;
@@ -311,6 +330,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
vq->busyloop_timeout = 0;
vq->umem = NULL;
vq->iotlb = NULL;
+ __vhost_vq_meta_reset(vq);
}
static int vhost_worker(void *data)
@@ -690,6 +710,18 @@ static int vq_memory_access_ok(void __user *log_base, struct vhost_umem *umem,
return 1;
}
+static inline void __user *vhost_vq_meta_fetch(struct vhost_virtqueue *vq,
+ u64 addr, unsigned int size,
+ int type)
+{
+ const struct vhost_umem_node *node = vq->meta_iotlb[type];
+
+ if (!node)
+ return NULL;
+
+ return (void *)(uintptr_t)(node->userspace_addr + addr - node->start);
+}
+
/* Can we switch to this memory table? */
/* Caller should have device mutex but not vq mutex */
static int memory_access_ok(struct vhost_dev *d, struct vhost_umem *umem,
@@ -719,7 +751,7 @@ static int memory_access_ok(struct vhost_dev *d, struct vhost_umem *umem,
static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len,
struct iovec iov[], int iov_size, int access);
-static int vhost_copy_to_user(struct vhost_virtqueue *vq, void *to,
+static int vhost_copy_to_user(struct vhost_virtqueue *vq, void __user *to,
const void *from, unsigned size)
{
int ret;
@@ -732,8 +764,14 @@ static int vhost_copy_to_user(struct vhost_virtqueue *vq, void *to,
* could be access through iotlb. So -EAGAIN should
* not happen in this case.
*/
- /* TODO: more fast path */
struct iov_iter t;
+ void __user *uaddr = vhost_vq_meta_fetch(vq,
+ (u64)(uintptr_t)to, size,
+ VHOST_ADDR_DESC);
+
+ if (uaddr)
+ return __copy_to_user(uaddr, from, size);
+
ret = translate_desc(vq, (u64)(uintptr_t)to, size, vq->iotlb_iov,
ARRAY_SIZE(vq->iotlb_iov),
VHOST_ACCESS_WO);
@@ -749,7 +787,7 @@ out:
}
static int vhost_copy_from_user(struct vhost_virtqueue *vq, void *to,
- void *from, unsigned size)
+ void __user *from, unsigned size)
{
int ret;
@@ -761,8 +799,14 @@ static int vhost_copy_from_user(struct vhost_virtqueue *vq, void *to,
* could be access through iotlb. So -EAGAIN should
* not happen in this case.
*/
- /* TODO: more fast path */
+ void __user *uaddr = vhost_vq_meta_fetch(vq,
+ (u64)(uintptr_t)from, size,
+ VHOST_ADDR_DESC);
struct iov_iter f;
+
+ if (uaddr)
+ return __copy_from_user(to, uaddr, size);
+
ret = translate_desc(vq, (u64)(uintptr_t)from, size, vq->iotlb_iov,
ARRAY_SIZE(vq->iotlb_iov),
VHOST_ACCESS_RO);
@@ -782,17 +826,12 @@ out:
return ret;
}
-static void __user *__vhost_get_user(struct vhost_virtqueue *vq,
- void *addr, unsigned size)
+static void __user *__vhost_get_user_slow(struct vhost_virtqueue *vq,
+ void __user *addr, unsigned int size,
+ int type)
{
int ret;
- /* This function should be called after iotlb
- * prefetch, which means we're sure that vq
- * could be access through iotlb. So -EAGAIN should
- * not happen in this case.
- */
- /* TODO: more fast path */
ret = translate_desc(vq, (u64)(uintptr_t)addr, size, vq->iotlb_iov,
ARRAY_SIZE(vq->iotlb_iov),
VHOST_ACCESS_RO);
@@ -813,14 +852,32 @@ static void __user *__vhost_get_user(struct vhost_virtqueue *vq,
return vq->iotlb_iov[0].iov_base;
}
-#define vhost_put_user(vq, x, ptr) \
+/* This function should be called after iotlb
+ * prefetch, which means we're sure that vq
+ * could be access through iotlb. So -EAGAIN should
+ * not happen in this case.
+ */
+static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq,
+ void *addr, unsigned int size,
+ int type)
+{
+ void __user *uaddr = vhost_vq_meta_fetch(vq,
+ (u64)(uintptr_t)addr, size, type);
+ if (uaddr)
+ return uaddr;
+
+ return __vhost_get_user_slow(vq, addr, size, type);
+}
+
+#define vhost_put_user(vq, x, ptr) \
({ \
int ret = -EFAULT; \
if (!vq->iotlb) { \
ret = __put_user(x, ptr); \
} else { \
__typeof__(ptr) to = \
- (__typeof__(ptr)) __vhost_get_user(vq, ptr, sizeof(*ptr)); \
+ (__typeof__(ptr)) __vhost_get_user(vq, ptr, \
+ sizeof(*ptr), VHOST_ADDR_USED); \
if (to != NULL) \
ret = __put_user(x, to); \
else \
@@ -829,14 +886,16 @@ static void __user *__vhost_get_user(struct vhost_virtqueue *vq,
ret; \
})
-#define vhost_get_user(vq, x, ptr) \
+#define vhost_get_user(vq, x, ptr, type) \
({ \
int ret; \
if (!vq->iotlb) { \
ret = __get_user(x, ptr); \
} else { \
__typeof__(ptr) from = \
- (__typeof__(ptr)) __vhost_get_user(vq, ptr, sizeof(*ptr)); \
+ (__typeof__(ptr)) __vhost_get_user(vq, ptr, \
+ sizeof(*ptr), \
+ type); \
if (from != NULL) \
ret = __get_user(x, from); \
else \
@@ -845,6 +904,12 @@ static void __user *__vhost_get_user(struct vhost_virtqueue *vq,
ret; \
})
+#define vhost_get_avail(vq, x, ptr) \
+ vhost_get_user(vq, x, ptr, VHOST_ADDR_AVAIL)
+
+#define vhost_get_used(vq, x, ptr) \
+ vhost_get_user(vq, x, ptr, VHOST_ADDR_USED)
+
static void vhost_dev_lock_vqs(struct vhost_dev *d)
{
int i = 0;
@@ -934,8 +999,8 @@ static int umem_access_ok(u64 uaddr, u64 size, int access)
return 0;
}
-int vhost_process_iotlb_msg(struct vhost_dev *dev,
- struct vhost_iotlb_msg *msg)
+static int vhost_process_iotlb_msg(struct vhost_dev *dev,
+ struct vhost_iotlb_msg *msg)
{
int ret = 0;
@@ -950,6 +1015,7 @@ int vhost_process_iotlb_msg(struct vhost_dev *dev,
ret = -EFAULT;
break;
}
+ vhost_vq_meta_reset(dev);
if (vhost_new_umem_range(dev->iotlb, msg->iova, msg->size,
msg->iova + msg->size - 1,
msg->uaddr, msg->perm)) {
@@ -959,6 +1025,7 @@ int vhost_process_iotlb_msg(struct vhost_dev *dev,
vhost_iotlb_notify_vq(dev, msg);
break;
case VHOST_IOTLB_INVALIDATE:
+ vhost_vq_meta_reset(dev);
vhost_del_umem_range(dev->iotlb, msg->iova,
msg->iova + msg->size - 1);
break;
@@ -1102,12 +1169,26 @@ static int vq_access_ok(struct vhost_virtqueue *vq, unsigned int num,
sizeof *used + num * sizeof *used->ring + s);
}
+static void vhost_vq_meta_update(struct vhost_virtqueue *vq,
+ const struct vhost_umem_node *node,
+ int type)
+{
+ int access = (type == VHOST_ADDR_USED) ?
+ VHOST_ACCESS_WO : VHOST_ACCESS_RO;
+
+ if (likely(node->perm & access))
+ vq->meta_iotlb[type] = node;
+}
+
static int iotlb_access_ok(struct vhost_virtqueue *vq,
- int access, u64 addr, u64 len)
+ int access, u64 addr, u64 len, int type)
{
const struct vhost_umem_node *node;
struct vhost_umem *umem = vq->iotlb;
- u64 s = 0, size;
+ u64 s = 0, size, orig_addr = addr;
+
+ if (vhost_vq_meta_fetch(vq, addr, len, type))
+ return true;
while (len > s) {
node = vhost_umem_interval_tree_iter_first(&umem->umem_tree,
@@ -1124,6 +1205,10 @@ static int iotlb_access_ok(struct vhost_virtqueue *vq,
}
size = node->size - addr + node->start;
+
+ if (orig_addr == addr && size >= len)
+ vhost_vq_meta_update(vq, node, type);
+
s += size;
addr += size;
}
@@ -1140,13 +1225,15 @@ int vq_iotlb_prefetch(struct vhost_virtqueue *vq)
return 1;
return iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->desc,
- num * sizeof *vq->desc) &&
+ num * sizeof(*vq->desc), VHOST_ADDR_DESC) &&
iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->avail,
sizeof *vq->avail +
- num * sizeof *vq->avail->ring + s) &&
+ num * sizeof(*vq->avail->ring) + s,
+ VHOST_ADDR_AVAIL) &&
iotlb_access_ok(vq, VHOST_ACCESS_WO, (u64)(uintptr_t)vq->used,
sizeof *vq->used +
- num * sizeof *vq->used->ring + s);
+ num * sizeof(*vq->used->ring) + s,
+ VHOST_ADDR_USED);
}
EXPORT_SYMBOL_GPL(vq_iotlb_prefetch);
@@ -1324,7 +1411,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp)
r = -EINVAL;
break;
}
- vq->last_avail_idx = s.num;
+ vq->last_avail_idx = vq->last_used_event = s.num;
/* Forget the cached index value. */
vq->avail_idx = vq->last_avail_idx;
break;
@@ -1713,10 +1800,8 @@ int vhost_vq_init_access(struct vhost_virtqueue *vq)
int r;
bool is_le = vq->is_le;
- if (!vq->private_data) {
- vhost_reset_is_le(vq);
+ if (!vq->private_data)
return 0;
- }
vhost_init_is_le(vq);
@@ -1729,7 +1814,7 @@ int vhost_vq_init_access(struct vhost_virtqueue *vq)
r = -EFAULT;
goto err;
}
- r = vhost_get_user(vq, last_used_idx, &vq->used->idx);
+ r = vhost_get_used(vq, last_used_idx, &vq->used->idx);
if (r) {
vq_err(vq, "Can't access used idx at %p\n",
&vq->used->idx);
@@ -1862,8 +1947,7 @@ static int get_indirect(struct vhost_virtqueue *vq,
i, count);
return -EINVAL;
}
- if (unlikely(copy_from_iter(&desc, sizeof(desc), &from) !=
- sizeof(desc))) {
+ if (unlikely(!copy_from_iter_full(&desc, sizeof(desc), &from))) {
vq_err(vq, "Failed indirect descriptor: idx %d, %zx\n",
i, (size_t)vhost64_to_cpu(vq, indirect->addr) + i * sizeof desc);
return -EINVAL;
@@ -1932,29 +2016,36 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
/* Check it isn't doing very strange things with descriptor numbers. */
last_avail_idx = vq->last_avail_idx;
- if (unlikely(vhost_get_user(vq, avail_idx, &vq->avail->idx))) {
- vq_err(vq, "Failed to access avail idx at %p\n",
- &vq->avail->idx);
- return -EFAULT;
- }
- vq->avail_idx = vhost16_to_cpu(vq, avail_idx);
- if (unlikely((u16)(vq->avail_idx - last_avail_idx) > vq->num)) {
- vq_err(vq, "Guest moved used index from %u to %u",
- last_avail_idx, vq->avail_idx);
- return -EFAULT;
- }
+ if (vq->avail_idx == vq->last_avail_idx) {
+ if (unlikely(vhost_get_avail(vq, avail_idx, &vq->avail->idx))) {
+ vq_err(vq, "Failed to access avail idx at %p\n",
+ &vq->avail->idx);
+ return -EFAULT;
+ }
+ vq->avail_idx = vhost16_to_cpu(vq, avail_idx);
- /* If there's nothing new since last we looked, return invalid. */
- if (vq->avail_idx == last_avail_idx)
- return vq->num;
+ if (unlikely((u16)(vq->avail_idx - last_avail_idx) > vq->num)) {
+ vq_err(vq, "Guest moved used index from %u to %u",
+ last_avail_idx, vq->avail_idx);
+ return -EFAULT;
+ }
- /* Only get avail ring entries after they have been exposed by guest. */
- smp_rmb();
+ /* If there's nothing new since last we looked, return
+ * invalid.
+ */
+ if (vq->avail_idx == last_avail_idx)
+ return vq->num;
+
+ /* Only get avail ring entries after they have been
+ * exposed by guest.
+ */
+ smp_rmb();
+ }
/* Grab the next descriptor number they're advertising, and increment
* the index we've seen. */
- if (unlikely(vhost_get_user(vq, ring_head,
+ if (unlikely(vhost_get_avail(vq, ring_head,
&vq->avail->ring[last_avail_idx & (vq->num - 1)]))) {
vq_err(vq, "Failed to read head: idx %d address %p\n",
last_avail_idx,
@@ -2159,10 +2250,6 @@ static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
__u16 old, new;
__virtio16 event;
bool v;
- /* Flush out used index updates. This is paired
- * with the barrier that the Guest executes when enabling
- * interrupts. */
- smp_mb();
if (vhost_has_feature(vq, VIRTIO_F_NOTIFY_ON_EMPTY) &&
unlikely(vq->avail_idx == vq->last_avail_idx))
@@ -2170,7 +2257,11 @@ static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) {
__virtio16 flags;
- if (vhost_get_user(vq, flags, &vq->avail->flags)) {
+ /* Flush out used index updates. This is paired
+ * with the barrier that the Guest executes when enabling
+ * interrupts. */
+ smp_mb();
+ if (vhost_get_avail(vq, flags, &vq->avail->flags)) {
vq_err(vq, "Failed to get flags");
return true;
}
@@ -2184,11 +2275,26 @@ static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
if (unlikely(!v))
return true;
- if (vhost_get_user(vq, event, vhost_used_event(vq))) {
+ /* We're sure if the following conditions are met, there's no
+ * need to notify guest:
+ * 1) cached used event is ahead of new
+ * 2) old to new updating does not cross cached used event. */
+ if (vring_need_event(vq->last_used_event, new + vq->num, new) &&
+ !vring_need_event(vq->last_used_event, new, old))
+ return false;
+
+ /* Flush out used index updates. This is paired
+ * with the barrier that the Guest executes when enabling
+ * interrupts. */
+ smp_mb();
+
+ if (vhost_get_avail(vq, event, vhost_used_event(vq))) {
vq_err(vq, "Failed to get used event idx");
return true;
}
- return vring_need_event(vhost16_to_cpu(vq, event), new, old);
+ vq->last_used_event = vhost16_to_cpu(vq, event);
+
+ return vring_need_event(vq->last_used_event, new, old);
}
/* This actually signals the guest, using eventfd. */
@@ -2226,11 +2332,15 @@ bool vhost_vq_avail_empty(struct vhost_dev *dev, struct vhost_virtqueue *vq)
__virtio16 avail_idx;
int r;
- r = vhost_get_user(vq, avail_idx, &vq->avail->idx);
- if (r)
+ if (vq->avail_idx != vq->last_avail_idx)
+ return false;
+
+ r = vhost_get_avail(vq, avail_idx, &vq->avail->idx);
+ if (unlikely(r))
return false;
+ vq->avail_idx = vhost16_to_cpu(vq, avail_idx);
- return vhost16_to_cpu(vq, avail_idx) == vq->avail_idx;
+ return vq->avail_idx == vq->last_avail_idx;
}
EXPORT_SYMBOL_GPL(vhost_vq_avail_empty);
@@ -2261,7 +2371,7 @@ bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
/* They could have slipped one in as we were doing that: make
* sure it's written, then check again. */
smp_mb();
- r = vhost_get_user(vq, avail_idx, &vq->avail->idx);
+ r = vhost_get_avail(vq, avail_idx, &vq->avail->idx);
if (r) {
vq_err(vq, "Failed to check avail idx at %p: %d\n",
&vq->avail->idx, r);
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 78f3c5fc02e4..f55671d53f28 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -76,6 +76,13 @@ struct vhost_umem {
int numem;
};
+enum vhost_uaddr_type {
+ VHOST_ADDR_DESC = 0,
+ VHOST_ADDR_AVAIL = 1,
+ VHOST_ADDR_USED = 2,
+ VHOST_NUM_ADDRS = 3,
+};
+
/* The virtqueue structure describes a queue attached to a device. */
struct vhost_virtqueue {
struct vhost_dev *dev;
@@ -86,6 +93,7 @@ struct vhost_virtqueue {
struct vring_desc __user *desc;
struct vring_avail __user *avail;
struct vring_used __user *used;
+ const struct vhost_umem_node *meta_iotlb[VHOST_NUM_ADDRS];
struct file *kick;
struct file *call;
struct file *error;
@@ -107,6 +115,9 @@ struct vhost_virtqueue {
/* Last index we used. */
u16 last_used_idx;
+ /* Last used evet we've seen */
+ u16 last_used_event;
+
/* Used flags */
u16 used_flags;
diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
index 3bb02c60a2f5..bb8971f2a634 100644
--- a/drivers/vhost/vringh.c
+++ b/drivers/vhost/vringh.c
@@ -3,6 +3,7 @@
*
* Since these may be in userspace, we use (inline) accessors.
*/
+#include <linux/compiler.h>
#include <linux/module.h>
#include <linux/vringh.h>
#include <linux/virtio_ring.h>
@@ -820,13 +821,13 @@ EXPORT_SYMBOL(vringh_need_notify_user);
static inline int getu16_kern(const struct vringh *vrh,
u16 *val, const __virtio16 *p)
{
- *val = vringh16_to_cpu(vrh, ACCESS_ONCE(*p));
+ *val = vringh16_to_cpu(vrh, READ_ONCE(*p));
return 0;
}
static inline int putu16_kern(const struct vringh *vrh, __virtio16 *p, u16 val)
{
- ACCESS_ONCE(*p) = cpu_to_vringh16(vrh, val);
+ WRITE_ONCE(*p, cpu_to_vringh16(vrh, val));
return 0;
}
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index a504e2e003da..ce5e63d2c66a 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -50,11 +50,10 @@ static u32 vhost_transport_get_local_cid(void)
return VHOST_VSOCK_DEFAULT_HOST_CID;
}
-static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
+static struct vhost_vsock *__vhost_vsock_get(u32 guest_cid)
{
struct vhost_vsock *vsock;
- spin_lock_bh(&vhost_vsock_lock);
list_for_each_entry(vsock, &vhost_vsock_list, list) {
u32 other_cid = vsock->guest_cid;
@@ -63,15 +62,24 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
continue;
if (other_cid == guest_cid) {
- spin_unlock_bh(&vhost_vsock_lock);
return vsock;
}
}
- spin_unlock_bh(&vhost_vsock_lock);
return NULL;
}
+static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
+{
+ struct vhost_vsock *vsock;
+
+ spin_lock_bh(&vhost_vsock_lock);
+ vsock = __vhost_vsock_get(guest_cid);
+ spin_unlock_bh(&vhost_vsock_lock);
+
+ return vsock;
+}
+
static void
vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
struct vhost_virtqueue *vq)
@@ -195,7 +203,6 @@ static int
vhost_transport_send_pkt(struct virtio_vsock_pkt *pkt)
{
struct vhost_vsock *vsock;
- struct vhost_virtqueue *vq;
int len = pkt->len;
/* Find the vhost_vsock according to guest context id */
@@ -205,8 +212,6 @@ vhost_transport_send_pkt(struct virtio_vsock_pkt *pkt)
return -ENODEV;
}
- vq = &vsock->vqs[VSOCK_VQ_RX];
-
if (pkt->reply)
atomic_inc(&vsock->queued_replies);
@@ -368,6 +373,7 @@ static void vhost_vsock_handle_rx_kick(struct vhost_work *work)
static int vhost_vsock_start(struct vhost_vsock *vsock)
{
+ struct vhost_virtqueue *vq;
size_t i;
int ret;
@@ -378,19 +384,20 @@ static int vhost_vsock_start(struct vhost_vsock *vsock)
goto err;
for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
- struct vhost_virtqueue *vq = &vsock->vqs[i];
+ vq = &vsock->vqs[i];
mutex_lock(&vq->mutex);
if (!vhost_vq_access_ok(vq)) {
ret = -EFAULT;
- mutex_unlock(&vq->mutex);
goto err_vq;
}
if (!vq->private_data) {
vq->private_data = vsock;
- vhost_vq_init_access(vq);
+ ret = vhost_vq_init_access(vq);
+ if (ret)
+ goto err_vq;
}
mutex_unlock(&vq->mutex);
@@ -400,8 +407,11 @@ static int vhost_vsock_start(struct vhost_vsock *vsock)
return 0;
err_vq:
+ vq->private_data = NULL;
+ mutex_unlock(&vq->mutex);
+
for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
- struct vhost_virtqueue *vq = &vsock->vqs[i];
+ vq = &vsock->vqs[i];
mutex_lock(&vq->mutex);
vq->private_data = NULL;
@@ -562,11 +572,12 @@ static int vhost_vsock_set_cid(struct vhost_vsock *vsock, u64 guest_cid)
return -EINVAL;
/* Refuse if CID is already in use */
- other = vhost_vsock_get(guest_cid);
- if (other && other != vsock)
- return -EADDRINUSE;
-
spin_lock_bh(&vhost_vsock_lock);
+ other = __vhost_vsock_get(guest_cid);
+ if (other && other != vsock) {
+ spin_unlock_bh(&vhost_vsock_lock);
+ return -EADDRINUSE;
+ }
vsock->guest_cid = guest_cid;
spin_unlock_bh(&vhost_vsock_lock);