diff options
Diffstat (limited to 'drivers/vhost')
-rw-r--r-- | drivers/vhost/Kconfig | 2 | ||||
-rw-r--r-- | drivers/vhost/net.c | 32 | ||||
-rw-r--r-- | drivers/vhost/scsi.c | 9 | ||||
-rw-r--r-- | drivers/vhost/vhost.c | 236 | ||||
-rw-r--r-- | drivers/vhost/vhost.h | 11 | ||||
-rw-r--r-- | drivers/vhost/vringh.c | 5 | ||||
-rw-r--r-- | drivers/vhost/vsock.c | 41 |
7 files changed, 243 insertions, 93 deletions
diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig index 40764ecad9ce..cfdecea5078f 100644 --- a/drivers/vhost/Kconfig +++ b/drivers/vhost/Kconfig @@ -1,6 +1,6 @@ config VHOST_NET tristate "Host kernel accelerator for virtio net" - depends on NET && EVENTFD && (TUN || !TUN) && (MACVTAP || !MACVTAP) + depends on NET && EVENTFD && (TUN || !TUN) && (TAP || !TAP) select VHOST ---help--- This kernel module can be loaded in host kernel to accelerate diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 5dc128a8da83..9b519897cc17 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -17,6 +17,8 @@ #include <linux/workqueue.h> #include <linux/file.h> #include <linux/slab.h> +#include <linux/sched/clock.h> +#include <linux/sched/signal.h> #include <linux/vmalloc.h> #include <linux/net.h> @@ -24,6 +26,7 @@ #include <linux/if_arp.h> #include <linux/if_tun.h> #include <linux/if_macvlan.h> +#include <linux/if_tap.h> #include <linux/if_vlan.h> #include <net/sock.h> @@ -342,7 +345,7 @@ static int vhost_net_tx_get_vq_desc(struct vhost_net *net, endtime = busy_clock() + vq->busyloop_timeout; while (vhost_can_busy_poll(vq->dev, endtime) && vhost_vq_avail_empty(vq->dev, vq)) - cpu_relax_lowlatency(); + cpu_relax(); preempt_enable(); r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), out_num, in_num, NULL, NULL); @@ -351,6 +354,15 @@ static int vhost_net_tx_get_vq_desc(struct vhost_net *net, return r; } +static bool vhost_exceeds_maxpend(struct vhost_net *net) +{ + struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX]; + struct vhost_virtqueue *vq = &nvq->vq; + + return (nvq->upend_idx + vq->num - VHOST_MAX_PEND) % UIO_MAXIOV + == nvq->done_idx; +} + /* Expects to be always run from workqueue - which acts as * read-size critical section for our kind of RCU. */ static void handle_tx(struct vhost_net *net) @@ -394,8 +406,7 @@ static void handle_tx(struct vhost_net *net) /* If more outstanding DMAs, queue the work. * Handle upend_idx wrap around */ - if (unlikely((nvq->upend_idx + vq->num - VHOST_MAX_PEND) - % UIO_MAXIOV == nvq->done_idx)) + if (unlikely(vhost_exceeds_maxpend(net))) break; head = vhost_net_tx_get_vq_desc(net, vq, vq->iov, @@ -454,6 +465,16 @@ static void handle_tx(struct vhost_net *net) msg.msg_control = NULL; ubufs = NULL; } + + total_len += len; + if (total_len < VHOST_NET_WEIGHT && + !vhost_vq_avail_empty(&net->dev, vq) && + likely(!vhost_exceeds_maxpend(net))) { + msg.msg_flags |= MSG_MORE; + } else { + msg.msg_flags &= ~MSG_MORE; + } + /* TODO: Check specific error and bomb out unless ENOBUFS? */ err = sock->ops->sendmsg(sock, &msg, len); if (unlikely(err < 0)) { @@ -472,7 +493,6 @@ static void handle_tx(struct vhost_net *net) vhost_add_used_and_signal(&net->dev, vq, head, 0); else vhost_zerocopy_signal_used(net, vq); - total_len += len; vhost_net_tx_packet(net); if (unlikely(total_len >= VHOST_NET_WEIGHT)) { vhost_poll_queue(&vq->poll); @@ -533,7 +553,7 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk) while (vhost_can_busy_poll(&net->dev, endtime) && !sk_has_rx_data(sk) && vhost_vq_avail_empty(&net->dev, vq)) - cpu_relax_lowlatency(); + cpu_relax(); preempt_enable(); @@ -943,7 +963,7 @@ static struct socket *get_tap_socket(int fd) sock = tun_get_socket(file); if (!IS_ERR(sock)) return sock; - sock = macvtap_get_socket(file); + sock = tap_get_socket(file); if (IS_ERR(sock)) fput(file); return sock; diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 6e29d053843d..fd6c8b66f06f 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -843,7 +843,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) struct iov_iter out_iter, in_iter, prot_iter, data_iter; u64 tag; u32 exp_data_len, data_direction; - unsigned out, in; + unsigned int out = 0, in = 0; int head, ret, prot_bytes; size_t req_size, rsp_size = sizeof(struct virtio_scsi_cmd_resp); size_t out_size, in_size; @@ -922,8 +922,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) */ iov_iter_init(&out_iter, WRITE, vq->iov, out, out_size); - ret = copy_from_iter(req, req_size, &out_iter); - if (unlikely(ret != req_size)) { + if (unlikely(!copy_from_iter_full(req, req_size, &out_iter))) { vq_err(vq, "Faulted on copy_from_iter\n"); vhost_scsi_send_bad_target(vs, vq, head, out); continue; @@ -1749,7 +1748,6 @@ out: static int vhost_scsi_make_nexus(struct vhost_scsi_tpg *tpg, const char *name) { - struct se_portal_group *se_tpg; struct vhost_scsi_nexus *tv_nexus; mutex_lock(&tpg->tv_tpg_mutex); @@ -1758,7 +1756,6 @@ static int vhost_scsi_make_nexus(struct vhost_scsi_tpg *tpg, pr_debug("tpg->tpg_nexus already exists\n"); return -EEXIST; } - se_tpg = &tpg->se_tpg; tv_nexus = kzalloc(sizeof(struct vhost_scsi_nexus), GFP_KERNEL); if (!tv_nexus) { @@ -2090,7 +2087,7 @@ static struct configfs_attribute *vhost_scsi_wwn_attrs[] = { NULL, }; -static struct target_core_fabric_ops vhost_scsi_ops = { +static const struct target_core_fabric_ops vhost_scsi_ops = { .module = THIS_MODULE, .name = "vhost", .get_fabric_name = vhost_scsi_get_fabric_name, diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index c6f2d89c0e97..f0ba362d4c10 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -27,6 +27,8 @@ #include <linux/cgroup.h> #include <linux/module.h> #include <linux/sort.h> +#include <linux/sched/mm.h> +#include <linux/sched/signal.h> #include <linux/interval_tree_generic.h> #include "vhost.h" @@ -49,7 +51,7 @@ enum { INTERVAL_TREE_DEFINE(struct vhost_umem_node, rb, __u64, __subtree_last, - START, LAST, , vhost_umem_interval_tree); + START, LAST, static inline, vhost_umem_interval_tree); #ifdef CONFIG_VHOST_CROSS_ENDIAN_LEGACY static void vhost_disable_cross_endian(struct vhost_virtqueue *vq) @@ -130,14 +132,14 @@ static long vhost_get_vring_endian(struct vhost_virtqueue *vq, u32 idx, static void vhost_init_is_le(struct vhost_virtqueue *vq) { - if (vhost_has_feature(vq, VIRTIO_F_VERSION_1)) - vq->is_le = true; + vq->is_le = vhost_has_feature(vq, VIRTIO_F_VERSION_1) + || virtio_legacy_is_little_endian(); } #endif /* CONFIG_VHOST_CROSS_ENDIAN_LEGACY */ static void vhost_reset_is_le(struct vhost_virtqueue *vq) { - vq->is_le = virtio_legacy_is_little_endian(); + vhost_init_is_le(vq); } struct vhost_flush_struct { @@ -261,8 +263,8 @@ void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work) if (!test_and_set_bit(VHOST_WORK_QUEUED, &work->flags)) { /* We can only add the work to the list after we're * sure it was not in the list. + * test_and_set_bit() implies a memory barrier. */ - smp_mb(); llist_add(&work->node, &dev->work_list); wake_up_process(dev->worker); } @@ -282,6 +284,22 @@ void vhost_poll_queue(struct vhost_poll *poll) } EXPORT_SYMBOL_GPL(vhost_poll_queue); +static void __vhost_vq_meta_reset(struct vhost_virtqueue *vq) +{ + int j; + + for (j = 0; j < VHOST_NUM_ADDRS; j++) + vq->meta_iotlb[j] = NULL; +} + +static void vhost_vq_meta_reset(struct vhost_dev *d) +{ + int i; + + for (i = 0; i < d->nvqs; ++i) + __vhost_vq_meta_reset(d->vqs[i]); +} + static void vhost_vq_reset(struct vhost_dev *dev, struct vhost_virtqueue *vq) { @@ -290,6 +308,7 @@ static void vhost_vq_reset(struct vhost_dev *dev, vq->avail = NULL; vq->used = NULL; vq->last_avail_idx = 0; + vq->last_used_event = 0; vq->avail_idx = 0; vq->last_used_idx = 0; vq->signalled_used = 0; @@ -311,6 +330,7 @@ static void vhost_vq_reset(struct vhost_dev *dev, vq->busyloop_timeout = 0; vq->umem = NULL; vq->iotlb = NULL; + __vhost_vq_meta_reset(vq); } static int vhost_worker(void *data) @@ -690,6 +710,18 @@ static int vq_memory_access_ok(void __user *log_base, struct vhost_umem *umem, return 1; } +static inline void __user *vhost_vq_meta_fetch(struct vhost_virtqueue *vq, + u64 addr, unsigned int size, + int type) +{ + const struct vhost_umem_node *node = vq->meta_iotlb[type]; + + if (!node) + return NULL; + + return (void *)(uintptr_t)(node->userspace_addr + addr - node->start); +} + /* Can we switch to this memory table? */ /* Caller should have device mutex but not vq mutex */ static int memory_access_ok(struct vhost_dev *d, struct vhost_umem *umem, @@ -719,7 +751,7 @@ static int memory_access_ok(struct vhost_dev *d, struct vhost_umem *umem, static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len, struct iovec iov[], int iov_size, int access); -static int vhost_copy_to_user(struct vhost_virtqueue *vq, void *to, +static int vhost_copy_to_user(struct vhost_virtqueue *vq, void __user *to, const void *from, unsigned size) { int ret; @@ -732,8 +764,14 @@ static int vhost_copy_to_user(struct vhost_virtqueue *vq, void *to, * could be access through iotlb. So -EAGAIN should * not happen in this case. */ - /* TODO: more fast path */ struct iov_iter t; + void __user *uaddr = vhost_vq_meta_fetch(vq, + (u64)(uintptr_t)to, size, + VHOST_ADDR_DESC); + + if (uaddr) + return __copy_to_user(uaddr, from, size); + ret = translate_desc(vq, (u64)(uintptr_t)to, size, vq->iotlb_iov, ARRAY_SIZE(vq->iotlb_iov), VHOST_ACCESS_WO); @@ -749,7 +787,7 @@ out: } static int vhost_copy_from_user(struct vhost_virtqueue *vq, void *to, - void *from, unsigned size) + void __user *from, unsigned size) { int ret; @@ -761,8 +799,14 @@ static int vhost_copy_from_user(struct vhost_virtqueue *vq, void *to, * could be access through iotlb. So -EAGAIN should * not happen in this case. */ - /* TODO: more fast path */ + void __user *uaddr = vhost_vq_meta_fetch(vq, + (u64)(uintptr_t)from, size, + VHOST_ADDR_DESC); struct iov_iter f; + + if (uaddr) + return __copy_from_user(to, uaddr, size); + ret = translate_desc(vq, (u64)(uintptr_t)from, size, vq->iotlb_iov, ARRAY_SIZE(vq->iotlb_iov), VHOST_ACCESS_RO); @@ -782,17 +826,12 @@ out: return ret; } -static void __user *__vhost_get_user(struct vhost_virtqueue *vq, - void *addr, unsigned size) +static void __user *__vhost_get_user_slow(struct vhost_virtqueue *vq, + void __user *addr, unsigned int size, + int type) { int ret; - /* This function should be called after iotlb - * prefetch, which means we're sure that vq - * could be access through iotlb. So -EAGAIN should - * not happen in this case. - */ - /* TODO: more fast path */ ret = translate_desc(vq, (u64)(uintptr_t)addr, size, vq->iotlb_iov, ARRAY_SIZE(vq->iotlb_iov), VHOST_ACCESS_RO); @@ -813,14 +852,32 @@ static void __user *__vhost_get_user(struct vhost_virtqueue *vq, return vq->iotlb_iov[0].iov_base; } -#define vhost_put_user(vq, x, ptr) \ +/* This function should be called after iotlb + * prefetch, which means we're sure that vq + * could be access through iotlb. So -EAGAIN should + * not happen in this case. + */ +static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq, + void *addr, unsigned int size, + int type) +{ + void __user *uaddr = vhost_vq_meta_fetch(vq, + (u64)(uintptr_t)addr, size, type); + if (uaddr) + return uaddr; + + return __vhost_get_user_slow(vq, addr, size, type); +} + +#define vhost_put_user(vq, x, ptr) \ ({ \ int ret = -EFAULT; \ if (!vq->iotlb) { \ ret = __put_user(x, ptr); \ } else { \ __typeof__(ptr) to = \ - (__typeof__(ptr)) __vhost_get_user(vq, ptr, sizeof(*ptr)); \ + (__typeof__(ptr)) __vhost_get_user(vq, ptr, \ + sizeof(*ptr), VHOST_ADDR_USED); \ if (to != NULL) \ ret = __put_user(x, to); \ else \ @@ -829,14 +886,16 @@ static void __user *__vhost_get_user(struct vhost_virtqueue *vq, ret; \ }) -#define vhost_get_user(vq, x, ptr) \ +#define vhost_get_user(vq, x, ptr, type) \ ({ \ int ret; \ if (!vq->iotlb) { \ ret = __get_user(x, ptr); \ } else { \ __typeof__(ptr) from = \ - (__typeof__(ptr)) __vhost_get_user(vq, ptr, sizeof(*ptr)); \ + (__typeof__(ptr)) __vhost_get_user(vq, ptr, \ + sizeof(*ptr), \ + type); \ if (from != NULL) \ ret = __get_user(x, from); \ else \ @@ -845,6 +904,12 @@ static void __user *__vhost_get_user(struct vhost_virtqueue *vq, ret; \ }) +#define vhost_get_avail(vq, x, ptr) \ + vhost_get_user(vq, x, ptr, VHOST_ADDR_AVAIL) + +#define vhost_get_used(vq, x, ptr) \ + vhost_get_user(vq, x, ptr, VHOST_ADDR_USED) + static void vhost_dev_lock_vqs(struct vhost_dev *d) { int i = 0; @@ -934,8 +999,8 @@ static int umem_access_ok(u64 uaddr, u64 size, int access) return 0; } -int vhost_process_iotlb_msg(struct vhost_dev *dev, - struct vhost_iotlb_msg *msg) +static int vhost_process_iotlb_msg(struct vhost_dev *dev, + struct vhost_iotlb_msg *msg) { int ret = 0; @@ -950,6 +1015,7 @@ int vhost_process_iotlb_msg(struct vhost_dev *dev, ret = -EFAULT; break; } + vhost_vq_meta_reset(dev); if (vhost_new_umem_range(dev->iotlb, msg->iova, msg->size, msg->iova + msg->size - 1, msg->uaddr, msg->perm)) { @@ -959,6 +1025,7 @@ int vhost_process_iotlb_msg(struct vhost_dev *dev, vhost_iotlb_notify_vq(dev, msg); break; case VHOST_IOTLB_INVALIDATE: + vhost_vq_meta_reset(dev); vhost_del_umem_range(dev->iotlb, msg->iova, msg->iova + msg->size - 1); break; @@ -1102,12 +1169,26 @@ static int vq_access_ok(struct vhost_virtqueue *vq, unsigned int num, sizeof *used + num * sizeof *used->ring + s); } +static void vhost_vq_meta_update(struct vhost_virtqueue *vq, + const struct vhost_umem_node *node, + int type) +{ + int access = (type == VHOST_ADDR_USED) ? + VHOST_ACCESS_WO : VHOST_ACCESS_RO; + + if (likely(node->perm & access)) + vq->meta_iotlb[type] = node; +} + static int iotlb_access_ok(struct vhost_virtqueue *vq, - int access, u64 addr, u64 len) + int access, u64 addr, u64 len, int type) { const struct vhost_umem_node *node; struct vhost_umem *umem = vq->iotlb; - u64 s = 0, size; + u64 s = 0, size, orig_addr = addr; + + if (vhost_vq_meta_fetch(vq, addr, len, type)) + return true; while (len > s) { node = vhost_umem_interval_tree_iter_first(&umem->umem_tree, @@ -1124,6 +1205,10 @@ static int iotlb_access_ok(struct vhost_virtqueue *vq, } size = node->size - addr + node->start; + + if (orig_addr == addr && size >= len) + vhost_vq_meta_update(vq, node, type); + s += size; addr += size; } @@ -1140,13 +1225,15 @@ int vq_iotlb_prefetch(struct vhost_virtqueue *vq) return 1; return iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->desc, - num * sizeof *vq->desc) && + num * sizeof(*vq->desc), VHOST_ADDR_DESC) && iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->avail, sizeof *vq->avail + - num * sizeof *vq->avail->ring + s) && + num * sizeof(*vq->avail->ring) + s, + VHOST_ADDR_AVAIL) && iotlb_access_ok(vq, VHOST_ACCESS_WO, (u64)(uintptr_t)vq->used, sizeof *vq->used + - num * sizeof *vq->used->ring + s); + num * sizeof(*vq->used->ring) + s, + VHOST_ADDR_USED); } EXPORT_SYMBOL_GPL(vq_iotlb_prefetch); @@ -1324,7 +1411,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp) r = -EINVAL; break; } - vq->last_avail_idx = s.num; + vq->last_avail_idx = vq->last_used_event = s.num; /* Forget the cached index value. */ vq->avail_idx = vq->last_avail_idx; break; @@ -1713,10 +1800,8 @@ int vhost_vq_init_access(struct vhost_virtqueue *vq) int r; bool is_le = vq->is_le; - if (!vq->private_data) { - vhost_reset_is_le(vq); + if (!vq->private_data) return 0; - } vhost_init_is_le(vq); @@ -1729,7 +1814,7 @@ int vhost_vq_init_access(struct vhost_virtqueue *vq) r = -EFAULT; goto err; } - r = vhost_get_user(vq, last_used_idx, &vq->used->idx); + r = vhost_get_used(vq, last_used_idx, &vq->used->idx); if (r) { vq_err(vq, "Can't access used idx at %p\n", &vq->used->idx); @@ -1862,8 +1947,7 @@ static int get_indirect(struct vhost_virtqueue *vq, i, count); return -EINVAL; } - if (unlikely(copy_from_iter(&desc, sizeof(desc), &from) != - sizeof(desc))) { + if (unlikely(!copy_from_iter_full(&desc, sizeof(desc), &from))) { vq_err(vq, "Failed indirect descriptor: idx %d, %zx\n", i, (size_t)vhost64_to_cpu(vq, indirect->addr) + i * sizeof desc); return -EINVAL; @@ -1932,29 +2016,36 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq, /* Check it isn't doing very strange things with descriptor numbers. */ last_avail_idx = vq->last_avail_idx; - if (unlikely(vhost_get_user(vq, avail_idx, &vq->avail->idx))) { - vq_err(vq, "Failed to access avail idx at %p\n", - &vq->avail->idx); - return -EFAULT; - } - vq->avail_idx = vhost16_to_cpu(vq, avail_idx); - if (unlikely((u16)(vq->avail_idx - last_avail_idx) > vq->num)) { - vq_err(vq, "Guest moved used index from %u to %u", - last_avail_idx, vq->avail_idx); - return -EFAULT; - } + if (vq->avail_idx == vq->last_avail_idx) { + if (unlikely(vhost_get_avail(vq, avail_idx, &vq->avail->idx))) { + vq_err(vq, "Failed to access avail idx at %p\n", + &vq->avail->idx); + return -EFAULT; + } + vq->avail_idx = vhost16_to_cpu(vq, avail_idx); - /* If there's nothing new since last we looked, return invalid. */ - if (vq->avail_idx == last_avail_idx) - return vq->num; + if (unlikely((u16)(vq->avail_idx - last_avail_idx) > vq->num)) { + vq_err(vq, "Guest moved used index from %u to %u", + last_avail_idx, vq->avail_idx); + return -EFAULT; + } - /* Only get avail ring entries after they have been exposed by guest. */ - smp_rmb(); + /* If there's nothing new since last we looked, return + * invalid. + */ + if (vq->avail_idx == last_avail_idx) + return vq->num; + + /* Only get avail ring entries after they have been + * exposed by guest. + */ + smp_rmb(); + } /* Grab the next descriptor number they're advertising, and increment * the index we've seen. */ - if (unlikely(vhost_get_user(vq, ring_head, + if (unlikely(vhost_get_avail(vq, ring_head, &vq->avail->ring[last_avail_idx & (vq->num - 1)]))) { vq_err(vq, "Failed to read head: idx %d address %p\n", last_avail_idx, @@ -2159,10 +2250,6 @@ static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) __u16 old, new; __virtio16 event; bool v; - /* Flush out used index updates. This is paired - * with the barrier that the Guest executes when enabling - * interrupts. */ - smp_mb(); if (vhost_has_feature(vq, VIRTIO_F_NOTIFY_ON_EMPTY) && unlikely(vq->avail_idx == vq->last_avail_idx)) @@ -2170,7 +2257,11 @@ static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) { __virtio16 flags; - if (vhost_get_user(vq, flags, &vq->avail->flags)) { + /* Flush out used index updates. This is paired + * with the barrier that the Guest executes when enabling + * interrupts. */ + smp_mb(); + if (vhost_get_avail(vq, flags, &vq->avail->flags)) { vq_err(vq, "Failed to get flags"); return true; } @@ -2184,11 +2275,26 @@ static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) if (unlikely(!v)) return true; - if (vhost_get_user(vq, event, vhost_used_event(vq))) { + /* We're sure if the following conditions are met, there's no + * need to notify guest: + * 1) cached used event is ahead of new + * 2) old to new updating does not cross cached used event. */ + if (vring_need_event(vq->last_used_event, new + vq->num, new) && + !vring_need_event(vq->last_used_event, new, old)) + return false; + + /* Flush out used index updates. This is paired + * with the barrier that the Guest executes when enabling + * interrupts. */ + smp_mb(); + + if (vhost_get_avail(vq, event, vhost_used_event(vq))) { vq_err(vq, "Failed to get used event idx"); return true; } - return vring_need_event(vhost16_to_cpu(vq, event), new, old); + vq->last_used_event = vhost16_to_cpu(vq, event); + + return vring_need_event(vq->last_used_event, new, old); } /* This actually signals the guest, using eventfd. */ @@ -2226,11 +2332,15 @@ bool vhost_vq_avail_empty(struct vhost_dev *dev, struct vhost_virtqueue *vq) __virtio16 avail_idx; int r; - r = vhost_get_user(vq, avail_idx, &vq->avail->idx); - if (r) + if (vq->avail_idx != vq->last_avail_idx) + return false; + + r = vhost_get_avail(vq, avail_idx, &vq->avail->idx); + if (unlikely(r)) return false; + vq->avail_idx = vhost16_to_cpu(vq, avail_idx); - return vhost16_to_cpu(vq, avail_idx) == vq->avail_idx; + return vq->avail_idx == vq->last_avail_idx; } EXPORT_SYMBOL_GPL(vhost_vq_avail_empty); @@ -2261,7 +2371,7 @@ bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) /* They could have slipped one in as we were doing that: make * sure it's written, then check again. */ smp_mb(); - r = vhost_get_user(vq, avail_idx, &vq->avail->idx); + r = vhost_get_avail(vq, avail_idx, &vq->avail->idx); if (r) { vq_err(vq, "Failed to check avail idx at %p: %d\n", &vq->avail->idx, r); diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 78f3c5fc02e4..f55671d53f28 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -76,6 +76,13 @@ struct vhost_umem { int numem; }; +enum vhost_uaddr_type { + VHOST_ADDR_DESC = 0, + VHOST_ADDR_AVAIL = 1, + VHOST_ADDR_USED = 2, + VHOST_NUM_ADDRS = 3, +}; + /* The virtqueue structure describes a queue attached to a device. */ struct vhost_virtqueue { struct vhost_dev *dev; @@ -86,6 +93,7 @@ struct vhost_virtqueue { struct vring_desc __user *desc; struct vring_avail __user *avail; struct vring_used __user *used; + const struct vhost_umem_node *meta_iotlb[VHOST_NUM_ADDRS]; struct file *kick; struct file *call; struct file *error; @@ -107,6 +115,9 @@ struct vhost_virtqueue { /* Last index we used. */ u16 last_used_idx; + /* Last used evet we've seen */ + u16 last_used_event; + /* Used flags */ u16 used_flags; diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c index 3bb02c60a2f5..bb8971f2a634 100644 --- a/drivers/vhost/vringh.c +++ b/drivers/vhost/vringh.c @@ -3,6 +3,7 @@ * * Since these may be in userspace, we use (inline) accessors. */ +#include <linux/compiler.h> #include <linux/module.h> #include <linux/vringh.h> #include <linux/virtio_ring.h> @@ -820,13 +821,13 @@ EXPORT_SYMBOL(vringh_need_notify_user); static inline int getu16_kern(const struct vringh *vrh, u16 *val, const __virtio16 *p) { - *val = vringh16_to_cpu(vrh, ACCESS_ONCE(*p)); + *val = vringh16_to_cpu(vrh, READ_ONCE(*p)); return 0; } static inline int putu16_kern(const struct vringh *vrh, __virtio16 *p, u16 val) { - ACCESS_ONCE(*p) = cpu_to_vringh16(vrh, val); + WRITE_ONCE(*p, cpu_to_vringh16(vrh, val)); return 0; } diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index a504e2e003da..ce5e63d2c66a 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -50,11 +50,10 @@ static u32 vhost_transport_get_local_cid(void) return VHOST_VSOCK_DEFAULT_HOST_CID; } -static struct vhost_vsock *vhost_vsock_get(u32 guest_cid) +static struct vhost_vsock *__vhost_vsock_get(u32 guest_cid) { struct vhost_vsock *vsock; - spin_lock_bh(&vhost_vsock_lock); list_for_each_entry(vsock, &vhost_vsock_list, list) { u32 other_cid = vsock->guest_cid; @@ -63,15 +62,24 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid) continue; if (other_cid == guest_cid) { - spin_unlock_bh(&vhost_vsock_lock); return vsock; } } - spin_unlock_bh(&vhost_vsock_lock); return NULL; } +static struct vhost_vsock *vhost_vsock_get(u32 guest_cid) +{ + struct vhost_vsock *vsock; + + spin_lock_bh(&vhost_vsock_lock); + vsock = __vhost_vsock_get(guest_cid); + spin_unlock_bh(&vhost_vsock_lock); + + return vsock; +} + static void vhost_transport_do_send_pkt(struct vhost_vsock *vsock, struct vhost_virtqueue *vq) @@ -195,7 +203,6 @@ static int vhost_transport_send_pkt(struct virtio_vsock_pkt *pkt) { struct vhost_vsock *vsock; - struct vhost_virtqueue *vq; int len = pkt->len; /* Find the vhost_vsock according to guest context id */ @@ -205,8 +212,6 @@ vhost_transport_send_pkt(struct virtio_vsock_pkt *pkt) return -ENODEV; } - vq = &vsock->vqs[VSOCK_VQ_RX]; - if (pkt->reply) atomic_inc(&vsock->queued_replies); @@ -368,6 +373,7 @@ static void vhost_vsock_handle_rx_kick(struct vhost_work *work) static int vhost_vsock_start(struct vhost_vsock *vsock) { + struct vhost_virtqueue *vq; size_t i; int ret; @@ -378,19 +384,20 @@ static int vhost_vsock_start(struct vhost_vsock *vsock) goto err; for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { - struct vhost_virtqueue *vq = &vsock->vqs[i]; + vq = &vsock->vqs[i]; mutex_lock(&vq->mutex); if (!vhost_vq_access_ok(vq)) { ret = -EFAULT; - mutex_unlock(&vq->mutex); goto err_vq; } if (!vq->private_data) { vq->private_data = vsock; - vhost_vq_init_access(vq); + ret = vhost_vq_init_access(vq); + if (ret) + goto err_vq; } mutex_unlock(&vq->mutex); @@ -400,8 +407,11 @@ static int vhost_vsock_start(struct vhost_vsock *vsock) return 0; err_vq: + vq->private_data = NULL; + mutex_unlock(&vq->mutex); + for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { - struct vhost_virtqueue *vq = &vsock->vqs[i]; + vq = &vsock->vqs[i]; mutex_lock(&vq->mutex); vq->private_data = NULL; @@ -562,11 +572,12 @@ static int vhost_vsock_set_cid(struct vhost_vsock *vsock, u64 guest_cid) return -EINVAL; /* Refuse if CID is already in use */ - other = vhost_vsock_get(guest_cid); - if (other && other != vsock) - return -EADDRINUSE; - spin_lock_bh(&vhost_vsock_lock); + other = __vhost_vsock_get(guest_cid); + if (other && other != vsock) { + spin_unlock_bh(&vhost_vsock_lock); + return -EADDRINUSE; + } vsock->guest_cid = guest_cid; spin_unlock_bh(&vhost_vsock_lock); |