diff options
author | Linus Torvalds | 2020-04-07 12:36:09 -0700 |
---|---|---|
committer | Linus Torvalds | 2020-04-07 12:36:09 -0700 |
commit | 762a9f2f0144246872d61bce60085f62992f4ca0 (patch) | |
tree | 35564810cd1136afe0b5bd1472b59144accd00a9 | |
parent | d5d247661e869b71e4db5ca69b08b9607895d496 (diff) | |
parent | 4a7c46247f9c620c0390a15cb00b6ef9576b9c23 (diff) |
Merge tag 'for-linus-5.7-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rw/uml
Pull UML updates from Richard Weinberger:
- New mode for time travel, external via virtio
- Fixes for ubd to make sure no requests can get lost
- Fixes for vector networking
- Allow CONFIG_STATIC_LINK only when possible
- Minor cleanups and fixes
* tag 'for-linus-5.7-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rw/uml:
um: Remove some unnecessary NULL checks in vector_user.c
um: vector: Avoid NULL ptr deference if transport is unset
um: Make CONFIG_STATIC_LINK actually static
um: Implement cpu_relax() as ndelay(1) for time-travel
um: Implement ndelay/udelay in time-travel mode
um: Implement time-travel=ext
um: virtio: Implement VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS
um: time-travel: Rewrite as an event scheduler
um: Move timer-internal.h to non-shared
hostfs: Use kasprintf() instead of fixed buffer formatting
um: falloc.h needs to be directly included for older libc
um: ubd: Retry buffer read on any kind of error
um: ubd: Prevent buffer overrun on command completion
um: Fix overlapping ELF segments when statically linked
um: Delete never executed timer
um: Don't overwrite ethtool driver version
um: Fix len of file in create_pid_file
um: Don't use console_drivers directly
um: Cleanup CONFIG_IOSCHED_CFQ
26 files changed, 986 insertions, 214 deletions
diff --git a/arch/um/Kconfig b/arch/um/Kconfig index 0917f8443c28..96ab7026b037 100644 --- a/arch/um/Kconfig +++ b/arch/um/Kconfig @@ -62,9 +62,12 @@ config NR_CPUS source "arch/$(HEADER_ARCH)/um/Kconfig" +config FORBID_STATIC_LINK + bool + config STATIC_LINK bool "Force a static link" - default n + depends on !FORBID_STATIC_LINK help This option gives you the ability to force a static link of UML. Normally, UML is linked as a shared binary. This is inconvenient for @@ -73,6 +76,9 @@ config STATIC_LINK Additionally, this option enables using higher memory spaces (up to 2.75G) for UML. + NOTE: This option is incompatible with some networking features which + depend on features that require being dynamically loaded (like NSS). + config LD_SCRIPT_STATIC bool default y @@ -191,6 +197,7 @@ config UML_TIME_TRAVEL_SUPPORT prompt "Support time-travel mode (e.g. for test execution)" # inf-cpu mode is incompatible with the benchmarking depends on !RAID6_PQ_BENCHMARK + depends on !SMP help Enable this option to support time travel inside the UML instance. diff --git a/arch/um/configs/i386_defconfig b/arch/um/configs/i386_defconfig index 73e98bb57bf5..fb51bd206dbe 100644 --- a/arch/um/configs/i386_defconfig +++ b/arch/um/configs/i386_defconfig @@ -26,7 +26,7 @@ CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set -CONFIG_IOSCHED_CFQ=m +CONFIG_IOSCHED_BFQ=m CONFIG_SSL=y CONFIG_NULL_CHAN=y CONFIG_PORT_CHAN=y diff --git a/arch/um/configs/x86_64_defconfig b/arch/um/configs/x86_64_defconfig index 3281d7600225..477b87317424 100644 --- a/arch/um/configs/x86_64_defconfig +++ b/arch/um/configs/x86_64_defconfig @@ -24,7 +24,7 @@ CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set -CONFIG_IOSCHED_CFQ=m +CONFIG_IOSCHED_BFQ=m CONFIG_SSL=y CONFIG_NULL_CHAN=y CONFIG_PORT_CHAN=y diff --git a/arch/um/drivers/Kconfig b/arch/um/drivers/Kconfig index 72d417055782..9160ead56e33 100644 --- a/arch/um/drivers/Kconfig +++ b/arch/um/drivers/Kconfig @@ -234,6 +234,7 @@ config UML_NET_DAEMON config UML_NET_VECTOR bool "Vector I/O high performance network devices" depends on UML_NET + select FORBID_STATIC_LINK help This User-Mode Linux network driver uses multi-message send and receive functions. The host running the UML guest must have @@ -245,6 +246,7 @@ config UML_NET_VECTOR config UML_NET_VDE bool "VDE transport (obsolete)" depends on UML_NET + select FORBID_STATIC_LINK help This User-Mode Linux network transport allows one or more running UMLs on a single host to communicate with each other and also @@ -292,6 +294,7 @@ config UML_NET_MCAST config UML_NET_PCAP bool "pcap transport (obsolete)" depends on UML_NET + select FORBID_STATIC_LINK help The pcap transport makes a pcap packet stream on the host look like an ethernet device inside UML. This is useful for making diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c index 35ebeebfc1a8..1802cf4ef5a5 100644 --- a/arch/um/drivers/net_kern.c +++ b/arch/um/drivers/net_kern.c @@ -266,7 +266,6 @@ static void uml_net_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { strlcpy(info->driver, DRIVER_NAME, sizeof(info->driver)); - strlcpy(info->version, "42", sizeof(info->version)); } static const struct ethtool_ops uml_net_ethtool_ops = { @@ -275,17 +274,6 @@ static const struct ethtool_ops uml_net_ethtool_ops = { .get_ts_info = ethtool_op_get_ts_info, }; -static void uml_net_user_timer_expire(struct timer_list *t) -{ -#ifdef undef - struct uml_net_private *lp = from_timer(lp, t, tl); - struct connection *conn = &lp->user; - - dprintk(KERN_INFO "uml_net_user_timer_expire [%p]\n", conn); - do_connect(conn); -#endif -} - void uml_net_setup_etheraddr(struct net_device *dev, char *str) { unsigned char *addr = dev->dev_addr; @@ -456,7 +444,6 @@ static void eth_configure(int n, void *init, char *mac, .add_address = transport->user->add_address, .delete_address = transport->user->delete_address }); - timer_setup(&lp->tl, uml_net_user_timer_expire, 0); spin_lock_init(&lp->lock); memcpy(lp->mac, dev->dev_addr, sizeof(lp->mac)); diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 247f95da057b..eae8c83364f7 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -1592,11 +1592,11 @@ int io_thread(void *arg) &io_remainder_size, UBD_REQ_BUFFER_SIZE ); - if (n < 0) { - if (n == -EAGAIN) { + if (n <= 0) { + if (n == -EAGAIN) ubd_read_poll(-1); - continue; - } + + continue; } for (count = 0; count < n/sizeof(struct io_thread_req *); count++) { @@ -1607,7 +1607,9 @@ int io_thread(void *arg) written = 0; do { - res = os_write_file(kernel_fd, ((char *) io_req_buffer) + written, n); + res = os_write_file(kernel_fd, + ((char *) io_req_buffer) + written, + n - written); if (res >= 0) { written += res; } diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c index e98304d0219e..8735c468230a 100644 --- a/arch/um/drivers/vector_kern.c +++ b/arch/um/drivers/vector_kern.c @@ -46,7 +46,6 @@ #define DRIVER_NAME "uml-vector" -#define DRIVER_VERSION "01" struct vector_cmd_line_arg { struct list_head list; int unit; @@ -198,6 +197,9 @@ static int get_transport_options(struct arglist *def) long parsed; int result = 0; + if (transport == NULL) + return -EINVAL; + if (vector != NULL) { if (kstrtoul(vector, 10, &parsed) == 0) { if (parsed == 0) { @@ -1378,7 +1380,6 @@ static void vector_net_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { strlcpy(info->driver, DRIVER_NAME, sizeof(info->driver)); - strlcpy(info->version, DRIVER_VERSION, sizeof(info->version)); } static int vector_net_load_bpf_flash(struct net_device *dev, diff --git a/arch/um/drivers/vector_user.c b/arch/um/drivers/vector_user.c index ddcd917be0af..aa28e9eecb7b 100644 --- a/arch/um/drivers/vector_user.c +++ b/arch/um/drivers/vector_user.c @@ -221,8 +221,7 @@ static struct vector_fds *user_init_tap_fds(struct arglist *ifspec) return result; tap_cleanup: printk(UM_KERN_ERR "user_init_tap: init failed, error %d", fd); - if (result != NULL) - kfree(result); + kfree(result); return NULL; } @@ -266,8 +265,7 @@ static struct vector_fds *user_init_hybrid_fds(struct arglist *ifspec) return result; hybrid_cleanup: printk(UM_KERN_ERR "user_init_hybrid: init failed"); - if (result != NULL) - kfree(result); + kfree(result); return NULL; } @@ -344,10 +342,8 @@ static struct vector_fds *user_init_unix_fds(struct arglist *ifspec, int id) unix_cleanup: if (fd >= 0) os_close_file(fd); - if (remote_addr != NULL) - kfree(remote_addr); - if (result != NULL) - kfree(result); + kfree(remote_addr); + kfree(result); return NULL; } @@ -382,8 +378,7 @@ static struct vector_fds *user_init_raw_fds(struct arglist *ifspec) return result; raw_cleanup: printk(UM_KERN_ERR "user_init_raw: init failed, error %d", err); - if (result != NULL) - kfree(result); + kfree(result); return NULL; } diff --git a/arch/um/drivers/vhost_user.h b/arch/um/drivers/vhost_user.h index 45ff5ea22fea..6c71b6005177 100644 --- a/arch/um/drivers/vhost_user.h +++ b/arch/um/drivers/vhost_user.h @@ -10,9 +10,10 @@ /* Feature bits */ #define VHOST_USER_F_PROTOCOL_FEATURES 30 /* Protocol feature bits */ -#define VHOST_USER_PROTOCOL_F_REPLY_ACK 3 -#define VHOST_USER_PROTOCOL_F_SLAVE_REQ 5 -#define VHOST_USER_PROTOCOL_F_CONFIG 9 +#define VHOST_USER_PROTOCOL_F_REPLY_ACK 3 +#define VHOST_USER_PROTOCOL_F_SLAVE_REQ 5 +#define VHOST_USER_PROTOCOL_F_CONFIG 9 +#define VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS 14 /* Vring state index masks */ #define VHOST_USER_VRING_INDEX_MASK 0xff #define VHOST_USER_VRING_POLL_MASK BIT(8) @@ -24,7 +25,8 @@ /* Supported protocol features */ #define VHOST_USER_SUPPORTED_PROTOCOL_F (BIT_ULL(VHOST_USER_PROTOCOL_F_REPLY_ACK) | \ BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ) | \ - BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)) + BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG) | \ + BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS)) enum vhost_user_request { VHOST_USER_GET_FEATURES = 1, @@ -52,12 +54,14 @@ enum vhost_user_request { VHOST_USER_SET_VRING_ENDIAN = 23, VHOST_USER_GET_CONFIG = 24, VHOST_USER_SET_CONFIG = 25, + VHOST_USER_VRING_KICK = 35, }; enum vhost_user_slave_request { VHOST_USER_SLAVE_IOTLB_MSG = 1, VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2, VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3, + VHOST_USER_SLAVE_VRING_CALL = 4, }; struct vhost_user_header { diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c index 023ced2250ea..be54d368e73d 100644 --- a/arch/um/drivers/virtio_uml.c +++ b/arch/um/drivers/virtio_uml.c @@ -26,6 +26,7 @@ #include <linux/virtio.h> #include <linux/virtio_config.h> #include <linux/virtio_ring.h> +#include <linux/time-internal.h> #include <shared/as-layout.h> #include <irq_kern.h> #include <init.h> @@ -53,6 +54,7 @@ struct virtio_uml_device { struct virtio_device vdev; struct platform_device *pdev; + spinlock_t sock_lock; int sock, req_fd; u64 features; u64 protocol_features; @@ -63,6 +65,11 @@ struct virtio_uml_device { struct virtio_uml_vq_info { int kick_fd, call_fd; char name[32]; +#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT + struct virtqueue *vq; + vq_callback_t *callback; + struct time_travel_event defer; +#endif }; extern unsigned long long physmem_size, highmem; @@ -117,10 +124,27 @@ static int vhost_user_recv_header(int fd, struct vhost_user_msg *msg) static int vhost_user_recv(struct virtio_uml_device *vu_dev, int fd, struct vhost_user_msg *msg, - size_t max_payload_size) + size_t max_payload_size, bool wait) { size_t size; - int rc = vhost_user_recv_header(fd, msg); + int rc; + + /* + * In virtio time-travel mode, we're handling all the vhost-user + * FDs by polling them whenever appropriate. However, we may get + * into a situation where we're sending out an interrupt message + * to a device (e.g. a net device) and need to handle a simulation + * time message while doing so, e.g. one that tells us to update + * our idea of how long we can run without scheduling. + * + * Thus, we need to not just read() from the given fd, but need + * to also handle messages for the simulation time - this function + * does that for us while waiting for the given fd to be readable. + */ + if (wait) + time_travel_wait_readable(fd); + + rc = vhost_user_recv_header(fd, msg); if (rc == -ECONNRESET && vu_dev->registered) { struct virtio_uml_platform_data *pdata; @@ -142,7 +166,8 @@ static int vhost_user_recv_resp(struct virtio_uml_device *vu_dev, struct vhost_user_msg *msg, size_t max_payload_size) { - int rc = vhost_user_recv(vu_dev, vu_dev->sock, msg, max_payload_size); + int rc = vhost_user_recv(vu_dev, vu_dev->sock, msg, + max_payload_size, true); if (rc) return rc; @@ -172,7 +197,8 @@ static int vhost_user_recv_req(struct virtio_uml_device *vu_dev, struct vhost_user_msg *msg, size_t max_payload_size) { - int rc = vhost_user_recv(vu_dev, vu_dev->req_fd, msg, max_payload_size); + int rc = vhost_user_recv(vu_dev, vu_dev->req_fd, msg, + max_payload_size, false); if (rc) return rc; @@ -189,6 +215,7 @@ static int vhost_user_send(struct virtio_uml_device *vu_dev, int *fds, size_t num_fds) { size_t size = sizeof(msg->header) + msg->header.size; + unsigned long flags; bool request_ack; int rc; @@ -207,24 +234,28 @@ static int vhost_user_send(struct virtio_uml_device *vu_dev, if (request_ack) msg->header.flags |= VHOST_USER_FLAG_NEED_REPLY; + spin_lock_irqsave(&vu_dev->sock_lock, flags); rc = full_sendmsg_fds(vu_dev->sock, msg, size, fds, num_fds); if (rc < 0) - return rc; + goto out; if (request_ack) { uint64_t status; rc = vhost_user_recv_u64(vu_dev, &status); if (rc) - return rc; + goto out; if (status) { vu_err(vu_dev, "slave reports error: %llu\n", status); - return -EIO; + rc = -EIO; + goto out; } } - return 0; +out: + spin_unlock_irqrestore(&vu_dev->sock_lock, flags); + return rc; } static int vhost_user_send_no_payload(struct virtio_uml_device *vu_dev, @@ -324,6 +355,7 @@ static void vhost_user_reply(struct virtio_uml_device *vu_dev, static irqreturn_t vu_req_interrupt(int irq, void *data) { struct virtio_uml_device *vu_dev = data; + struct virtqueue *vq; int response = 1; struct { struct vhost_user_msg msg; @@ -343,6 +375,15 @@ static irqreturn_t vu_req_interrupt(int irq, void *data) virtio_config_changed(&vu_dev->vdev); response = 0; break; + case VHOST_USER_SLAVE_VRING_CALL: + virtio_device_for_each_vq((&vu_dev->vdev), vq) { + if (vq->index == msg.msg.payload.vring_state.index) { + response = 0; + vring_interrupt(0 /* ignored */, vq); + break; + } + } + break; case VHOST_USER_SLAVE_IOTLB_MSG: /* not supported - VIRTIO_F_IOMMU_PLATFORM */ case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG: @@ -684,6 +725,17 @@ static bool vu_notify(struct virtqueue *vq) const uint64_t n = 1; int rc; + time_travel_propagate_time(); + + if (info->kick_fd < 0) { + struct virtio_uml_device *vu_dev; + + vu_dev = to_virtio_uml_device(vq->vdev); + + return vhost_user_set_vring_state(vu_dev, VHOST_USER_VRING_KICK, + vq->index, 0) == 0; + } + do { rc = os_write_file(info->kick_fd, &n, sizeof(n)); } while (rc == -EINTR); @@ -749,10 +801,13 @@ static void vu_del_vq(struct virtqueue *vq) { struct virtio_uml_vq_info *info = vq->priv; - um_free_irq(VIRTIO_IRQ, vq); + if (info->call_fd >= 0) { + um_free_irq(VIRTIO_IRQ, vq); + os_close_file(info->call_fd); + } - os_close_file(info->call_fd); - os_close_file(info->kick_fd); + if (info->kick_fd >= 0) + os_close_file(info->kick_fd); vring_del_virtqueue(vq); kfree(info); @@ -782,6 +837,15 @@ static int vu_setup_vq_call_fd(struct virtio_uml_device *vu_dev, int call_fds[2]; int rc; + /* no call FD needed/desired in this case */ + if (vu_dev->protocol_features & + BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS) && + vu_dev->protocol_features & + BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ)) { + info->call_fd = -1; + return 0; + } + /* Use a pipe for call fd, since SIGIO is not supported for eventfd */ rc = os_pipe(call_fds, true, true); if (rc < 0) @@ -810,6 +874,23 @@ out: return rc; } +#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT +static void vu_defer_irq_handle(struct time_travel_event *d) +{ + struct virtio_uml_vq_info *info; + + info = container_of(d, struct virtio_uml_vq_info, defer); + info->callback(info->vq); +} + +static void vu_defer_irq_callback(struct virtqueue *vq) +{ + struct virtio_uml_vq_info *info = vq->priv; + + time_travel_add_irq_event(&info->defer); +} +#endif + static struct virtqueue *vu_setup_vq(struct virtio_device *vdev, unsigned index, vq_callback_t *callback, const char *name, bool ctx) @@ -829,6 +910,19 @@ static struct virtqueue *vu_setup_vq(struct virtio_device *vdev, snprintf(info->name, sizeof(info->name), "%s.%d-%s", pdev->name, pdev->id, name); +#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT + /* + * When we get an interrupt, we must bounce it through the simulation + * calendar (the simtime device), except for the simtime device itself + * since that's part of the simulation control. + */ + if (time_travel_mode == TT_MODE_EXTERNAL && callback) { + info->callback = callback; + callback = vu_defer_irq_callback; + time_travel_set_event_fn(&info->defer, vu_defer_irq_handle); + } +#endif + vq = vring_create_virtqueue(index, num, PAGE_SIZE, vdev, true, true, ctx, vu_notify, callback, info->name); if (!vq) { @@ -837,11 +931,19 @@ static struct virtqueue *vu_setup_vq(struct virtio_device *vdev, } vq->priv = info; num = virtqueue_get_vring_size(vq); +#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT + info->vq = vq; +#endif - rc = os_eventfd(0, 0); - if (rc < 0) - goto error_kick; - info->kick_fd = rc; + if (vu_dev->protocol_features & + BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS)) { + info->kick_fd = -1; + } else { + rc = os_eventfd(0, 0); + if (rc < 0) + goto error_kick; + info->kick_fd = rc; + } rc = vu_setup_vq_call_fd(vu_dev, vq); if (rc) @@ -866,10 +968,13 @@ static struct virtqueue *vu_setup_vq(struct virtio_device *vdev, return vq; error_setup: - um_free_irq(VIRTIO_IRQ, vq); - os_close_file(info->call_fd); + if (info->call_fd >= 0) { + um_free_irq(VIRTIO_IRQ, vq); + os_close_file(info->call_fd); + } error_call: - os_close_file(info->kick_fd); + if (info->kick_fd >= 0) + os_close_file(info->kick_fd); error_kick: vring_del_virtqueue(vq); error_create: @@ -908,10 +1013,12 @@ static int vu_find_vqs(struct virtio_device *vdev, unsigned nvqs, list_for_each_entry(vq, &vdev->vqs, list) { struct virtio_uml_vq_info *info = vq->priv; - rc = vhost_user_set_vring_kick(vu_dev, vq->index, - info->kick_fd); - if (rc) - goto error_setup; + if (info->kick_fd >= 0) { + rc = vhost_user_set_vring_kick(vu_dev, vq->index, + info->kick_fd); + if (rc) + goto error_setup; + } rc = vhost_user_set_vring_enable(vu_dev, vq->index, true); if (rc) @@ -1008,6 +1115,8 @@ static int virtio_uml_probe(struct platform_device *pdev) return rc; vu_dev->sock = rc; + spin_lock_init(&vu_dev->sock_lock); + rc = vhost_user_init(vu_dev); if (rc) goto error_init; diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild index db7d9d4e30d8..8d435f8a6dec 100644 --- a/arch/um/include/asm/Kbuild +++ b/arch/um/include/asm/Kbuild @@ -3,7 +3,6 @@ generic-y += bpf_perf_event.h generic-y += bug.h generic-y += compat.h generic-y += current.h -generic-y += delay.h generic-y += device.h generic-y += emergency-restart.h generic-y += exec.h diff --git a/arch/um/include/asm/delay.h b/arch/um/include/asm/delay.h new file mode 100644 index 000000000000..56fc2b8f2dd0 --- /dev/null +++ b/arch/um/include/asm/delay.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __UM_DELAY_H +#define __UM_DELAY_H +#include <asm-generic/delay.h> +#include <linux/time-internal.h> + +static inline void um_ndelay(unsigned long nsecs) +{ + if (time_travel_mode == TT_MODE_INFCPU || + time_travel_mode == TT_MODE_EXTERNAL) { + time_travel_ndelay(nsecs); + return; + } + ndelay(nsecs); +} +#undef ndelay +#define ndelay um_ndelay + +static inline void um_udelay(unsigned long usecs) +{ + if (time_travel_mode == TT_MODE_INFCPU || + time_travel_mode == TT_MODE_EXTERNAL) { + time_travel_ndelay(1000 * usecs); + return; + } + udelay(usecs); +} +#undef udelay +#define udelay um_udelay +#endif /* __UM_DELAY_H */ diff --git a/arch/um/include/linux/time-internal.h b/arch/um/include/linux/time-internal.h new file mode 100644 index 000000000000..f3b03d39a854 --- /dev/null +++ b/arch/um/include/linux/time-internal.h @@ -0,0 +1,84 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2012 - 2014 Cisco Systems + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#ifndef __TIMER_INTERNAL_H__ +#define __TIMER_INTERNAL_H__ +#include <linux/list.h> + +#define TIMER_MULTIPLIER 256 +#define TIMER_MIN_DELTA 500 + +enum time_travel_mode { + TT_MODE_OFF, + TT_MODE_BASIC, + TT_MODE_INFCPU, + TT_MODE_EXTERNAL, +}; + +#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT +struct time_travel_event { + unsigned long long time; + void (*fn)(struct time_travel_event *d); + struct list_head list; + bool pending, onstack; +}; + +extern enum time_travel_mode time_travel_mode; + +void time_travel_sleep(unsigned long long duration); + +static inline void +time_travel_set_event_fn(struct time_travel_event *e, + void (*fn)(struct time_travel_event *d)) +{ + e->fn = fn; +} + +void __time_travel_propagate_time(void); + +static inline void time_travel_propagate_time(void) +{ + if (time_travel_mode == TT_MODE_EXTERNAL) + __time_travel_propagate_time(); +} + +void __time_travel_wait_readable(int fd); + +static inline void time_travel_wait_readable(int fd) +{ + if (time_travel_mode == TT_MODE_EXTERNAL) + __time_travel_wait_readable(fd); +} + +void time_travel_add_irq_event(struct time_travel_event *e); +#else +struct time_travel_event { +}; + +#define time_travel_mode TT_MODE_OFF + +static inline void time_travel_sleep(unsigned long long duration) +{ +} + +/* this is a macro so the event/function need not exist */ +#define time_travel_set_event_fn(e, fn) do {} while (0) + +static inline void time_travel_propagate_time(void) +{ +} + +static inline void time_travel_wait_readable(int fd) +{ +} +#endif /* CONFIG_UML_TIME_TRAVEL_SUPPORT */ + +/* + * Without CONFIG_UML_TIME_TRAVEL_SUPPORT this is a linker error if used, + * which is intentional since we really shouldn't link it in that case. + */ +void time_travel_ndelay(unsigned long nsec); +#endif /* __TIMER_INTERNAL_H__ */ diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h index 0f30204b6afa..f467d28fc0b4 100644 --- a/arch/um/include/shared/os.h +++ b/arch/um/include/shared/os.h @@ -181,6 +181,7 @@ extern int os_falloc_punch(int fd, unsigned long long offset, int count); extern int os_eventfd(unsigned int initval, int flags); extern int os_sendmsg_fds(int fd, const void *buf, unsigned int len, const int *fds, unsigned int fds_num); +int os_poll(unsigned int n, const int *fds); /* start_up.c */ extern void os_early_checks(void); diff --git a/arch/um/include/shared/timer-internal.h b/arch/um/include/shared/timer-internal.h deleted file mode 100644 index 2d2d13c9b46f..000000000000 --- a/arch/um/include/shared/timer-internal.h +++ /dev/null @@ -1,76 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2012 - 2014 Cisco Systems - * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - */ - -#ifndef __TIMER_INTERNAL_H__ -#define __TIMER_INTERNAL_H__ - -#define TIMER_MULTIPLIER 256 -#define TIMER_MIN_DELTA 500 - -enum time_travel_mode { - TT_MODE_OFF, - TT_MODE_BASIC, - TT_MODE_INFCPU, -}; - -enum time_travel_timer_mode { - TT_TMR_DISABLED, - TT_TMR_ONESHOT, - TT_TMR_PERIODIC, -}; - -#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT -extern enum time_travel_mode time_travel_mode; -extern unsigned long long time_travel_time; -extern enum time_travel_timer_mode time_travel_timer_mode; -extern unsigned long long time_travel_timer_expiry; -extern unsigned long long time_travel_timer_interval; - -static inline void time_travel_set_time(unsigned long long ns) -{ - time_travel_time = ns; -} - -static inline void time_travel_set_timer_mode(enum time_travel_timer_mode mode) -{ - time_travel_timer_mode = mode; -} - -static inline void time_travel_set_timer_expiry(unsigned long long expiry) -{ - time_travel_timer_expiry = expiry; -} - -static inline void time_travel_set_timer_interval(unsigned long long interval) -{ - time_travel_timer_interval = interval; -} -#else -#define time_travel_mode TT_MODE_OFF -#define time_travel_time 0 -#define time_travel_timer_expiry 0 -#define time_travel_timer_interval 0 - -static inline void time_travel_set_time(unsigned long long ns) -{ -} - -static inline void time_travel_set_timer_mode(enum time_travel_timer_mode mode) -{ -} - -static inline void time_travel_set_timer_expiry(unsigned long long expiry) -{ -} - -static inline void time_travel_set_timer_interval(unsigned long long interval) -{ -} - -#define time_travel_timer_mode TT_TMR_DISABLED -#endif - -#endif diff --git a/arch/um/kernel/kmsg_dump.c b/arch/um/kernel/kmsg_dump.c index 98bdf69e4c2e..e4abac6c9727 100644 --- a/arch/um/kernel/kmsg_dump.c +++ b/arch/um/kernel/kmsg_dump.c @@ -9,20 +9,19 @@ static void kmsg_dumper_stdout(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason) { static char line[1024]; - + struct console *con; size_t len = 0; - bool con_available = false; /* only dump kmsg when no console is available */ if (!console_trylock()) return; - if (console_drivers != NULL) - con_available = true; + for_each_console(con) + break; console_unlock(); - if (con_available == true) + if (con) return; printf("kmsg_dump:\n"); diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 56a094182bf5..cbe33af2a880 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -32,7 +32,7 @@ #include <kern_util.h> #include <os.h> #include <skas.h> -#include <timer-internal.h> +#include <linux/time-internal.h> /* * This is a per-cpu array. A processor only modifies its entry and it only @@ -203,43 +203,6 @@ void initial_thread_cb(void (*proc)(void *), void *arg) kmalloc_ok = save_kmalloc_ok; } -static void time_travel_sleep(unsigned long long duration) -{ - unsigned long long next = time_travel_time + duration; - - if (time_travel_mode != TT_MODE_INFCPU) - os_timer_disable(); - - while (time_travel_timer_mode == TT_TMR_PERIODIC && - time_travel_timer_expiry < time_travel_time) - time_travel_set_timer_expiry(time_travel_timer_expiry + - time_travel_timer_interval); - - if (time_travel_timer_mode != TT_TMR_DISABLED && - time_travel_timer_expiry < next) { - if (time_travel_timer_mode == TT_TMR_ONESHOT) - time_travel_set_timer_mode(TT_TMR_DISABLED); - /* - * In basic mode, time_travel_time will be adjusted in - * the timer IRQ handler so it works even when the signal - * comes from the OS timer, see there. - */ - if (time_travel_mode != TT_MODE_BASIC) - time_travel_set_time(time_travel_timer_expiry); - - deliver_alarm(); - } else { - time_travel_set_time(next); - } - - if (time_travel_mode != TT_MODE_INFCPU) { - if (time_travel_timer_mode == TT_TMR_PERIODIC) - os_timer_set_interval(time_travel_timer_interval); - else if (time_travel_timer_mode == TT_TMR_ONESHOT) - os_timer_one_shot(time_travel_timer_expiry - next); - } -} - static void um_idle_sleep(void) { unsigned long long duration = UM_NSEC_PER_SEC; diff --git a/arch/um/kernel/skas/syscall.c b/arch/um/kernel/skas/syscall.c index 40d90dddf3f1..0a12d5a09217 100644 --- a/arch/um/kernel/skas/syscall.c +++ b/arch/um/kernel/skas/syscall.c @@ -10,7 +10,7 @@ #include <sysdep/ptrace.h> #include <sysdep/ptrace_user.h> #include <sysdep/syscalls.h> -#include <shared/timer-internal.h> +#include <linux/time-internal.h> void handle_syscall(struct uml_pt_regs *r) { @@ -24,7 +24,8 @@ void handle_syscall(struct uml_pt_regs *r) * went to sleep, even if said userspace interacts with the kernel in * various ways. */ - if (time_travel_mode == TT_MODE_INFCPU) + if (time_travel_mode == TT_MODE_INFCPU || + time_travel_mode == TT_MODE_EXTERNAL) schedule(); /* Initialize the syscall number and default return value. */ diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index 94ea87bd231c..25eaa6a0c658 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -4,6 +4,7 @@ * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de) * Copyright (C) 2012-2014 Cisco Systems * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Copyright (C) 2019 Intel Corporation */ #include <linux/clockchips.h> @@ -18,21 +19,484 @@ #include <asm/param.h> #include <kern_util.h> #include <os.h> -#include <timer-internal.h> +#include <linux/time-internal.h> +#include <linux/um_timetravel.h> #include <shared/init.h> #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT enum time_travel_mode time_travel_mode; -unsigned long long time_travel_time; -enum time_travel_timer_mode time_travel_timer_mode; -unsigned long long time_travel_timer_expiry; -unsigned long long time_travel_timer_interval; +EXPORT_SYMBOL_GPL(time_travel_mode); static bool time_travel_start_set; static unsigned long long time_travel_start; -#else +static unsigned long long time_travel_time; +static LIST_HEAD(time_travel_events); +static unsigned long long time_travel_timer_interval; +static unsigned long long time_travel_next_event; +static struct time_travel_event time_travel_timer_event; +static int time_travel_ext_fd = -1; +static unsigned int time_travel_ext_waiting; +static bool time_travel_ext_prev_request_valid; +static unsigned long long time_travel_ext_prev_request; +static bool time_travel_ext_free_until_valid; +static unsigned long long time_travel_ext_free_until; + +static void time_travel_set_time(unsigned long long ns) +{ + if (unlikely(ns < time_travel_time)) + panic("time-travel: time goes backwards %lld -> %lld\n", + time_travel_time, ns); + time_travel_time = ns; +} + +enum time_travel_message_handling { + TTMH_IDLE, + TTMH_POLL, + TTMH_READ, +}; + +static void time_travel_handle_message(struct um_timetravel_msg *msg, + enum time_travel_message_handling mode) +{ + struct um_timetravel_msg resp = { + .op = UM_TIMETRAVEL_ACK, + }; + int ret; + + /* + * Poll outside the locked section (if we're not called to only read + * the response) so we can get interrupts for e.g. virtio while we're + * here, but then we need to lock to not get interrupted between the + * read of the message and write of the ACK. + */ + if (mode != TTMH_READ) { + while (os_poll(1, &time_travel_ext_fd) != 0) { + if (mode == TTMH_IDLE) { + BUG_ON(!irqs_disabled()); + local_irq_enable(); + local_irq_disable(); + } + } + } + + ret = os_read_file(time_travel_ext_fd, msg, sizeof(*msg)); + + if (ret == 0) + panic("time-travel external link is broken\n"); + if (ret != sizeof(*msg)) + panic("invalid time-travel message - %d bytes\n", ret); + + switch (msg->op) { + default: + WARN_ONCE(1, "time-travel: unexpected message %lld\n", + (unsigned long long)msg->op); + break; + case UM_TIMETRAVEL_ACK: + return; + case UM_TIMETRAVEL_RUN: + time_travel_set_time(msg->time); + break; + case UM_TIMETRAVEL_FREE_UNTIL: + time_travel_ext_free_until_valid = true; + time_travel_ext_free_until = msg->time; + break; + } + + os_write_file(time_travel_ext_fd, &resp, sizeof(resp)); +} + +static u64 time_travel_ext_req(u32 op, u64 time) +{ + static int seq; + int mseq = ++seq; + struct um_timetravel_msg msg = { + .op = op, + .time = time, + .seq = mseq, + }; + unsigned long flags; + + /* + * We need to save interrupts here and only restore when we + * got the ACK - otherwise we can get interrupted and send + * another request while we're still waiting for an ACK, but + * the peer doesn't know we got interrupted and will send + * the ACKs in the same order as the message, but we'd need + * to see them in the opposite order ... + * + * This wouldn't matter *too* much, but some ACKs carry the + * current time (for UM_TIMETRAVEL_GET) and getting another + * ACK without a time would confuse us a lot! + * + * The sequence number assignment that happens here lets us + * debug such message handling issues more easily. + */ + local_irq_save(flags); + os_write_file(time_travel_ext_fd, &msg, sizeof(msg)); + + while (msg.op != UM_TIMETRAVEL_ACK) + time_travel_handle_message(&msg, TTMH_READ); + + if (msg.seq != mseq) + panic("time-travel: ACK message has different seqno! op=%d, seq=%d != %d time=%lld\n", + msg.op, msg.seq, mseq, msg.time); + + if (op == UM_TIMETRAVEL_GET) + time_travel_set_time(msg.time); + local_irq_restore(flags); + + return msg.time; +} + +void __time_travel_wait_readable(int fd) +{ + int fds[2] = { fd, time_travel_ext_fd }; + int ret; + + if (time_travel_mode != TT_MODE_EXTERNAL) + return; + + while ((ret = os_poll(2, fds))) { + struct um_timetravel_msg msg; + + if (ret == 1) + time_travel_handle_message(&msg, TTMH_READ); + } +} +EXPORT_SYMBOL_GPL(__time_travel_wait_readable); + +static void time_travel_ext_update_request(unsigned long long time) +{ + if (time_travel_mode != TT_MODE_EXTERNAL) + return; + + /* asked for exactly this time previously */ + if (time_travel_ext_prev_request_valid && + time == time_travel_ext_prev_request) + return; + + time_travel_ext_prev_request = time; + time_travel_ext_prev_request_valid = true; + time_travel_ext_req(UM_TIMETRAVEL_REQUEST, time); +} + +void __time_travel_propagate_time(void) +{ + time_travel_ext_req(UM_TIMETRAVEL_UPDATE, time_travel_time); +} +EXPORT_SYMBOL_GPL(__time_travel_propagate_time); + +/* returns true if we must do a wait to the simtime device */ +static bool time_travel_ext_request(unsigned long long time) +{ + /* + * If we received an external sync point ("free until") then we + * don't have to request/wait for anything until then, unless + * we're already waiting. + */ + if (!time_travel_ext_waiting && time_travel_ext_free_until_valid && + time < time_travel_ext_free_until) + return false; + + time_travel_ext_update_request(time); + return true; +} + +static void time_travel_ext_wait(bool idle) +{ + struct um_timetravel_msg msg = { + .op = UM_TIMETRAVEL_ACK, + }; + + time_travel_ext_prev_request_valid = false; + time_travel_ext_waiting++; + + time_travel_ext_req(UM_TIMETRAVEL_WAIT, -1); + + /* + * Here we are deep in the idle loop, so we have to break out of the + * kernel abstraction in a sense and implement this in terms of the + * UML system waiting on the VQ interrupt while sleeping, when we get + * the signal it'll call time_travel_ext_vq_notify_done() completing the + * call. + */ + while (msg.op != UM_TIMETRAVEL_RUN) + time_travel_handle_message(&msg, idle ? TTMH_IDLE : TTMH_POLL); + + time_travel_ext_waiting--; + + /* we might request more stuff while polling - reset when we run */ + time_travel_ext_prev_request_valid = false; +} + +static void time_travel_ext_get_time(void) +{ + time_travel_ext_req(UM_TIMETRAVEL_GET, -1); +} + +static void __time_travel_update_time(unsigned long long ns, bool idle) +{ + if (time_travel_mode == TT_MODE_EXTERNAL && time_travel_ext_request(ns)) + time_travel_ext_wait(idle); + else + time_travel_set_time(ns); +} + +static struct time_travel_event *time_travel_first_event(void) +{ + return list_first_entry_or_null(&time_travel_events, + struct time_travel_event, + list); +} + +static void __time_travel_add_event(struct time_travel_event *e, + unsigned long long time) +{ + struct time_travel_event *tmp; + bool inserted = false; + + if (WARN(time_travel_mode == TT_MODE_BASIC && + e != &time_travel_timer_event, + "only timer events can be handled in basic mode")) + return; + + if (e->pending) + return; + + e->pending = true; + e->time = time; + + list_for_each_entry(tmp, &time_travel_events, list) { + /* + * Add the new entry before one with higher time, + * or if they're equal and both on stack, because + * in that case we need to unwind the stack in the + * right order, and the later event (timer sleep + * or such) must be dequeued first. + */ + if ((tmp->time > e->time) || + (tmp->time == e->time && tmp->onstack && e->onstack)) { + list_add_tail(&e->list, &tmp->list); + inserted = true; + break; + } + } + + if (!inserted) + list_add_tail(&e->list, &time_travel_events); + + tmp = time_travel_first_event(); + time_travel_ext_update_request(tmp->time); + time_travel_next_event = tmp->time; +} + +static void time_travel_add_event(struct time_travel_event *e, + unsigned long long time) +{ + if (WARN_ON(!e->fn)) + return; + + __time_travel_add_event(e, time); +} + +void time_travel_periodic_timer(struct time_travel_event *e) +{ + time_travel_add_event(&time_travel_timer_event, + time_travel_time + time_travel_timer_interval); + deliver_alarm(); +} + +static void time_travel_deliver_event(struct time_travel_event *e) +{ + if (e == &time_travel_timer_event) { + /* + * deliver_alarm() does the irq_enter/irq_exit + * by itself, so must handle it specially here + */ + e->fn(e); + } else { + unsigned long flags; + + local_irq_save(flags); + irq_enter(); + e->fn(e); + irq_exit(); + local_irq_restore(flags); + } +} + +static bool time_travel_del_event(struct time_travel_event *e) +{ + if (!e->pending) + return false; + list_del(&e->list); + e->pending = false; + return true; +} + +static void time_travel_update_time(unsigned long long next, bool idle) +{ + struct time_travel_event ne = { + .onstack = true, + }; + struct time_travel_event *e; + bool finished = idle; + + /* add it without a handler - we deal with that specifically below */ + __time_travel_add_event(&ne, next); + + do { + e = time_travel_first_event(); + + BUG_ON(!e); + __time_travel_update_time(e->time, idle); + + /* new events may have been inserted while we were waiting */ + if (e == time_travel_first_event()) { + BUG_ON(!time_travel_del_event(e)); + BUG_ON(time_travel_time != e->time); + + if (e == &ne) { + finished = true; + } else { + if (e->onstack) + panic("On-stack event dequeued outside of the stack! time=%lld, event time=%lld, event=%pS\n", + time_travel_time, e->time, e); + time_travel_deliver_event(e); + } + } + + e = time_travel_first_event(); + if (e) + time_travel_ext_update_request(e->time); + } while (ne.pending && !finished); + + time_travel_del_event(&ne); +} + +void time_travel_ndelay(unsigned long nsec) +{ + time_travel_update_time(time_travel_time + nsec, false); +} +EXPORT_SYMBOL(time_travel_ndelay); + +void time_travel_add_irq_event(struct time_travel_event *e) +{ + BUG_ON(time_travel_mode != TT_MODE_EXTERNAL); + + time_travel_ext_get_time(); + /* + * We could model interrupt latency here, for now just + * don't have any latency at all and request the exact + * same time (again) to run the interrupt... + */ + time_travel_add_event(e, time_travel_time); +} +EXPORT_SYMBOL_GPL(time_travel_add_irq_event); + +static void time_travel_oneshot_timer(struct time_travel_event *e) +{ + deliver_alarm(); +} + +void time_travel_sleep(unsigned long long duration) +{ + unsigned long long next = time_travel_time + duration; + + if (time_travel_mode == TT_MODE_BASIC) + os_timer_disable(); + + time_travel_update_time(next, true); + + if (time_travel_mode == TT_MODE_BASIC && + time_travel_timer_event.pending) { + if (time_travel_timer_event.fn == time_travel_periodic_timer) { + /* + * This is somewhat wrong - we should get the first + * one sooner like the os_timer_one_shot() below... + */ + os_timer_set_interval(time_travel_timer_interval); + } else { + os_timer_one_shot(time_travel_timer_event.time - next); + } + } +} + +static void time_travel_handle_real_alarm(void) +{ + time_travel_set_time(time_travel_next_event); + + time_travel_del_event(&time_travel_timer_event); + + if (time_travel_timer_event.fn == time_travel_periodic_timer) + time_travel_add_event(&time_travel_timer_event, + time_travel_time + + time_travel_timer_interval); +} + +static void time_travel_set_interval(unsigned long long interval) +{ + time_travel_timer_interval = interval; +} + +static int time_travel_connect_external(const char *socket) +{ + const char *sep; + unsigned long long id = (unsigned long long)-1; + int rc; + + if ((sep = strchr(socket, ':'))) { + char buf[25] = {}; + if (sep - socket > sizeof(buf) - 1) + goto invalid_number; + + memcpy(buf, socket, sep - socket); + if (kstrtoull(buf, 0, &id)) { +invalid_number: + panic("time-travel: invalid external ID in string '%s'\n", + socket); + return -EINVAL; + } + + socket = sep + 1; + } + + rc = os_connect_socket(socket); + if (rc < 0) { + panic("time-travel: failed to connect to external socket %s\n", + socket); + return rc; + } + + time_travel_ext_fd = rc; + + time_travel_ext_req(UM_TIMETRAVEL_START, id); + + return 1; +} +#else /* CONFIG_UML_TIME_TRAVEL_SUPPORT */ #define time_travel_start_set 0 #define time_travel_start 0 +#define time_travel_time 0 + +static inline void time_travel_update_time(unsigned long long ns, bool retearly) +{ +} + +static inline void time_travel_handle_real_alarm(void) +{ +} + +static void time_travel_set_interval(unsigned long long interval) +{ +} + +/* fail link if this actually gets used */ +extern u64 time_travel_ext_req(u32 op, u64 time); + +/* these are empty macros so the struct/fn need not exist */ +#define time_travel_add_event(e, time) do { } while (0) +#define time_travel_del_event(e) do { } while (0) #endif void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) @@ -48,7 +512,7 @@ void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) * never get any real signals from the OS. */ if (time_travel_mode == TT_MODE_BASIC) - time_travel_set_time(time_travel_timer_expiry); + time_travel_handle_real_alarm(); local_irq_save(flags); do_IRQ(TIMER_IRQ, regs); @@ -58,9 +522,10 @@ void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) static int itimer_shutdown(struct clock_event_device *evt) { if (time_travel_mode != TT_MODE_OFF) - time_travel_set_timer_mode(TT_TMR_DISABLED); + time_travel_del_event(&time_travel_timer_event); - if (time_travel_mode != TT_MODE_INFCPU) + if (time_travel_mode != TT_MODE_INFCPU && + time_travel_mode != TT_MODE_EXTERNAL) os_timer_disable(); return 0; @@ -71,12 +536,16 @@ static int itimer_set_periodic(struct clock_event_device *evt) unsigned long long interval = NSEC_PER_SEC / HZ; if (time_travel_mode != TT_MODE_OFF) { - time_travel_set_timer_mode(TT_TMR_PERIODIC); - time_travel_set_timer_expiry(time_travel_time + interval); - time_travel_set_timer_interval(interval); + time_travel_del_event(&time_travel_timer_event); + time_travel_set_event_fn(&time_travel_timer_event, + time_travel_periodic_timer); + time_travel_set_interval(interval); + time_travel_add_event(&time_travel_timer_event, + time_travel_time + interval); } - if (time_travel_mode != TT_MODE_INFCPU) + if (time_travel_mode != TT_MODE_INFCPU && + time_travel_mode != TT_MODE_EXTERNAL) os_timer_set_interval(interval); return 0; @@ -88,11 +557,15 @@ static int itimer_next_event(unsigned long delta, delta += 1; if (time_travel_mode != TT_MODE_OFF) { - time_travel_set_timer_mode(TT_TMR_ONESHOT); - time_travel_set_timer_expiry(time_travel_time + delta); + time_travel_del_event(&time_travel_timer_event); + time_travel_set_event_fn(&time_travel_timer_event, + time_travel_oneshot_timer); + time_travel_add_event(&time_travel_timer_event, + time_travel_time + delta); } - if (time_travel_mode != TT_MODE_INFCPU) + if (time_travel_mode != TT_MODE_INFCPU && + time_travel_mode != TT_MODE_EXTERNAL) return os_timer_one_shot(delta); return 0; @@ -143,8 +616,17 @@ static u64 timer_read(struct clocksource *cs) * stuck in loops that expect time to move more than the * exact requested sleep amount, e.g. python's socket server, * see https://bugs.python.org/issue37026. + * + * However, don't do that when we're in interrupt or such as + * then we might recurse into our own processing, and get to + * even more waiting, and that's not good - it messes up the + * "what do I do next" and onstack event we use to know when + * to return from time_travel_update_time(). */ - time_travel_set_time(time_travel_time + TIMER_MULTIPLIER); + if (!irqs_disabled() && !in_interrupt() && !in_softirq()) + time_travel_update_time(time_travel_time + + TIMER_MULTIPLIER, + false); return time_travel_time / TIMER_MULTIPLIER; } @@ -188,6 +670,8 @@ void read_persistent_clock64(struct timespec64 *ts) if (time_travel_start_set) nsecs = time_travel_start + time_travel_time; + else if (time_travel_mode == TT_MODE_EXTERNAL) + nsecs = time_travel_ext_req(UM_TIMETRAVEL_GET_TOD, -1); else nsecs = os_persistent_clock_emulation(); @@ -204,7 +688,8 @@ void __init time_init(void) #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT unsigned long calibrate_delay_is_known(void) { - if (time_travel_mode == TT_MODE_INFCPU) + if (time_travel_mode == TT_MODE_INFCPU || + time_travel_mode == TT_MODE_EXTERNAL) return 1; return 0; } @@ -218,6 +703,13 @@ int setup_time_travel(char *str) return 1; } + if (strncmp(str, "=ext:", 5) == 0) { + time_travel_mode = TT_MODE_EXTERNAL; + timer_clockevent.name = "time-travel-timer-external"; + timer_clocksource.name = "time-travel-clock-external"; + return time_travel_connect_external(str + 5); + } + if (!*str) { time_travel_mode = TT_MODE_BASIC; timer_clockevent.name = "time-travel-timer"; @@ -242,7 +734,15 @@ __uml_help(setup_time_travel, "are no wall clock timers, and any CPU processing happens - as seen from the\n" "guest - instantly. This can be useful for accurate simulation regardless of\n" "debug overhead, physical CPU speed, etc. but is somewhat dangerous as it can\n" -"easily lead to getting stuck (e.g. if anything in the system busy loops).\n"); +"easily lead to getting stuck (e.g. if anything in the system busy loops).\n" +"\n" +"time-travel=ext:[ID:]/path/to/socket\n" +"This enables time travel mode similar to =inf-cpu, except the system will\n" +"use the given socket to coordinate with a central scheduler, in order to\n" +"have more than one system simultaneously be on simulated time. The virtio\n" +"driver code in UML knows about this so you can also simulate networks and\n" +"devices using it, assuming the device has the right capabilities.\n" +"The optional ID is a 64-bit integer that's sent to the central scheduler.\n"); int setup_time_travel_start(char *str) { diff --git a/arch/um/kernel/uml.lds.S b/arch/um/kernel/uml.lds.S index 9f21443be2c9..3b6dab3d4501 100644 --- a/arch/um/kernel/uml.lds.S +++ b/arch/um/kernel/uml.lds.S @@ -19,10 +19,10 @@ SECTIONS __binary_start = START; . = START + SIZEOF_HEADERS; + . = ALIGN(PAGE_SIZE); _text = .; INIT_TEXT_SECTION(0) - . = ALIGN(PAGE_SIZE); .text : { diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c index fbda10535dab..26ecbd64c409 100644 --- a/arch/um/os-Linux/file.c +++ b/arch/um/os-Linux/file.c @@ -5,9 +5,11 @@ #include <stdio.h> #include <unistd.h> +#include <stdlib.h> #include <errno.h> #include <fcntl.h> #include <signal.h> +#include <linux/falloc.h> #include <sys/ioctl.h> #include <sys/mount.h> #include <sys/socket.h> @@ -16,6 +18,7 @@ #include <sys/un.h> #include <sys/types.h> #include <sys/eventfd.h> +#include <poll.h> #include <os.h> static void copy_stat(struct uml_stat *dst, const struct stat64 *src) @@ -664,3 +667,31 @@ int os_sendmsg_fds(int fd, const void *buf, unsigned int len, const int *fds, return -errno; return err; } + +int os_poll(unsigned int n, const int *fds) +{ + /* currently need 2 FDs at most so avoid dynamic allocation */ + struct pollfd pollfds[2] = {}; + unsigned int i; + int ret; + + if (n > ARRAY_SIZE(pollfds)) + return -EINVAL; + + for (i = 0; i < n; i++) { + pollfds[i].fd = fds[i]; + pollfds[i].events = POLLIN; + } + + ret = poll(pollfds, n, -1); + if (ret < 0) + return -errno; + + /* Return the index of the available FD */ + for (i = 0; i < n; i++) { + if (pollfds[i].revents) + return i; + } + + return -EIO; +} diff --git a/arch/um/os-Linux/time.c b/arch/um/os-Linux/time.c index 432f8e1f55c2..90f6de224c70 100644 --- a/arch/um/os-Linux/time.c +++ b/arch/um/os-Linux/time.c @@ -14,7 +14,6 @@ #include <kern_util.h> #include <os.h> #include <string.h> -#include <timer-internal.h> static timer_t event_high_res_timer = 0; diff --git a/arch/um/os-Linux/umid.c b/arch/um/os-Linux/umid.c index 44def53a11cd..9e16078a4bf8 100644 --- a/arch/um/os-Linux/umid.c +++ b/arch/um/os-Linux/umid.c @@ -220,11 +220,12 @@ static void __init create_pid_file(void) char pid[sizeof("nnnnn\0")], *file; int fd, n; - file = malloc(strlen(uml_dir) + UMID_LEN + sizeof("/pid\0")); + n = strlen(uml_dir) + UMID_LEN + sizeof("/pid\0"); + file = malloc(n); if (!file) return; - if (umid_file_name("pid", file, sizeof(file))) + if (umid_file_name("pid", file, n)) goto out; fd = open(file, O_RDWR | O_CREAT | O_EXCL, 0644); diff --git a/arch/x86/um/asm/processor.h b/arch/x86/um/asm/processor.h index 593d5f3902bd..478710384b34 100644 --- a/arch/x86/um/asm/processor.h +++ b/arch/x86/um/asm/processor.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __UM_PROCESSOR_H #define __UM_PROCESSOR_H +#include <linux/time-internal.h> /* include faultinfo structure */ #include <sysdep/faultinfo.h> @@ -21,12 +22,19 @@ #include <asm/user.h> /* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ -static inline void rep_nop(void) +static __always_inline void rep_nop(void) { __asm__ __volatile__("rep;nop": : :"memory"); } -#define cpu_relax() rep_nop() +static __always_inline void cpu_relax(void) +{ + if (time_travel_mode == TT_MODE_INFCPU || + time_travel_mode == TT_MODE_EXTERNAL) + time_travel_ndelay(1); + else + rep_nop(); +} #define task_pt_regs(t) (&(t)->thread.regs) diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index e6b8c49076bb..c070c0d8e3e9 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -139,8 +139,8 @@ static char *inode_name(struct inode *ino) static char *follow_link(char *link) { - int len, n; char *name, *resolved, *end; + int n; name = __getname(); if (!name) { @@ -164,15 +164,13 @@ static char *follow_link(char *link) return name; *(end + 1) = '\0'; - len = strlen(link) + strlen(name) + 1; - resolved = kmalloc(len, GFP_KERNEL); + resolved = kasprintf(GFP_KERNEL, "%s%s", link, name); if (resolved == NULL) { n = -ENOMEM; goto out_free; } - sprintf(resolved, "%s%s", link, name); __putname(name); kfree(link); return resolved; @@ -921,18 +919,16 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent) sb->s_d_op = &simple_dentry_operations; sb->s_maxbytes = MAX_LFS_FILESIZE; - /* NULL is printed as <NULL> by sprintf: avoid that. */ + /* NULL is printed as '(null)' by printf(): avoid that. */ if (req_root == NULL) req_root = ""; err = -ENOMEM; sb->s_fs_info = host_root_path = - kmalloc(strlen(root_ino) + strlen(req_root) + 2, GFP_KERNEL); + kasprintf(GFP_KERNEL, "%s/%s", root_ino, req_root); if (host_root_path == NULL) goto out; - sprintf(host_root_path, "%s/%s", root_ino, req_root); - root_inode = new_inode(sb); if (!root_inode) goto out; diff --git a/include/uapi/linux/um_timetravel.h b/include/uapi/linux/um_timetravel.h new file mode 100644 index 000000000000..ca3238222b6d --- /dev/null +++ b/include/uapi/linux/um_timetravel.h @@ -0,0 +1,128 @@ +/* + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Copyright (C) 2019 Intel Corporation + */ +#ifndef _UAPI_LINUX_UM_TIMETRAVEL_H +#define _UAPI_LINUX_UM_TIMETRAVEL_H +#include <linux/types.h> + +/** + * struct um_timetravel_msg - UM time travel message + * + * This is the basic message type, going in both directions. + * + * This is the message passed between the host (user-mode Linux instance) + * and the calendar (the application on the other side of the socket) in + * order to implement common scheduling. + * + * Whenever UML has an event it will request runtime for it from the + * calendar, and then wait for its turn until it can run, etc. Note + * that it will only ever request the single next runtime, i.e. multiple + * REQUEST messages override each other. + */ +struct um_timetravel_msg { + /** + * @op: operation value from &enum um_timetravel_ops + */ + __u32 op; + + /** + * @seq: sequence number for the message - shall be reflected in + * the ACK response, and should be checked while processing + * the response to see if it matches + */ + __u32 seq; + + /** + * @time: time in nanoseconds + */ + __u64 time; +}; + +/** + * enum um_timetravel_ops - Operation codes + */ +enum um_timetravel_ops { + /** + * @UM_TIMETRAVEL_ACK: response (ACK) to any previous message, + * this usually doesn't carry any data in the 'time' field + * unless otherwise specified below + */ + UM_TIMETRAVEL_ACK = 0, + + /** + * @UM_TIMETRAVEL_START: initialize the connection, the time + * field contains an (arbitrary) ID to possibly be able + * to distinguish the connections. + */ + UM_TIMETRAVEL_START = 1, + + /** + * @UM_TIMETRAVEL_REQUEST: request to run at the given time + * (host -> calendar) + */ + UM_TIMETRAVEL_REQUEST = 2, + + /** + * @UM_TIMETRAVEL_WAIT: Indicate waiting for the previously requested + * runtime, new requests may be made while waiting (e.g. due to + * interrupts); the time field is ignored. The calendar must process + * this message and later send a %UM_TIMETRAVEL_RUN message when + * the host can run again. + * (host -> calendar) + */ + UM_TIMETRAVEL_WAIT = 3, + + /** + * @UM_TIMETRAVEL_GET: return the current time from the calendar in the + * ACK message, the time in the request message is ignored + * (host -> calendar) + */ + UM_TIMETRAVEL_GET = 4, + + /** + * @UM_TIMETRAVEL_UPDATE: time update to the calendar, must be sent e.g. + * before kicking an interrupt to another calendar + * (host -> calendar) + */ + UM_TIMETRAVEL_UPDATE = 5, + + /** + * @UM_TIMETRAVEL_RUN: run time request granted, current time is in + * the time field + * (calendar -> host) + */ + UM_TIMETRAVEL_RUN = 6, + + /** + * @UM_TIMETRAVEL_FREE_UNTIL: Enable free-running until the given time, + * this is a message from the calendar telling the host that it can + * freely do its own scheduling for anything before the indicated + * time. + * Note that if a calendar sends this message once, the host may + * assume that it will also do so in the future, if it implements + * wraparound semantics for the time field. + * (calendar -> host) + */ + UM_TIMETRAVEL_FREE_UNTIL = 7, + + /** + * @UM_TIMETRAVEL_GET_TOD: Return time of day, typically used once at + * boot by the virtual machines to get a synchronized time from + * the simulation. + */ + UM_TIMETRAVEL_GET_TOD = 8, +}; + +#endif /* _UAPI_LINUX_UM_TIMETRAVEL_H */ |