From b495dfed706c4c5873c0dab8930ad6eb1d276a6c Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 30 Jan 2020 20:22:26 +0100 Subject: um: Cleanup CONFIG_IOSCHED_CFQ CONFIG_IOSCHED_CFQ is gone since commit f382fb0bcef4 ("block: remove legacy IO schedulers"). The IOSCHED_BFQ seems to replace IOSCHED_CFQ so select it in configs previously choosing the latter. Signed-off-by: Krzysztof Kozlowski Acked-by: Anton Ivanov Signed-off-by: Richard Weinberger --- arch/um/configs/i386_defconfig | 2 +- arch/um/configs/x86_64_defconfig | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/um/configs/i386_defconfig b/arch/um/configs/i386_defconfig index 73e98bb57bf5..fb51bd206dbe 100644 --- a/arch/um/configs/i386_defconfig +++ b/arch/um/configs/i386_defconfig @@ -26,7 +26,7 @@ CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set -CONFIG_IOSCHED_CFQ=m +CONFIG_IOSCHED_BFQ=m CONFIG_SSL=y CONFIG_NULL_CHAN=y CONFIG_PORT_CHAN=y diff --git a/arch/um/configs/x86_64_defconfig b/arch/um/configs/x86_64_defconfig index 3281d7600225..477b87317424 100644 --- a/arch/um/configs/x86_64_defconfig +++ b/arch/um/configs/x86_64_defconfig @@ -24,7 +24,7 @@ CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set -CONFIG_IOSCHED_CFQ=m +CONFIG_IOSCHED_BFQ=m CONFIG_SSL=y CONFIG_NULL_CHAN=y CONFIG_PORT_CHAN=y -- cgit v1.2.3 From 7d7c0568285d6f5630fb269766186afe09e58dc7 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 12 Feb 2020 12:02:59 +0200 Subject: um: Don't use console_drivers directly console_drivers is kind of (semi-)private variable to the console code. Direct use of it make us stuck with it being exported here and there. Reduce use of console_drivers by replacing it with for_each_console(). Cc: Thomas Meyer Signed-off-by: Andy Shevchenko Signed-off-by: Richard Weinberger --- arch/um/kernel/kmsg_dump.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/um/kernel/kmsg_dump.c b/arch/um/kernel/kmsg_dump.c index 98bdf69e4c2e..e4abac6c9727 100644 --- a/arch/um/kernel/kmsg_dump.c +++ b/arch/um/kernel/kmsg_dump.c @@ -9,20 +9,19 @@ static void kmsg_dumper_stdout(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason) { static char line[1024]; - + struct console *con; size_t len = 0; - bool con_available = false; /* only dump kmsg when no console is available */ if (!console_trylock()) return; - if (console_drivers != NULL) - con_available = true; + for_each_console(con) + break; console_unlock(); - if (con_available == true) + if (con) return; printf("kmsg_dump:\n"); -- cgit v1.2.3 From ba758cfce00a5977ccf019926f8379a96f5ac5f6 Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Wed, 19 Feb 2020 21:44:42 +0800 Subject: um: Fix len of file in create_pid_file sizeof gives us the size of the pointer variable, not of the area it points to. So the number of bytes copied by umid_file_name() is 8. We should pass in the correct length of the file buffer. Signed-off-by: Wen Yang Signed-off-by: Richard Weinberger --- arch/um/os-Linux/umid.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/um/os-Linux/umid.c b/arch/um/os-Linux/umid.c index 44def53a11cd..9e16078a4bf8 100644 --- a/arch/um/os-Linux/umid.c +++ b/arch/um/os-Linux/umid.c @@ -220,11 +220,12 @@ static void __init create_pid_file(void) char pid[sizeof("nnnnn\0")], *file; int fd, n; - file = malloc(strlen(uml_dir) + UMID_LEN + sizeof("/pid\0")); + n = strlen(uml_dir) + UMID_LEN + sizeof("/pid\0"); + file = malloc(n); if (!file) return; - if (umid_file_name("pid", file, sizeof(file))) + if (umid_file_name("pid", file, n)) goto out; fd = open(file, O_RDWR | O_CREAT | O_EXCL, 0644); -- cgit v1.2.3 From c2ed957c3b288cd037d2c933222edaf1e20ede26 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 20 Feb 2020 10:40:13 +0200 Subject: um: Don't overwrite ethtool driver version In-tree drivers don't need to manage internal version because they are aligned to the global Linux kernel version, which is reported by default with "ethtool -i". Signed-off-by: Leon Romanovsky Acked-by: Anton Ivanov Signed-off-by: Richard Weinberger --- arch/um/drivers/net_kern.c | 1 - arch/um/drivers/vector_kern.c | 2 -- 2 files changed, 3 deletions(-) diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c index 35ebeebfc1a8..af07733c2dc8 100644 --- a/arch/um/drivers/net_kern.c +++ b/arch/um/drivers/net_kern.c @@ -266,7 +266,6 @@ static void uml_net_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { strlcpy(info->driver, DRIVER_NAME, sizeof(info->driver)); - strlcpy(info->version, "42", sizeof(info->version)); } static const struct ethtool_ops uml_net_ethtool_ops = { diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c index 0ff86391f77d..9693dfca7651 100644 --- a/arch/um/drivers/vector_kern.c +++ b/arch/um/drivers/vector_kern.c @@ -46,7 +46,6 @@ #define DRIVER_NAME "uml-vector" -#define DRIVER_VERSION "01" struct vector_cmd_line_arg { struct list_head list; int unit; @@ -1378,7 +1377,6 @@ static void vector_net_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { strlcpy(info->driver, DRIVER_NAME, sizeof(info->driver)); - strlcpy(info->version, DRIVER_VERSION, sizeof(info->version)); } static int vector_net_load_bpf_flash(struct net_device *dev, -- cgit v1.2.3 From 73343392aae8bdd939ca58e855889d1b015b0ef4 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 20 Feb 2020 10:40:14 +0200 Subject: um: Delete never executed timer The "#ifdef undef" construction effectively disabled the timer. It causes to the fact that this timer did nothing, so delete it. Signed-off-by: Leon Romanovsky Acked-by: Anton Ivanov Signed-off-by: Richard Weinberger --- arch/um/drivers/net_kern.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c index af07733c2dc8..1802cf4ef5a5 100644 --- a/arch/um/drivers/net_kern.c +++ b/arch/um/drivers/net_kern.c @@ -274,17 +274,6 @@ static const struct ethtool_ops uml_net_ethtool_ops = { .get_ts_info = ethtool_op_get_ts_info, }; -static void uml_net_user_timer_expire(struct timer_list *t) -{ -#ifdef undef - struct uml_net_private *lp = from_timer(lp, t, tl); - struct connection *conn = &lp->user; - - dprintk(KERN_INFO "uml_net_user_timer_expire [%p]\n", conn); - do_connect(conn); -#endif -} - void uml_net_setup_etheraddr(struct net_device *dev, char *str) { unsigned char *addr = dev->dev_addr; @@ -455,7 +444,6 @@ static void eth_configure(int n, void *init, char *mac, .add_address = transport->user->add_address, .delete_address = transport->user->delete_address }); - timer_setup(&lp->tl, uml_net_user_timer_expire, 0); spin_lock_init(&lp->lock); memcpy(lp->mac, dev->dev_addr, sizeof(lp->mac)); -- cgit v1.2.3 From 598f5630361397c542a0ba2bec0ac5c0e1723d5c Mon Sep 17 00:00:00 2001 From: David Gow Date: Thu, 5 Mar 2020 10:39:39 -0800 Subject: um: Fix overlapping ELF segments when statically linked When statically linked, the .text section in UML kernels is not page aligned, causing it to share a page with the executable headers. As .text and the executable headers have different permissions, this causes the kernel to wish to map the same page twice (once as headers with r-- permissions, once as .text with r-x permissions), causing a segfault, and a nasty message printed to the host kernel's dmesg: "Uhuuh, elf segment at 0000000060000000 requested but the memory is mapped already" By aligning the .text to a page boundary (as in the dynamically linked version in dyn.lds.S), there is no such overlap, and the kernel runs correctly. Signed-off-by: David Gow Reviewed-by: Brendan Higgins Signed-off-by: Richard Weinberger --- arch/um/kernel/uml.lds.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/um/kernel/uml.lds.S b/arch/um/kernel/uml.lds.S index 9f21443be2c9..3b6dab3d4501 100644 --- a/arch/um/kernel/uml.lds.S +++ b/arch/um/kernel/uml.lds.S @@ -19,10 +19,10 @@ SECTIONS __binary_start = START; . = START + SIZEOF_HEADERS; + . = ALIGN(PAGE_SIZE); _text = .; INIT_TEXT_SECTION(0) - . = ALIGN(PAGE_SIZE); .text : { -- cgit v1.2.3 From 6e682d53fc1ef73a169e2a5300326cb23abb32ee Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Mon, 16 Mar 2020 20:45:06 -0400 Subject: um: ubd: Prevent buffer overrun on command completion On the hypervisor side, when completing commands and the pipe is full, we retry writing only the entries that failed, by offsetting io_req_buffer, but we don't reduce the number of bytes written, which can cause a buffer overrun of io_req_buffer, and write garbage to the pipe. Cc: Martyn Welch Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Richard Weinberger --- arch/um/drivers/ubd_kern.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 247f95da057b..eca45ad2166c 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -1607,7 +1607,9 @@ int io_thread(void *arg) written = 0; do { - res = os_write_file(kernel_fd, ((char *) io_req_buffer) + written, n); + res = os_write_file(kernel_fd, + ((char *) io_req_buffer) + written, + n - written); if (res >= 0) { written += res; } -- cgit v1.2.3 From e355b2f55efc4633409ada14125c9e28ad2de012 Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Mon, 16 Mar 2020 20:45:07 -0400 Subject: um: ubd: Retry buffer read on any kind of error Should bulk_req_safe_read return an error, we want to retry the read, otherwise, even though no IO will be done, os_write_file might still end up writing garbage to the pipe. Cc: Martyn Welch Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Richard Weinberger --- arch/um/drivers/ubd_kern.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index eca45ad2166c..eae8c83364f7 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -1592,11 +1592,11 @@ int io_thread(void *arg) &io_remainder_size, UBD_REQ_BUFFER_SIZE ); - if (n < 0) { - if (n == -EAGAIN) { + if (n <= 0) { + if (n == -EAGAIN) ubd_read_poll(-1); - continue; - } + + continue; } for (count = 0; count < n/sizeof(struct io_thread_req *); count++) { -- cgit v1.2.3 From 35f3401317a3b26aa01fde8facfd320f2628fdcc Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Tue, 17 Mar 2020 17:35:34 +0000 Subject: um: falloc.h needs to be directly included for older libc When building UML with glibc 2.17 installed, compilation of arch/um/os-Linux/file.c fails due to failure to find FALLOC_FL_PUNCH_HOLE and FALLOC_FL_KEEP_SIZE definitions. It appears that /usr/include/bits/fcntl-linux.h (indirectly included by /usr/include/fcntl.h) does not include falloc.h with an older glibc, whereas a more up-to-date version does. Adding the direct include to file.c resolves the issue and does not cause problems for more recent glibc. Fixes: 50109b5a03b4 ("um: Add support for DISCARD in the UBD Driver") Cc: Brendan Higgins Signed-off-by: Alan Maguire Reviewed-by: Brendan Higgins Acked-By: Anton Ivanov Signed-off-by: Richard Weinberger --- arch/um/os-Linux/file.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c index fbda10535dab..5c819f89b8c2 100644 --- a/arch/um/os-Linux/file.c +++ b/arch/um/os-Linux/file.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From b58c4e96192ee7c47d5c67853b1557306cfa0e7f Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 20 Mar 2020 15:07:35 +0200 Subject: hostfs: Use kasprintf() instead of fixed buffer formatting Improve readability and maintainability by replacing a hardcoded string allocation and formatting by the use of the kasprintf() helper. Signed-off-by: Andy Shevchenko Signed-off-by: Richard Weinberger --- fs/hostfs/hostfs_kern.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index e6b8c49076bb..c070c0d8e3e9 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -139,8 +139,8 @@ static char *inode_name(struct inode *ino) static char *follow_link(char *link) { - int len, n; char *name, *resolved, *end; + int n; name = __getname(); if (!name) { @@ -164,15 +164,13 @@ static char *follow_link(char *link) return name; *(end + 1) = '\0'; - len = strlen(link) + strlen(name) + 1; - resolved = kmalloc(len, GFP_KERNEL); + resolved = kasprintf(GFP_KERNEL, "%s%s", link, name); if (resolved == NULL) { n = -ENOMEM; goto out_free; } - sprintf(resolved, "%s%s", link, name); __putname(name); kfree(link); return resolved; @@ -921,18 +919,16 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent) sb->s_d_op = &simple_dentry_operations; sb->s_maxbytes = MAX_LFS_FILESIZE; - /* NULL is printed as by sprintf: avoid that. */ + /* NULL is printed as '(null)' by printf(): avoid that. */ if (req_root == NULL) req_root = ""; err = -ENOMEM; sb->s_fs_info = host_root_path = - kmalloc(strlen(root_ino) + strlen(req_root) + 2, GFP_KERNEL); + kasprintf(GFP_KERNEL, "%s/%s", root_ino, req_root); if (host_root_path == NULL) goto out; - sprintf(host_root_path, "%s/%s", root_ino, req_root); - root_inode = new_inode(sb); if (!root_inode) goto out; -- cgit v1.2.3 From f185063bff914e589b50f1b711fc42218c4790c6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 13 Feb 2020 14:26:44 +0100 Subject: um: Move timer-internal.h to non-shared This file isn't really shared, it's only used on the kernel side, not on the user side. Remove the include from the user-side and move the file to a better place. While at it, rename it to time-internal.h, it's not really just timers but all kinds of things related to timekeeping. Signed-off-by: Johannes Berg Signed-off-by: Richard Weinberger --- arch/um/include/linux/time-internal.h | 76 +++++++++++++++++++++++++++++++++ arch/um/include/shared/timer-internal.h | 76 --------------------------------- arch/um/kernel/process.c | 2 +- arch/um/kernel/skas/syscall.c | 2 +- arch/um/kernel/time.c | 2 +- arch/um/os-Linux/time.c | 1 - 6 files changed, 79 insertions(+), 80 deletions(-) create mode 100644 arch/um/include/linux/time-internal.h delete mode 100644 arch/um/include/shared/timer-internal.h diff --git a/arch/um/include/linux/time-internal.h b/arch/um/include/linux/time-internal.h new file mode 100644 index 000000000000..2d2d13c9b46f --- /dev/null +++ b/arch/um/include/linux/time-internal.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2012 - 2014 Cisco Systems + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#ifndef __TIMER_INTERNAL_H__ +#define __TIMER_INTERNAL_H__ + +#define TIMER_MULTIPLIER 256 +#define TIMER_MIN_DELTA 500 + +enum time_travel_mode { + TT_MODE_OFF, + TT_MODE_BASIC, + TT_MODE_INFCPU, +}; + +enum time_travel_timer_mode { + TT_TMR_DISABLED, + TT_TMR_ONESHOT, + TT_TMR_PERIODIC, +}; + +#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT +extern enum time_travel_mode time_travel_mode; +extern unsigned long long time_travel_time; +extern enum time_travel_timer_mode time_travel_timer_mode; +extern unsigned long long time_travel_timer_expiry; +extern unsigned long long time_travel_timer_interval; + +static inline void time_travel_set_time(unsigned long long ns) +{ + time_travel_time = ns; +} + +static inline void time_travel_set_timer_mode(enum time_travel_timer_mode mode) +{ + time_travel_timer_mode = mode; +} + +static inline void time_travel_set_timer_expiry(unsigned long long expiry) +{ + time_travel_timer_expiry = expiry; +} + +static inline void time_travel_set_timer_interval(unsigned long long interval) +{ + time_travel_timer_interval = interval; +} +#else +#define time_travel_mode TT_MODE_OFF +#define time_travel_time 0 +#define time_travel_timer_expiry 0 +#define time_travel_timer_interval 0 + +static inline void time_travel_set_time(unsigned long long ns) +{ +} + +static inline void time_travel_set_timer_mode(enum time_travel_timer_mode mode) +{ +} + +static inline void time_travel_set_timer_expiry(unsigned long long expiry) +{ +} + +static inline void time_travel_set_timer_interval(unsigned long long interval) +{ +} + +#define time_travel_timer_mode TT_TMR_DISABLED +#endif + +#endif diff --git a/arch/um/include/shared/timer-internal.h b/arch/um/include/shared/timer-internal.h deleted file mode 100644 index 2d2d13c9b46f..000000000000 --- a/arch/um/include/shared/timer-internal.h +++ /dev/null @@ -1,76 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2012 - 2014 Cisco Systems - * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - */ - -#ifndef __TIMER_INTERNAL_H__ -#define __TIMER_INTERNAL_H__ - -#define TIMER_MULTIPLIER 256 -#define TIMER_MIN_DELTA 500 - -enum time_travel_mode { - TT_MODE_OFF, - TT_MODE_BASIC, - TT_MODE_INFCPU, -}; - -enum time_travel_timer_mode { - TT_TMR_DISABLED, - TT_TMR_ONESHOT, - TT_TMR_PERIODIC, -}; - -#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT -extern enum time_travel_mode time_travel_mode; -extern unsigned long long time_travel_time; -extern enum time_travel_timer_mode time_travel_timer_mode; -extern unsigned long long time_travel_timer_expiry; -extern unsigned long long time_travel_timer_interval; - -static inline void time_travel_set_time(unsigned long long ns) -{ - time_travel_time = ns; -} - -static inline void time_travel_set_timer_mode(enum time_travel_timer_mode mode) -{ - time_travel_timer_mode = mode; -} - -static inline void time_travel_set_timer_expiry(unsigned long long expiry) -{ - time_travel_timer_expiry = expiry; -} - -static inline void time_travel_set_timer_interval(unsigned long long interval) -{ - time_travel_timer_interval = interval; -} -#else -#define time_travel_mode TT_MODE_OFF -#define time_travel_time 0 -#define time_travel_timer_expiry 0 -#define time_travel_timer_interval 0 - -static inline void time_travel_set_time(unsigned long long ns) -{ -} - -static inline void time_travel_set_timer_mode(enum time_travel_timer_mode mode) -{ -} - -static inline void time_travel_set_timer_expiry(unsigned long long expiry) -{ -} - -static inline void time_travel_set_timer_interval(unsigned long long interval) -{ -} - -#define time_travel_timer_mode TT_TMR_DISABLED -#endif - -#endif diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 56a094182bf5..0274ebb70977 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -32,7 +32,7 @@ #include #include #include -#include +#include /* * This is a per-cpu array. A processor only modifies its entry and it only diff --git a/arch/um/kernel/skas/syscall.c b/arch/um/kernel/skas/syscall.c index 40d90dddf3f1..2e82820f7d29 100644 --- a/arch/um/kernel/skas/syscall.c +++ b/arch/um/kernel/skas/syscall.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include void handle_syscall(struct uml_pt_regs *r) { diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index 94ea87bd231c..a849d391e909 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT diff --git a/arch/um/os-Linux/time.c b/arch/um/os-Linux/time.c index 432f8e1f55c2..90f6de224c70 100644 --- a/arch/um/os-Linux/time.c +++ b/arch/um/os-Linux/time.c @@ -14,7 +14,6 @@ #include #include #include -#include static timer_t event_high_res_timer = 0; -- cgit v1.2.3 From 4b786e24ca80a492736b359b3d1a8d07612a78e5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 13 Feb 2020 14:26:45 +0100 Subject: um: time-travel: Rewrite as an event scheduler Instead of tracking all the various timer configurations, modify the time-travel mode to have an event scheduler and use a timer event on the scheduler to handle the different timer configurations. This doesn't change the function right now, but it prepares the code for having different kinds of events in the future (i.e. interrupts coming from other devices that are part of co-simulation.) While at it, also move time_travel_sleep() to time.c to reduce the externally visible API surface. Also, we really should mark time-travel as incompatible with SMP, even if UML doesn't support SMP yet. Finally, I noticed a bug while developing this - if we move time forward due to consuming time while reading the clock, we might move across the next event and that would cause us to go backward in time when we then handle that event. Fix that by invoking the whole event machine in this case, but in order to simplify this, make reading the clock only cost something when interrupts are not disabled. Otherwise, we'd have to hook into the interrupt delivery machinery etc. and that's somewhat intrusive. Signed-off-by: Johannes Berg Signed-off-by: Richard Weinberger --- arch/um/Kconfig | 1 + arch/um/include/linux/time-internal.h | 65 +++------- arch/um/kernel/process.c | 37 ------ arch/um/kernel/time.c | 221 ++++++++++++++++++++++++++++++++-- 4 files changed, 229 insertions(+), 95 deletions(-) diff --git a/arch/um/Kconfig b/arch/um/Kconfig index 0917f8443c28..817a4c838a06 100644 --- a/arch/um/Kconfig +++ b/arch/um/Kconfig @@ -191,6 +191,7 @@ config UML_TIME_TRAVEL_SUPPORT prompt "Support time-travel mode (e.g. for test execution)" # inf-cpu mode is incompatible with the benchmarking depends on !RAID6_PQ_BENCHMARK + depends on !SMP help Enable this option to support time travel inside the UML instance. diff --git a/arch/um/include/linux/time-internal.h b/arch/um/include/linux/time-internal.h index 2d2d13c9b46f..eb1f84616edb 100644 --- a/arch/um/include/linux/time-internal.h +++ b/arch/um/include/linux/time-internal.h @@ -6,6 +6,7 @@ #ifndef __TIMER_INTERNAL_H__ #define __TIMER_INTERNAL_H__ +#include #define TIMER_MULTIPLIER 256 #define TIMER_MIN_DELTA 500 @@ -16,61 +17,35 @@ enum time_travel_mode { TT_MODE_INFCPU, }; -enum time_travel_timer_mode { - TT_TMR_DISABLED, - TT_TMR_ONESHOT, - TT_TMR_PERIODIC, +#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT +struct time_travel_event { + unsigned long long time; + void (*fn)(struct time_travel_event *d); + struct list_head list; + bool pending, onstack; }; -#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT extern enum time_travel_mode time_travel_mode; -extern unsigned long long time_travel_time; -extern enum time_travel_timer_mode time_travel_timer_mode; -extern unsigned long long time_travel_timer_expiry; -extern unsigned long long time_travel_timer_interval; -static inline void time_travel_set_time(unsigned long long ns) -{ - time_travel_time = ns; -} +void time_travel_sleep(unsigned long long duration); -static inline void time_travel_set_timer_mode(enum time_travel_timer_mode mode) +static inline void +time_travel_set_event_fn(struct time_travel_event *e, + void (*fn)(struct time_travel_event *d)) { - time_travel_timer_mode = mode; -} - -static inline void time_travel_set_timer_expiry(unsigned long long expiry) -{ - time_travel_timer_expiry = expiry; -} - -static inline void time_travel_set_timer_interval(unsigned long long interval) -{ - time_travel_timer_interval = interval; + e->fn = fn; } #else -#define time_travel_mode TT_MODE_OFF -#define time_travel_time 0 -#define time_travel_timer_expiry 0 -#define time_travel_timer_interval 0 - -static inline void time_travel_set_time(unsigned long long ns) -{ -} - -static inline void time_travel_set_timer_mode(enum time_travel_timer_mode mode) -{ -} +struct time_travel_event { +}; -static inline void time_travel_set_timer_expiry(unsigned long long expiry) -{ -} +#define time_travel_mode TT_MODE_OFF -static inline void time_travel_set_timer_interval(unsigned long long interval) +static inline void time_travel_sleep(unsigned long long duration) { } -#define time_travel_timer_mode TT_TMR_DISABLED -#endif - -#endif +/* this is a macro so the event/function need not exist */ +#define time_travel_set_event_fn(e, fn) do {} while (0) +#endif /* CONFIG_UML_TIME_TRAVEL_SUPPORT */ +#endif /* __TIMER_INTERNAL_H__ */ diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 0274ebb70977..cbe33af2a880 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -203,43 +203,6 @@ void initial_thread_cb(void (*proc)(void *), void *arg) kmalloc_ok = save_kmalloc_ok; } -static void time_travel_sleep(unsigned long long duration) -{ - unsigned long long next = time_travel_time + duration; - - if (time_travel_mode != TT_MODE_INFCPU) - os_timer_disable(); - - while (time_travel_timer_mode == TT_TMR_PERIODIC && - time_travel_timer_expiry < time_travel_time) - time_travel_set_timer_expiry(time_travel_timer_expiry + - time_travel_timer_interval); - - if (time_travel_timer_mode != TT_TMR_DISABLED && - time_travel_timer_expiry < next) { - if (time_travel_timer_mode == TT_TMR_ONESHOT) - time_travel_set_timer_mode(TT_TMR_DISABLED); - /* - * In basic mode, time_travel_time will be adjusted in - * the timer IRQ handler so it works even when the signal - * comes from the OS timer, see there. - */ - if (time_travel_mode != TT_MODE_BASIC) - time_travel_set_time(time_travel_timer_expiry); - - deliver_alarm(); - } else { - time_travel_set_time(next); - } - - if (time_travel_mode != TT_MODE_INFCPU) { - if (time_travel_timer_mode == TT_TMR_PERIODIC) - os_timer_set_interval(time_travel_timer_interval); - else if (time_travel_timer_mode == TT_TMR_ONESHOT) - os_timer_one_shot(time_travel_timer_expiry - next); - } -} - static void um_idle_sleep(void) { unsigned long long duration = UM_NSEC_PER_SEC; diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index a849d391e909..cdebe96308d7 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -4,6 +4,7 @@ * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de) * Copyright (C) 2012-2014 Cisco Systems * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Copyright (C) 2019 Intel Corporation */ #include @@ -23,16 +24,201 @@ #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT enum time_travel_mode time_travel_mode; -unsigned long long time_travel_time; -enum time_travel_timer_mode time_travel_timer_mode; -unsigned long long time_travel_timer_expiry; -unsigned long long time_travel_timer_interval; static bool time_travel_start_set; static unsigned long long time_travel_start; -#else +static unsigned long long time_travel_time; +static LIST_HEAD(time_travel_events); +static unsigned long long time_travel_timer_interval; +static unsigned long long time_travel_next_event; +static struct time_travel_event time_travel_timer_event; + +static void time_travel_set_time(unsigned long long ns) +{ + if (unlikely(ns < time_travel_time)) + panic("time-travel: time goes backwards %lld -> %lld\n", + time_travel_time, ns); + time_travel_time = ns; +} + +static struct time_travel_event *time_travel_first_event(void) +{ + return list_first_entry_or_null(&time_travel_events, + struct time_travel_event, + list); +} + +static void __time_travel_add_event(struct time_travel_event *e, + unsigned long long time) +{ + struct time_travel_event *tmp; + bool inserted = false; + + if (WARN(time_travel_mode == TT_MODE_BASIC && + e != &time_travel_timer_event, + "only timer events can be handled in basic mode")) + return; + + if (e->pending) + return; + + e->pending = true; + e->time = time; + + list_for_each_entry(tmp, &time_travel_events, list) { + /* + * Add the new entry before one with higher time, + * or if they're equal and both on stack, because + * in that case we need to unwind the stack in the + * right order, and the later event (timer sleep + * or such) must be dequeued first. + */ + if ((tmp->time > e->time) || + (tmp->time == e->time && tmp->onstack && e->onstack)) { + list_add_tail(&e->list, &tmp->list); + inserted = true; + break; + } + } + + if (!inserted) + list_add_tail(&e->list, &time_travel_events); + + tmp = time_travel_first_event(); + time_travel_next_event = tmp->time; +} + +static void time_travel_add_event(struct time_travel_event *e, + unsigned long long time) +{ + if (WARN_ON(!e->fn)) + return; + + __time_travel_add_event(e, time); +} + +void time_travel_periodic_timer(struct time_travel_event *e) +{ + time_travel_add_event(&time_travel_timer_event, + time_travel_time + time_travel_timer_interval); + deliver_alarm(); +} + +static void time_travel_deliver_event(struct time_travel_event *e) +{ + /* this is basically just deliver_alarm(), handles IRQs itself */ + e->fn(e); +} + +static bool time_travel_del_event(struct time_travel_event *e) +{ + if (!e->pending) + return false; + list_del(&e->list); + e->pending = false; + return true; +} + +static void time_travel_update_time(unsigned long long next, bool retearly) +{ + struct time_travel_event ne = { + .onstack = true, + }; + struct time_travel_event *e; + bool finished = retearly; + + /* add it without a handler - we deal with that specifically below */ + __time_travel_add_event(&ne, next); + + do { + e = time_travel_first_event(); + + BUG_ON(!e); + time_travel_set_time(e->time); + + /* new events may have been inserted while we were waiting */ + if (e == time_travel_first_event()) { + BUG_ON(!time_travel_del_event(e)); + BUG_ON(time_travel_time != e->time); + + if (e == &ne) { + finished = true; + } else { + if (e->onstack) + panic("On-stack event dequeued outside of the stack! time=%lld, event time=%lld, event=%pS\n", + time_travel_time, e->time, e); + time_travel_deliver_event(e); + } + } + } while (!finished); + + time_travel_del_event(&ne); +} + +static void time_travel_oneshot_timer(struct time_travel_event *e) +{ + deliver_alarm(); +} + +void time_travel_sleep(unsigned long long duration) +{ + unsigned long long next = time_travel_time + duration; + + if (time_travel_mode == TT_MODE_BASIC) + os_timer_disable(); + + time_travel_update_time(next, true); + + if (time_travel_mode == TT_MODE_BASIC && + time_travel_timer_event.pending) { + if (time_travel_timer_event.fn == time_travel_periodic_timer) { + /* + * This is somewhat wrong - we should get the first + * one sooner like the os_timer_one_shot() below... + */ + os_timer_set_interval(time_travel_timer_interval); + } else { + os_timer_one_shot(time_travel_timer_event.time - next); + } + } +} + +static void time_travel_handle_real_alarm(void) +{ + time_travel_set_time(time_travel_next_event); + + time_travel_del_event(&time_travel_timer_event); + + if (time_travel_timer_event.fn == time_travel_periodic_timer) + time_travel_add_event(&time_travel_timer_event, + time_travel_time + + time_travel_timer_interval); +} + +static void time_travel_set_interval(unsigned long long interval) +{ + time_travel_timer_interval = interval; +} +#else /* CONFIG_UML_TIME_TRAVEL_SUPPORT */ #define time_travel_start_set 0 #define time_travel_start 0 +#define time_travel_time 0 + +static inline void time_travel_update_time(unsigned long long ns, bool retearly) +{ +} + +static inline void time_travel_handle_real_alarm(void) +{ +} + +static void time_travel_set_interval(unsigned long long interval) +{ +} + +/* these are empty macros so the struct/fn need not exist */ +#define time_travel_add_event(e, time) do { } while (0) +#define time_travel_del_event(e) do { } while (0) #endif void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) @@ -48,7 +234,7 @@ void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) * never get any real signals from the OS. */ if (time_travel_mode == TT_MODE_BASIC) - time_travel_set_time(time_travel_timer_expiry); + time_travel_handle_real_alarm(); local_irq_save(flags); do_IRQ(TIMER_IRQ, regs); @@ -58,7 +244,7 @@ void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) static int itimer_shutdown(struct clock_event_device *evt) { if (time_travel_mode != TT_MODE_OFF) - time_travel_set_timer_mode(TT_TMR_DISABLED); + time_travel_del_event(&time_travel_timer_event); if (time_travel_mode != TT_MODE_INFCPU) os_timer_disable(); @@ -71,9 +257,12 @@ static int itimer_set_periodic(struct clock_event_device *evt) unsigned long long interval = NSEC_PER_SEC / HZ; if (time_travel_mode != TT_MODE_OFF) { - time_travel_set_timer_mode(TT_TMR_PERIODIC); - time_travel_set_timer_expiry(time_travel_time + interval); - time_travel_set_timer_interval(interval); + time_travel_del_event(&time_travel_timer_event); + time_travel_set_event_fn(&time_travel_timer_event, + time_travel_periodic_timer); + time_travel_set_interval(interval); + time_travel_add_event(&time_travel_timer_event, + time_travel_time + interval); } if (time_travel_mode != TT_MODE_INFCPU) @@ -88,8 +277,11 @@ static int itimer_next_event(unsigned long delta, delta += 1; if (time_travel_mode != TT_MODE_OFF) { - time_travel_set_timer_mode(TT_TMR_ONESHOT); - time_travel_set_timer_expiry(time_travel_time + delta); + time_travel_del_event(&time_travel_timer_event); + time_travel_set_event_fn(&time_travel_timer_event, + time_travel_oneshot_timer); + time_travel_add_event(&time_travel_timer_event, + time_travel_time + delta); } if (time_travel_mode != TT_MODE_INFCPU) @@ -144,7 +336,10 @@ static u64 timer_read(struct clocksource *cs) * exact requested sleep amount, e.g. python's socket server, * see https://bugs.python.org/issue37026. */ - time_travel_set_time(time_travel_time + TIMER_MULTIPLIER); + if (!irqs_disabled()) + time_travel_update_time(time_travel_time + + TIMER_MULTIPLIER, + false); return time_travel_time / TIMER_MULTIPLIER; } -- cgit v1.2.3 From dd9ada5627245a3441ebde00736dd63d09acc222 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 13 Feb 2020 14:26:46 +0100 Subject: um: virtio: Implement VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS Implement in-band notifications that are necessary for running vhost-user devices under externally synchronized time-travel mode (which is in a follow-up patch). This feature makes what usually should be eventfd notifications in-band messages. We'll prefer this feature, under the assumption that only a few (simulation) devices will ever support it, since it's not very efficient. Signed-off-by: Johannes Berg Signed-off-by: Richard Weinberger --- arch/um/drivers/vhost_user.h | 12 ++++--- arch/um/drivers/virtio_uml.c | 85 ++++++++++++++++++++++++++++++++++---------- 2 files changed, 75 insertions(+), 22 deletions(-) diff --git a/arch/um/drivers/vhost_user.h b/arch/um/drivers/vhost_user.h index 45ff5ea22fea..6c71b6005177 100644 --- a/arch/um/drivers/vhost_user.h +++ b/arch/um/drivers/vhost_user.h @@ -10,9 +10,10 @@ /* Feature bits */ #define VHOST_USER_F_PROTOCOL_FEATURES 30 /* Protocol feature bits */ -#define VHOST_USER_PROTOCOL_F_REPLY_ACK 3 -#define VHOST_USER_PROTOCOL_F_SLAVE_REQ 5 -#define VHOST_USER_PROTOCOL_F_CONFIG 9 +#define VHOST_USER_PROTOCOL_F_REPLY_ACK 3 +#define VHOST_USER_PROTOCOL_F_SLAVE_REQ 5 +#define VHOST_USER_PROTOCOL_F_CONFIG 9 +#define VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS 14 /* Vring state index masks */ #define VHOST_USER_VRING_INDEX_MASK 0xff #define VHOST_USER_VRING_POLL_MASK BIT(8) @@ -24,7 +25,8 @@ /* Supported protocol features */ #define VHOST_USER_SUPPORTED_PROTOCOL_F (BIT_ULL(VHOST_USER_PROTOCOL_F_REPLY_ACK) | \ BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ) | \ - BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)) + BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG) | \ + BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS)) enum vhost_user_request { VHOST_USER_GET_FEATURES = 1, @@ -52,12 +54,14 @@ enum vhost_user_request { VHOST_USER_SET_VRING_ENDIAN = 23, VHOST_USER_GET_CONFIG = 24, VHOST_USER_SET_CONFIG = 25, + VHOST_USER_VRING_KICK = 35, }; enum vhost_user_slave_request { VHOST_USER_SLAVE_IOTLB_MSG = 1, VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2, VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3, + VHOST_USER_SLAVE_VRING_CALL = 4, }; struct vhost_user_header { diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c index 023ced2250ea..9b4c5b7c0f0d 100644 --- a/arch/um/drivers/virtio_uml.c +++ b/arch/um/drivers/virtio_uml.c @@ -53,6 +53,7 @@ struct virtio_uml_device { struct virtio_device vdev; struct platform_device *pdev; + spinlock_t sock_lock; int sock, req_fd; u64 features; u64 protocol_features; @@ -189,6 +190,7 @@ static int vhost_user_send(struct virtio_uml_device *vu_dev, int *fds, size_t num_fds) { size_t size = sizeof(msg->header) + msg->header.size; + unsigned long flags; bool request_ack; int rc; @@ -207,24 +209,28 @@ static int vhost_user_send(struct virtio_uml_device *vu_dev, if (request_ack) msg->header.flags |= VHOST_USER_FLAG_NEED_REPLY; + spin_lock_irqsave(&vu_dev->sock_lock, flags); rc = full_sendmsg_fds(vu_dev->sock, msg, size, fds, num_fds); if (rc < 0) - return rc; + goto out; if (request_ack) { uint64_t status; rc = vhost_user_recv_u64(vu_dev, &status); if (rc) - return rc; + goto out; if (status) { vu_err(vu_dev, "slave reports error: %llu\n", status); - return -EIO; + rc = -EIO; + goto out; } } - return 0; +out: + spin_unlock_irqrestore(&vu_dev->sock_lock, flags); + return rc; } static int vhost_user_send_no_payload(struct virtio_uml_device *vu_dev, @@ -324,6 +330,7 @@ static void vhost_user_reply(struct virtio_uml_device *vu_dev, static irqreturn_t vu_req_interrupt(int irq, void *data) { struct virtio_uml_device *vu_dev = data; + struct virtqueue *vq; int response = 1; struct { struct vhost_user_msg msg; @@ -343,6 +350,15 @@ static irqreturn_t vu_req_interrupt(int irq, void *data) virtio_config_changed(&vu_dev->vdev); response = 0; break; + case VHOST_USER_SLAVE_VRING_CALL: + virtio_device_for_each_vq((&vu_dev->vdev), vq) { + if (vq->index == msg.msg.payload.vring_state.index) { + response = 0; + vring_interrupt(0 /* ignored */, vq); + break; + } + } + break; case VHOST_USER_SLAVE_IOTLB_MSG: /* not supported - VIRTIO_F_IOMMU_PLATFORM */ case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG: @@ -684,6 +700,15 @@ static bool vu_notify(struct virtqueue *vq) const uint64_t n = 1; int rc; + if (info->kick_fd < 0) { + struct virtio_uml_device *vu_dev; + + vu_dev = to_virtio_uml_device(vq->vdev); + + return vhost_user_set_vring_state(vu_dev, VHOST_USER_VRING_KICK, + vq->index, 0) == 0; + } + do { rc = os_write_file(info->kick_fd, &n, sizeof(n)); } while (rc == -EINTR); @@ -749,10 +774,13 @@ static void vu_del_vq(struct virtqueue *vq) { struct virtio_uml_vq_info *info = vq->priv; - um_free_irq(VIRTIO_IRQ, vq); + if (info->call_fd >= 0) { + um_free_irq(VIRTIO_IRQ, vq); + os_close_file(info->call_fd); + } - os_close_file(info->call_fd); - os_close_file(info->kick_fd); + if (info->kick_fd >= 0) + os_close_file(info->kick_fd); vring_del_virtqueue(vq); kfree(info); @@ -782,6 +810,15 @@ static int vu_setup_vq_call_fd(struct virtio_uml_device *vu_dev, int call_fds[2]; int rc; + /* no call FD needed/desired in this case */ + if (vu_dev->protocol_features & + BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS) && + vu_dev->protocol_features & + BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ)) { + info->call_fd = -1; + return 0; + } + /* Use a pipe for call fd, since SIGIO is not supported for eventfd */ rc = os_pipe(call_fds, true, true); if (rc < 0) @@ -838,10 +875,15 @@ static struct virtqueue *vu_setup_vq(struct virtio_device *vdev, vq->priv = info; num = virtqueue_get_vring_size(vq); - rc = os_eventfd(0, 0); - if (rc < 0) - goto error_kick; - info->kick_fd = rc; + if (vu_dev->protocol_features & + BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS)) { + info->kick_fd = -1; + } else { + rc = os_eventfd(0, 0); + if (rc < 0) + goto error_kick; + info->kick_fd = rc; + } rc = vu_setup_vq_call_fd(vu_dev, vq); if (rc) @@ -866,10 +908,13 @@ static struct virtqueue *vu_setup_vq(struct virtio_device *vdev, return vq; error_setup: - um_free_irq(VIRTIO_IRQ, vq); - os_close_file(info->call_fd); + if (info->call_fd >= 0) { + um_free_irq(VIRTIO_IRQ, vq); + os_close_file(info->call_fd); + } error_call: - os_close_file(info->kick_fd); + if (info->kick_fd >= 0) + os_close_file(info->kick_fd); error_kick: vring_del_virtqueue(vq); error_create: @@ -908,10 +953,12 @@ static int vu_find_vqs(struct virtio_device *vdev, unsigned nvqs, list_for_each_entry(vq, &vdev->vqs, list) { struct virtio_uml_vq_info *info = vq->priv; - rc = vhost_user_set_vring_kick(vu_dev, vq->index, - info->kick_fd); - if (rc) - goto error_setup; + if (info->kick_fd >= 0) { + rc = vhost_user_set_vring_kick(vu_dev, vq->index, + info->kick_fd); + if (rc) + goto error_setup; + } rc = vhost_user_set_vring_enable(vu_dev, vq->index, true); if (rc) @@ -1008,6 +1055,8 @@ static int virtio_uml_probe(struct platform_device *pdev) return rc; vu_dev->sock = rc; + spin_lock_init(&vu_dev->sock_lock); + rc = vhost_user_init(vu_dev); if (rc) goto error_init; -- cgit v1.2.3 From 88ce642492339f49a0b391af40e5798c08948e49 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 13 Feb 2020 14:26:47 +0100 Subject: um: Implement time-travel=ext This implements synchronized time-travel mode which - using a special application on a unix socket - lets multiple machines take part in a time-travelling simulation together. The protocol for the unix domain socket is defined in the new file include/uapi/linux/um_timetravel.h. Signed-off-by: Johannes Berg Signed-off-by: Richard Weinberger --- arch/um/drivers/virtio_uml.c | 68 ++++++- arch/um/include/linux/time-internal.h | 27 +++ arch/um/include/shared/os.h | 1 + arch/um/kernel/skas/syscall.c | 3 +- arch/um/kernel/time.c | 323 ++++++++++++++++++++++++++++++++-- arch/um/os-Linux/file.c | 30 ++++ include/uapi/linux/um_timetravel.h | 128 ++++++++++++++ 7 files changed, 563 insertions(+), 17 deletions(-) create mode 100644 include/uapi/linux/um_timetravel.h diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c index 9b4c5b7c0f0d..be54d368e73d 100644 --- a/arch/um/drivers/virtio_uml.c +++ b/arch/um/drivers/virtio_uml.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -64,6 +65,11 @@ struct virtio_uml_device { struct virtio_uml_vq_info { int kick_fd, call_fd; char name[32]; +#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT + struct virtqueue *vq; + vq_callback_t *callback; + struct time_travel_event defer; +#endif }; extern unsigned long long physmem_size, highmem; @@ -118,10 +124,27 @@ static int vhost_user_recv_header(int fd, struct vhost_user_msg *msg) static int vhost_user_recv(struct virtio_uml_device *vu_dev, int fd, struct vhost_user_msg *msg, - size_t max_payload_size) + size_t max_payload_size, bool wait) { size_t size; - int rc = vhost_user_recv_header(fd, msg); + int rc; + + /* + * In virtio time-travel mode, we're handling all the vhost-user + * FDs by polling them whenever appropriate. However, we may get + * into a situation where we're sending out an interrupt message + * to a device (e.g. a net device) and need to handle a simulation + * time message while doing so, e.g. one that tells us to update + * our idea of how long we can run without scheduling. + * + * Thus, we need to not just read() from the given fd, but need + * to also handle messages for the simulation time - this function + * does that for us while waiting for the given fd to be readable. + */ + if (wait) + time_travel_wait_readable(fd); + + rc = vhost_user_recv_header(fd, msg); if (rc == -ECONNRESET && vu_dev->registered) { struct virtio_uml_platform_data *pdata; @@ -143,7 +166,8 @@ static int vhost_user_recv_resp(struct virtio_uml_device *vu_dev, struct vhost_user_msg *msg, size_t max_payload_size) { - int rc = vhost_user_recv(vu_dev, vu_dev->sock, msg, max_payload_size); + int rc = vhost_user_recv(vu_dev, vu_dev->sock, msg, + max_payload_size, true); if (rc) return rc; @@ -173,7 +197,8 @@ static int vhost_user_recv_req(struct virtio_uml_device *vu_dev, struct vhost_user_msg *msg, size_t max_payload_size) { - int rc = vhost_user_recv(vu_dev, vu_dev->req_fd, msg, max_payload_size); + int rc = vhost_user_recv(vu_dev, vu_dev->req_fd, msg, + max_payload_size, false); if (rc) return rc; @@ -700,6 +725,8 @@ static bool vu_notify(struct virtqueue *vq) const uint64_t n = 1; int rc; + time_travel_propagate_time(); + if (info->kick_fd < 0) { struct virtio_uml_device *vu_dev; @@ -847,6 +874,23 @@ out: return rc; } +#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT +static void vu_defer_irq_handle(struct time_travel_event *d) +{ + struct virtio_uml_vq_info *info; + + info = container_of(d, struct virtio_uml_vq_info, defer); + info->callback(info->vq); +} + +static void vu_defer_irq_callback(struct virtqueue *vq) +{ + struct virtio_uml_vq_info *info = vq->priv; + + time_travel_add_irq_event(&info->defer); +} +#endif + static struct virtqueue *vu_setup_vq(struct virtio_device *vdev, unsigned index, vq_callback_t *callback, const char *name, bool ctx) @@ -866,6 +910,19 @@ static struct virtqueue *vu_setup_vq(struct virtio_device *vdev, snprintf(info->name, sizeof(info->name), "%s.%d-%s", pdev->name, pdev->id, name); +#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT + /* + * When we get an interrupt, we must bounce it through the simulation + * calendar (the simtime device), except for the simtime device itself + * since that's part of the simulation control. + */ + if (time_travel_mode == TT_MODE_EXTERNAL && callback) { + info->callback = callback; + callback = vu_defer_irq_callback; + time_travel_set_event_fn(&info->defer, vu_defer_irq_handle); + } +#endif + vq = vring_create_virtqueue(index, num, PAGE_SIZE, vdev, true, true, ctx, vu_notify, callback, info->name); if (!vq) { @@ -874,6 +931,9 @@ static struct virtqueue *vu_setup_vq(struct virtio_device *vdev, } vq->priv = info; num = virtqueue_get_vring_size(vq); +#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT + info->vq = vq; +#endif if (vu_dev->protocol_features & BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS)) { diff --git a/arch/um/include/linux/time-internal.h b/arch/um/include/linux/time-internal.h index eb1f84616edb..e21655926f08 100644 --- a/arch/um/include/linux/time-internal.h +++ b/arch/um/include/linux/time-internal.h @@ -15,6 +15,7 @@ enum time_travel_mode { TT_MODE_OFF, TT_MODE_BASIC, TT_MODE_INFCPU, + TT_MODE_EXTERNAL, }; #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT @@ -35,6 +36,24 @@ time_travel_set_event_fn(struct time_travel_event *e, { e->fn = fn; } + +void __time_travel_propagate_time(void); + +static inline void time_travel_propagate_time(void) +{ + if (time_travel_mode == TT_MODE_EXTERNAL) + __time_travel_propagate_time(); +} + +void __time_travel_wait_readable(int fd); + +static inline void time_travel_wait_readable(int fd) +{ + if (time_travel_mode == TT_MODE_EXTERNAL) + __time_travel_wait_readable(fd); +} + +void time_travel_add_irq_event(struct time_travel_event *e); #else struct time_travel_event { }; @@ -47,5 +66,13 @@ static inline void time_travel_sleep(unsigned long long duration) /* this is a macro so the event/function need not exist */ #define time_travel_set_event_fn(e, fn) do {} while (0) + +static inline void time_travel_propagate_time(void) +{ +} + +static inline void time_travel_wait_readable(int fd) +{ +} #endif /* CONFIG_UML_TIME_TRAVEL_SUPPORT */ #endif /* __TIMER_INTERNAL_H__ */ diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h index 0f30204b6afa..f467d28fc0b4 100644 --- a/arch/um/include/shared/os.h +++ b/arch/um/include/shared/os.h @@ -181,6 +181,7 @@ extern int os_falloc_punch(int fd, unsigned long long offset, int count); extern int os_eventfd(unsigned int initval, int flags); extern int os_sendmsg_fds(int fd, const void *buf, unsigned int len, const int *fds, unsigned int fds_num); +int os_poll(unsigned int n, const int *fds); /* start_up.c */ extern void os_early_checks(void); diff --git a/arch/um/kernel/skas/syscall.c b/arch/um/kernel/skas/syscall.c index 2e82820f7d29..0a12d5a09217 100644 --- a/arch/um/kernel/skas/syscall.c +++ b/arch/um/kernel/skas/syscall.c @@ -24,7 +24,8 @@ void handle_syscall(struct uml_pt_regs *r) * went to sleep, even if said userspace interacts with the kernel in * various ways. */ - if (time_travel_mode == TT_MODE_INFCPU) + if (time_travel_mode == TT_MODE_INFCPU || + time_travel_mode == TT_MODE_EXTERNAL) schedule(); /* Initialize the syscall number and default return value. */ diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index cdebe96308d7..15c4825b857e 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -20,10 +20,12 @@ #include #include #include +#include #include #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT enum time_travel_mode time_travel_mode; +EXPORT_SYMBOL_GPL(time_travel_mode); static bool time_travel_start_set; static unsigned long long time_travel_start; @@ -32,6 +34,12 @@ static LIST_HEAD(time_travel_events); static unsigned long long time_travel_timer_interval; static unsigned long long time_travel_next_event; static struct time_travel_event time_travel_timer_event; +static int time_travel_ext_fd = -1; +static unsigned int time_travel_ext_waiting; +static bool time_travel_ext_prev_request_valid; +static unsigned long long time_travel_ext_prev_request; +static bool time_travel_ext_free_until_valid; +static unsigned long long time_travel_ext_free_until; static void time_travel_set_time(unsigned long long ns) { @@ -41,6 +49,199 @@ static void time_travel_set_time(unsigned long long ns) time_travel_time = ns; } +enum time_travel_message_handling { + TTMH_IDLE, + TTMH_POLL, + TTMH_READ, +}; + +static void time_travel_handle_message(struct um_timetravel_msg *msg, + enum time_travel_message_handling mode) +{ + struct um_timetravel_msg resp = { + .op = UM_TIMETRAVEL_ACK, + }; + int ret; + + /* + * Poll outside the locked section (if we're not called to only read + * the response) so we can get interrupts for e.g. virtio while we're + * here, but then we need to lock to not get interrupted between the + * read of the message and write of the ACK. + */ + if (mode != TTMH_READ) { + while (os_poll(1, &time_travel_ext_fd) != 0) { + if (mode == TTMH_IDLE) { + BUG_ON(!irqs_disabled()); + local_irq_enable(); + local_irq_disable(); + } + } + } + + ret = os_read_file(time_travel_ext_fd, msg, sizeof(*msg)); + + if (ret == 0) + panic("time-travel external link is broken\n"); + if (ret != sizeof(*msg)) + panic("invalid time-travel message - %d bytes\n", ret); + + switch (msg->op) { + default: + WARN_ONCE(1, "time-travel: unexpected message %lld\n", + (unsigned long long)msg->op); + break; + case UM_TIMETRAVEL_ACK: + return; + case UM_TIMETRAVEL_RUN: + time_travel_set_time(msg->time); + break; + case UM_TIMETRAVEL_FREE_UNTIL: + time_travel_ext_free_until_valid = true; + time_travel_ext_free_until = msg->time; + break; + } + + os_write_file(time_travel_ext_fd, &resp, sizeof(resp)); +} + +static u64 time_travel_ext_req(u32 op, u64 time) +{ + static int seq; + int mseq = ++seq; + struct um_timetravel_msg msg = { + .op = op, + .time = time, + .seq = mseq, + }; + unsigned long flags; + + /* + * We need to save interrupts here and only restore when we + * got the ACK - otherwise we can get interrupted and send + * another request while we're still waiting for an ACK, but + * the peer doesn't know we got interrupted and will send + * the ACKs in the same order as the message, but we'd need + * to see them in the opposite order ... + * + * This wouldn't matter *too* much, but some ACKs carry the + * current time (for UM_TIMETRAVEL_GET) and getting another + * ACK without a time would confuse us a lot! + * + * The sequence number assignment that happens here lets us + * debug such message handling issues more easily. + */ + local_irq_save(flags); + os_write_file(time_travel_ext_fd, &msg, sizeof(msg)); + + while (msg.op != UM_TIMETRAVEL_ACK) + time_travel_handle_message(&msg, TTMH_READ); + + if (msg.seq != mseq) + panic("time-travel: ACK message has different seqno! op=%d, seq=%d != %d time=%lld\n", + msg.op, msg.seq, mseq, msg.time); + + if (op == UM_TIMETRAVEL_GET) + time_travel_set_time(msg.time); + local_irq_restore(flags); + + return msg.time; +} + +void __time_travel_wait_readable(int fd) +{ + int fds[2] = { fd, time_travel_ext_fd }; + int ret; + + if (time_travel_mode != TT_MODE_EXTERNAL) + return; + + while ((ret = os_poll(2, fds))) { + struct um_timetravel_msg msg; + + if (ret == 1) + time_travel_handle_message(&msg, TTMH_READ); + } +} +EXPORT_SYMBOL_GPL(__time_travel_wait_readable); + +static void time_travel_ext_update_request(unsigned long long time) +{ + if (time_travel_mode != TT_MODE_EXTERNAL) + return; + + /* asked for exactly this time previously */ + if (time_travel_ext_prev_request_valid && + time == time_travel_ext_prev_request) + return; + + time_travel_ext_prev_request = time; + time_travel_ext_prev_request_valid = true; + time_travel_ext_req(UM_TIMETRAVEL_REQUEST, time); +} + +void __time_travel_propagate_time(void) +{ + time_travel_ext_req(UM_TIMETRAVEL_UPDATE, time_travel_time); +} +EXPORT_SYMBOL_GPL(__time_travel_propagate_time); + +/* returns true if we must do a wait to the simtime device */ +static bool time_travel_ext_request(unsigned long long time) +{ + /* + * If we received an external sync point ("free until") then we + * don't have to request/wait for anything until then, unless + * we're already waiting. + */ + if (!time_travel_ext_waiting && time_travel_ext_free_until_valid && + time < time_travel_ext_free_until) + return false; + + time_travel_ext_update_request(time); + return true; +} + +static void time_travel_ext_wait(bool idle) +{ + struct um_timetravel_msg msg = { + .op = UM_TIMETRAVEL_ACK, + }; + + time_travel_ext_prev_request_valid = false; + time_travel_ext_waiting++; + + time_travel_ext_req(UM_TIMETRAVEL_WAIT, -1); + + /* + * Here we are deep in the idle loop, so we have to break out of the + * kernel abstraction in a sense and implement this in terms of the + * UML system waiting on the VQ interrupt while sleeping, when we get + * the signal it'll call time_travel_ext_vq_notify_done() completing the + * call. + */ + while (msg.op != UM_TIMETRAVEL_RUN) + time_travel_handle_message(&msg, idle ? TTMH_IDLE : TTMH_POLL); + + time_travel_ext_waiting--; + + /* we might request more stuff while polling - reset when we run */ + time_travel_ext_prev_request_valid = false; +} + +static void time_travel_ext_get_time(void) +{ + time_travel_ext_req(UM_TIMETRAVEL_GET, -1); +} + +static void __time_travel_update_time(unsigned long long ns, bool idle) +{ + if (time_travel_mode == TT_MODE_EXTERNAL && time_travel_ext_request(ns)) + time_travel_ext_wait(idle); + else + time_travel_set_time(ns); +} + static struct time_travel_event *time_travel_first_event(void) { return list_first_entry_or_null(&time_travel_events, @@ -85,6 +286,7 @@ static void __time_travel_add_event(struct time_travel_event *e, list_add_tail(&e->list, &time_travel_events); tmp = time_travel_first_event(); + time_travel_ext_update_request(tmp->time); time_travel_next_event = tmp->time; } @@ -106,8 +308,21 @@ void time_travel_periodic_timer(struct time_travel_event *e) static void time_travel_deliver_event(struct time_travel_event *e) { - /* this is basically just deliver_alarm(), handles IRQs itself */ - e->fn(e); + if (e == &time_travel_timer_event) { + /* + * deliver_alarm() does the irq_enter/irq_exit + * by itself, so must handle it specially here + */ + e->fn(e); + } else { + unsigned long flags; + + local_irq_save(flags); + irq_enter(); + e->fn(e); + irq_exit(); + local_irq_restore(flags); + } } static bool time_travel_del_event(struct time_travel_event *e) @@ -119,13 +334,13 @@ static bool time_travel_del_event(struct time_travel_event *e) return true; } -static void time_travel_update_time(unsigned long long next, bool retearly) +static void time_travel_update_time(unsigned long long next, bool idle) { struct time_travel_event ne = { .onstack = true, }; struct time_travel_event *e; - bool finished = retearly; + bool finished = idle; /* add it without a handler - we deal with that specifically below */ __time_travel_add_event(&ne, next); @@ -134,7 +349,7 @@ static void time_travel_update_time(unsigned long long next, bool retearly) e = time_travel_first_event(); BUG_ON(!e); - time_travel_set_time(e->time); + __time_travel_update_time(e->time, idle); /* new events may have been inserted while we were waiting */ if (e == time_travel_first_event()) { @@ -150,11 +365,29 @@ static void time_travel_update_time(unsigned long long next, bool retearly) time_travel_deliver_event(e); } } - } while (!finished); + + e = time_travel_first_event(); + if (e) + time_travel_ext_update_request(e->time); + } while (ne.pending && !finished); time_travel_del_event(&ne); } +void time_travel_add_irq_event(struct time_travel_event *e) +{ + BUG_ON(time_travel_mode != TT_MODE_EXTERNAL); + + time_travel_ext_get_time(); + /* + * We could model interrupt latency here, for now just + * don't have any latency at all and request the exact + * same time (again) to run the interrupt... + */ + time_travel_add_event(e, time_travel_time); +} +EXPORT_SYMBOL_GPL(time_travel_add_irq_event); + static void time_travel_oneshot_timer(struct time_travel_event *e) { deliver_alarm(); @@ -199,6 +432,42 @@ static void time_travel_set_interval(unsigned long long interval) { time_travel_timer_interval = interval; } + +static int time_travel_connect_external(const char *socket) +{ + const char *sep; + unsigned long long id = (unsigned long long)-1; + int rc; + + if ((sep = strchr(socket, ':'))) { + char buf[25] = {}; + if (sep - socket > sizeof(buf) - 1) + goto invalid_number; + + memcpy(buf, socket, sep - socket); + if (kstrtoull(buf, 0, &id)) { +invalid_number: + panic("time-travel: invalid external ID in string '%s'\n", + socket); + return -EINVAL; + } + + socket = sep + 1; + } + + rc = os_connect_socket(socket); + if (rc < 0) { + panic("time-travel: failed to connect to external socket %s\n", + socket); + return rc; + } + + time_travel_ext_fd = rc; + + time_travel_ext_req(UM_TIMETRAVEL_START, id); + + return 1; +} #else /* CONFIG_UML_TIME_TRAVEL_SUPPORT */ #define time_travel_start_set 0 #define time_travel_start 0 @@ -216,6 +485,9 @@ static void time_travel_set_interval(unsigned long long interval) { } +/* fail link if this actually gets used */ +extern u64 time_travel_ext_req(u32 op, u64 time); + /* these are empty macros so the struct/fn need not exist */ #define time_travel_add_event(e, time) do { } while (0) #define time_travel_del_event(e) do { } while (0) @@ -246,7 +518,8 @@ static int itimer_shutdown(struct clock_event_device *evt) if (time_travel_mode != TT_MODE_OFF) time_travel_del_event(&time_travel_timer_event); - if (time_travel_mode != TT_MODE_INFCPU) + if (time_travel_mode != TT_MODE_INFCPU && + time_travel_mode != TT_MODE_EXTERNAL) os_timer_disable(); return 0; @@ -265,7 +538,8 @@ static int itimer_set_periodic(struct clock_event_device *evt) time_travel_time + interval); } - if (time_travel_mode != TT_MODE_INFCPU) + if (time_travel_mode != TT_MODE_INFCPU && + time_travel_mode != TT_MODE_EXTERNAL) os_timer_set_interval(interval); return 0; @@ -284,7 +558,8 @@ static int itimer_next_event(unsigned long delta, time_travel_time + delta); } - if (time_travel_mode != TT_MODE_INFCPU) + if (time_travel_mode != TT_MODE_INFCPU && + time_travel_mode != TT_MODE_EXTERNAL) return os_timer_one_shot(delta); return 0; @@ -335,8 +610,14 @@ static u64 timer_read(struct clocksource *cs) * stuck in loops that expect time to move more than the * exact requested sleep amount, e.g. python's socket server, * see https://bugs.python.org/issue37026. + * + * However, don't do that when we're in interrupt or such as + * then we might recurse into our own processing, and get to + * even more waiting, and that's not good - it messes up the + * "what do I do next" and onstack event we use to know when + * to return from time_travel_update_time(). */ - if (!irqs_disabled()) + if (!irqs_disabled() && !in_interrupt() && !in_softirq()) time_travel_update_time(time_travel_time + TIMER_MULTIPLIER, false); @@ -383,6 +664,8 @@ void read_persistent_clock64(struct timespec64 *ts) if (time_travel_start_set) nsecs = time_travel_start + time_travel_time; + else if (time_travel_mode == TT_MODE_EXTERNAL) + nsecs = time_travel_ext_req(UM_TIMETRAVEL_GET_TOD, -1); else nsecs = os_persistent_clock_emulation(); @@ -399,7 +682,8 @@ void __init time_init(void) #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT unsigned long calibrate_delay_is_known(void) { - if (time_travel_mode == TT_MODE_INFCPU) + if (time_travel_mode == TT_MODE_INFCPU || + time_travel_mode == TT_MODE_EXTERNAL) return 1; return 0; } @@ -413,6 +697,13 @@ int setup_time_travel(char *str) return 1; } + if (strncmp(str, "=ext:", 5) == 0) { + time_travel_mode = TT_MODE_EXTERNAL; + timer_clockevent.name = "time-travel-timer-external"; + timer_clocksource.name = "time-travel-clock-external"; + return time_travel_connect_external(str + 5); + } + if (!*str) { time_travel_mode = TT_MODE_BASIC; timer_clockevent.name = "time-travel-timer"; @@ -437,7 +728,15 @@ __uml_help(setup_time_travel, "are no wall clock timers, and any CPU processing happens - as seen from the\n" "guest - instantly. This can be useful for accurate simulation regardless of\n" "debug overhead, physical CPU speed, etc. but is somewhat dangerous as it can\n" -"easily lead to getting stuck (e.g. if anything in the system busy loops).\n"); +"easily lead to getting stuck (e.g. if anything in the system busy loops).\n" +"\n" +"time-travel=ext:[ID:]/path/to/socket\n" +"This enables time travel mode similar to =inf-cpu, except the system will\n" +"use the given socket to coordinate with a central scheduler, in order to\n" +"have more than one system simultaneously be on simulated time. The virtio\n" +"driver code in UML knows about this so you can also simulate networks and\n" +"devices using it, assuming the device has the right capabilities.\n" +"The optional ID is a 64-bit integer that's sent to the central scheduler.\n"); int setup_time_travel_start(char *str) { diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c index 5c819f89b8c2..26ecbd64c409 100644 --- a/arch/um/os-Linux/file.c +++ b/arch/um/os-Linux/file.c @@ -5,6 +5,7 @@ #include #include +#include #include #include #include @@ -17,6 +18,7 @@ #include #include #include +#include #include static void copy_stat(struct uml_stat *dst, const struct stat64 *src) @@ -665,3 +667,31 @@ int os_sendmsg_fds(int fd, const void *buf, unsigned int len, const int *fds, return -errno; return err; } + +int os_poll(unsigned int n, const int *fds) +{ + /* currently need 2 FDs at most so avoid dynamic allocation */ + struct pollfd pollfds[2] = {}; + unsigned int i; + int ret; + + if (n > ARRAY_SIZE(pollfds)) + return -EINVAL; + + for (i = 0; i < n; i++) { + pollfds[i].fd = fds[i]; + pollfds[i].events = POLLIN; + } + + ret = poll(pollfds, n, -1); + if (ret < 0) + return -errno; + + /* Return the index of the available FD */ + for (i = 0; i < n; i++) { + if (pollfds[i].revents) + return i; + } + + return -EIO; +} diff --git a/include/uapi/linux/um_timetravel.h b/include/uapi/linux/um_timetravel.h new file mode 100644 index 000000000000..ca3238222b6d --- /dev/null +++ b/include/uapi/linux/um_timetravel.h @@ -0,0 +1,128 @@ +/* + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Copyright (C) 2019 Intel Corporation + */ +#ifndef _UAPI_LINUX_UM_TIMETRAVEL_H +#define _UAPI_LINUX_UM_TIMETRAVEL_H +#include + +/** + * struct um_timetravel_msg - UM time travel message + * + * This is the basic message type, going in both directions. + * + * This is the message passed between the host (user-mode Linux instance) + * and the calendar (the application on the other side of the socket) in + * order to implement common scheduling. + * + * Whenever UML has an event it will request runtime for it from the + * calendar, and then wait for its turn until it can run, etc. Note + * that it will only ever request the single next runtime, i.e. multiple + * REQUEST messages override each other. + */ +struct um_timetravel_msg { + /** + * @op: operation value from &enum um_timetravel_ops + */ + __u32 op; + + /** + * @seq: sequence number for the message - shall be reflected in + * the ACK response, and should be checked while processing + * the response to see if it matches + */ + __u32 seq; + + /** + * @time: time in nanoseconds + */ + __u64 time; +}; + +/** + * enum um_timetravel_ops - Operation codes + */ +enum um_timetravel_ops { + /** + * @UM_TIMETRAVEL_ACK: response (ACK) to any previous message, + * this usually doesn't carry any data in the 'time' field + * unless otherwise specified below + */ + UM_TIMETRAVEL_ACK = 0, + + /** + * @UM_TIMETRAVEL_START: initialize the connection, the time + * field contains an (arbitrary) ID to possibly be able + * to distinguish the connections. + */ + UM_TIMETRAVEL_START = 1, + + /** + * @UM_TIMETRAVEL_REQUEST: request to run at the given time + * (host -> calendar) + */ + UM_TIMETRAVEL_REQUEST = 2, + + /** + * @UM_TIMETRAVEL_WAIT: Indicate waiting for the previously requested + * runtime, new requests may be made while waiting (e.g. due to + * interrupts); the time field is ignored. The calendar must process + * this message and later send a %UM_TIMETRAVEL_RUN message when + * the host can run again. + * (host -> calendar) + */ + UM_TIMETRAVEL_WAIT = 3, + + /** + * @UM_TIMETRAVEL_GET: return the current time from the calendar in the + * ACK message, the time in the request message is ignored + * (host -> calendar) + */ + UM_TIMETRAVEL_GET = 4, + + /** + * @UM_TIMETRAVEL_UPDATE: time update to the calendar, must be sent e.g. + * before kicking an interrupt to another calendar + * (host -> calendar) + */ + UM_TIMETRAVEL_UPDATE = 5, + + /** + * @UM_TIMETRAVEL_RUN: run time request granted, current time is in + * the time field + * (calendar -> host) + */ + UM_TIMETRAVEL_RUN = 6, + + /** + * @UM_TIMETRAVEL_FREE_UNTIL: Enable free-running until the given time, + * this is a message from the calendar telling the host that it can + * freely do its own scheduling for anything before the indicated + * time. + * Note that if a calendar sends this message once, the host may + * assume that it will also do so in the future, if it implements + * wraparound semantics for the time field. + * (calendar -> host) + */ + UM_TIMETRAVEL_FREE_UNTIL = 7, + + /** + * @UM_TIMETRAVEL_GET_TOD: Return time of day, typically used once at + * boot by the virtual machines to get a synchronized time from + * the simulation. + */ + UM_TIMETRAVEL_GET_TOD = 8, +}; + +#endif /* _UAPI_LINUX_UM_TIMETRAVEL_H */ -- cgit v1.2.3 From 0bc8fb4dda2b461491ec567b2333d13897780d8c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 13 Feb 2020 14:26:48 +0100 Subject: um: Implement ndelay/udelay in time-travel mode In external or inf-cpu time-travel mode, ndelay/udelay currently just waste CPU time since the simulation time doesn't advance. Implement them properly in this case. Note that the "if (time_travel_mode == ...)" parts compile out if CONFIG_UML_TIME_TRAVEL_SUPPORT isn't set, time_travel_mode is defined to TT_MODE_OFF in that case. Signed-off-by: Johannes Berg Signed-off-by: Richard Weinberger --- arch/um/include/asm/Kbuild | 1 - arch/um/include/asm/delay.h | 30 ++++++++++++++++++++++++++++++ arch/um/include/linux/time-internal.h | 6 ++++++ arch/um/kernel/time.c | 6 ++++++ 4 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 arch/um/include/asm/delay.h diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild index db7d9d4e30d8..8d435f8a6dec 100644 --- a/arch/um/include/asm/Kbuild +++ b/arch/um/include/asm/Kbuild @@ -3,7 +3,6 @@ generic-y += bpf_perf_event.h generic-y += bug.h generic-y += compat.h generic-y += current.h -generic-y += delay.h generic-y += device.h generic-y += emergency-restart.h generic-y += exec.h diff --git a/arch/um/include/asm/delay.h b/arch/um/include/asm/delay.h new file mode 100644 index 000000000000..56fc2b8f2dd0 --- /dev/null +++ b/arch/um/include/asm/delay.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __UM_DELAY_H +#define __UM_DELAY_H +#include +#include + +static inline void um_ndelay(unsigned long nsecs) +{ + if (time_travel_mode == TT_MODE_INFCPU || + time_travel_mode == TT_MODE_EXTERNAL) { + time_travel_ndelay(nsecs); + return; + } + ndelay(nsecs); +} +#undef ndelay +#define ndelay um_ndelay + +static inline void um_udelay(unsigned long usecs) +{ + if (time_travel_mode == TT_MODE_INFCPU || + time_travel_mode == TT_MODE_EXTERNAL) { + time_travel_ndelay(1000 * usecs); + return; + } + udelay(usecs); +} +#undef udelay +#define udelay um_udelay +#endif /* __UM_DELAY_H */ diff --git a/arch/um/include/linux/time-internal.h b/arch/um/include/linux/time-internal.h index e21655926f08..f3b03d39a854 100644 --- a/arch/um/include/linux/time-internal.h +++ b/arch/um/include/linux/time-internal.h @@ -75,4 +75,10 @@ static inline void time_travel_wait_readable(int fd) { } #endif /* CONFIG_UML_TIME_TRAVEL_SUPPORT */ + +/* + * Without CONFIG_UML_TIME_TRAVEL_SUPPORT this is a linker error if used, + * which is intentional since we really shouldn't link it in that case. + */ +void time_travel_ndelay(unsigned long nsec); #endif /* __TIMER_INTERNAL_H__ */ diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index 15c4825b857e..25eaa6a0c658 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -374,6 +374,12 @@ static void time_travel_update_time(unsigned long long next, bool idle) time_travel_del_event(&ne); } +void time_travel_ndelay(unsigned long nsec) +{ + time_travel_update_time(time_travel_time + nsec, false); +} +EXPORT_SYMBOL(time_travel_ndelay); + void time_travel_add_irq_event(struct time_travel_event *e) { BUG_ON(time_travel_mode != TT_MODE_EXTERNAL); -- cgit v1.2.3 From 5bef0a153bf29150357ff60283315a933f05c994 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 13 Feb 2020 14:26:49 +0100 Subject: um: Implement cpu_relax() as ndelay(1) for time-travel In time-travel mode, cpu_relax() currently does actual CPU relax, but that doesn't affect the simulation. Ideally, we wouldn't run anything that uses it in simulation, but if we actually have virtio devices combined with the same simulation it's possible. Implement cpu_relax() as ndelay(1) in this case, using time_travel_ndelay(1) directly to catch errors if this is used erroneously in builds that don't set CONFIG_UML_TIME_TRAVEL_SUPPORT. While at it, convert it to an __always_inline and also add that to rep_nop() like the original does now. Signed-off-by: Johannes Berg Signed-off-by: Richard Weinberger --- arch/x86/um/asm/processor.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/x86/um/asm/processor.h b/arch/x86/um/asm/processor.h index 593d5f3902bd..478710384b34 100644 --- a/arch/x86/um/asm/processor.h +++ b/arch/x86/um/asm/processor.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __UM_PROCESSOR_H #define __UM_PROCESSOR_H +#include /* include faultinfo structure */ #include @@ -21,12 +22,19 @@ #include /* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ -static inline void rep_nop(void) +static __always_inline void rep_nop(void) { __asm__ __volatile__("rep;nop": : :"memory"); } -#define cpu_relax() rep_nop() +static __always_inline void cpu_relax(void) +{ + if (time_travel_mode == TT_MODE_INFCPU || + time_travel_mode == TT_MODE_EXTERNAL) + time_travel_ndelay(1); + else + rep_nop(); +} #define task_pt_regs(t) (&(t)->thread.regs) -- cgit v1.2.3 From 3363179385629c1804ea846f4e72608c2201a81e Mon Sep 17 00:00:00 2001 From: Brendan Higgins Date: Fri, 24 Jan 2020 14:14:01 -0800 Subject: um: Make CONFIG_STATIC_LINK actually static Currently, CONFIG_STATIC_LINK can be enabled with options which cannot be statically linked, namely UML_NET_VECTOR, UML_NET_VDE, and UML_NET_PCAP; this is because glibc tries to load NSS which does not support being statically linked. So make CONFIG_STATIC_LINK depend on !UML_NET_VECTOR && !UML_NET_VDE && !UML_NET_PCAP. Link: https://lore.kernel.org/lkml/f658f317-be54-ed75-8296-c373c2dcc697@cambridgegreys.com/#t Signed-off-by: Brendan Higgins Acked-By: Anton Ivanov Signed-off-by: Richard Weinberger --- arch/um/Kconfig | 8 +++++++- arch/um/drivers/Kconfig | 3 +++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/um/Kconfig b/arch/um/Kconfig index 817a4c838a06..96ab7026b037 100644 --- a/arch/um/Kconfig +++ b/arch/um/Kconfig @@ -62,9 +62,12 @@ config NR_CPUS source "arch/$(HEADER_ARCH)/um/Kconfig" +config FORBID_STATIC_LINK + bool + config STATIC_LINK bool "Force a static link" - default n + depends on !FORBID_STATIC_LINK help This option gives you the ability to force a static link of UML. Normally, UML is linked as a shared binary. This is inconvenient for @@ -73,6 +76,9 @@ config STATIC_LINK Additionally, this option enables using higher memory spaces (up to 2.75G) for UML. + NOTE: This option is incompatible with some networking features which + depend on features that require being dynamically loaded (like NSS). + config LD_SCRIPT_STATIC bool default y diff --git a/arch/um/drivers/Kconfig b/arch/um/drivers/Kconfig index 72d417055782..9160ead56e33 100644 --- a/arch/um/drivers/Kconfig +++ b/arch/um/drivers/Kconfig @@ -234,6 +234,7 @@ config UML_NET_DAEMON config UML_NET_VECTOR bool "Vector I/O high performance network devices" depends on UML_NET + select FORBID_STATIC_LINK help This User-Mode Linux network driver uses multi-message send and receive functions. The host running the UML guest must have @@ -245,6 +246,7 @@ config UML_NET_VECTOR config UML_NET_VDE bool "VDE transport (obsolete)" depends on UML_NET + select FORBID_STATIC_LINK help This User-Mode Linux network transport allows one or more running UMLs on a single host to communicate with each other and also @@ -292,6 +294,7 @@ config UML_NET_MCAST config UML_NET_PCAP bool "pcap transport (obsolete)" depends on UML_NET + select FORBID_STATIC_LINK help The pcap transport makes a pcap packet stream on the host look like an ethernet device inside UML. This is useful for making -- cgit v1.2.3 From 237ce2e681d20e92fe17fc23a348b4f0d595e641 Mon Sep 17 00:00:00 2001 From: Sjoerd Simons Date: Sun, 16 Feb 2020 22:36:24 +0100 Subject: um: vector: Avoid NULL ptr deference if transport is unset When the transport option of a vec isn't set strncmp ends up being called on a NULL pointer. Better not do that. Signed-off-by: Sjoerd Simons Acked-By: Anton Ivanov Signed-off-by: Richard Weinberger --- arch/um/drivers/vector_kern.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c index 9693dfca7651..8e8aac1d30e1 100644 --- a/arch/um/drivers/vector_kern.c +++ b/arch/um/drivers/vector_kern.c @@ -197,6 +197,9 @@ static int get_transport_options(struct arglist *def) long parsed; int result = 0; + if (transport == NULL) + return -EINVAL; + if (vector != NULL) { if (kstrtoul(vector, 10, &parsed) == 0) { if (parsed == 0) { -- cgit v1.2.3 From 4a7c46247f9c620c0390a15cb00b6ef9576b9c23 Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Fri, 27 Mar 2020 19:36:25 +0000 Subject: um: Remove some unnecessary NULL checks in vector_user.c kfree() already checks for null pointers, so additional checking is unnecessary. Signed-off-by: Alex Dewar Signed-off-by: Richard Weinberger --- arch/um/drivers/vector_user.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/arch/um/drivers/vector_user.c b/arch/um/drivers/vector_user.c index ddcd917be0af..aa28e9eecb7b 100644 --- a/arch/um/drivers/vector_user.c +++ b/arch/um/drivers/vector_user.c @@ -221,8 +221,7 @@ static struct vector_fds *user_init_tap_fds(struct arglist *ifspec) return result; tap_cleanup: printk(UM_KERN_ERR "user_init_tap: init failed, error %d", fd); - if (result != NULL) - kfree(result); + kfree(result); return NULL; } @@ -266,8 +265,7 @@ static struct vector_fds *user_init_hybrid_fds(struct arglist *ifspec) return result; hybrid_cleanup: printk(UM_KERN_ERR "user_init_hybrid: init failed"); - if (result != NULL) - kfree(result); + kfree(result); return NULL; } @@ -344,10 +342,8 @@ static struct vector_fds *user_init_unix_fds(struct arglist *ifspec, int id) unix_cleanup: if (fd >= 0) os_close_file(fd); - if (remote_addr != NULL) - kfree(remote_addr); - if (result != NULL) - kfree(result); + kfree(remote_addr); + kfree(result); return NULL; } @@ -382,8 +378,7 @@ static struct vector_fds *user_init_raw_fds(struct arglist *ifspec) return result; raw_cleanup: printk(UM_KERN_ERR "user_init_raw: init failed, error %d", err); - if (result != NULL) - kfree(result); + kfree(result); return NULL; } -- cgit v1.2.3