diff options
84 files changed, 9077 insertions, 43 deletions
@@ -105,6 +105,24 @@ config CC_COVERAGE config CC_HAS_ASM_INLINE def_bool $(success,echo 'void foo(void) { asm inline (""); }' | $(CC) -x c - -c -o /dev/null) +config XEN + bool "Select U-Boot be run as a bootloader for XEN Virtual Machine" + help + Enabling this option will make U-Boot be run as a bootloader + for XEN [1] Virtual Machine. + + Xen is a virtual machine monitor (VMM) or a type-1 hypervisor with support + for para-virtualization. Xen can organize the safe execution of several + virtual machines on the same physical system with performance close to + native. It is used as the basis for a number of different commercial and + open source applications, such as: server virtualization, Infrastructure + as a Service (IaaS), desktop virtualization, security applications, + embedded and hardware appliances. + Xen has a special VM called Domain-0 that runs the Dom0 kernel and allows + Xen to use the device drivers for the Domain-0 kernel by default. + + [1] - https://xenproject.org/ + config DISTRO_DEFAULTS bool "Select defaults suitable for booting general purpose Linux distributions" select AUTO_COMPLETE diff --git a/Licenses/README b/Licenses/README index 486e18d0d80..c23ad216fc8 100644 --- a/Licenses/README +++ b/Licenses/README @@ -149,5 +149,6 @@ BSD 3-clause "New" or "Revised" License BSD-3-Clause Y bsd-3-clause.txt http:/ IBM PIBS (PowerPC Initialization and IBM-pibs ibm-pibs.txt Boot Software) license ISC License ISC Y isc.txt https://spdx.org/licenses/ISC +MIT License MIT Y mit.txt https://spdx.org/licenses/MIT.html SIL OPEN FONT LICENSE (OFL-1.1) OFL-1.1 Y OFL.txt https://spdx.org/licenses/OFL-1.1.html X11 License X11 x11.txt https://spdx.org/licenses/X11.html diff --git a/Licenses/mit.txt b/Licenses/mit.txt new file mode 100644 index 00000000000..25a55e054f7 --- /dev/null +++ b/Licenses/mit.txt @@ -0,0 +1,20 @@ +MIT License +Copyright (c) 2020 EPAM Systems Inc. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/MAINTAINERS b/MAINTAINERS index e35d5d4fcbf..17ac45587b7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -148,6 +148,18 @@ F: include/configs/meson64_android.h F: doc/board/amlogic/ N: meson +ARM ASPEED +M: Ryan Chen <ryan_chen@aspeedtech.com> +M: Chia-Wei Wang <chiawei_wang@aspeedtech.com> +R: Aspeed BMC SW team <BMC-SW@aspeedtech.com> +S: Maintained +F: arch/arm/mach-aspeed/ +F: arch/arm/include/asm/arch-aspeed/ +F: board/aspeed/ +F: drivers/clk/aspeed/ +F: drivers/pinctrl/aspeed/ +N: aspeed + ARM BROADCOM BCM283X M: Matthias Brugger <mbrugger@suse.com> S: Maintained diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 6b8a32c38d9..84018516668 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1749,6 +1749,14 @@ config TARGET_PRESIDIO_ASIC bool "Support Cortina Presidio ASIC Platform" select ARM64 +config TARGET_XENGUEST_ARM64 + bool "Xen guest ARM64" + select ARM64 + select XEN + select OF_CONTROL + select LINUX_KERNEL_IMAGE_HEADER + select XEN_SERIAL + select SSCANF endchoice config ARCH_SUPPORT_TFABOOT @@ -1955,6 +1963,7 @@ source "board/xilinx/Kconfig" source "board/xilinx/zynq/Kconfig" source "board/xilinx/zynqmp/Kconfig" source "board/phytium/durian/Kconfig" +source "board/xen/xenguest_arm64/Kconfig" source "arch/arm/Kconfig.debug" diff --git a/arch/arm/cpu/armv8/Makefile b/arch/arm/cpu/armv8/Makefile index 7e33a183d54..93d26f98568 100644 --- a/arch/arm/cpu/armv8/Makefile +++ b/arch/arm/cpu/armv8/Makefile @@ -40,3 +40,4 @@ obj-$(CONFIG_TARGET_HIKEY) += hisilicon/ obj-$(CONFIG_ARMV8_PSCI) += psci.o obj-$(CONFIG_ARCH_SUNXI) += lowlevel_init.o obj-$(CONFIG_TARGET_BCMNS3) += bcmns3/ +obj-$(CONFIG_XEN) += xen/ diff --git a/arch/arm/cpu/armv8/xen/Makefile b/arch/arm/cpu/armv8/xen/Makefile new file mode 100644 index 00000000000..e3b4ae2bd40 --- /dev/null +++ b/arch/arm/cpu/armv8/xen/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0+ +# +# (C) 2018 NXP +# (C) 2020 EPAM Systems Inc. + +obj-y += lowlevel_init.o hypercall.o diff --git a/arch/arm/cpu/armv8/xen/hypercall.S b/arch/arm/cpu/armv8/xen/hypercall.S new file mode 100644 index 00000000000..731256b34e2 --- /dev/null +++ b/arch/arm/cpu/armv8/xen/hypercall.S @@ -0,0 +1,79 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * hypercall.S + * + * Xen hypercall wrappers + * + * Stefano Stabellini <stefano.stabellini@eu.citrix.com>, Citrix, 2012 + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/* + * The Xen hypercall calling convention is very similar to the procedure + * call standard for the ARM 64-bit architecture: the first parameter is + * passed in x0, the second in x1, the third in x2, the fourth in x3 and + * the fifth in x4. + * + * The hypercall number is passed in x16. + * + * The return value is in x0. + * + * The hvc ISS is required to be 0xEA1, that is the Xen specific ARM + * hypercall tag. + * + * Parameter structs passed to hypercalls are laid out according to + * the ARM 64-bit EABI standard. + */ + +#include <xen/interface/xen.h> + +#define XEN_HYPERCALL_TAG 0xEA1 + +#define HYPERCALL_SIMPLE(hypercall) \ +.globl HYPERVISOR_##hypercall; \ +.align 4,0x90; \ +HYPERVISOR_##hypercall: \ + mov x16, #__HYPERVISOR_##hypercall; \ + hvc XEN_HYPERCALL_TAG; \ + ret; \ + +#define HYPERCALL0 HYPERCALL_SIMPLE +#define HYPERCALL1 HYPERCALL_SIMPLE +#define HYPERCALL2 HYPERCALL_SIMPLE +#define HYPERCALL3 HYPERCALL_SIMPLE +#define HYPERCALL4 HYPERCALL_SIMPLE +#define HYPERCALL5 HYPERCALL_SIMPLE + + .text + +HYPERCALL2(xen_version); +HYPERCALL3(console_io); +HYPERCALL3(grant_table_op); +HYPERCALL2(sched_op); +HYPERCALL2(event_channel_op); +HYPERCALL2(hvm_op); +HYPERCALL2(memory_op); + diff --git a/arch/arm/cpu/armv8/xen/lowlevel_init.S b/arch/arm/cpu/armv8/xen/lowlevel_init.S new file mode 100644 index 00000000000..760e32ed761 --- /dev/null +++ b/arch/arm/cpu/armv8/xen/lowlevel_init.S @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0+ + * + * (C) 2017 NXP + * (C) 2020 EPAM Systems Inc. + */ + +#include <config.h> + +.align 8 +.global rom_pointer +rom_pointer: + .space 32 + +/* + * Routine: save_boot_params (called after reset from start.S) + */ + +.global save_boot_params +save_boot_params: + /* The firmware provided ATAG/FDT address can be found in r2/x0 */ + adr x1, rom_pointer + stp x0, x2, [x1], #16 + stp x3, x4, [x1], #16 + + /* Returns */ + b save_boot_params_ret + +.global restore_boot_params +restore_boot_params: + adr x1, rom_pointer + ldp x0, x2, [x1], #16 + ldp x3, x4, [x1], #16 + ret diff --git a/arch/arm/include/asm/arch-aspeed/platform.h b/arch/arm/include/asm/arch-aspeed/platform.h new file mode 100644 index 00000000000..6cee036f54c --- /dev/null +++ b/arch/arm/include/asm/arch-aspeed/platform.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright (C) ASPEED Technology Inc. + * Ryan Chen <ryan_chen@aspeedtech.com> + * + */ + +#ifndef _ASM_ARCH_PLATFORM_H +#define _ASM_ARCH_PLATFORM_H + +#if defined(CONFIG_ASPEED_AST2500) +#define ASPEED_MAC_COUNT 2 +#define ASPEED_DRAM_BASE 0x80000000 +#define ASPEED_SRAM_BASE 0x1e720000 +#define ASPEED_SRAM_SIZE 0x9000 +#else +#err "Unrecognized Aspeed platform." +#endif + +#endif diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h index 8959749ad65..ade1401f3b4 100644 --- a/arch/arm/include/asm/io.h +++ b/arch/arm/include/asm/io.h @@ -110,9 +110,13 @@ static inline void __raw_readsl(unsigned long addr, void *data, int longlen) * have some advantages to use them instead of the simple one here. */ #define mb() dsb() +#define rmb() dsb() +#define wmb() dsb() #define __iormb() dmb() #define __iowmb() dmb() +#define smp_processor_id() 0 + #define writeb(v,c) ({ u8 __v = v; __iowmb(); __arch_putb(__v,c); __v; }) #define writew(v,c) ({ u16 __v = v; __iowmb(); __arch_putw(__v,c); __v; }) #define writel(v,c) ({ u32 __v = v; __iowmb(); __arch_putl(__v,c); __v; }) diff --git a/arch/arm/include/asm/xen.h b/arch/arm/include/asm/xen.h new file mode 100644 index 00000000000..8e2ee3d64ea --- /dev/null +++ b/arch/arm/include/asm/xen.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0+ + * + * (C) 2020 EPAM Systems Inc. + */ + +extern unsigned long rom_pointer[]; + diff --git a/arch/arm/include/asm/xen/hypercall.h b/arch/arm/include/asm/xen/hypercall.h new file mode 100644 index 00000000000..a4fd077079a --- /dev/null +++ b/arch/arm/include/asm/xen/hypercall.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * hypercall.h + * + * Linux-specific hypervisor handling. + * + * Stefano Stabellini <stefano.stabellini@eu.citrix.com>, Citrix, 2012 + */ + +#ifndef _ASM_ARM_XEN_HYPERCALL_H +#define _ASM_ARM_XEN_HYPERCALL_H + +#include <xen/interface/xen.h> + +int HYPERVISOR_xen_version(int cmd, void *arg); +int HYPERVISOR_console_io(int cmd, int count, char *str); +int HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count); +int HYPERVISOR_sched_op(int cmd, void *arg); +int HYPERVISOR_event_channel_op(int cmd, void *arg); +unsigned long HYPERVISOR_hvm_op(int op, void *arg); +int HYPERVISOR_memory_op(unsigned int cmd, void *arg); +#endif /* _ASM_ARM_XEN_HYPERCALL_H */ diff --git a/arch/arm/include/asm/xen/system.h b/arch/arm/include/asm/xen/system.h new file mode 100644 index 00000000000..0fc8a7995ca --- /dev/null +++ b/arch/arm/include/asm/xen/system.h @@ -0,0 +1,88 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * (C) 2014 Karim Allah Ahmed <karim.allah.ahmed@gmail.com> + * (C) 2020, EPAM Systems Inc. + */ +#ifndef _ASM_ARM_XEN_SYSTEM_H +#define _ASM_ARM_XEN_SYSTEM_H + +#include <compiler.h> +#include <asm/bitops.h> + +/* If *ptr == old, then store new there (and return new). + * Otherwise, return the old value. + * Atomic. + */ +#define synch_cmpxchg(ptr, old, new) \ +({ __typeof__(*ptr) stored = old; \ + __atomic_compare_exchange_n(ptr, &stored, new, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) ? new : old; \ +}) + +/* As test_and_clear_bit, but using __ATOMIC_SEQ_CST */ +static inline int synch_test_and_clear_bit(int nr, volatile void *addr) +{ + u8 *byte = ((u8 *)addr) + (nr >> 3); + u8 bit = 1 << (nr & 7); + u8 orig; + + orig = __atomic_fetch_and(byte, ~bit, __ATOMIC_SEQ_CST); + + return (orig & bit) != 0; +} + +/* As test_and_set_bit, but using __ATOMIC_SEQ_CST */ +static inline int synch_test_and_set_bit(int nr, volatile void *base) +{ + u8 *byte = ((u8 *)base) + (nr >> 3); + u8 bit = 1 << (nr & 7); + u8 orig; + + orig = __atomic_fetch_or(byte, bit, __ATOMIC_SEQ_CST); + + return (orig & bit) != 0; +} + +/* As set_bit, but using __ATOMIC_SEQ_CST */ +static inline void synch_set_bit(int nr, volatile void *addr) +{ + synch_test_and_set_bit(nr, addr); +} + +/* As clear_bit, but using __ATOMIC_SEQ_CST */ +static inline void synch_clear_bit(int nr, volatile void *addr) +{ + synch_test_and_clear_bit(nr, addr); +} + +/* As test_bit, but with a following memory barrier. */ +//static inline int synch_test_bit(int nr, volatile void *addr) +static inline int synch_test_bit(int nr, const void *addr) +{ + int result; + + result = test_bit(nr, addr); + barrier(); + return result; +} + +#define xchg(ptr, v) __atomic_exchange_n(ptr, v, __ATOMIC_SEQ_CST) +#define xchg(ptr, v) __atomic_exchange_n(ptr, v, __ATOMIC_SEQ_CST) + +#define xen_mb() mb() +#define xen_rmb() rmb() +#define xen_wmb() wmb() + +#define to_phys(x) ((unsigned long)(x)) +#define to_virt(x) ((void *)(x)) + +#define PFN_UP(x) (unsigned long)(((x) + PAGE_SIZE - 1) >> PAGE_SHIFT) +#define PFN_DOWN(x) (unsigned long)((x) >> PAGE_SHIFT) +#define PFN_PHYS(x) ((unsigned long)(x) << PAGE_SHIFT) +#define PHYS_PFN(x) (unsigned long)((x) >> PAGE_SHIFT) + +#define virt_to_pfn(_virt) (PFN_DOWN(to_phys(_virt))) +#define virt_to_mfn(_virt) (PFN_DOWN(to_phys(_virt))) +#define mfn_to_virt(_mfn) (to_virt(PFN_PHYS(_mfn))) +#define pfn_to_virt(_pfn) (to_virt(PFN_PHYS(_pfn))) + +#endif diff --git a/arch/arm/mach-aspeed/Makefile b/arch/arm/mach-aspeed/Makefile index 1557dcae66d..33f65b50b28 100644 --- a/arch/arm/mach-aspeed/Makefile +++ b/arch/arm/mach-aspeed/Makefile @@ -3,4 +3,4 @@ # Copyright (c) 2016 Google, Inc obj-$(CONFIG_ARCH_ASPEED) += ast_wdt.o -obj-$(CONFIG_ASPEED_AST2500) += ast2500/ ast2500-board.o +obj-$(CONFIG_ASPEED_AST2500) += ast2500/ diff --git a/arch/arm/mach-aspeed/ast2500/Makefile b/arch/arm/mach-aspeed/ast2500/Makefile index a35b239ef35..4c27c8fc465 100644 --- a/arch/arm/mach-aspeed/ast2500/Makefile +++ b/arch/arm/mach-aspeed/ast2500/Makefile @@ -1 +1,3 @@ +obj-y += lowlevel_init.o +obj-y += board_common.o obj-y += clk_ast2500.o sdram_ast2500.o diff --git a/arch/arm/mach-aspeed/ast2500-board.c b/arch/arm/mach-aspeed/ast2500/board_common.c index f74dcbbb624..3482ee91efd 100644 --- a/arch/arm/mach-aspeed/ast2500-board.c +++ b/arch/arm/mach-aspeed/ast2500/board_common.c @@ -28,31 +28,6 @@ DECLARE_GLOBAL_DATA_PTR; -void lowlevel_init(void) -{ - /* - * These two watchdogs need to be stopped as soon as possible, - * otherwise the board might hang. By default they are set to - * a very short timeout and even simple debug write to serial - * console early in the init process might cause them to fire. - */ - struct ast_wdt *flash_addr_wdt = - (struct ast_wdt *)(WDT_BASE + - sizeof(struct ast_wdt) * - AST_FLASH_ADDR_DETECT_WDT); - - clrbits_le32(&flash_addr_wdt->ctrl, WDT_CTRL_EN); - -#ifndef CONFIG_FIRMWARE_2ND_BOOT - struct ast_wdt *sec_boot_wdt = - (struct ast_wdt *)(WDT_BASE + - sizeof(struct ast_wdt) * - AST_2ND_BOOT_WDT); - - clrbits_le32(&sec_boot_wdt->ctrl, WDT_CTRL_EN); -#endif -} - int board_init(void) { gd->bd->bi_boot_params = CONFIG_SYS_SDRAM_BASE + 0x100; diff --git a/arch/arm/mach-aspeed/ast2500/lowlevel_init.S b/arch/arm/mach-aspeed/ast2500/lowlevel_init.S new file mode 100644 index 00000000000..9ec3dd46b70 --- /dev/null +++ b/arch/arm/mach-aspeed/ast2500/lowlevel_init.S @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright (C) ASPEED Technology Inc. + */ +#include <asm/arch/scu_ast2500.h> + +/* registers for low level init */ +#define SCU_PROT_KEY 0x1e6e2000 +#define SCU_VGA_HANDSHAKE 0x1e6e2040 +#define SCU_HW_STRAP 0x1e6e2070 +#define SCU_HW_STRAP_CLR 0x1e6e207c +#define WDT3_CTRL 0x1e78504c + +.global lowlevel_init +lowlevel_init: + + /* unlock SCU */ + ldr r0, =SCU_PROT_KEY + ldr r1, =SCU_UNLOCK_VALUE + str r1, [r0] + + /* set BMC FW as DRAM initializer */ + ldr r0, =SCU_VGA_HANDSHAKE + ldr r1, [r0] + orr r1, #0x80 + str r1, [r0] + + /* set PERST# as LPC reset source if eSPI mode is enabled*/ + ldr r0, =SCU_HW_STRAP + ldr r1, [r0] + tst r1, #(0x1 << 25) + ldrne r0, =SCU_HW_STRAP_CLR + movne r1, #(0x1 << 14) + strne r1, [r0] + + /* disable WDT3 for SPI 3/4 bytes auto-detection */ + ldr r0, =WDT3_CTRL + mov r1, #0x0 + str r1, [r0] + + mov pc, lr diff --git a/board/xen/xenguest_arm64/Kconfig b/board/xen/xenguest_arm64/Kconfig new file mode 100644 index 00000000000..cc131ed5b96 --- /dev/null +++ b/board/xen/xenguest_arm64/Kconfig @@ -0,0 +1,12 @@ +if TARGET_XENGUEST_ARM64 + +config SYS_BOARD + default "xenguest_arm64" + +config SYS_VENDOR + default "xen" + +config SYS_CONFIG_NAME + default "xenguest_arm64" + +endif diff --git a/board/xen/xenguest_arm64/MAINTAINERS b/board/xen/xenguest_arm64/MAINTAINERS new file mode 100644 index 00000000000..787e9e0d0e1 --- /dev/null +++ b/board/xen/xenguest_arm64/MAINTAINERS @@ -0,0 +1,7 @@ +XEN GUEST FOR ARM64 +M: Andrii Anisov <andrii_anisov@epam.com> +S: Maintained +F: board/xen/xenguest_arm64/ +F: doc/board/xen/ +F: include/configs/xenguest_arm64.h +F: configs/xenguest_arm64_defconfig diff --git a/board/xen/xenguest_arm64/Makefile b/board/xen/xenguest_arm64/Makefile new file mode 100644 index 00000000000..1cf87a728f1 --- /dev/null +++ b/board/xen/xenguest_arm64/Makefile @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0+ +# +# (C) Copyright 2020 EPAM Systems Inc. + +obj-y := xenguest_arm64.o diff --git a/board/xen/xenguest_arm64/xenguest_arm64.c b/board/xen/xenguest_arm64/xenguest_arm64.c new file mode 100644 index 00000000000..cce54369bb3 --- /dev/null +++ b/board/xen/xenguest_arm64/xenguest_arm64.c @@ -0,0 +1,202 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * (C) 2013 + * David Feng <fenghua@phytium.com.cn> + * Sharma Bhupesh <bhupesh.sharma@freescale.com> + * + * (C) 2020 EPAM Systems Inc + */ + +#include <common.h> +#include <cpu_func.h> +#include <dm.h> +#include <errno.h> +#include <malloc.h> +#include <xen.h> + +#include <asm/io.h> +#include <asm/armv8/mmu.h> +#include <asm/xen.h> +#include <asm/xen/hypercall.h> +#include <asm/xen/system.h> + +#include <linux/compiler.h> + +#include <xen/gnttab.h> +#include <xen/hvm.h> + +DECLARE_GLOBAL_DATA_PTR; + +int board_init(void) +{ + return 0; +} + +/* + * Use fdt provided by Xen: according to + * https://www.kernel.org/doc/Documentation/arm64/booting.txt + * x0 is the physical address of the device tree blob (dtb) in system RAM. + * This is stored in rom_pointer during low level init. + */ +void *board_fdt_blob_setup(void) +{ + if (fdt_magic(rom_pointer[0]) != FDT_MAGIC) + return NULL; + return (void *)rom_pointer[0]; +} + +#define MAX_MEM_MAP_REGIONS 5 +static struct mm_region xen_mem_map[MAX_MEM_MAP_REGIONS]; +struct mm_region *mem_map = xen_mem_map; + +static int get_next_memory_node(const void *blob, int mem) +{ + do { + mem = fdt_node_offset_by_prop_value(blob, mem, + "device_type", "memory", 7); + } while (!fdtdec_get_is_enabled(blob, mem)); + + return mem; +} + +static int setup_mem_map(void) +{ + int i = 0, ret, mem, reg = 0; + struct fdt_resource res; + const void *blob = gd->fdt_blob; + u64 gfn; + phys_addr_t gnttab_base; + phys_size_t gnttab_sz; + + /* + * Add "magic" region which is used by Xen to provide some essentials + * for the guest: we need console and xenstore. + */ + ret = hvm_get_parameter_maintain_dcache(HVM_PARAM_CONSOLE_PFN, &gfn); + if (ret < 0) { + printf("%s: Can't get HVM_PARAM_CONSOLE_PFN, ret %d\n", + __func__, ret); + return -EINVAL; + } + + xen_mem_map[i].virt = PFN_PHYS(gfn); + xen_mem_map[i].phys = PFN_PHYS(gfn); + xen_mem_map[i].size = PAGE_SIZE; + xen_mem_map[i].attrs = (PTE_BLOCK_MEMTYPE(MT_NORMAL) | + PTE_BLOCK_INNER_SHARE); + i++; + + ret = hvm_get_parameter_maintain_dcache(HVM_PARAM_STORE_PFN, &gfn); + if (ret < 0) { + printf("%s: Can't get HVM_PARAM_STORE_PFN, ret %d\n", + __func__, ret); + return -EINVAL; + } + + xen_mem_map[i].virt = PFN_PHYS(gfn); + xen_mem_map[i].phys = PFN_PHYS(gfn); + xen_mem_map[i].size = PAGE_SIZE; + xen_mem_map[i].attrs = (PTE_BLOCK_MEMTYPE(MT_NORMAL) | + PTE_BLOCK_INNER_SHARE); + i++; + + /* Get Xen's suggested physical page assignments for the grant table. */ + get_gnttab_base(&gnttab_base, &gnttab_sz); + + xen_mem_map[i].virt = gnttab_base; + xen_mem_map[i].phys = gnttab_base; + xen_mem_map[i].size = gnttab_sz; + xen_mem_map[i].attrs = (PTE_BLOCK_MEMTYPE(MT_NORMAL) | + PTE_BLOCK_INNER_SHARE); + i++; + + mem = get_next_memory_node(blob, -1); + if (mem < 0) { + printf("%s: Missing /memory node\n", __func__); + return -EINVAL; + } + + for (; i < MAX_MEM_MAP_REGIONS; i++) { + ret = fdt_get_resource(blob, mem, "reg", reg++, &res); + if (ret == -FDT_ERR_NOTFOUND) { + reg = 0; + mem = get_next_memory_node(blob, mem); + if (mem == -FDT_ERR_NOTFOUND) + break; + + ret = fdt_get_resource(blob, mem, "reg", reg++, &res); + if (ret == -FDT_ERR_NOTFOUND) + break; + } + if (ret != 0) { + printf("No reg property for memory node\n"); + return -EINVAL; + } + + xen_mem_map[i].virt = (phys_addr_t)res.start; + xen_mem_map[i].phys = (phys_addr_t)res.start; + xen_mem_map[i].size = (phys_size_t)(res.end - res.start + 1); + xen_mem_map[i].attrs = (PTE_BLOCK_MEMTYPE(MT_NORMAL) | + PTE_BLOCK_INNER_SHARE); + } + return 0; +} + +void enable_caches(void) +{ + /* Re-setup the memory map as BSS gets cleared after relocation. */ + setup_mem_map(); + icache_enable(); + dcache_enable(); +} + +/* Read memory settings from the Xen provided device tree. */ +int dram_init(void) +{ + int ret; + + ret = fdtdec_setup_mem_size_base(); + if (ret < 0) + return ret; + /* Setup memory map, so MMU page table size can be estimated. */ + return setup_mem_map(); +} + +int dram_init_banksize(void) +{ + return fdtdec_setup_memory_banksize(); +} + +/* + * Board specific reset that is system reset. + */ +void reset_cpu(ulong addr) +{ +} + +int ft_system_setup(void *blob, struct bd_info *bd) +{ + return 0; +} + +int ft_board_setup(void *blob, struct bd_info *bd) +{ + return 0; +} + +int board_early_init_f(void) +{ + return 0; +} + +int print_cpuinfo(void) +{ + printf("Xen virtual CPU\n"); + return 0; +} + +void board_cleanup_before_linux(void) +{ + xen_fini(); +} + diff --git a/cmd/Kconfig b/cmd/Kconfig index 23d7e27dc8d..9ad511aa176 100644 --- a/cmd/Kconfig +++ b/cmd/Kconfig @@ -1370,6 +1370,13 @@ config CMD_USB_MASS_STORAGE help USB mass storage support +config CMD_PVBLOCK + bool "Xen para-virtualized block device" + depends on XEN + select PVBLOCK + help + Xen para-virtualized block device support + config CMD_VIRTIO bool "virtio" depends on VIRTIO diff --git a/cmd/Makefile b/cmd/Makefile index ef2a22f9b12..3a9c9747c94 100644 --- a/cmd/Makefile +++ b/cmd/Makefile @@ -174,6 +174,7 @@ obj-$(CONFIG_CMD_DFU) += dfu.o obj-$(CONFIG_CMD_GPT) += gpt.o obj-$(CONFIG_CMD_ETHSW) += ethsw.o obj-$(CONFIG_CMD_AXI) += axi.o +obj-$(CONFIG_CMD_PVBLOCK) += pvblock.o # Power obj-$(CONFIG_CMD_PMIC) += pmic.o diff --git a/cmd/demo.c b/cmd/demo.c index f923533f794..7310aa2907a 100644 --- a/cmd/demo.c +++ b/cmd/demo.c @@ -130,5 +130,4 @@ U_BOOT_CMD( "demo hello <num> [<char>] Say hello\n" "demo light [<num>] Set or get the lights\n" "demo status <num> Get demo device status\n" - "demo list List available demo devices" ); diff --git a/cmd/pvblock.c b/cmd/pvblock.c new file mode 100644 index 00000000000..4e99b06122b --- /dev/null +++ b/cmd/pvblock.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * (C) Copyright 2020 EPAM Systems Inc. + * + * XEN para-virtualized block device support + */ + +#include <blk.h> +#include <common.h> +#include <command.h> + +/* Current I/O Device */ +static int pvblock_curr_device; + +int do_pvblock(struct cmd_tbl *cmdtp, int flag, int argc, char *const argv[]) +{ + return blk_common_cmd(argc, argv, IF_TYPE_PVBLOCK, + &pvblock_curr_device); +} + +U_BOOT_CMD(pvblock, 5, 1, do_pvblock, + "Xen para-virtualized block device", + "info - show available block devices\n" + "pvblock device [dev] - show or set current device\n" + "pvblock part [dev] - print partition table of one or all devices\n" + "pvblock read addr blk# cnt\n" + "pvblock write addr blk# cnt - read/write `cnt'" + " blocks starting at block `blk#'\n" + " to/from memory address `addr'"); + diff --git a/common/board_r.c b/common/board_r.c index d48d2bb8a04..9b2fec701a5 100644 --- a/common/board_r.c +++ b/common/board_r.c @@ -49,6 +49,7 @@ #include <nand.h> #include <of_live.h> #include <onenand_uboot.h> +#include <pvblock.h> #include <scsi.h> #include <serial.h> #include <status_led.h> @@ -56,6 +57,9 @@ #include <timer.h> #include <trace.h> #include <watchdog.h> +#ifdef CONFIG_XEN +#include <xen.h> +#endif #ifdef CONFIG_ADDR_MAP #include <asm/mmu.h> #endif @@ -465,6 +469,23 @@ static int initr_mmc(void) } #endif +#ifdef CONFIG_XEN +static int initr_xen(void) +{ + xen_init(); + return 0; +} +#endif + +#ifdef CONFIG_PVBLOCK +static int initr_pvblock(void) +{ + puts("PVBLOCK: "); + pvblock_init(); + return 0; +} +#endif + /* * Tell if it's OK to load the environment early in boot. * @@ -762,6 +783,12 @@ static init_fnc_t init_sequence_r[] = { #ifdef CONFIG_MMC initr_mmc, #endif +#ifdef CONFIG_XEN + initr_xen, +#endif +#ifdef CONFIG_PVBLOCK + initr_pvblock, +#endif initr_env, #ifdef CONFIG_SYS_BOOTPARAMS_LEN initr_malloc_bootparams, diff --git a/configs/evb-ast2500_defconfig b/configs/evb-ast2500_defconfig index 825fa4dae1d..105975c9e6c 100644 --- a/configs/evb-ast2500_defconfig +++ b/configs/evb-ast2500_defconfig @@ -11,6 +11,8 @@ CONFIG_PRE_CON_BUF_ADDR=0x1e720000 CONFIG_DEFAULT_DEVICE_TREE="ast2500-evb" CONFIG_USE_BOOTARGS=y CONFIG_BOOTARGS="console=ttyS4,115200n8 root=/dev/ram rw" +CONFIG_USE_BOOTCOMMAND=y +CONFIG_BOOTCOMMAND="bootm 20080000 20300000" CONFIG_PRE_CONSOLE_BUFFER=y # CONFIG_DISPLAY_CPUINFO is not set CONFIG_HUSH_PARSER=y @@ -20,7 +22,6 @@ CONFIG_CMD_MMC=y CONFIG_CMD_DHCP=y CONFIG_CMD_MII=y CONFIG_CMD_PING=y -CONFIG_OF_EMBED=y CONFIG_ENV_OVERWRITE=y CONFIG_SYS_RELOC_GD_ENV_ADDR=y CONFIG_NET_RANDOM_ETHADDR=y diff --git a/configs/xenguest_arm64_defconfig b/configs/xenguest_arm64_defconfig new file mode 100644 index 00000000000..46473c251da --- /dev/null +++ b/configs/xenguest_arm64_defconfig @@ -0,0 +1,60 @@ +CONFIG_ARM=y +CONFIG_POSITION_INDEPENDENT=y +CONFIG_SYS_TEXT_BASE=0x40080000 +CONFIG_SYS_MALLOC_F_LEN=0x2000 +CONFIG_IDENT_STRING=" xenguest" +CONFIG_TARGET_XENGUEST_ARM64=y +CONFIG_BOOTDELAY=10 + +CONFIG_SYS_PROMPT="xenguest# " + +CONFIG_CMD_NET=n +CONFIG_CMD_BDI=n +CONFIG_CMD_BOOTD=n +CONFIG_CMD_BOOTEFI=n +CONFIG_CMD_BOOTEFI_HELLO_COMPILE=n +CONFIG_CMD_ELF=n +CONFIG_CMD_EXT4=y +CONFIG_CMD_FAT=y +CONFIG_CMD_GO=n +CONFIG_CMD_RUN=n +CONFIG_CMD_IMI=n +CONFIG_CMD_IMLS=n +CONFIG_CMD_XIMG=n +CONFIG_CMD_EXPORTENV=n +CONFIG_CMD_IMPORTENV=n +CONFIG_CMD_EDITENV=n +CONFIG_CMD_ENV_EXISTS=n +CONFIG_CMD_MEMORY=y +CONFIG_CMD_CRC32=n +CONFIG_CMD_DM=n +CONFIG_CMD_LOADB=n +CONFIG_CMD_LOADS=n +CONFIG_CMD_FLASH=n +CONFIG_CMD_GPT=n +CONFIG_CMD_FPGA=n +CONFIG_CMD_ECHO=n +CONFIG_CMD_ITEST=n +CONFIG_CMD_SOURCE=n +CONFIG_CMD_SETEXPR=n +CONFIG_CMD_MISC=n +CONFIG_CMD_UNZIP=n +CONFIG_CMD_LZMADEC=n +CONFIG_CMD_SAVEENV=n +CONFIG_CMD_UMS=n + +CONFIG_CMD_PVBLOCK=y + +#CONFIG_USB=n +# CONFIG_ISO_PARTITION is not set + +#CONFIG_EFI_PARTITION=y +# CONFIG_EFI_LOADER is not set + +CONFIG_DM=y +# CONFIG_MMC is not set +CONFIG_DM_SERIAL=y +# CONFIG_REQUIRE_SERIAL_CONSOLE is not set + +CONFIG_OF_BOARD=y +CONFIG_OF_LIBFDT=y diff --git a/disk/part.c b/disk/part.c index f6a31025dc8..b69fd345f36 100644 --- a/disk/part.c +++ b/disk/part.c @@ -149,6 +149,7 @@ void dev_print (struct blk_desc *dev_desc) case IF_TYPE_MMC: case IF_TYPE_USB: case IF_TYPE_NVME: + case IF_TYPE_PVBLOCK: printf ("Vendor: %s Rev: %s Prod: %s\n", dev_desc->vendor, dev_desc->revision, @@ -288,6 +289,9 @@ static void print_part_header(const char *type, struct blk_desc *dev_desc) case IF_TYPE_NVME: puts ("NVMe"); break; + case IF_TYPE_PVBLOCK: + puts("PV BLOCK"); + break; case IF_TYPE_VIRTIO: puts("VirtIO"); break; diff --git a/doc/board/index.rst b/doc/board/index.rst index 0a15899180f..63935abcd79 100644 --- a/doc/board/index.rst +++ b/doc/board/index.rst @@ -22,4 +22,5 @@ Board-specific doc st/index tbs/index toradex/index + xen/index xilinx/index diff --git a/doc/board/xen/index.rst b/doc/board/xen/index.rst new file mode 100644 index 00000000000..e58fe9e3512 --- /dev/null +++ b/doc/board/xen/index.rst @@ -0,0 +1,9 @@ +.. SPDX-License-Identifier: GPL-2.0+ + +XenGuestARM64 +============= + +.. toctree:: + :maxdepth: 2 + + xenguest_arm64 diff --git a/doc/board/xen/xenguest_arm64.rst b/doc/board/xen/xenguest_arm64.rst new file mode 100644 index 00000000000..1327f88f990 --- /dev/null +++ b/doc/board/xen/xenguest_arm64.rst @@ -0,0 +1,81 @@ +.. SPDX-License-Identifier: GPL-2.0+ + +Xen guest ARM64 board +===================== + +This board specification +------------------------ + +This board is to be run as a virtual Xen [1] guest with U-boot as its primary +bootloader. Xen is a type 1 hypervisor that allows multiple operating systems +to run simultaneously on a single physical server. Xen is capable of running +virtual machines in both full virtualization and para-virtualization (PV) +modes. Xen runs virtual machines, which are called “domains”. + +Paravirtualized drivers are a special type of device drivers that are used in +a guest system in the Xen domain and perform I/O operations using a special +interface provided by the virtualization system and the host system. + +Xen support for U-boot is implemented by introducing a new Xen guest ARM64 +board and porting essential drivers from MiniOS [3] as well as some of the work +previously done by NXP [4]: + +- PV block device frontend driver with XenStore based device enumeration and + UCLASS_PVBLOCK class; +- PV serial console device frontend driver; +- Xen hypervisor support with minimal set of the essential headers adapted from + the Linux kernel; +- Xen grant table support; +- Xen event channel support in polling mode; +- XenBus support; +- dynamic RAM size as defined in the device tree instead of the statically + defined values; +- position-independent pre-relocation code is used as we cannot statically + define any start addresses at compile time which is up to Xen to choose at + run-time; +- new defconfig introduced: xenguest_arm64_defconfig. + + +Board limitations +----------------- + +1. U-boot runs without MMU enabled at the early stages. + According to Xen on ARM ABI (xen/include/public/arch-arm.h): all memory + which is shared with other entities in the system (including the hypervisor + and other guests) must reside in memory which is mapped as Normal Inner + Write-Back Outer Write-Back Inner-Shareable. + Thus, page attributes must be equally set for all the entities working with + that page. + Before MMU is set up the data cache is turned off and pages are seen by the + vCPU and Xen in different ways - cacheable by Xen and non-cacheable by vCPU. + So it means that manual data cache maintenance is required at the early + stages. + +2. No serial console until MMU is up. + Because data cache maintenance is required until the MMU setup the + early/debug serial console is not implemented. Therefore, we do not have + usual prints like U-boot’s banner etc. until the serial driver is + initialized. + +3. Single RAM bank supported. + If a Xen guest is given much memory it is possible that Xen allocates two + memory banks for it. The first one is allocated under 4GB address space and + in some cases may represent the whole guest’s memory. It is assumed that + U-boot most likely won’t require high memory bank for its work andlaunching + OS, so it is enough to take the first one. + + +Board default configuration +--------------------------- + +One can select the configuration as follows: + + - make xenguest_arm64_defconfig + +[1] - https://xenproject.org/ + +[2] - https://wiki.xenproject.org/wiki/Paravirtualization_(PV) + +[3] - https://wiki.xenproject.org/wiki/Mini-OS + +[4] - https://source.codeaurora.org/external/imx/uboot-imx/tree/?h=imx_v2018.03_4.14.98_2.0.0_ga diff --git a/drivers/Kconfig b/drivers/Kconfig index 119e412849f..613669cb381 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -136,6 +136,8 @@ source "drivers/w1-eeprom/Kconfig" source "drivers/watchdog/Kconfig" +source "drivers/xen/Kconfig" + config PHYS_TO_BUS bool "Custom physical to bus address mapping" help diff --git a/drivers/Makefile b/drivers/Makefile index 2178871bfb5..33126b2da7b 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -29,6 +29,7 @@ obj-$(CONFIG_$(SPL_)REMOTEPROC) += remoteproc/ obj-$(CONFIG_$(SPL_TPL_)TPM) += tpm/ obj-$(CONFIG_$(SPL_TPL_)ACPI_PMC) += power/acpi_pmc/ obj-$(CONFIG_$(SPL_)BOARD) += board/ +obj-$(CONFIG_XEN) += xen/ ifndef CONFIG_TPL_BUILD ifdef CONFIG_SPL_BUILD diff --git a/drivers/block/blk-uclass.c b/drivers/block/blk-uclass.c index b46a1ac8d21..2fb9f6b765e 100644 --- a/drivers/block/blk-uclass.c +++ b/drivers/block/blk-uclass.c @@ -28,6 +28,7 @@ static const char *if_typename_str[IF_TYPE_COUNT] = { [IF_TYPE_NVME] = "nvme", [IF_TYPE_EFI] = "efi", [IF_TYPE_VIRTIO] = "virtio", + [IF_TYPE_PVBLOCK] = "pvblock", }; static enum uclass_id if_type_uclass_id[IF_TYPE_COUNT] = { @@ -43,6 +44,7 @@ static enum uclass_id if_type_uclass_id[IF_TYPE_COUNT] = { [IF_TYPE_NVME] = UCLASS_NVME, [IF_TYPE_EFI] = UCLASS_EFI, [IF_TYPE_VIRTIO] = UCLASS_VIRTIO, + [IF_TYPE_PVBLOCK] = UCLASS_PVBLOCK, }; static enum if_type if_typename_to_iftype(const char *if_typename) diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index 91065e67f1b..5e0a39396bb 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -191,6 +191,13 @@ config PCIE_INTEL_FPGA Say Y here if you want to enable PCIe controller support on Intel FPGA, example Stratix 10. +config PCIE_IPROC + bool "Iproc PCIe support" + depends on DM_PCI + help + Broadcom iProc PCIe controller driver. + Say Y here if you want to enable Broadcom iProc PCIe controller, + config PCI_MVEBU bool "Enable Armada XP/38x PCIe driver" depends on ARCH_MVEBU diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile index 9faebffa488..9db90fb53c5 100644 --- a/drivers/pci/Makefile +++ b/drivers/pci/Makefile @@ -30,6 +30,7 @@ obj-$(CONFIG_SH4_PCI) += pci_sh4.o obj-$(CONFIG_SH7751_PCI) +=pci_sh7751.o obj-$(CONFIG_SH7780_PCI) +=pci_sh7780.o obj-$(CONFIG_PCI_TEGRA) += pci_tegra.o +obj-$(CONFIG_PCIE_IPROC) += pcie_iproc.o obj-$(CONFIG_PCI_AARDVARK) += pci-aardvark.o obj-$(CONFIG_PCIE_DW_MVEBU) += pcie_dw_mvebu.o obj-$(CONFIG_PCIE_FSL) += pcie_fsl.o pcie_fsl_fixup.o diff --git a/drivers/pci/pci-uclass.c b/drivers/pci/pci-uclass.c index 834526c5a47..40cc9f1090e 100644 --- a/drivers/pci/pci-uclass.c +++ b/drivers/pci/pci-uclass.c @@ -1179,6 +1179,48 @@ ulong pci_conv_size_to_32(ulong old, ulong value, uint offset, return value; } +int pci_get_dma_regions(struct udevice *dev, struct pci_region *memp, int index) +{ + int pci_addr_cells, addr_cells, size_cells; + int cells_per_record; + const u32 *prop; + int len; + int i = 0; + + prop = ofnode_get_property(dev_ofnode(dev), "dma-ranges", &len); + if (!prop) { + log_err("PCI: Device '%s': Cannot decode dma-ranges\n", + dev->name); + return -EINVAL; + } + + pci_addr_cells = ofnode_read_simple_addr_cells(dev_ofnode(dev)); + addr_cells = ofnode_read_simple_addr_cells(dev_ofnode(dev->parent)); + size_cells = ofnode_read_simple_size_cells(dev_ofnode(dev)); + + /* PCI addresses are always 3-cells */ + len /= sizeof(u32); + cells_per_record = pci_addr_cells + addr_cells + size_cells; + debug("%s: len=%d, cells_per_record=%d\n", __func__, len, + cells_per_record); + + while (len) { + memp->bus_start = fdtdec_get_number(prop + 1, 2); + prop += pci_addr_cells; + memp->phys_start = fdtdec_get_number(prop, addr_cells); + prop += addr_cells; + memp->size = fdtdec_get_number(prop, size_cells); + prop += size_cells; + + if (i == index) + return 0; + i++; + len -= cells_per_record; + } + + return -EINVAL; +} + int pci_get_regions(struct udevice *dev, struct pci_region **iop, struct pci_region **memp, struct pci_region **prefp) { diff --git a/drivers/pci/pcie_iproc.c b/drivers/pci/pcie_iproc.c new file mode 100644 index 00000000000..d77735fcf26 --- /dev/null +++ b/drivers/pci/pcie_iproc.c @@ -0,0 +1,1287 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (C) 2020 Broadcom + * + */ + +#include <common.h> +#include <dm.h> +#include <errno.h> +#include <generic-phy.h> +#include <pci.h> +#include <malloc.h> +#include <asm/io.h> +#include <dm/device_compat.h> +#include <linux/log2.h> + +#define EP_PERST_SOURCE_SELECT_SHIFT 2 +#define EP_PERST_SOURCE_SELECT BIT(EP_PERST_SOURCE_SELECT_SHIFT) +#define EP_MODE_SURVIVE_PERST_SHIFT 1 +#define EP_MODE_SURVIVE_PERST BIT(EP_MODE_SURVIVE_PERST_SHIFT) +#define RC_PCIE_RST_OUTPUT_SHIFT 0 +#define RC_PCIE_RST_OUTPUT BIT(RC_PCIE_RST_OUTPUT_SHIFT) + +#define CFG_IND_ADDR_MASK 0x00001ffc + +#define CFG_ADDR_BUS_NUM_SHIFT 20 +#define CFG_ADDR_BUS_NUM_MASK 0x0ff00000 +#define CFG_ADDR_DEV_NUM_SHIFT 15 +#define CFG_ADDR_DEV_NUM_MASK 0x000f8000 +#define CFG_ADDR_FUNC_NUM_SHIFT 12 +#define CFG_ADDR_FUNC_NUM_MASK 0x00007000 +#define CFG_ADDR_REG_NUM_SHIFT 2 +#define CFG_ADDR_REG_NUM_MASK 0x00000ffc +#define CFG_ADDR_CFG_TYPE_SHIFT 0 +#define CFG_ADDR_CFG_TYPE_MASK 0x00000003 + +#define IPROC_PCI_PM_CAP 0x48 +#define IPROC_PCI_PM_CAP_MASK 0xffff +#define IPROC_PCI_EXP_CAP 0xac + +#define IPROC_PCIE_REG_INVALID 0xffff + +#define PCI_EXP_TYPE_ROOT_PORT 0x4 /* Root Port */ +#define PCI_EXP_RTCTL 28 /* Root Control */ +/* CRS Software Visibility capability */ +#define PCI_EXP_RTCAP_CRSVIS 0x0001 + +#define PCI_EXP_LNKSTA 18 /* Link Status */ +#define PCI_EXP_LNKSTA_NLW 0x03f0 /* Negotiated Link Width */ + +#define PCIE_PHYLINKUP_SHIFT 3 +#define PCIE_PHYLINKUP BIT(PCIE_PHYLINKUP_SHIFT) +#define PCIE_DL_ACTIVE_SHIFT 2 +#define PCIE_DL_ACTIVE BIT(PCIE_DL_ACTIVE_SHIFT) + +/* derive the enum index of the outbound/inbound mapping registers */ +#define MAP_REG(base_reg, index) ((base_reg) + (index) * 2) + +/* + * Maximum number of outbound mapping window sizes that can be supported by any + * OARR/OMAP mapping pair + */ +#define MAX_NUM_OB_WINDOW_SIZES 4 + +#define OARR_VALID_SHIFT 0 +#define OARR_VALID BIT(OARR_VALID_SHIFT) +#define OARR_SIZE_CFG_SHIFT 1 + +/* + * Maximum number of inbound mapping region sizes that can be supported by an + * IARR + */ +#define MAX_NUM_IB_REGION_SIZES 9 + +#define IMAP_VALID_SHIFT 0 +#define IMAP_VALID BIT(IMAP_VALID_SHIFT) + +#define APB_ERR_EN_SHIFT 0 +#define APB_ERR_EN BIT(APB_ERR_EN_SHIFT) + +/** + * iProc PCIe host registers + */ +enum iproc_pcie_reg { + /* clock/reset signal control */ + IPROC_PCIE_CLK_CTRL = 0, + + /* + * To allow MSI to be steered to an external MSI controller (e.g., ARM + * GICv3 ITS) + */ + IPROC_PCIE_MSI_GIC_MODE, + + /* + * IPROC_PCIE_MSI_BASE_ADDR and IPROC_PCIE_MSI_WINDOW_SIZE define the + * window where the MSI posted writes are written, for the writes to be + * interpreted as MSI writes. + */ + IPROC_PCIE_MSI_BASE_ADDR, + IPROC_PCIE_MSI_WINDOW_SIZE, + + /* + * To hold the address of the register where the MSI writes are + * programed. When ARM GICv3 ITS is used, this should be programmed + * with the address of the GITS_TRANSLATER register. + */ + IPROC_PCIE_MSI_ADDR_LO, + IPROC_PCIE_MSI_ADDR_HI, + + /* enable MSI */ + IPROC_PCIE_MSI_EN_CFG, + + /* allow access to root complex configuration space */ + IPROC_PCIE_CFG_IND_ADDR, + IPROC_PCIE_CFG_IND_DATA, + + /* allow access to device configuration space */ + IPROC_PCIE_CFG_ADDR, + IPROC_PCIE_CFG_DATA, + + /* enable INTx */ + IPROC_PCIE_INTX_EN, + IPROC_PCIE_INTX_CSR, + + /* outbound address mapping */ + IPROC_PCIE_OARR0, + IPROC_PCIE_OMAP0, + IPROC_PCIE_OARR1, + IPROC_PCIE_OMAP1, + IPROC_PCIE_OARR2, + IPROC_PCIE_OMAP2, + IPROC_PCIE_OARR3, + IPROC_PCIE_OMAP3, + + /* inbound address mapping */ + IPROC_PCIE_IARR0, + IPROC_PCIE_IMAP0, + IPROC_PCIE_IARR1, + IPROC_PCIE_IMAP1, + IPROC_PCIE_IARR2, + IPROC_PCIE_IMAP2, + IPROC_PCIE_IARR3, + IPROC_PCIE_IMAP3, + IPROC_PCIE_IARR4, + IPROC_PCIE_IMAP4, + + /* config read status */ + IPROC_PCIE_CFG_RD_STATUS, + + /* link status */ + IPROC_PCIE_LINK_STATUS, + + /* enable APB error for unsupported requests */ + IPROC_PCIE_APB_ERR_EN, + + /* Ordering Mode configuration registers */ + IPROC_PCIE_ORDERING_CFG, + IPROC_PCIE_IMAP0_RO_CONTROL, + IPROC_PCIE_IMAP1_RO_CONTROL, + IPROC_PCIE_IMAP2_RO_CONTROL, + IPROC_PCIE_IMAP3_RO_CONTROL, + IPROC_PCIE_IMAP4_RO_CONTROL, + + /* total number of core registers */ + IPROC_PCIE_MAX_NUM_REG, +}; + +/* iProc PCIe PAXB v2 registers */ +static const u16 iproc_pcie_reg_paxb_v2[] = { + [IPROC_PCIE_CLK_CTRL] = 0x000, + [IPROC_PCIE_CFG_IND_ADDR] = 0x120, + [IPROC_PCIE_CFG_IND_DATA] = 0x124, + [IPROC_PCIE_CFG_ADDR] = 0x1f8, + [IPROC_PCIE_CFG_DATA] = 0x1fc, + [IPROC_PCIE_INTX_EN] = 0x330, + [IPROC_PCIE_INTX_CSR] = 0x334, + [IPROC_PCIE_OARR0] = 0xd20, + [IPROC_PCIE_OMAP0] = 0xd40, + [IPROC_PCIE_OARR1] = 0xd28, + [IPROC_PCIE_OMAP1] = 0xd48, + [IPROC_PCIE_OARR2] = 0xd60, + [IPROC_PCIE_OMAP2] = 0xd68, + [IPROC_PCIE_OARR3] = 0xdf0, + [IPROC_PCIE_OMAP3] = 0xdf8, + [IPROC_PCIE_IARR0] = 0xd00, + [IPROC_PCIE_IMAP0] = 0xc00, + [IPROC_PCIE_IARR2] = 0xd10, + [IPROC_PCIE_IMAP2] = 0xcc0, + [IPROC_PCIE_IARR3] = 0xe00, + [IPROC_PCIE_IMAP3] = 0xe08, + [IPROC_PCIE_IARR4] = 0xe68, + [IPROC_PCIE_IMAP4] = 0xe70, + [IPROC_PCIE_CFG_RD_STATUS] = 0xee0, + [IPROC_PCIE_LINK_STATUS] = 0xf0c, + [IPROC_PCIE_APB_ERR_EN] = 0xf40, + [IPROC_PCIE_ORDERING_CFG] = 0x2000, + [IPROC_PCIE_IMAP0_RO_CONTROL] = 0x201c, + [IPROC_PCIE_IMAP1_RO_CONTROL] = 0x2020, + [IPROC_PCIE_IMAP2_RO_CONTROL] = 0x2024, + [IPROC_PCIE_IMAP3_RO_CONTROL] = 0x2028, + [IPROC_PCIE_IMAP4_RO_CONTROL] = 0x202c, +}; + +/* iProc PCIe PAXC v2 registers */ +static const u16 iproc_pcie_reg_paxc_v2[] = { + [IPROC_PCIE_MSI_GIC_MODE] = 0x050, + [IPROC_PCIE_MSI_BASE_ADDR] = 0x074, + [IPROC_PCIE_MSI_WINDOW_SIZE] = 0x078, + [IPROC_PCIE_MSI_ADDR_LO] = 0x07c, + [IPROC_PCIE_MSI_ADDR_HI] = 0x080, + [IPROC_PCIE_MSI_EN_CFG] = 0x09c, + [IPROC_PCIE_CFG_IND_ADDR] = 0x1f0, + [IPROC_PCIE_CFG_IND_DATA] = 0x1f4, + [IPROC_PCIE_CFG_ADDR] = 0x1f8, + [IPROC_PCIE_CFG_DATA] = 0x1fc, +}; + +/** + * List of device IDs of controllers that have corrupted + * capability list that require SW fixup + */ +static const u16 iproc_pcie_corrupt_cap_did[] = { + 0x16cd, + 0x16f0, + 0xd802, + 0xd804 +}; + +enum iproc_pcie_type { + IPROC_PCIE_PAXB_V2, + IPROC_PCIE_PAXC, + IPROC_PCIE_PAXC_V2, +}; + +/** + * struct iproc_pcie_ob - iProc PCIe outbound mapping + * + * @axi_offset: offset from the AXI address to the internal address used by + * the iProc PCIe core + * @nr_windows: total number of supported outbound mapping windows + */ +struct iproc_pcie_ob { + resource_size_t axi_offset; + unsigned int nr_windows; +}; + +/** + * struct iproc_pcie_ib - iProc PCIe inbound mapping + * + * @nr_regions: total number of supported inbound mapping regions + */ +struct iproc_pcie_ib { + unsigned int nr_regions; +}; + +/** + * struct iproc_pcie_ob_map - outbound mapping controller specific parameters + * + * @window_sizes: list of supported outbound mapping window sizes in MB + * @nr_sizes: number of supported outbound mapping window sizes + */ +struct iproc_pcie_ob_map { + resource_size_t window_sizes[MAX_NUM_OB_WINDOW_SIZES]; + unsigned int nr_sizes; +}; + +static const struct iproc_pcie_ob_map paxb_v2_ob_map[] = { + { + /* OARR0/OMAP0 */ + .window_sizes = { 128, 256 }, + .nr_sizes = 2, + }, + { + /* OARR1/OMAP1 */ + .window_sizes = { 128, 256 }, + .nr_sizes = 2, + }, + { + /* OARR2/OMAP2 */ + .window_sizes = { 128, 256, 512, 1024 }, + .nr_sizes = 4, + }, + { + /* OARR3/OMAP3 */ + .window_sizes = { 128, 256, 512, 1024 }, + .nr_sizes = 4, + }, +}; + +/** + * iProc PCIe inbound mapping type + */ +enum iproc_pcie_ib_map_type { + /* for DDR memory */ + IPROC_PCIE_IB_MAP_MEM = 0, + + /* for device I/O memory */ + IPROC_PCIE_IB_MAP_IO, + + /* invalid or unused */ + IPROC_PCIE_IB_MAP_INVALID +}; + +/** + * struct iproc_pcie_ib_map - inbound mapping controller specific parameters + * + * @type: inbound mapping region type + * @size_unit: inbound mapping region size unit, could be SZ_1K, SZ_1M, or SZ_1G + * @region_sizes: list of supported inbound mapping region sizes in KB, MB, or + * GB, depedning on the size unit + * @nr_sizes: number of supported inbound mapping region sizes + * @nr_windows: number of supported inbound mapping windows for the region + * @imap_addr_offset: register offset between the upper and lower 32-bit + * IMAP address registers + * @imap_window_offset: register offset between each IMAP window + */ +struct iproc_pcie_ib_map { + enum iproc_pcie_ib_map_type type; + unsigned int size_unit; + resource_size_t region_sizes[MAX_NUM_IB_REGION_SIZES]; + unsigned int nr_sizes; + unsigned int nr_windows; + u16 imap_addr_offset; + u16 imap_window_offset; +}; + +static const struct iproc_pcie_ib_map paxb_v2_ib_map[] = { + { + /* IARR0/IMAP0 */ + .type = IPROC_PCIE_IB_MAP_IO, + .size_unit = SZ_1K, + .region_sizes = { 32 }, + .nr_sizes = 1, + .nr_windows = 8, + .imap_addr_offset = 0x40, + .imap_window_offset = 0x4, + }, + { + /* IARR1/IMAP1 (currently unused) */ + .type = IPROC_PCIE_IB_MAP_INVALID, + }, + { + /* IARR2/IMAP2 */ + .type = IPROC_PCIE_IB_MAP_MEM, + .size_unit = SZ_1M, + .region_sizes = { 64, 128, 256, 512, 1024, 2048, 4096, 8192, + 16384 }, + .nr_sizes = 9, + .nr_windows = 1, + .imap_addr_offset = 0x4, + .imap_window_offset = 0x8, + }, + { + /* IARR3/IMAP3 */ + .type = IPROC_PCIE_IB_MAP_MEM, + .size_unit = SZ_1G, + .region_sizes = { 1, 2, 4, 8, 16, 32 }, + .nr_sizes = 6, + .nr_windows = 8, + .imap_addr_offset = 0x4, + .imap_window_offset = 0x8, + }, + { + /* IARR4/IMAP4 */ + .type = IPROC_PCIE_IB_MAP_MEM, + .size_unit = SZ_1G, + .region_sizes = { 32, 64, 128, 256, 512 }, + .nr_sizes = 5, + .nr_windows = 8, + .imap_addr_offset = 0x4, + .imap_window_offset = 0x8, + }, +}; + +/** + * struct iproc_pcie - iproc pcie device instance + * + * @dev: pointer to pcie udevice + * @base: device I/O base address + * @type: pci device type, PAXC or PAXB + * @reg_offsets: pointer to pcie host register + * @fix_paxc_cap: paxc capability + * @need_ob_cfg: outbound mapping status + * @ob: pcie outbound mapping + * @ob_map: pointer to outbound mapping parameters + * @need_ib_cfg: inbound mapping status + * @ib: pcie inbound mapping + * @ib_map: pointer to inbound mapping parameters + * @ep_is_internal: ep status + * @phy: phy device + * @link_is_active: link up status + * @has_apb_err_disable: apb error status + */ +struct iproc_pcie { + struct udevice *dev; + void __iomem *base; + enum iproc_pcie_type type; + u16 *reg_offsets; + bool fix_paxc_cap; + bool need_ob_cfg; + struct iproc_pcie_ob ob; + const struct iproc_pcie_ob_map *ob_map; + bool need_ib_cfg; + struct iproc_pcie_ib ib; + const struct iproc_pcie_ib_map *ib_map; + bool ep_is_internal; + struct phy phy; + bool link_is_active; + bool has_apb_err_disable; +}; + +static inline bool iproc_pcie_reg_is_invalid(u16 reg_offset) +{ + return !!(reg_offset == IPROC_PCIE_REG_INVALID); +} + +static inline u16 iproc_pcie_reg_offset(struct iproc_pcie *pcie, + enum iproc_pcie_reg reg) +{ + return pcie->reg_offsets[reg]; +} + +static inline u32 iproc_pcie_read_reg(struct iproc_pcie *pcie, + enum iproc_pcie_reg reg) +{ + u16 offset = iproc_pcie_reg_offset(pcie, reg); + + if (iproc_pcie_reg_is_invalid(offset)) + return 0; + + return readl(pcie->base + offset); +} + +static inline void iproc_pcie_write_reg(struct iproc_pcie *pcie, + enum iproc_pcie_reg reg, u32 val) +{ + u16 offset = iproc_pcie_reg_offset(pcie, reg); + + if (iproc_pcie_reg_is_invalid(offset)) + return; + + writel(val, pcie->base + offset); +} + +static int iproc_pcie_map_ep_cfg_reg(const struct udevice *udev, pci_dev_t bdf, + uint where, void **paddress) +{ + struct iproc_pcie *pcie = dev_get_priv(udev); + unsigned int busno = PCI_BUS(bdf); + unsigned int slot = PCI_DEV(bdf); + unsigned int fn = PCI_FUNC(bdf); + + u16 offset; + u32 val; + + /* root complex access */ + if (busno == 0) { + if (slot > 0 || fn > 0) + return -ENODEV; + + iproc_pcie_write_reg(pcie, IPROC_PCIE_CFG_IND_ADDR, + where & CFG_IND_ADDR_MASK); + offset = iproc_pcie_reg_offset(pcie, IPROC_PCIE_CFG_IND_DATA); + if (iproc_pcie_reg_is_invalid(offset)) + return -ENODEV; + + *paddress = (pcie->base + offset); + return 0; + } + + if (!pcie->link_is_active) + return -ENODEV; + + /* EP device access */ + val = (busno << CFG_ADDR_BUS_NUM_SHIFT) | + (slot << CFG_ADDR_DEV_NUM_SHIFT) | + (fn << CFG_ADDR_FUNC_NUM_SHIFT) | + (where & CFG_ADDR_REG_NUM_MASK) | + (1 & CFG_ADDR_CFG_TYPE_MASK); + + iproc_pcie_write_reg(pcie, IPROC_PCIE_CFG_ADDR, val); + offset = iproc_pcie_reg_offset(pcie, IPROC_PCIE_CFG_DATA); + + if (iproc_pcie_reg_is_invalid(offset)) + return -ENODEV; + + *paddress = (pcie->base + offset); + + return 0; +} + +static void iproc_pcie_fix_cap(struct iproc_pcie *pcie, int where, ulong *val) +{ + u32 i, dev_id; + + switch (where & ~0x3) { + case PCI_VENDOR_ID: + dev_id = *val >> 16; + + /* + * Activate fixup for those controllers that have corrupted + * capability list registers + */ + for (i = 0; i < ARRAY_SIZE(iproc_pcie_corrupt_cap_did); i++) + if (dev_id == iproc_pcie_corrupt_cap_did[i]) + pcie->fix_paxc_cap = true; + break; + + case IPROC_PCI_PM_CAP: + if (pcie->fix_paxc_cap) { + /* advertise PM, force next capability to PCIe */ + *val &= ~IPROC_PCI_PM_CAP_MASK; + *val |= IPROC_PCI_EXP_CAP << 8 | PCI_CAP_ID_PM; + } + break; + + case IPROC_PCI_EXP_CAP: + if (pcie->fix_paxc_cap) { + /* advertise root port, version 2, terminate here */ + *val = (PCI_EXP_TYPE_ROOT_PORT << 4 | 2) << 16 | + PCI_CAP_ID_EXP; + } + break; + + case IPROC_PCI_EXP_CAP + PCI_EXP_RTCTL: + /* Don't advertise CRS SV support */ + *val &= ~(PCI_EXP_RTCAP_CRSVIS << 16); + break; + + default: + break; + } +} + +static int iproc_pci_raw_config_read32(struct iproc_pcie *pcie, + unsigned int devfn, int where, + int size, u32 *val) +{ + void __iomem *addr; + int ret; + + ret = iproc_pcie_map_ep_cfg_reg(pcie->dev, devfn, where & ~0x3, &addr); + if (ret) { + *val = ~0; + return -EINVAL; + } + + *val = readl(addr); + + if (size <= 2) + *val = (*val >> (8 * (where & 3))) & ((1 << (size * 8)) - 1); + + return 0; +} + +static int iproc_pci_raw_config_write32(struct iproc_pcie *pcie, + unsigned int devfn, int where, + int size, u32 val) +{ + void __iomem *addr; + int ret; + u32 mask, tmp; + + ret = iproc_pcie_map_ep_cfg_reg(pcie->dev, devfn, where & ~0x3, &addr); + if (ret) + return -EINVAL; + + if (size == 4) { + writel(val, addr); + return 0; + } + + mask = ~(((1 << (size * 8)) - 1) << ((where & 0x3) * 8)); + tmp = readl(addr) & mask; + tmp |= val << ((where & 0x3) * 8); + writel(tmp, addr); + return 0; +} + +/** + * iproc_pcie_apb_err_disable() - configure apb error + * + * APB error forwarding can be disabled during access of configuration + * registers of the endpoint device, to prevent unsupported requests + * (typically seen during enumeration with multi-function devices) from + * triggering a system exception. + * + * @bus: pcie udevice + * @bdf: pdf value + * @disabled: flag to enable/disabled apb error + */ +static inline void iproc_pcie_apb_err_disable(const struct udevice *bus, + pci_dev_t bdf, bool disable) +{ + struct iproc_pcie *pcie = dev_get_priv(bus); + u32 val; + + if (PCI_BUS(bdf) && pcie->has_apb_err_disable) { + val = iproc_pcie_read_reg(pcie, IPROC_PCIE_APB_ERR_EN); + if (disable) + val &= ~APB_ERR_EN; + else + val |= APB_ERR_EN; + iproc_pcie_write_reg(pcie, IPROC_PCIE_APB_ERR_EN, val); + } +} + +static int iproc_pcie_config_read32(const struct udevice *bus, pci_dev_t bdf, + uint offset, ulong *valuep, + enum pci_size_t size) +{ + struct iproc_pcie *pcie = dev_get_priv(bus); + int ret; + ulong data; + + iproc_pcie_apb_err_disable(bus, bdf, true); + ret = pci_generic_mmap_read_config(bus, iproc_pcie_map_ep_cfg_reg, + bdf, offset, &data, PCI_SIZE_32); + iproc_pcie_apb_err_disable(bus, bdf, false); + if (size <= PCI_SIZE_16) + *valuep = (data >> (8 * (offset & 3))) & + ((1 << (BIT(size) * 8)) - 1); + else + *valuep = data; + + if (!ret && PCI_BUS(bdf) == 0) + iproc_pcie_fix_cap(pcie, offset, valuep); + + return ret; +} + +static int iproc_pcie_config_write32(struct udevice *bus, pci_dev_t bdf, + uint offset, ulong value, + enum pci_size_t size) +{ + void *addr; + ulong mask, tmp; + int ret; + + ret = iproc_pcie_map_ep_cfg_reg(bus, bdf, offset, &addr); + if (ret) + return ret; + + if (size == PCI_SIZE_32) { + writel(value, addr); + return ret; + } + + iproc_pcie_apb_err_disable(bus, bdf, true); + mask = ~(((1 << (BIT(size) * 8)) - 1) << ((offset & 0x3) * 8)); + tmp = readl(addr) & mask; + tmp |= (value << ((offset & 0x3) * 8)); + writel(tmp, addr); + iproc_pcie_apb_err_disable(bus, bdf, false); + + return ret; +} + +const static struct dm_pci_ops iproc_pcie_ops = { + .read_config = iproc_pcie_config_read32, + .write_config = iproc_pcie_config_write32, +}; + +static int iproc_pcie_rev_init(struct iproc_pcie *pcie) +{ + unsigned int reg_idx; + const u16 *regs; + u16 num_elements; + + switch (pcie->type) { + case IPROC_PCIE_PAXC_V2: + pcie->ep_is_internal = true; + regs = iproc_pcie_reg_paxc_v2; + num_elements = ARRAY_SIZE(iproc_pcie_reg_paxc_v2); + break; + case IPROC_PCIE_PAXB_V2: + regs = iproc_pcie_reg_paxb_v2; + num_elements = ARRAY_SIZE(iproc_pcie_reg_paxb_v2); + pcie->has_apb_err_disable = true; + if (pcie->need_ob_cfg) { + pcie->ob.axi_offset = 0; + pcie->ob_map = paxb_v2_ob_map; + pcie->ob.nr_windows = ARRAY_SIZE(paxb_v2_ob_map); + } + pcie->need_ib_cfg = true; + pcie->ib.nr_regions = ARRAY_SIZE(paxb_v2_ib_map); + pcie->ib_map = paxb_v2_ib_map; + break; + default: + dev_dbg(pcie->dev, "incompatible iProc PCIe interface\n"); + return -EINVAL; + } + + pcie->reg_offsets = calloc(IPROC_PCIE_MAX_NUM_REG, + sizeof(*pcie->reg_offsets)); + if (!pcie->reg_offsets) + return -ENOMEM; + + /* go through the register table and populate all valid registers */ + pcie->reg_offsets[0] = (pcie->type == IPROC_PCIE_PAXC_V2) ? + IPROC_PCIE_REG_INVALID : regs[0]; + for (reg_idx = 1; reg_idx < num_elements; reg_idx++) + pcie->reg_offsets[reg_idx] = regs[reg_idx] ? + regs[reg_idx] : IPROC_PCIE_REG_INVALID; + + return 0; +} + +static inline bool iproc_pcie_ob_is_valid(struct iproc_pcie *pcie, + int window_idx) +{ + u32 val; + + val = iproc_pcie_read_reg(pcie, MAP_REG(IPROC_PCIE_OARR0, window_idx)); + + return !!(val & OARR_VALID); +} + +static inline int iproc_pcie_ob_write(struct iproc_pcie *pcie, int window_idx, + int size_idx, u64 axi_addr, u64 pci_addr) +{ + u16 oarr_offset, omap_offset; + + /* + * Derive the OARR/OMAP offset from the first pair (OARR0/OMAP0) based + * on window index. + */ + oarr_offset = iproc_pcie_reg_offset(pcie, MAP_REG(IPROC_PCIE_OARR0, + window_idx)); + omap_offset = iproc_pcie_reg_offset(pcie, MAP_REG(IPROC_PCIE_OMAP0, + window_idx)); + if (iproc_pcie_reg_is_invalid(oarr_offset) || + iproc_pcie_reg_is_invalid(omap_offset)) + return -EINVAL; + + /* + * Program the OARR registers. The upper 32-bit OARR register is + * always right after the lower 32-bit OARR register. + */ + writel(lower_32_bits(axi_addr) | (size_idx << OARR_SIZE_CFG_SHIFT) | + OARR_VALID, pcie->base + oarr_offset); + writel(upper_32_bits(axi_addr), pcie->base + oarr_offset + 4); + + /* now program the OMAP registers */ + writel(lower_32_bits(pci_addr), pcie->base + omap_offset); + writel(upper_32_bits(pci_addr), pcie->base + omap_offset + 4); + + debug("ob window [%d]: offset 0x%x axi %pap pci %pap\n", + window_idx, oarr_offset, &axi_addr, &pci_addr); + debug("oarr lo 0x%x oarr hi 0x%x\n", + readl(pcie->base + oarr_offset), + readl(pcie->base + oarr_offset + 4)); + debug("omap lo 0x%x omap hi 0x%x\n", + readl(pcie->base + omap_offset), + readl(pcie->base + omap_offset + 4)); + + return 0; +} + +/** + * iproc_pcie_setup_ob() - setup outbound address mapping + * + * Some iProc SoCs require the SW to configure the outbound address mapping + * Outbound address translation: + * + * iproc_pcie_address = axi_address - axi_offset + * OARR = iproc_pcie_address + * OMAP = pci_addr + * axi_addr -> iproc_pcie_address -> OARR -> OMAP -> pci_address + * + * @pcie: pcie device + * @axi_addr: axi address to be translated + * @pci_addr: pci address + * @size: window size + * + * @return: 0 on success and -ve on failure + */ +static int iproc_pcie_setup_ob(struct iproc_pcie *pcie, u64 axi_addr, + u64 pci_addr, resource_size_t size) +{ + struct iproc_pcie_ob *ob = &pcie->ob; + int ret = -EINVAL, window_idx, size_idx; + + if (axi_addr < ob->axi_offset) { + pr_err("axi address %pap less than offset %pap\n", + &axi_addr, &ob->axi_offset); + return -EINVAL; + } + + /* + * Translate the AXI address to the internal address used by the iProc + * PCIe core before programming the OARR + */ + axi_addr -= ob->axi_offset; + + /* iterate through all OARR/OMAP mapping windows */ + for (window_idx = ob->nr_windows - 1; window_idx >= 0; window_idx--) { + const struct iproc_pcie_ob_map *ob_map = + &pcie->ob_map[window_idx]; + + /* + * If current outbound window is already in use, move on to the + * next one. + */ + if (iproc_pcie_ob_is_valid(pcie, window_idx)) + continue; + + /* + * Iterate through all supported window sizes within the + * OARR/OMAP pair to find a match. Go through the window sizes + * in a descending order. + */ + for (size_idx = ob_map->nr_sizes - 1; size_idx >= 0; + size_idx--) { + resource_size_t window_size = + ob_map->window_sizes[size_idx] * SZ_1M; + + /* + * Keep iterating until we reach the last window and + * with the minimal window size at index zero. In this + * case, we take a compromise by mapping it using the + * minimum window size that can be supported + */ + if (size < window_size) { + if (size_idx > 0 || window_idx > 0) + continue; + + /* + * For the corner case of reaching the minimal + * window size that can be supported on the + * last window + */ + axi_addr = ALIGN_DOWN(axi_addr, window_size); + pci_addr = ALIGN_DOWN(pci_addr, window_size); + size = window_size; + } + + if (!IS_ALIGNED(axi_addr, window_size) || + !IS_ALIGNED(pci_addr, window_size)) { + pr_err("axi %pap or pci %pap not aligned\n", + &axi_addr, &pci_addr); + return -EINVAL; + } + + /* + * Match found! Program both OARR and OMAP and mark + * them as a valid entry. + */ + ret = iproc_pcie_ob_write(pcie, window_idx, size_idx, + axi_addr, pci_addr); + if (ret) + goto err_ob; + + size -= window_size; + if (size == 0) + return 0; + + /* + * If we are here, we are done with the current window, + * but not yet finished all mappings. Need to move on + * to the next window. + */ + axi_addr += window_size; + pci_addr += window_size; + break; + } + } + +err_ob: + pr_err("unable to configure outbound mapping\n"); + pr_err("axi %pap, axi offset %pap, pci %pap, res size %pap\n", + &axi_addr, &ob->axi_offset, &pci_addr, &size); + + return ret; +} + +static int iproc_pcie_map_ranges(struct udevice *dev) +{ + struct iproc_pcie *pcie = dev_get_priv(dev); + struct udevice *bus = pci_get_controller(dev); + struct pci_controller *hose = dev_get_uclass_priv(bus); + int i, ret; + + for (i = 0; i < hose->region_count; i++) { + if (hose->regions[i].flags == PCI_REGION_MEM || + hose->regions[i].flags == PCI_REGION_PREFETCH) { + debug("%d: bus_addr %p, axi_addr %p, size 0x%lx\n", + i, &hose->regions[i].bus_start, + &hose->regions[i].phys_start, + hose->regions[i].size); + ret = iproc_pcie_setup_ob(pcie, + hose->regions[i].phys_start, + hose->regions[i].bus_start, + hose->regions[i].size); + if (ret) + return ret; + } + } + + return 0; +} + +static inline bool iproc_pcie_ib_is_in_use(struct iproc_pcie *pcie, + int region_idx) +{ + const struct iproc_pcie_ib_map *ib_map = &pcie->ib_map[region_idx]; + u32 val; + + val = iproc_pcie_read_reg(pcie, MAP_REG(IPROC_PCIE_IARR0, region_idx)); + + return !!(val & (BIT(ib_map->nr_sizes) - 1)); +} + +static inline bool +iproc_pcie_ib_check_type(const struct iproc_pcie_ib_map *ib_map, + enum iproc_pcie_ib_map_type type) +{ + return !!(ib_map->type == type); +} + +static int iproc_pcie_ib_write(struct iproc_pcie *pcie, int region_idx, + int size_idx, int nr_windows, u64 axi_addr, + u64 pci_addr, resource_size_t size) +{ + const struct iproc_pcie_ib_map *ib_map = &pcie->ib_map[region_idx]; + u16 iarr_offset, imap_offset; + u32 val; + int window_idx; + + iarr_offset = iproc_pcie_reg_offset(pcie, MAP_REG(IPROC_PCIE_IARR0, + region_idx)); + imap_offset = iproc_pcie_reg_offset(pcie, MAP_REG(IPROC_PCIE_IMAP0, + region_idx)); + if (iproc_pcie_reg_is_invalid(iarr_offset) || + iproc_pcie_reg_is_invalid(imap_offset)) + return -EINVAL; + + debug("ib region [%d]: offset 0x%x axi %pap pci %pap\n", + region_idx, iarr_offset, &axi_addr, &pci_addr); + + /* + * Program the IARR registers. The upper 32-bit IARR register is + * always right after the lower 32-bit IARR register. + */ + writel(lower_32_bits(pci_addr) | BIT(size_idx), + pcie->base + iarr_offset); + writel(upper_32_bits(pci_addr), pcie->base + iarr_offset + 4); + + debug("iarr lo 0x%x iarr hi 0x%x\n", + readl(pcie->base + iarr_offset), + readl(pcie->base + iarr_offset + 4)); + + /* + * Now program the IMAP registers. Each IARR region may have one or + * more IMAP windows. + */ + size >>= ilog2(nr_windows); + for (window_idx = 0; window_idx < nr_windows; window_idx++) { + val = readl(pcie->base + imap_offset); + val |= lower_32_bits(axi_addr) | IMAP_VALID; + writel(val, pcie->base + imap_offset); + writel(upper_32_bits(axi_addr), + pcie->base + imap_offset + ib_map->imap_addr_offset); + + debug("imap window [%d] lo 0x%x hi 0x%x\n", + window_idx, readl(pcie->base + imap_offset), + readl(pcie->base + imap_offset + + ib_map->imap_addr_offset)); + + imap_offset += ib_map->imap_window_offset; + axi_addr += size; + } + + return 0; +} + +/** + * iproc_pcie_setup_ib() - setup inbound address mapping + * + * @pcie: pcie device + * @axi_addr: axi address to be translated + * @pci_addr: pci address + * @size: window size + * @type: inbound mapping type + * + * @return: 0 on success and -ve on failure + */ +static int iproc_pcie_setup_ib(struct iproc_pcie *pcie, u64 axi_addr, + u64 pci_addr, resource_size_t size, + enum iproc_pcie_ib_map_type type) +{ + struct iproc_pcie_ib *ib = &pcie->ib; + int ret; + unsigned int region_idx, size_idx; + + /* iterate through all IARR mapping regions */ + for (region_idx = 0; region_idx < ib->nr_regions; region_idx++) { + const struct iproc_pcie_ib_map *ib_map = + &pcie->ib_map[region_idx]; + + /* + * If current inbound region is already in use or not a + * compatible type, move on to the next. + */ + if (iproc_pcie_ib_is_in_use(pcie, region_idx) || + !iproc_pcie_ib_check_type(ib_map, type)) + continue; + + /* iterate through all supported region sizes to find a match */ + for (size_idx = 0; size_idx < ib_map->nr_sizes; size_idx++) { + resource_size_t region_size = + ib_map->region_sizes[size_idx] * ib_map->size_unit; + + if (size != region_size) + continue; + + if (!IS_ALIGNED(axi_addr, region_size) || + !IS_ALIGNED(pci_addr, region_size)) { + pr_err("axi %pap or pci %pap not aligned\n", + &axi_addr, &pci_addr); + return -EINVAL; + } + + /* Match found! Program IARR and all IMAP windows. */ + ret = iproc_pcie_ib_write(pcie, region_idx, size_idx, + ib_map->nr_windows, axi_addr, + pci_addr, size); + if (ret) + goto err_ib; + else + return 0; + } + } + ret = -EINVAL; + +err_ib: + pr_err("unable to configure inbound mapping\n"); + pr_err("axi %pap, pci %pap, res size %pap\n", + &axi_addr, &pci_addr, &size); + + return ret; +} + +static int iproc_pcie_map_dma_ranges(struct iproc_pcie *pcie) +{ + int ret; + struct pci_region regions; + int i = 0; + + while (!pci_get_dma_regions(pcie->dev, ®ions, i)) { + dev_dbg(pcie->dev, + "dma %d: bus_addr %#lx, axi_addr %#llx, size %#lx\n", + i, regions.bus_start, regions.phys_start, regions.size); + + /* Each range entry corresponds to an inbound mapping region */ + ret = iproc_pcie_setup_ib(pcie, regions.phys_start, + regions.bus_start, + regions.size, + IPROC_PCIE_IB_MAP_MEM); + if (ret) + return ret; + i++; + } + return 0; +} + +static void iproc_pcie_reset_map_regs(struct iproc_pcie *pcie) +{ + struct iproc_pcie_ib *ib = &pcie->ib; + struct iproc_pcie_ob *ob = &pcie->ob; + int window_idx, region_idx; + + if (pcie->ep_is_internal) + return; + + /* iterate through all OARR mapping regions */ + for (window_idx = ob->nr_windows - 1; window_idx >= 0; window_idx--) { + iproc_pcie_write_reg(pcie, MAP_REG(IPROC_PCIE_OARR0, + window_idx), 0); + } + + /* iterate through all IARR mapping regions */ + for (region_idx = 0; region_idx < ib->nr_regions; region_idx++) { + iproc_pcie_write_reg(pcie, MAP_REG(IPROC_PCIE_IARR0, + region_idx), 0); + } +} + +static void iproc_pcie_reset(struct iproc_pcie *pcie) +{ + u32 val; + + /* + * PAXC and the internal emulated endpoint device downstream should not + * be reset. If firmware has been loaded on the endpoint device at an + * earlier boot stage, reset here causes issues. + */ + if (pcie->ep_is_internal) + return; + + /* + * Select perst_b signal as reset source. Put the device into reset, + * and then bring it out of reset + */ + val = iproc_pcie_read_reg(pcie, IPROC_PCIE_CLK_CTRL); + val &= ~EP_PERST_SOURCE_SELECT & ~EP_MODE_SURVIVE_PERST & + ~RC_PCIE_RST_OUTPUT; + iproc_pcie_write_reg(pcie, IPROC_PCIE_CLK_CTRL, val); + udelay(250); + + val |= RC_PCIE_RST_OUTPUT; + iproc_pcie_write_reg(pcie, IPROC_PCIE_CLK_CTRL, val); + mdelay(100); +} + +static inline bool iproc_pcie_link_is_active(struct iproc_pcie *pcie) +{ + u32 val; + + val = iproc_pcie_read_reg(pcie, IPROC_PCIE_LINK_STATUS); + return !!((val & PCIE_PHYLINKUP) && (val & PCIE_DL_ACTIVE)); +} + +static int iproc_pcie_check_link(struct iproc_pcie *pcie) +{ + u32 link_status, class; + + pcie->link_is_active = false; + /* force class to PCI_CLASS_BRIDGE_PCI (0x0604) */ +#define PCI_BRIDGE_CTRL_REG_OFFSET 0x43c +#define PCI_CLASS_BRIDGE_MASK 0xffff00 +#define PCI_CLASS_BRIDGE_SHIFT 8 + iproc_pci_raw_config_read32(pcie, 0, + PCI_BRIDGE_CTRL_REG_OFFSET, + 4, &class); + class &= ~PCI_CLASS_BRIDGE_MASK; + class |= (PCI_CLASS_BRIDGE_PCI << PCI_CLASS_BRIDGE_SHIFT); + iproc_pci_raw_config_write32(pcie, 0, + PCI_BRIDGE_CTRL_REG_OFFSET, + 4, class); + + /* + * PAXC connects to emulated endpoint devices directly and does not + * have a Serdes. Therefore skip the link detection logic here. + */ + if (pcie->ep_is_internal) { + pcie->link_is_active = true; + return 0; + } + + if (!iproc_pcie_link_is_active(pcie)) { + pr_err("PHY or data link is INACTIVE!\n"); + return -ENODEV; + } + +#define PCI_TARGET_LINK_SPEED_MASK 0xf +#define PCI_TARGET_LINK_WIDTH_MASK 0x3f +#define PCI_TARGET_LINK_WIDTH_OFFSET 0x4 + + /* check link status to see if link is active */ + iproc_pci_raw_config_read32(pcie, 0, + IPROC_PCI_EXP_CAP + PCI_EXP_LNKSTA, + 2, &link_status); + if (link_status & PCI_EXP_LNKSTA_NLW) + pcie->link_is_active = true; + + if (pcie->link_is_active) + pr_info("link UP @ Speed Gen-%d and width-x%d\n", + link_status & PCI_TARGET_LINK_SPEED_MASK, + (link_status >> PCI_TARGET_LINK_WIDTH_OFFSET) & + PCI_TARGET_LINK_WIDTH_MASK); + else + pr_info("link DOWN\n"); + + return 0; +} + +static int iproc_pcie_probe(struct udevice *dev) +{ + struct iproc_pcie *pcie = dev_get_priv(dev); + int ret; + + pcie->type = (enum iproc_pcie_type)dev_get_driver_data(dev); + debug("PAX type %d\n", pcie->type); + pcie->base = dev_read_addr_ptr(dev); + debug("PAX reg base %p\n", pcie->base); + + if (!pcie->base) + return -ENODEV; + + if (dev_read_bool(dev, "brcm,pcie-ob")) + pcie->need_ob_cfg = true; + + pcie->dev = dev; + ret = iproc_pcie_rev_init(pcie); + if (ret) + return ret; + + if (!pcie->ep_is_internal) { + ret = generic_phy_get_by_name(dev, "pcie-phy", &pcie->phy); + if (!ret) { + ret = generic_phy_init(&pcie->phy); + if (ret) { + pr_err("failed to init %s PHY\n", dev->name); + return ret; + } + + ret = generic_phy_power_on(&pcie->phy); + if (ret) { + pr_err("power on %s PHY failed\n", dev->name); + goto err_exit_phy; + } + } + } + + iproc_pcie_reset(pcie); + + if (pcie->need_ob_cfg) { + ret = iproc_pcie_map_ranges(dev); + if (ret) { + pr_err("outbound map failed\n"); + goto err_power_off_phy; + } + } + + if (pcie->need_ib_cfg) { + ret = iproc_pcie_map_dma_ranges(pcie); + if (ret) { + pr_err("inbound map failed\n"); + goto err_power_off_phy; + } + } + + if (iproc_pcie_check_link(pcie)) + pr_info("no PCIe EP device detected\n"); + + return 0; + +err_power_off_phy: + generic_phy_power_off(&pcie->phy); +err_exit_phy: + generic_phy_exit(&pcie->phy); + return ret; +} + +static int iproc_pcie_remove(struct udevice *dev) +{ + struct iproc_pcie *pcie = dev_get_priv(dev); + int ret; + + iproc_pcie_reset_map_regs(pcie); + + if (generic_phy_valid(&pcie->phy)) { + ret = generic_phy_power_off(&pcie->phy); + if (ret) { + pr_err("failed to power off PCIe phy\n"); + return ret; + } + + ret = generic_phy_exit(&pcie->phy); + if (ret) { + pr_err("failed to power off PCIe phy\n"); + return ret; + } + } + + return 0; +} + +static const struct udevice_id pci_iproc_ids[] = { + { .compatible = "brcm,iproc-pcie-paxb-v2", + .data = IPROC_PCIE_PAXB_V2 }, + { .compatible = "brcm,iproc-pcie-paxc-v2", + .data = IPROC_PCIE_PAXC_V2 }, + { } +}; + +U_BOOT_DRIVER(pci_iproc) = { + .name = "pci_iproc", + .id = UCLASS_PCI, + .of_match = pci_iproc_ids, + .ops = &iproc_pcie_ops, + .probe = iproc_pcie_probe, + .remove = iproc_pcie_remove, + .priv_auto_alloc_size = sizeof(struct iproc_pcie), + .flags = DM_REMOVE_OS_PREPARE, +}; diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig index e146ffc5f86..e344677f91f 100644 --- a/drivers/serial/Kconfig +++ b/drivers/serial/Kconfig @@ -822,6 +822,13 @@ config MPC8XX_CONS depends on MPC8xx default y +config XEN_SERIAL + bool "XEN serial support" + depends on XEN + help + If built without DM support, then requires Xen + to be built with CONFIG_VERBOSE_DEBUG. + choice prompt "Console port" default 8xx_CONS_SMC1 diff --git a/drivers/serial/Makefile b/drivers/serial/Makefile index e4a92bbbb71..25f7f8d342c 100644 --- a/drivers/serial/Makefile +++ b/drivers/serial/Makefile @@ -70,6 +70,7 @@ obj-$(CONFIG_OWL_SERIAL) += serial_owl.o obj-$(CONFIG_OMAP_SERIAL) += serial_omap.o obj-$(CONFIG_MTK_SERIAL) += serial_mtk.o obj-$(CONFIG_SIFIVE_SERIAL) += serial_sifive.o +obj-$(CONFIG_XEN_SERIAL) += serial_xen.o ifndef CONFIG_SPL_BUILD obj-$(CONFIG_USB_TTY) += usbtty.o diff --git a/drivers/serial/serial_xen.c b/drivers/serial/serial_xen.c new file mode 100644 index 00000000000..ba6504b9479 --- /dev/null +++ b/drivers/serial/serial_xen.c @@ -0,0 +1,182 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * (C) 2018 NXP + * (C) 2020 EPAM Systems Inc. + */ +#include <common.h> +#include <cpu_func.h> +#include <dm.h> +#include <serial.h> +#include <watchdog.h> + +#include <linux/bug.h> + +#include <xen/hvm.h> +#include <xen/events.h> + +#include <xen/interface/sched.h> +#include <xen/interface/hvm/hvm_op.h> +#include <xen/interface/hvm/params.h> +#include <xen/interface/io/console.h> +#include <xen/interface/io/ring.h> + +DECLARE_GLOBAL_DATA_PTR; + +u32 console_evtchn; + +/* + * struct xen_uart_priv - Structure representing a Xen UART info + * @intf: Console I/O interface for Xen guest OSes + * @evtchn: Console event channel + */ +struct xen_uart_priv { + struct xencons_interface *intf; + u32 evtchn; +}; + +int xen_serial_setbrg(struct udevice *dev, int baudrate) +{ + return 0; +} + +static int xen_serial_probe(struct udevice *dev) +{ + struct xen_uart_priv *priv = dev_get_priv(dev); + u64 val = 0; + unsigned long gfn; + int ret; + + ret = hvm_get_parameter(HVM_PARAM_CONSOLE_EVTCHN, &val); + if (ret < 0 || val == 0) + return ret; + + priv->evtchn = val; + console_evtchn = val; + + ret = hvm_get_parameter(HVM_PARAM_CONSOLE_PFN, &val); + if (ret < 0) + return ret; + + if (!val) + return -EINVAL; + + gfn = val; + priv->intf = (struct xencons_interface *)(gfn << XEN_PAGE_SHIFT); + + return 0; +} + +static int xen_serial_pending(struct udevice *dev, bool input) +{ + struct xen_uart_priv *priv = dev_get_priv(dev); + struct xencons_interface *intf = priv->intf; + + if (!input || intf->in_cons == intf->in_prod) + return 0; + + return 1; +} + +static int xen_serial_getc(struct udevice *dev) +{ + struct xen_uart_priv *priv = dev_get_priv(dev); + struct xencons_interface *intf = priv->intf; + XENCONS_RING_IDX cons; + char c; + + while (intf->in_cons == intf->in_prod) + mb(); /* wait */ + + cons = intf->in_cons; + mb(); /* get pointers before reading ring */ + + c = intf->in[MASK_XENCONS_IDX(cons++, intf->in)]; + + mb(); /* read ring before consuming */ + intf->in_cons = cons; + + notify_remote_via_evtchn(priv->evtchn); + + return c; +} + +static int __write_console(struct udevice *dev, const char *data, int len) +{ + struct xen_uart_priv *priv = dev_get_priv(dev); + struct xencons_interface *intf = priv->intf; + XENCONS_RING_IDX cons, prod; + int sent = 0; + + cons = intf->out_cons; + prod = intf->out_prod; + mb(); /* Update pointer */ + + WARN_ON((prod - cons) > sizeof(intf->out)); + + while ((sent < len) && ((prod - cons) < sizeof(intf->out))) + intf->out[MASK_XENCONS_IDX(prod++, intf->out)] = data[sent++]; + + mb(); /* Update data before pointer */ + intf->out_prod = prod; + + if (sent) + notify_remote_via_evtchn(priv->evtchn); + + return sent; +} + +static int write_console(struct udevice *dev, const char *data, int len) +{ + /* + * Make sure the whole buffer is emitted, polling if + * necessary. We don't ever want to rely on the hvc daemon + * because the most interesting console output is when the + * kernel is crippled. + */ + while (len) { + int sent = __write_console(dev, data, len); + + data += sent; + len -= sent; + + if (unlikely(len)) + HYPERVISOR_sched_op(SCHEDOP_yield, NULL); + } + + return 0; +} + +static int xen_serial_putc(struct udevice *dev, const char ch) +{ + write_console(dev, &ch, 1); + + return 0; +} + +static const struct dm_serial_ops xen_serial_ops = { + .putc = xen_serial_putc, + .getc = xen_serial_getc, + .pending = xen_serial_pending, +}; + +#if CONFIG_IS_ENABLED(OF_CONTROL) +static const struct udevice_id xen_serial_ids[] = { + { .compatible = "xen,xen" }, + { } +}; +#endif + +U_BOOT_DRIVER(serial_xen) = { + .name = "serial_xen", + .id = UCLASS_SERIAL, +#if CONFIG_IS_ENABLED(OF_CONTROL) + .of_match = xen_serial_ids, +#endif + .priv_auto_alloc_size = sizeof(struct xen_uart_priv), + .probe = xen_serial_probe, + .ops = &xen_serial_ops, +#if !CONFIG_IS_ENABLED(OF_CONTROL) + .flags = DM_FLAG_PRE_RELOC, +#endif +}; + diff --git a/drivers/usb/gadget/ether.c b/drivers/usb/gadget/ether.c index a939918e973..a8e8bfc04b4 100644 --- a/drivers/usb/gadget/ether.c +++ b/drivers/usb/gadget/ether.c @@ -33,7 +33,6 @@ #define USB_NET_NAME "usb_ether" -#define atomic_read extern struct platform_data brd; diff --git a/drivers/usb/musb-new/linux-compat.h b/drivers/usb/musb-new/linux-compat.h index 733b197f593..6d9f19dfe6b 100644 --- a/drivers/usb/musb-new/linux-compat.h +++ b/drivers/usb/musb-new/linux-compat.h @@ -10,10 +10,6 @@ #define platform_data device_data -#ifndef wmb -#define wmb() asm volatile ("" : : : "memory") -#endif - #define msleep(a) udelay(a * 1000) /* diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig new file mode 100644 index 00000000000..6ad2a936682 --- /dev/null +++ b/drivers/xen/Kconfig @@ -0,0 +1,10 @@ +config PVBLOCK + bool "Xen para-virtualized block device" + depends on DM + select BLK + select HAVE_BLOCK_DEVICE + help + This driver implements the front-end of the Xen virtual + block device driver. It communicates with a back-end driver + in another domain which drives the actual block device. + diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile new file mode 100644 index 00000000000..87157df69b8 --- /dev/null +++ b/drivers/xen/Makefile @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0+ +# +# (C) Copyright 2020 EPAM Systems Inc. + +obj-y += hypervisor.o +obj-y += events.o +obj-y += xenbus.o +obj-y += gnttab.o + +obj-$(CONFIG_PVBLOCK) += pvblock.o diff --git a/drivers/xen/events.c b/drivers/xen/events.c new file mode 100644 index 00000000000..c490f87b2fc --- /dev/null +++ b/drivers/xen/events.c @@ -0,0 +1,199 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge + * (C) 2005 - Grzegorz Milos - Intel Research Cambridge + * (C) 2020 - EPAM Systems Inc. + * + * File: events.c [1] + * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk) + * Changes: Grzegorz Milos (gm281@cam.ac.uk) + * + * Date: Jul 2003, changes Jun 2005 + * + * Description: Deals with events received on event channels + * + * [1] - http://xenbits.xen.org/gitweb/?p=mini-os.git;a=summary + */ +#include <common.h> +#include <log.h> + +#include <asm/io.h> +#include <asm/xen/system.h> + +#include <xen/events.h> +#include <xen/hvm.h> + +extern u32 console_evtchn; + +#define NR_EVS 1024 + +/** + * struct _ev_action - represents a event handler. + * + * Chaining or sharing is not allowed + */ +struct _ev_action { + void (*handler)(evtchn_port_t port, struct pt_regs *regs, void *data); + void *data; + u32 count; +}; + +static struct _ev_action ev_actions[NR_EVS]; +void default_handler(evtchn_port_t port, struct pt_regs *regs, void *data); + +static unsigned long bound_ports[NR_EVS / (8 * sizeof(unsigned long))]; + +void unbind_all_ports(void) +{ + int i; + int cpu = 0; + struct shared_info *s = HYPERVISOR_shared_info; + struct vcpu_info *vcpu_info = &s->vcpu_info[cpu]; + + for (i = 0; i < NR_EVS; i++) { + if (i == console_evtchn) + continue; + if (test_and_clear_bit(i, bound_ports)) { + printf("port %d still bound!\n", i); + unbind_evtchn(i); + } + } + vcpu_info->evtchn_upcall_pending = 0; + vcpu_info->evtchn_pending_sel = 0; +} + +int do_event(evtchn_port_t port, struct pt_regs *regs) +{ + struct _ev_action *action; + + clear_evtchn(port); + + if (port >= NR_EVS) { + printk("WARN: do_event(): Port number too large: %d\n", port); + return 1; + } + + action = &ev_actions[port]; + action->count++; + + /* call the handler */ + action->handler(port, regs, action->data); + + return 1; +} + +evtchn_port_t bind_evtchn(evtchn_port_t port, + void (*handler)(evtchn_port_t, struct pt_regs *, void *), + void *data) +{ + if (ev_actions[port].handler != default_handler) + printf("WARN: Handler for port %d already registered, replacing\n", + port); + + ev_actions[port].data = data; + wmb(); + ev_actions[port].handler = handler; + synch_set_bit(port, bound_ports); + + return port; +} + +/** + * unbind_evtchn() - Unbind event channel for selected port + */ +void unbind_evtchn(evtchn_port_t port) +{ + struct evtchn_close close; + int rc; + + if (ev_actions[port].handler == default_handler) + debug("Default handler for port %d when unbinding\n", port); + mask_evtchn(port); + clear_evtchn(port); + + ev_actions[port].handler = default_handler; + wmb(); + ev_actions[port].data = NULL; + synch_clear_bit(port, bound_ports); + + close.port = port; + rc = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close); + if (rc) + printf("WARN: close_port %d failed rc=%d. ignored\n", port, rc); +} + +void default_handler(evtchn_port_t port, struct pt_regs *regs, void *ignore) +{ + debug("[Port %d] - event received\n", port); +} + +/** + * evtchn_alloc_unbound() - Create a port available to the pal for + * exchanging notifications. + * + * Unfortunate confusion of terminology: the port is unbound as far + * as Xen is concerned, but we automatically bind a handler to it. + * + * Return: The result of the hypervisor call. + */ +int evtchn_alloc_unbound(domid_t pal, + void (*handler)(evtchn_port_t, struct pt_regs *, void *), + void *data, evtchn_port_t *port) +{ + int rc; + + struct evtchn_alloc_unbound op; + + op.dom = DOMID_SELF; + op.remote_dom = pal; + rc = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, &op); + if (rc) { + printf("ERROR: alloc_unbound failed with rc=%d", rc); + return rc; + } + if (!handler) + handler = default_handler; + *port = bind_evtchn(op.port, handler, data); + return rc; +} + +/** + * eventchn_poll() - Event channel polling function + * + * Check and process any pending events + */ +void eventchn_poll(void) +{ + do_hypervisor_callback(NULL); +} + +/** + * init_events() - Initialize event handler + * + * Initially all events are without a handler and disabled. + */ +void init_events(void) +{ + int i; + + debug("%s\n", __func__); + + for (i = 0; i < NR_EVS; i++) { + ev_actions[i].handler = default_handler; + mask_evtchn(i); + } +} + +/** + * fini_events() - Close all ports + * + * Mask and clear event channels. Close port using EVTCHNOP_close + * hypercall. + */ +void fini_events(void) +{ + debug("%s\n", __func__); + /* Dealloc all events */ + unbind_all_ports(); +} + diff --git a/drivers/xen/gnttab.c b/drivers/xen/gnttab.c new file mode 100644 index 00000000000..becf7a79fbf --- /dev/null +++ b/drivers/xen/gnttab.c @@ -0,0 +1,216 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * (C) 2006 - Cambridge University + * (C) 2020 - EPAM Systems Inc. + * + * File: gnttab.c [1] + * Author: Steven Smith (sos22@cam.ac.uk) + * Changes: Grzegorz Milos (gm281@cam.ac.uk) + * + * Date: July 2006 + * + * Description: Simple grant tables implementation. About as stupid as it's + * possible to be and still work. + * + * [1] - http://xenbits.xen.org/gitweb/?p=mini-os.git;a=summary + */ +#include <common.h> +#include <linux/compiler.h> +#include <log.h> +#include <malloc.h> + +#include <asm/armv8/mmu.h> +#include <asm/io.h> +#include <asm/xen/system.h> + +#include <linux/bug.h> + +#include <xen/gnttab.h> +#include <xen/hvm.h> + +#include <xen/interface/memory.h> + +DECLARE_GLOBAL_DATA_PTR; + +#define NR_RESERVED_ENTRIES 8 + +/* NR_GRANT_FRAMES must be less than or equal to that configured in Xen */ +#define NR_GRANT_FRAMES 1 +#define NR_GRANT_ENTRIES (NR_GRANT_FRAMES * PAGE_SIZE / sizeof(struct grant_entry_v1)) + +static struct grant_entry_v1 *gnttab_table; +static grant_ref_t gnttab_list[NR_GRANT_ENTRIES]; + +static void put_free_entry(grant_ref_t ref) +{ + unsigned long flags; + + local_irq_save(flags); + gnttab_list[ref] = gnttab_list[0]; + gnttab_list[0] = ref; + local_irq_restore(flags); +} + +static grant_ref_t get_free_entry(void) +{ + unsigned int ref; + unsigned long flags; + + local_irq_save(flags); + ref = gnttab_list[0]; + BUG_ON(ref < NR_RESERVED_ENTRIES || ref >= NR_GRANT_ENTRIES); + gnttab_list[0] = gnttab_list[ref]; + local_irq_restore(flags); + return ref; +} + +/** + * gnttab_grant_access() - Allow access to the given frame. + * The function creates an entry in the grant table according + * to the specified parameters. + * @domid: the id of the domain for which access is allowed + * @frame: the number of the shared frame + * @readonly: determines whether the frame is shared read-only or read-write + * + * Return: relevant grant reference + */ +grant_ref_t gnttab_grant_access(domid_t domid, unsigned long frame, int readonly) +{ + grant_ref_t ref; + + ref = get_free_entry(); + gnttab_table[ref].frame = frame; + gnttab_table[ref].domid = domid; + wmb(); + readonly *= GTF_readonly; + gnttab_table[ref].flags = GTF_permit_access | readonly; + + return ref; +} + +/** + * gnttab_end_access() - End of memory sharing. The function invalidates + * the entry in the grant table. + */ +int gnttab_end_access(grant_ref_t ref) +{ + u16 flags, nflags; + + BUG_ON(ref >= NR_GRANT_ENTRIES || ref < NR_RESERVED_ENTRIES); + + nflags = gnttab_table[ref].flags; + do { + if ((flags = nflags) & (GTF_reading | GTF_writing)) { + printf("WARNING: g.e. still in use! (%x)\n", flags); + return 0; + } + } while ((nflags = synch_cmpxchg(&gnttab_table[ref].flags, flags, 0)) != + flags); + + put_free_entry(ref); + return 1; +} + +grant_ref_t gnttab_alloc_and_grant(void **map) +{ + unsigned long mfn; + grant_ref_t gref; + + *map = (void *)memalign(PAGE_SIZE, PAGE_SIZE); + mfn = virt_to_mfn(*map); + gref = gnttab_grant_access(0, mfn, 0); + return gref; +} + +static const char * const gnttabop_error_msgs[] = GNTTABOP_error_msgs; + +const char *gnttabop_error(int16_t status) +{ + status = -status; + if (status < 0 || status >= ARRAY_SIZE(gnttabop_error_msgs)) + return "bad status"; + else + return gnttabop_error_msgs[status]; +} + +/* Get Xen's suggested physical page assignments for the grant table. */ +void get_gnttab_base(phys_addr_t *gnttab_base, phys_size_t *gnttab_sz) +{ + const void *blob = gd->fdt_blob; + struct fdt_resource res; + int mem; + + mem = fdt_node_offset_by_compatible(blob, -1, "xen,xen"); + if (mem < 0) { + printf("No xen,xen compatible found\n"); + BUG(); + } + + mem = fdt_get_resource(blob, mem, "reg", 0, &res); + if (mem == -FDT_ERR_NOTFOUND) { + printf("No grant table base in the device tree\n"); + BUG(); + } + + *gnttab_base = (phys_addr_t)res.start; + if (gnttab_sz) + *gnttab_sz = (phys_size_t)(res.end - res.start + 1); + + debug("FDT suggests grant table base at %llx\n", + *gnttab_base); +} + +void init_gnttab(void) +{ + struct xen_add_to_physmap xatp; + struct gnttab_setup_table setup; + xen_pfn_t frames[NR_GRANT_FRAMES]; + int i, rc; + + debug("%s\n", __func__); + + for (i = NR_RESERVED_ENTRIES; i < NR_GRANT_ENTRIES; i++) + put_free_entry(i); + + get_gnttab_base((phys_addr_t *)&gnttab_table, NULL); + + for (i = 0; i < NR_GRANT_FRAMES; i++) { + xatp.domid = DOMID_SELF; + xatp.size = 0; + xatp.space = XENMAPSPACE_grant_table; + xatp.idx = i; + xatp.gpfn = PFN_DOWN((unsigned long)gnttab_table) + i; + rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp); + if (rc) + printf("XENMEM_add_to_physmap failed; status = %d\n", + rc); + BUG_ON(rc != 0); + } + + setup.dom = DOMID_SELF; + setup.nr_frames = NR_GRANT_FRAMES; + set_xen_guest_handle(setup.frame_list, frames); +} + +void fini_gnttab(void) +{ + struct xen_remove_from_physmap xrtp; + struct gnttab_setup_table setup; + int i, rc; + + debug("%s\n", __func__); + + for (i = 0; i < NR_GRANT_FRAMES; i++) { + xrtp.domid = DOMID_SELF; + xrtp.gpfn = PFN_DOWN((unsigned long)gnttab_table) + i; + rc = HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &xrtp); + if (rc) + printf("XENMEM_remove_from_physmap failed; status = %d\n", + rc); + BUG_ON(rc != 0); + } + + setup.dom = DOMID_SELF; + setup.nr_frames = 0; +} + diff --git a/drivers/xen/hypervisor.c b/drivers/xen/hypervisor.c new file mode 100644 index 00000000000..178c206f5bf --- /dev/null +++ b/drivers/xen/hypervisor.c @@ -0,0 +1,252 @@ +// SPDX-License-Identifier: MIT License +/* + * hypervisor.c + * + * Communication to/from hypervisor. + * + * Copyright (c) 2002-2003, K A Fraser + * Copyright (c) 2005, Grzegorz Milos, gm281@cam.ac.uk,Intel Research Cambridge + * Copyright (c) 2020, EPAM Systems Inc. + */ +#include <common.h> +#include <cpu_func.h> +#include <log.h> +#include <memalign.h> + +#include <asm/io.h> +#include <asm/armv8/mmu.h> +#include <asm/xen/system.h> + +#include <linux/bug.h> + +#include <xen/hvm.h> +#include <xen/events.h> +#include <xen/gnttab.h> +#include <xen/xenbus.h> +#include <xen/interface/memory.h> + +#define active_evtchns(cpu, sh, idx) \ + ((sh)->evtchn_pending[idx] & \ + ~(sh)->evtchn_mask[idx]) + +int in_callback; + +/* + * Shared page for communicating with the hypervisor. + * Events flags go here, for example. + */ +struct shared_info *HYPERVISOR_shared_info; + +static const char *param_name(int op) +{ +#define PARAM(x)[HVM_PARAM_##x] = #x + static const char *const names[] = { + PARAM(CALLBACK_IRQ), + PARAM(STORE_PFN), + PARAM(STORE_EVTCHN), + PARAM(PAE_ENABLED), + PARAM(IOREQ_PFN), + PARAM(VPT_ALIGN), + PARAM(CONSOLE_PFN), + PARAM(CONSOLE_EVTCHN), + }; +#undef PARAM + + if (op >= ARRAY_SIZE(names)) + return "unknown"; + + if (!names[op]) + return "reserved"; + + return names[op]; +} + +/** + * hvm_get_parameter_maintain_dcache - function to obtain a HVM + * parameter value. + * @idx: HVM parameter index + * @value: Value to fill in + * + * According to Xen on ARM ABI (xen/include/public/arch-arm.h): + * all memory which is shared with other entities in the system + * (including the hypervisor and other guests) must reside in memory + * which is mapped as Normal Inner Write-Back Outer Write-Back + * Inner-Shareable. + * + * Thus, page attributes must be equally set for all the entities + * working with that page. + * + * Before MMU setup the data cache is turned off, so it means that + * manual data cache maintenance is required, because of the + * difference of page attributes. + */ +int hvm_get_parameter_maintain_dcache(int idx, uint64_t *value) +{ + struct xen_hvm_param xhv; + int ret; + + invalidate_dcache_range((unsigned long)&xhv, + (unsigned long)&xhv + sizeof(xhv)); + xhv.domid = DOMID_SELF; + xhv.index = idx; + invalidate_dcache_range((unsigned long)&xhv, + (unsigned long)&xhv + sizeof(xhv)); + + ret = HYPERVISOR_hvm_op(HVMOP_get_param, &xhv); + if (ret < 0) { + pr_err("Cannot get hvm parameter %s (%d): %d!\n", + param_name(idx), idx, ret); + BUG(); + } + invalidate_dcache_range((unsigned long)&xhv, + (unsigned long)&xhv + sizeof(xhv)); + + *value = xhv.value; + + return ret; +} + +int hvm_get_parameter(int idx, uint64_t *value) +{ + struct xen_hvm_param xhv; + int ret; + + xhv.domid = DOMID_SELF; + xhv.index = idx; + ret = HYPERVISOR_hvm_op(HVMOP_get_param, &xhv); + if (ret < 0) { + pr_err("Cannot get hvm parameter %s (%d): %d!\n", + param_name(idx), idx, ret); + BUG(); + } + + *value = xhv.value; + + return ret; +} + +struct shared_info *map_shared_info(void *p) +{ + struct xen_add_to_physmap xatp; + + HYPERVISOR_shared_info = (struct shared_info *)memalign(PAGE_SIZE, + PAGE_SIZE); + if (!HYPERVISOR_shared_info) + BUG(); + + xatp.domid = DOMID_SELF; + xatp.idx = 0; + xatp.space = XENMAPSPACE_shared_info; + xatp.gpfn = virt_to_pfn(HYPERVISOR_shared_info); + if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp) != 0) + BUG(); + + return HYPERVISOR_shared_info; +} + +void do_hypervisor_callback(struct pt_regs *regs) +{ + unsigned long l1, l2, l1i, l2i; + unsigned int port; + int cpu = 0; + struct shared_info *s = HYPERVISOR_shared_info; + struct vcpu_info *vcpu_info = &s->vcpu_info[cpu]; + + in_callback = 1; + + vcpu_info->evtchn_upcall_pending = 0; + l1 = xchg(&vcpu_info->evtchn_pending_sel, 0); + + while (l1 != 0) { + l1i = __ffs(l1); + l1 &= ~(1UL << l1i); + + while ((l2 = active_evtchns(cpu, s, l1i)) != 0) { + l2i = __ffs(l2); + l2 &= ~(1UL << l2i); + + port = (l1i * (sizeof(unsigned long) * 8)) + l2i; + do_event(port, regs); + } + } + + in_callback = 0; +} + +void force_evtchn_callback(void) +{ +#ifdef XEN_HAVE_PV_UPCALL_MASK + int save; +#endif + struct vcpu_info *vcpu; + + vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; +#ifdef XEN_HAVE_PV_UPCALL_MASK + save = vcpu->evtchn_upcall_mask; +#endif + + while (vcpu->evtchn_upcall_pending) { +#ifdef XEN_HAVE_PV_UPCALL_MASK + vcpu->evtchn_upcall_mask = 1; +#endif + do_hypervisor_callback(NULL); +#ifdef XEN_HAVE_PV_UPCALL_MASK + vcpu->evtchn_upcall_mask = save; +#endif + }; +} + +void mask_evtchn(uint32_t port) +{ + struct shared_info *s = HYPERVISOR_shared_info; + + synch_set_bit(port, &s->evtchn_mask[0]); +} + +void unmask_evtchn(uint32_t port) +{ + struct shared_info *s = HYPERVISOR_shared_info; + struct vcpu_info *vcpu_info = &s->vcpu_info[smp_processor_id()]; + + synch_clear_bit(port, &s->evtchn_mask[0]); + + /* + * Just like a real IO-APIC we 'lose the interrupt edge' if the + * channel is masked. + */ + if (synch_test_bit(port, &s->evtchn_pending[0]) && + !synch_test_and_set_bit(port / (sizeof(unsigned long) * 8), + &vcpu_info->evtchn_pending_sel)) { + vcpu_info->evtchn_upcall_pending = 1; +#ifdef XEN_HAVE_PV_UPCALL_MASK + if (!vcpu_info->evtchn_upcall_mask) +#endif + force_evtchn_callback(); + } +} + +void clear_evtchn(uint32_t port) +{ + struct shared_info *s = HYPERVISOR_shared_info; + + synch_clear_bit(port, &s->evtchn_pending[0]); +} + +void xen_init(void) +{ + debug("%s\n", __func__); + + map_shared_info(NULL); + init_events(); + init_xenbus(); + init_gnttab(); +} + +void xen_fini(void) +{ + debug("%s\n", __func__); + + fini_gnttab(); + fini_xenbus(); + fini_events(); +} diff --git a/drivers/xen/pvblock.c b/drivers/xen/pvblock.c new file mode 100644 index 00000000000..76e82fbf41d --- /dev/null +++ b/drivers/xen/pvblock.c @@ -0,0 +1,867 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * (C) 2007-2008 Samuel Thibault. + * (C) Copyright 2020 EPAM Systems Inc. + */ +#include <blk.h> +#include <common.h> +#include <dm.h> +#include <dm/device-internal.h> +#include <malloc.h> +#include <part.h> + +#include <asm/armv8/mmu.h> +#include <asm/io.h> +#include <asm/xen/system.h> + +#include <linux/bug.h> +#include <linux/compat.h> + +#include <xen/events.h> +#include <xen/gnttab.h> +#include <xen/hvm.h> +#include <xen/xenbus.h> + +#include <xen/interface/io/ring.h> +#include <xen/interface/io/blkif.h> +#include <xen/interface/io/protocols.h> + +#define DRV_NAME "pvblock" +#define DRV_NAME_BLK "pvblock_blk" + +#define O_RDONLY 00 +#define O_RDWR 02 +#define WAIT_RING_TO_MS 10 + +struct blkfront_info { + u64 sectors; + unsigned int sector_size; + int mode; + int info; + int barrier; + int flush; +}; + +/** + * struct blkfront_dev - Struct representing blkfront device + * @dom: Domain id + * @ring: Front_ring structure + * @ring_ref: The grant reference, allowing us to grant access + * to the ring to the other end/domain + * @evtchn: Event channel used to signal ring events + * @handle: Events handle + * @nodename: Device XenStore path in format "device/vbd/" + @devid + * @backend: Backend XenStore path + * @info: Private data + * @devid: Device id + */ +struct blkfront_dev { + domid_t dom; + + struct blkif_front_ring ring; + grant_ref_t ring_ref; + evtchn_port_t evtchn; + blkif_vdev_t handle; + + char *nodename; + char *backend; + struct blkfront_info info; + unsigned int devid; + u8 *bounce_buffer; +}; + +struct blkfront_platdata { + unsigned int devid; +}; + +/** + * struct blkfront_aiocb - AIO сontrol block + * @aio_dev: Blockfront device + * @aio_buf: Memory buffer, which must be sector-aligned for + * @aio_dev sector + * @aio_nbytes: Size of AIO, which must be less than @aio_dev + * sector-sized amounts + * @aio_offset: Offset, which must not go beyond @aio_dev + * sector-aligned location + * @data: Data used to receiving response from ring + * @gref: Array of grant references + * @n: Number of segments + * @aio_cb: Represents one I/O request. + */ +struct blkfront_aiocb { + struct blkfront_dev *aio_dev; + u8 *aio_buf; + size_t aio_nbytes; + off_t aio_offset; + void *data; + + grant_ref_t gref[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + int n; + + void (*aio_cb)(struct blkfront_aiocb *aiocb, int ret); +}; + +static void blkfront_sync(struct blkfront_dev *dev); + +static void free_blkfront(struct blkfront_dev *dev) +{ + mask_evtchn(dev->evtchn); + free(dev->backend); + + gnttab_end_access(dev->ring_ref); + free(dev->ring.sring); + + unbind_evtchn(dev->evtchn); + + free(dev->bounce_buffer); + free(dev->nodename); + free(dev); +} + +static int init_blkfront(unsigned int devid, struct blkfront_dev *dev) +{ + xenbus_transaction_t xbt; + char *err = NULL; + char *message = NULL; + struct blkif_sring *s; + int retry = 0; + char *msg = NULL; + char *c; + char nodename[32]; + char path[ARRAY_SIZE(nodename) + strlen("/backend-id") + 1]; + + sprintf(nodename, "device/vbd/%d", devid); + + memset(dev, 0, sizeof(*dev)); + dev->nodename = strdup(nodename); + dev->devid = devid; + + snprintf(path, sizeof(path), "%s/backend-id", nodename); + dev->dom = xenbus_read_integer(path); + evtchn_alloc_unbound(dev->dom, NULL, dev, &dev->evtchn); + + s = (struct blkif_sring *)memalign(PAGE_SIZE, PAGE_SIZE); + if (!s) { + printf("Failed to allocate shared ring\n"); + goto error; + } + + SHARED_RING_INIT(s); + FRONT_RING_INIT(&dev->ring, s, PAGE_SIZE); + + dev->ring_ref = gnttab_grant_access(dev->dom, virt_to_pfn(s), 0); + +again: + err = xenbus_transaction_start(&xbt); + if (err) { + printf("starting transaction\n"); + free(err); + } + + err = xenbus_printf(xbt, nodename, "ring-ref", "%u", dev->ring_ref); + if (err) { + message = "writing ring-ref"; + goto abort_transaction; + } + err = xenbus_printf(xbt, nodename, "event-channel", "%u", dev->evtchn); + if (err) { + message = "writing event-channel"; + goto abort_transaction; + } + err = xenbus_printf(xbt, nodename, "protocol", "%s", + XEN_IO_PROTO_ABI_NATIVE); + if (err) { + message = "writing protocol"; + goto abort_transaction; + } + + snprintf(path, sizeof(path), "%s/state", nodename); + err = xenbus_switch_state(xbt, path, XenbusStateConnected); + if (err) { + message = "switching state"; + goto abort_transaction; + } + + err = xenbus_transaction_end(xbt, 0, &retry); + free(err); + if (retry) { + goto again; + printf("completing transaction\n"); + } + + goto done; + +abort_transaction: + free(err); + err = xenbus_transaction_end(xbt, 1, &retry); + printf("Abort transaction %s\n", message); + goto error; + +done: + snprintf(path, sizeof(path), "%s/backend", nodename); + msg = xenbus_read(XBT_NIL, path, &dev->backend); + if (msg) { + printf("Error %s when reading the backend path %s\n", + msg, path); + goto error; + } + + dev->handle = strtoul(strrchr(nodename, '/') + 1, NULL, 0); + + { + XenbusState state; + char path[strlen(dev->backend) + + strlen("/feature-flush-cache") + 1]; + + snprintf(path, sizeof(path), "%s/mode", dev->backend); + msg = xenbus_read(XBT_NIL, path, &c); + if (msg) { + printf("Error %s when reading the mode\n", msg); + goto error; + } + if (*c == 'w') + dev->info.mode = O_RDWR; + else + dev->info.mode = O_RDONLY; + free(c); + + snprintf(path, sizeof(path), "%s/state", dev->backend); + + msg = NULL; + state = xenbus_read_integer(path); + while (!msg && state < XenbusStateConnected) + msg = xenbus_wait_for_state_change(path, &state); + if (msg || state != XenbusStateConnected) { + printf("backend not available, state=%d\n", state); + goto error; + } + + snprintf(path, sizeof(path), "%s/info", dev->backend); + dev->info.info = xenbus_read_integer(path); + + snprintf(path, sizeof(path), "%s/sectors", dev->backend); + /* + * FIXME: read_integer returns an int, so disk size + * limited to 1TB for now + */ + dev->info.sectors = xenbus_read_integer(path); + + snprintf(path, sizeof(path), "%s/sector-size", dev->backend); + dev->info.sector_size = xenbus_read_integer(path); + + snprintf(path, sizeof(path), "%s/feature-barrier", + dev->backend); + dev->info.barrier = xenbus_read_integer(path); + + snprintf(path, sizeof(path), "%s/feature-flush-cache", + dev->backend); + dev->info.flush = xenbus_read_integer(path); + } + unmask_evtchn(dev->evtchn); + + dev->bounce_buffer = memalign(dev->info.sector_size, + dev->info.sector_size); + if (!dev->bounce_buffer) { + printf("Failed to allocate bouncing buffer\n"); + goto error; + } + + debug("%llu sectors of %u bytes, bounce buffer at %p\n", + dev->info.sectors, dev->info.sector_size, + dev->bounce_buffer); + + return 0; + +error: + free(msg); + free(err); + free_blkfront(dev); + return -ENODEV; +} + +static void shutdown_blkfront(struct blkfront_dev *dev) +{ + char *err = NULL, *err2; + XenbusState state; + + char path[strlen(dev->backend) + strlen("/state") + 1]; + char nodename[strlen(dev->nodename) + strlen("/event-channel") + 1]; + + debug("Close " DRV_NAME ", device ID %d\n", dev->devid); + + blkfront_sync(dev); + + snprintf(path, sizeof(path), "%s/state", dev->backend); + snprintf(nodename, sizeof(nodename), "%s/state", dev->nodename); + + if ((err = xenbus_switch_state(XBT_NIL, nodename, + XenbusStateClosing)) != NULL) { + printf("%s: error changing state to %d: %s\n", __func__, + XenbusStateClosing, err); + goto close; + } + + state = xenbus_read_integer(path); + while (!err && state < XenbusStateClosing) + err = xenbus_wait_for_state_change(path, &state); + free(err); + + if ((err = xenbus_switch_state(XBT_NIL, nodename, + XenbusStateClosed)) != NULL) { + printf("%s: error changing state to %d: %s\n", __func__, + XenbusStateClosed, err); + goto close; + } + + state = xenbus_read_integer(path); + while (state < XenbusStateClosed) { + err = xenbus_wait_for_state_change(path, &state); + free(err); + } + + if ((err = xenbus_switch_state(XBT_NIL, nodename, + XenbusStateInitialising)) != NULL) { + printf("%s: error changing state to %d: %s\n", __func__, + XenbusStateInitialising, err); + goto close; + } + + state = xenbus_read_integer(path); + while (!err && + (state < XenbusStateInitWait || state >= XenbusStateClosed)) + err = xenbus_wait_for_state_change(path, &state); + +close: + free(err); + + snprintf(nodename, sizeof(nodename), "%s/ring-ref", dev->nodename); + err2 = xenbus_rm(XBT_NIL, nodename); + free(err2); + snprintf(nodename, sizeof(nodename), "%s/event-channel", dev->nodename); + err2 = xenbus_rm(XBT_NIL, nodename); + free(err2); + + if (!err) + free_blkfront(dev); +} + +/** + * blkfront_aio_poll() - AIO polling function. + * @dev: Blkfront device + * + * Here we receive response from the ring and check its status. This happens + * until we read all data from the ring. We read the data from consumed pointer + * to the response pointer. Then increase consumed pointer to make it clear that + * the data has been read. + * + * Return: Number of consumed bytes. + */ +static int blkfront_aio_poll(struct blkfront_dev *dev) +{ + RING_IDX rp, cons; + struct blkif_response *rsp; + int more; + int nr_consumed; + +moretodo: + rp = dev->ring.sring->rsp_prod; + rmb(); /* Ensure we see queued responses up to 'rp'. */ + cons = dev->ring.rsp_cons; + + nr_consumed = 0; + while ((cons != rp)) { + struct blkfront_aiocb *aiocbp; + int status; + + rsp = RING_GET_RESPONSE(&dev->ring, cons); + nr_consumed++; + + aiocbp = (void *)(uintptr_t)rsp->id; + status = rsp->status; + + switch (rsp->operation) { + case BLKIF_OP_READ: + case BLKIF_OP_WRITE: + { + int j; + + if (status != BLKIF_RSP_OKAY) + printf("%s error %d on %s at offset %llu, num bytes %llu\n", + rsp->operation == BLKIF_OP_READ ? + "read" : "write", + status, aiocbp->aio_dev->nodename, + (unsigned long long)aiocbp->aio_offset, + (unsigned long long)aiocbp->aio_nbytes); + + for (j = 0; j < aiocbp->n; j++) + gnttab_end_access(aiocbp->gref[j]); + + break; + } + + case BLKIF_OP_WRITE_BARRIER: + if (status != BLKIF_RSP_OKAY) + printf("write barrier error %d\n", status); + break; + case BLKIF_OP_FLUSH_DISKCACHE: + if (status != BLKIF_RSP_OKAY) + printf("flush error %d\n", status); + break; + + default: + printf("unrecognized block operation %d response (status %d)\n", + rsp->operation, status); + break; + } + + dev->ring.rsp_cons = ++cons; + /* Nota: callback frees aiocbp itself */ + if (aiocbp && aiocbp->aio_cb) + aiocbp->aio_cb(aiocbp, status ? -EIO : 0); + if (dev->ring.rsp_cons != cons) + /* We reentered, we must not continue here */ + break; + } + + RING_FINAL_CHECK_FOR_RESPONSES(&dev->ring, more); + if (more) + goto moretodo; + + return nr_consumed; +} + +static void blkfront_wait_slot(struct blkfront_dev *dev) +{ + /* Wait for a slot */ + if (RING_FULL(&dev->ring)) { + while (true) { + blkfront_aio_poll(dev); + if (!RING_FULL(&dev->ring)) + break; + wait_event_timeout(NULL, !RING_FULL(&dev->ring), + WAIT_RING_TO_MS); + } + } +} + +/** + * blkfront_aio_poll() - Issue an aio. + * @aiocbp: AIO control block structure + * @write: Describes is it read or write operation + * 0 - read + * 1 - write + * + * We check whether the AIO parameters meet the requirements of the device. + * Then receive request from ring and define its arguments. After this we + * grant access to the grant references. The last step is notifying about AIO + * via event channel. + */ +static void blkfront_aio(struct blkfront_aiocb *aiocbp, int write) +{ + struct blkfront_dev *dev = aiocbp->aio_dev; + struct blkif_request *req; + RING_IDX i; + int notify; + int n, j; + uintptr_t start, end; + + /* Can't io at non-sector-aligned location */ + BUG_ON(aiocbp->aio_offset & (dev->info.sector_size - 1)); + /* Can't io non-sector-sized amounts */ + BUG_ON(aiocbp->aio_nbytes & (dev->info.sector_size - 1)); + /* Can't io non-sector-aligned buffer */ + BUG_ON(((uintptr_t)aiocbp->aio_buf & (dev->info.sector_size - 1))); + + start = (uintptr_t)aiocbp->aio_buf & PAGE_MASK; + end = ((uintptr_t)aiocbp->aio_buf + aiocbp->aio_nbytes + + PAGE_SIZE - 1) & PAGE_MASK; + n = (end - start) / PAGE_SIZE; + aiocbp->n = n; + + BUG_ON(n > BLKIF_MAX_SEGMENTS_PER_REQUEST); + + blkfront_wait_slot(dev); + i = dev->ring.req_prod_pvt; + req = RING_GET_REQUEST(&dev->ring, i); + + req->operation = write ? BLKIF_OP_WRITE : BLKIF_OP_READ; + req->nr_segments = n; + req->handle = dev->handle; + req->id = (uintptr_t)aiocbp; + req->sector_number = aiocbp->aio_offset / dev->info.sector_size; + + for (j = 0; j < n; j++) { + req->seg[j].first_sect = 0; + req->seg[j].last_sect = PAGE_SIZE / dev->info.sector_size - 1; + } + req->seg[0].first_sect = ((uintptr_t)aiocbp->aio_buf & ~PAGE_MASK) / + dev->info.sector_size; + req->seg[n - 1].last_sect = (((uintptr_t)aiocbp->aio_buf + + aiocbp->aio_nbytes - 1) & ~PAGE_MASK) / dev->info.sector_size; + for (j = 0; j < n; j++) { + uintptr_t data = start + j * PAGE_SIZE; + + if (!write) { + /* Trigger CoW if needed */ + *(char *)(data + (req->seg[j].first_sect * + dev->info.sector_size)) = 0; + barrier(); + } + req->seg[j].gref = gnttab_grant_access(dev->dom, + virt_to_pfn((void *)data), + write); + aiocbp->gref[j] = req->seg[j].gref; + } + + dev->ring.req_prod_pvt = i + 1; + + wmb(); + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&dev->ring, notify); + + if (notify) + notify_remote_via_evtchn(dev->evtchn); +} + +static void blkfront_aio_cb(struct blkfront_aiocb *aiocbp, int ret) +{ + aiocbp->data = (void *)1; + aiocbp->aio_cb = NULL; +} + +static void blkfront_io(struct blkfront_aiocb *aiocbp, int write) +{ + aiocbp->aio_cb = blkfront_aio_cb; + blkfront_aio(aiocbp, write); + aiocbp->data = NULL; + + while (true) { + blkfront_aio_poll(aiocbp->aio_dev); + if (aiocbp->data) + break; + cpu_relax(); + } +} + +static void blkfront_push_operation(struct blkfront_dev *dev, u8 op, + uint64_t id) +{ + struct blkif_request *req; + int notify, i; + + blkfront_wait_slot(dev); + i = dev->ring.req_prod_pvt; + req = RING_GET_REQUEST(&dev->ring, i); + req->operation = op; + req->nr_segments = 0; + req->handle = dev->handle; + req->id = id; + req->sector_number = 0; + dev->ring.req_prod_pvt = i + 1; + wmb(); + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&dev->ring, notify); + if (notify) + notify_remote_via_evtchn(dev->evtchn); +} + +static void blkfront_sync(struct blkfront_dev *dev) +{ + if (dev->info.mode == O_RDWR) { + if (dev->info.barrier == 1) + blkfront_push_operation(dev, + BLKIF_OP_WRITE_BARRIER, 0); + + if (dev->info.flush == 1) + blkfront_push_operation(dev, + BLKIF_OP_FLUSH_DISKCACHE, 0); + } + + while (true) { + blkfront_aio_poll(dev); + if (RING_FREE_REQUESTS(&dev->ring) == RING_SIZE(&dev->ring)) + break; + cpu_relax(); + } +} + +/** + * pvblock_iop() - Issue an aio. + * @udev: Pvblock device + * @blknr: Block number to read from / write to + * @blkcnt: Amount of blocks to read / write + * @buffer: Memory buffer with data to be read / write + * @write: Describes is it read or write operation + * 0 - read + * 1 - write + * + * Depending on the operation - reading or writing, data is read / written from the + * specified address (@buffer) to the sector (@blknr). + */ +static ulong pvblock_iop(struct udevice *udev, lbaint_t blknr, + lbaint_t blkcnt, void *buffer, int write) +{ + struct blkfront_dev *blk_dev = dev_get_priv(udev); + struct blk_desc *desc = dev_get_uclass_platdata(udev); + struct blkfront_aiocb aiocb; + lbaint_t blocks_todo; + bool unaligned; + + if (blkcnt == 0) + return 0; + + if ((blknr + blkcnt) > desc->lba) { + printf(DRV_NAME ": block number 0x" LBAF " exceeds max(0x" LBAF ")\n", + blknr + blkcnt, desc->lba); + return 0; + } + + unaligned = (uintptr_t)buffer & (blk_dev->info.sector_size - 1); + + aiocb.aio_dev = blk_dev; + aiocb.aio_offset = blknr * desc->blksz; + aiocb.aio_cb = NULL; + aiocb.data = NULL; + blocks_todo = blkcnt; + do { + aiocb.aio_buf = unaligned ? blk_dev->bounce_buffer : buffer; + + if (write && unaligned) + memcpy(blk_dev->bounce_buffer, buffer, desc->blksz); + + aiocb.aio_nbytes = unaligned ? desc->blksz : + min((size_t)(BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE), + (size_t)(blocks_todo * desc->blksz)); + + blkfront_io(&aiocb, write); + + if (!write && unaligned) + memcpy(buffer, blk_dev->bounce_buffer, desc->blksz); + + aiocb.aio_offset += aiocb.aio_nbytes; + buffer += aiocb.aio_nbytes; + blocks_todo -= aiocb.aio_nbytes / desc->blksz; + } while (blocks_todo > 0); + + return blkcnt; +} + +ulong pvblock_blk_read(struct udevice *udev, lbaint_t blknr, lbaint_t blkcnt, + void *buffer) +{ + return pvblock_iop(udev, blknr, blkcnt, buffer, 0); +} + +ulong pvblock_blk_write(struct udevice *udev, lbaint_t blknr, lbaint_t blkcnt, + const void *buffer) +{ + return pvblock_iop(udev, blknr, blkcnt, (void *)buffer, 1); +} + +static int pvblock_blk_bind(struct udevice *udev) +{ + struct blk_desc *desc = dev_get_uclass_platdata(udev); + int devnum; + + desc->if_type = IF_TYPE_PVBLOCK; + /* + * Initialize the devnum to -ENODEV. This is to make sure that + * blk_next_free_devnum() works as expected, since the default + * value 0 is a valid devnum. + */ + desc->devnum = -ENODEV; + devnum = blk_next_free_devnum(IF_TYPE_PVBLOCK); + if (devnum < 0) + return devnum; + desc->devnum = devnum; + desc->part_type = PART_TYPE_UNKNOWN; + desc->bdev = udev; + + strncpy(desc->vendor, "Xen", sizeof(desc->vendor)); + strncpy(desc->revision, "1", sizeof(desc->revision)); + strncpy(desc->product, "Virtual disk", sizeof(desc->product)); + + return 0; +} + +static int pvblock_blk_probe(struct udevice *udev) +{ + struct blkfront_dev *blk_dev = dev_get_priv(udev); + struct blkfront_platdata *platdata = dev_get_platdata(udev); + struct blk_desc *desc = dev_get_uclass_platdata(udev); + int ret, devid; + + devid = platdata->devid; + free(platdata); + + ret = init_blkfront(devid, blk_dev); + if (ret < 0) + return ret; + + desc->blksz = blk_dev->info.sector_size; + desc->lba = blk_dev->info.sectors; + desc->log2blksz = LOG2(blk_dev->info.sector_size); + + return 0; +} + +static int pvblock_blk_remove(struct udevice *udev) +{ + struct blkfront_dev *blk_dev = dev_get_priv(udev); + + shutdown_blkfront(blk_dev); + return 0; +} + +static const struct blk_ops pvblock_blk_ops = { + .read = pvblock_blk_read, + .write = pvblock_blk_write, +}; + +U_BOOT_DRIVER(pvblock_blk) = { + .name = DRV_NAME_BLK, + .id = UCLASS_BLK, + .ops = &pvblock_blk_ops, + .bind = pvblock_blk_bind, + .probe = pvblock_blk_probe, + .remove = pvblock_blk_remove, + .priv_auto_alloc_size = sizeof(struct blkfront_dev), + .flags = DM_FLAG_OS_PREPARE, +}; + +/******************************************************************************* + * Para-virtual block device class + *******************************************************************************/ + +typedef int (*enum_vbd_callback)(struct udevice *parent, unsigned int devid); + +static int on_new_vbd(struct udevice *parent, unsigned int devid) +{ + struct driver_info info; + struct udevice *udev; + struct blkfront_platdata *platdata; + int ret; + + debug("New " DRV_NAME_BLK ", device ID %d\n", devid); + + platdata = malloc(sizeof(struct blkfront_platdata)); + if (!platdata) { + printf("Failed to allocate platform data\n"); + return -ENOMEM; + } + + platdata->devid = devid; + + info.name = DRV_NAME_BLK; + info.platdata = platdata; + + ret = device_bind_by_name(parent, false, &info, &udev); + if (ret < 0) { + printf("Failed to bind " DRV_NAME_BLK " to device with ID %d, ret: %d\n", + devid, ret); + free(platdata); + } + return ret; +} + +static int xenbus_enumerate_vbd(struct udevice *udev, enum_vbd_callback clb) +{ + char **dirs, *msg; + int i, ret; + + msg = xenbus_ls(XBT_NIL, "device/vbd", &dirs); + if (msg) { + printf("Failed to read device/vbd directory: %s\n", msg); + free(msg); + return -ENODEV; + } + + for (i = 0; dirs[i]; i++) { + int devid; + + sscanf(dirs[i], "%d", &devid); + ret = clb(udev, devid); + if (ret < 0) + goto fail; + + free(dirs[i]); + } + ret = 0; + +fail: + for (; dirs[i]; i++) + free(dirs[i]); + free(dirs); + return ret; +} + +static void print_pvblock_devices(void) +{ + struct udevice *udev; + bool first = true; + const char *class_name; + + class_name = uclass_get_name(UCLASS_PVBLOCK); + for (blk_first_device(IF_TYPE_PVBLOCK, &udev); udev; + blk_next_device(&udev), first = false) { + struct blk_desc *desc = dev_get_uclass_platdata(udev); + + if (!first) + puts(", "); + printf("%s: %d", class_name, desc->devnum); + } + printf("\n"); +} + +void pvblock_init(void) +{ + struct driver_info info; + struct udevice *udev; + struct uclass *uc; + int ret; + + /* + * At this point Xen drivers have already initialized, + * so we can instantiate the class driver and enumerate + * virtual block devices. + */ + info.name = DRV_NAME; + ret = device_bind_by_name(gd->dm_root, false, &info, &udev); + if (ret < 0) + printf("Failed to bind " DRV_NAME ", ret: %d\n", ret); + + /* Bootstrap virtual block devices class driver */ + ret = uclass_get(UCLASS_PVBLOCK, &uc); + if (ret) + return; + uclass_foreach_dev_probe(UCLASS_PVBLOCK, udev); + + print_pvblock_devices(); +} + +static int pvblock_probe(struct udevice *udev) +{ + struct uclass *uc; + int ret; + + if (xenbus_enumerate_vbd(udev, on_new_vbd) < 0) + return -ENODEV; + + ret = uclass_get(UCLASS_BLK, &uc); + if (ret) + return ret; + uclass_foreach_dev_probe(UCLASS_BLK, udev) { + if (_ret) + return _ret; + }; + return 0; +} + +U_BOOT_DRIVER(pvblock_drv) = { + .name = DRV_NAME, + .id = UCLASS_PVBLOCK, + .probe = pvblock_probe, +}; + +UCLASS_DRIVER(pvblock) = { + .name = DRV_NAME, + .id = UCLASS_PVBLOCK, +}; diff --git a/drivers/xen/xenbus.c b/drivers/xen/xenbus.c new file mode 100644 index 00000000000..177d144723c --- /dev/null +++ b/drivers/xen/xenbus.c @@ -0,0 +1,557 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * (C) 2006 - Cambridge University + * (C) 2020 - EPAM Systems Inc. + * + * File: xenbus.c [1] + * Author: Steven Smith (sos22@cam.ac.uk) + * Changes: Grzegorz Milos (gm281@cam.ac.uk) + * Changes: John D. Ramsdell + * + * Date: Jun 2006, changes Aug 2006 + * + * Description: Minimal implementation of xenbus + * + * [1] - http://xenbits.xen.org/gitweb/?p=mini-os.git;a=summary + */ + +#include <common.h> +#include <log.h> + +#include <asm/armv8/mmu.h> +#include <asm/io.h> +#include <asm/xen/system.h> + +#include <linux/bug.h> +#include <linux/compat.h> + +#include <xen/events.h> +#include <xen/hvm.h> +#include <xen/xenbus.h> + +#include <xen/interface/io/xs_wire.h> + +#define map_frame_virt(v) (v << PAGE_SHIFT) + +#define SCNd16 "d" + +/* Wait for reply time out, ms */ +#define WAIT_XENBUS_TO_MS 5000 +/* Polling time out, ms */ +#define WAIT_XENBUS_POLL_TO_MS 1 + +static struct xenstore_domain_interface *xenstore_buf; + +static char *errmsg(struct xsd_sockmsg *rep); + +u32 xenbus_evtchn; + +struct write_req { + const void *data; + unsigned int len; +}; + +static void memcpy_from_ring(const void *r, void *d, int off, int len) +{ + int c1, c2; + const char *ring = r; + char *dest = d; + + c1 = min(len, XENSTORE_RING_SIZE - off); + c2 = len - c1; + memcpy(dest, ring + off, c1); + memcpy(dest + c1, ring, c2); +} + +/** + * xenbus_get_reply() - Receive reply from xenbus + * @req_reply: reply message structure + * + * Wait for reply message event from the ring and copy received message + * to input xsd_sockmsg structure. Repeat until full reply is + * proceeded. + * + * Return: false - timeout + * true - reply is received + */ +static bool xenbus_get_reply(struct xsd_sockmsg **req_reply) +{ + struct xsd_sockmsg msg; + unsigned int prod = xenstore_buf->rsp_prod; + +again: + if (!wait_event_timeout(NULL, prod != xenstore_buf->rsp_prod, + WAIT_XENBUS_TO_MS)) { + printk("%s: wait_event timeout\n", __func__); + return false; + } + + prod = xenstore_buf->rsp_prod; + if (xenstore_buf->rsp_prod - xenstore_buf->rsp_cons < sizeof(msg)) + goto again; + + rmb(); + memcpy_from_ring(xenstore_buf->rsp, &msg, + MASK_XENSTORE_IDX(xenstore_buf->rsp_cons), + sizeof(msg)); + + if (xenstore_buf->rsp_prod - xenstore_buf->rsp_cons < sizeof(msg) + msg.len) + goto again; + + /* We do not support and expect any Xen bus wathes. */ + BUG_ON(msg.type == XS_WATCH_EVENT); + + *req_reply = malloc(sizeof(msg) + msg.len); + memcpy_from_ring(xenstore_buf->rsp, *req_reply, + MASK_XENSTORE_IDX(xenstore_buf->rsp_cons), + msg.len + sizeof(msg)); + mb(); + xenstore_buf->rsp_cons += msg.len + sizeof(msg); + + wmb(); + notify_remote_via_evtchn(xenbus_evtchn); + return true; +} + +char *xenbus_switch_state(xenbus_transaction_t xbt, const char *path, + XenbusState state) +{ + char *current_state; + char *msg = NULL; + char *msg2 = NULL; + char value[2]; + XenbusState rs; + int xbt_flag = 0; + int retry = 0; + + do { + if (xbt == XBT_NIL) { + msg = xenbus_transaction_start(&xbt); + if (msg) + goto exit; + xbt_flag = 1; + } + + msg = xenbus_read(xbt, path, ¤t_state); + if (msg) + goto exit; + + rs = (XenbusState)(current_state[0] - '0'); + free(current_state); + if (rs == state) { + msg = NULL; + goto exit; + } + + snprintf(value, 2, "%d", state); + msg = xenbus_write(xbt, path, value); + +exit: + if (xbt_flag) { + msg2 = xenbus_transaction_end(xbt, 0, &retry); + xbt = XBT_NIL; + } + if (msg == NULL && msg2 != NULL) + msg = msg2; + else + free(msg2); + } while (retry); + + return msg; +} + +char *xenbus_wait_for_state_change(const char *path, XenbusState *state) +{ + for (;;) { + char *res, *msg; + XenbusState rs; + + msg = xenbus_read(XBT_NIL, path, &res); + if (msg) + return msg; + + rs = (XenbusState)(res[0] - 48); + free(res); + + if (rs == *state) { + wait_event_timeout(NULL, false, WAIT_XENBUS_POLL_TO_MS); + } else { + *state = rs; + break; + } + } + return NULL; +} + +/* Send data to xenbus. This can block. All of the requests are seen + * by xenbus as if sent atomically. The header is added + * automatically, using type %type, req_id %req_id, and trans_id + * %trans_id. + */ +static void xb_write(int type, int req_id, xenbus_transaction_t trans_id, + const struct write_req *req, int nr_reqs) +{ + XENSTORE_RING_IDX prod; + int r; + int len = 0; + const struct write_req *cur_req; + int req_off; + int total_off; + int this_chunk; + struct xsd_sockmsg m = { + .type = type, + .req_id = req_id, + .tx_id = trans_id + }; + struct write_req header_req = { + &m, + sizeof(m) + }; + + for (r = 0; r < nr_reqs; r++) + len += req[r].len; + m.len = len; + len += sizeof(m); + + cur_req = &header_req; + + BUG_ON(len > XENSTORE_RING_SIZE); + prod = xenstore_buf->req_prod; + /* We are running synchronously, so it is a bug if we do not + * have enough room to send a message: please note that a message + * can occupy multiple slots in the ring buffer. + */ + BUG_ON(prod + len - xenstore_buf->req_cons > XENSTORE_RING_SIZE); + + total_off = 0; + req_off = 0; + while (total_off < len) { + this_chunk = min(cur_req->len - req_off, + XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod)); + memcpy((char *)xenstore_buf->req + MASK_XENSTORE_IDX(prod), + (char *)cur_req->data + req_off, this_chunk); + prod += this_chunk; + req_off += this_chunk; + total_off += this_chunk; + if (req_off == cur_req->len) { + req_off = 0; + if (cur_req == &header_req) + cur_req = req; + else + cur_req++; + } + } + + BUG_ON(req_off != 0); + BUG_ON(total_off != len); + BUG_ON(prod > xenstore_buf->req_cons + XENSTORE_RING_SIZE); + + /* Remote must see entire message before updating indexes */ + wmb(); + + xenstore_buf->req_prod += len; + + /* Send evtchn to notify remote */ + notify_remote_via_evtchn(xenbus_evtchn); +} + +/* Send a message to xenbus, in the same fashion as xb_write, and + * block waiting for a reply. The reply is malloced and should be + * freed by the caller. + */ +struct xsd_sockmsg *xenbus_msg_reply(int type, + xenbus_transaction_t trans, + struct write_req *io, + int nr_reqs) +{ + struct xsd_sockmsg *rep; + + /* We do not use request identifier which is echoed in daemon's response. */ + xb_write(type, 0, trans, io, nr_reqs); + /* Now wait for the message to arrive. */ + if (!xenbus_get_reply(&rep)) + return NULL; + return rep; +} + +static char *errmsg(struct xsd_sockmsg *rep) +{ + char *res; + + if (!rep) { + char msg[] = "No reply"; + size_t len = strlen(msg) + 1; + + return memcpy(malloc(len), msg, len); + } + if (rep->type != XS_ERROR) + return NULL; + res = malloc(rep->len + 1); + memcpy(res, rep + 1, rep->len); + res[rep->len] = 0; + free(rep); + return res; +} + +/* List the contents of a directory. Returns a malloc()ed array of + * pointers to malloc()ed strings. The array is NULL terminated. May + * block. + */ +char *xenbus_ls(xenbus_transaction_t xbt, const char *pre, char ***contents) +{ + struct xsd_sockmsg *reply, *repmsg; + struct write_req req[] = { { pre, strlen(pre) + 1 } }; + int nr_elems, x, i; + char **res, *msg; + + repmsg = xenbus_msg_reply(XS_DIRECTORY, xbt, req, ARRAY_SIZE(req)); + msg = errmsg(repmsg); + if (msg) { + *contents = NULL; + return msg; + } + reply = repmsg + 1; + for (x = nr_elems = 0; x < repmsg->len; x++) + nr_elems += (((char *)reply)[x] == 0); + res = malloc(sizeof(res[0]) * (nr_elems + 1)); + for (x = i = 0; i < nr_elems; i++) { + int l = strlen((char *)reply + x); + + res[i] = malloc(l + 1); + memcpy(res[i], (char *)reply + x, l + 1); + x += l + 1; + } + res[i] = NULL; + free(repmsg); + *contents = res; + return NULL; +} + +char *xenbus_read(xenbus_transaction_t xbt, const char *path, char **value) +{ + struct write_req req[] = { {path, strlen(path) + 1} }; + struct xsd_sockmsg *rep; + char *res, *msg; + + rep = xenbus_msg_reply(XS_READ, xbt, req, ARRAY_SIZE(req)); + msg = errmsg(rep); + if (msg) { + *value = NULL; + return msg; + } + res = malloc(rep->len + 1); + memcpy(res, rep + 1, rep->len); + res[rep->len] = 0; + free(rep); + *value = res; + return NULL; +} + +char *xenbus_write(xenbus_transaction_t xbt, const char *path, + const char *value) +{ + struct write_req req[] = { + {path, strlen(path) + 1}, + {value, strlen(value)}, + }; + struct xsd_sockmsg *rep; + char *msg; + + rep = xenbus_msg_reply(XS_WRITE, xbt, req, ARRAY_SIZE(req)); + msg = errmsg(rep); + if (msg) + return msg; + free(rep); + return NULL; +} + +char *xenbus_rm(xenbus_transaction_t xbt, const char *path) +{ + struct write_req req[] = { {path, strlen(path) + 1} }; + struct xsd_sockmsg *rep; + char *msg; + + rep = xenbus_msg_reply(XS_RM, xbt, req, ARRAY_SIZE(req)); + msg = errmsg(rep); + if (msg) + return msg; + free(rep); + return NULL; +} + +char *xenbus_get_perms(xenbus_transaction_t xbt, const char *path, char **value) +{ + struct write_req req[] = { {path, strlen(path) + 1} }; + struct xsd_sockmsg *rep; + char *res, *msg; + + rep = xenbus_msg_reply(XS_GET_PERMS, xbt, req, ARRAY_SIZE(req)); + msg = errmsg(rep); + if (msg) { + *value = NULL; + return msg; + } + res = malloc(rep->len + 1); + memcpy(res, rep + 1, rep->len); + res[rep->len] = 0; + free(rep); + *value = res; + return NULL; +} + +#define PERM_MAX_SIZE 32 +char *xenbus_set_perms(xenbus_transaction_t xbt, const char *path, + domid_t dom, char perm) +{ + char value[PERM_MAX_SIZE]; + struct write_req req[] = { + {path, strlen(path) + 1}, + {value, 0}, + }; + struct xsd_sockmsg *rep; + char *msg; + + snprintf(value, PERM_MAX_SIZE, "%c%hu", perm, dom); + req[1].len = strlen(value) + 1; + rep = xenbus_msg_reply(XS_SET_PERMS, xbt, req, ARRAY_SIZE(req)); + msg = errmsg(rep); + if (msg) + return msg; + free(rep); + return NULL; +} + +char *xenbus_transaction_start(xenbus_transaction_t *xbt) +{ + /* Xenstored becomes angry if you send a length 0 message, so just + * shove a nul terminator on the end + */ + struct write_req req = { "", 1}; + struct xsd_sockmsg *rep; + char *err; + + rep = xenbus_msg_reply(XS_TRANSACTION_START, 0, &req, 1); + err = errmsg(rep); + if (err) + return err; + sscanf((char *)(rep + 1), "%lu", xbt); + free(rep); + return NULL; +} + +char *xenbus_transaction_end(xenbus_transaction_t t, int abort, int *retry) +{ + struct xsd_sockmsg *rep; + struct write_req req; + char *err; + + *retry = 0; + + req.data = abort ? "F" : "T"; + req.len = 2; + rep = xenbus_msg_reply(XS_TRANSACTION_END, t, &req, 1); + err = errmsg(rep); + if (err) { + if (!strcmp(err, "EAGAIN")) { + *retry = 1; + free(err); + return NULL; + } else { + return err; + } + } + free(rep); + return NULL; +} + +int xenbus_read_integer(const char *path) +{ + char *res, *buf; + int t; + + res = xenbus_read(XBT_NIL, path, &buf); + if (res) { + printk("Failed to read %s.\n", path); + free(res); + return -1; + } + sscanf(buf, "%d", &t); + free(buf); + return t; +} + +int xenbus_read_uuid(const char *path, unsigned char uuid[16]) +{ + char *res, *buf; + + res = xenbus_read(XBT_NIL, path, &buf); + if (res) { + printk("Failed to read %s.\n", path); + free(res); + return 0; + } + if (strlen(buf) != ((2 * 16) + 4) /* 16 hex bytes and 4 hyphens */ + || sscanf(buf, + "%2hhx%2hhx%2hhx%2hhx-" + "%2hhx%2hhx-" + "%2hhx%2hhx-" + "%2hhx%2hhx-" + "%2hhx%2hhx%2hhx%2hhx%2hhx%2hhx", + uuid, uuid + 1, uuid + 2, uuid + 3, + uuid + 4, uuid + 5, uuid + 6, uuid + 7, + uuid + 8, uuid + 9, uuid + 10, uuid + 11, + uuid + 12, uuid + 13, uuid + 14, uuid + 15) != 16) { + printk("Xenbus path %s value %s is not a uuid!\n", path, buf); + free(buf); + return 0; + } + free(buf); + return 1; +} + +char *xenbus_printf(xenbus_transaction_t xbt, + const char *node, const char *path, + const char *fmt, ...) +{ +#define BUFFER_SIZE 256 + char fullpath[BUFFER_SIZE]; + char val[BUFFER_SIZE]; + va_list args; + + BUG_ON(strlen(node) + strlen(path) + 1 >= BUFFER_SIZE); + sprintf(fullpath, "%s/%s", node, path); + va_start(args, fmt); + vsprintf(val, fmt, args); + va_end(args); + return xenbus_write(xbt, fullpath, val); +} + +domid_t xenbus_get_self_id(void) +{ + char *dom_id; + domid_t ret; + + BUG_ON(xenbus_read(XBT_NIL, "domid", &dom_id)); + sscanf(dom_id, "%"SCNd16, &ret); + + return ret; +} + +void init_xenbus(void) +{ + u64 v; + + debug("%s\n", __func__); + if (hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v)) + BUG(); + xenbus_evtchn = v; + + if (hvm_get_parameter(HVM_PARAM_STORE_PFN, &v)) + BUG(); + xenstore_buf = (struct xenstore_domain_interface *)map_frame_virt(v); +} + +void fini_xenbus(void) +{ + debug("%s\n", __func__); +} diff --git a/include/blk.h b/include/blk.h index abcd4bedbbb..9ee10fb80e7 100644 --- a/include/blk.h +++ b/include/blk.h @@ -33,6 +33,7 @@ enum if_type { IF_TYPE_HOST, IF_TYPE_NVME, IF_TYPE_EFI, + IF_TYPE_PVBLOCK, IF_TYPE_VIRTIO, IF_TYPE_COUNT, /* Number of interface types */ diff --git a/include/configs/aspeed-common.h b/include/configs/aspeed-common.h index 1295a6cd19c..df0f5d2e76f 100644 --- a/include/configs/aspeed-common.h +++ b/include/configs/aspeed-common.h @@ -7,22 +7,24 @@ * (C) Copyright 2016 Google, Inc */ -#ifndef __AST_COMMON_CONFIG_H -#define __AST_COMMON_CONFIG_H +#ifndef _ASPEED_COMMON_CONFIG_H +#define _ASPEED_COMMON_CONFIG_H + +#include <asm/arch/platform.h> /* Misc CPU related */ #define CONFIG_CMDLINE_TAG #define CONFIG_SETUP_MEMORY_TAGS #define CONFIG_INITRD_TAG -#define CONFIG_SYS_SDRAM_BASE 0x80000000 +#define CONFIG_SYS_SDRAM_BASE ASPEED_DRAM_BASE #ifdef CONFIG_PRE_CON_BUF_SZ -#define CONFIG_SYS_INIT_RAM_ADDR (0x1e720000 + CONFIG_PRE_CON_BUF_SZ) -#define CONFIG_SYS_INIT_RAM_SIZE (36*1024 - CONFIG_PRE_CON_BUF_SZ) +#define CONFIG_SYS_INIT_RAM_ADDR (ASPEED_SRAM_BASE + CONFIG_PRE_CON_BUF_SZ) +#define CONFIG_SYS_INIT_RAM_SIZE (ASPEED_SRAM_SIZE - CONFIG_PRE_CON_BUF_SZ) #else -#define CONFIG_SYS_INIT_RAM_ADDR (0x1e720000) -#define CONFIG_SYS_INIT_RAM_SIZE (36*1024) +#define CONFIG_SYS_INIT_RAM_ADDR (ASPEED_SRAM_BASE) +#define CONFIG_SYS_INIT_RAM_SIZE (ASPEED_SRAM_SIZE) #endif #define SYS_INIT_RAM_END (CONFIG_SYS_INIT_RAM_ADDR \ @@ -45,8 +47,6 @@ * Miscellaneous configurable options */ -#define CONFIG_BOOTCOMMAND "bootm 20080000 20300000" - #define CONFIG_EXTRA_ENV_SETTINGS \ "verify=yes\0" \ "spi_dma=yes\0" \ diff --git a/include/configs/xenguest_arm64.h b/include/configs/xenguest_arm64.h new file mode 100644 index 00000000000..db3059a82c6 --- /dev/null +++ b/include/configs/xenguest_arm64.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0+ + * + * (C) Copyright 2020 EPAM Systemc Inc. + */ +#ifndef __XENGUEST_ARM64_H +#define __XENGUEST_ARM64_H + +#ifndef __ASSEMBLY__ +#include <linux/types.h> +#endif + +#define CONFIG_BOARD_EARLY_INIT_F + +#define CONFIG_EXTRA_ENV_SETTINGS + +#undef CONFIG_NR_DRAM_BANKS +#undef CONFIG_SYS_SDRAM_BASE + +#define CONFIG_NR_DRAM_BANKS 1 + +/* + * This can be any arbitrary address as we are using PIE, but + * please note, that CONFIG_SYS_TEXT_BASE must match the below. + */ +#define CONFIG_SYS_LOAD_ADDR 0x40000000 +#define CONFIG_LNX_KRNL_IMG_TEXT_OFFSET_BASE CONFIG_SYS_LOAD_ADDR + +/* Size of malloc() pool */ +#define CONFIG_SYS_MALLOC_LEN (32 * 1024 * 1024) + +/* Monitor Command Prompt */ +#define CONFIG_SYS_PROMPT_HUSH_PS2 "> " +#define CONFIG_SYS_CBSIZE 1024 +#define CONFIG_SYS_MAXARGS 64 +#define CONFIG_SYS_BARGSIZE CONFIG_SYS_CBSIZE +#define CONFIG_SYS_PBSIZE (CONFIG_SYS_CBSIZE + \ + sizeof(CONFIG_SYS_PROMPT) + 16) + +#define CONFIG_OF_SYSTEM_SETUP + +#define CONFIG_CMDLINE_TAG 1 +#define CONFIG_INITRD_TAG 1 + +#define CONFIG_CMD_RUN + +#undef CONFIG_EXTRA_ENV_SETTINGS +#define CONFIG_EXTRA_ENV_SETTINGS \ + "loadimage=ext4load pvblock 0 0x90000000 /boot/Image;\0" \ + "pvblockboot=run loadimage;" \ + "booti 0x90000000 - 0x88000000;\0" + +#endif /* __XENGUEST_ARM64_H */ diff --git a/include/dm/uclass-id.h b/include/dm/uclass-id.h index dbc14ec3429..4ec5fa6670a 100644 --- a/include/dm/uclass-id.h +++ b/include/dm/uclass-id.h @@ -123,6 +123,7 @@ enum uclass_id { UCLASS_W1, /* Dallas 1-Wire bus */ UCLASS_W1_EEPROM, /* one-wire EEPROMs */ UCLASS_WDT, /* Watchdog Timer driver */ + UCLASS_PVBLOCK, /* Xen virtual block device */ UCLASS_COUNT, UCLASS_INVALID = -1, diff --git a/include/linux/compat.h b/include/linux/compat.h index 712eeaef4ed..363b2b94255 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -1,12 +1,20 @@ #ifndef _LINUX_COMPAT_H_ #define _LINUX_COMPAT_H_ +#include <console.h> #include <log.h> #include <malloc.h> + +#include <asm/processor.h> + #include <linux/types.h> #include <linux/err.h> #include <linux/kernel.h> +#ifdef CONFIG_XEN +#include <xen/events.h> +#endif + struct unused {}; typedef struct unused unused_t; @@ -122,6 +130,52 @@ static inline void kmem_cache_destroy(struct kmem_cache *cachep) #define add_wait_queue(...) do { } while (0) #define remove_wait_queue(...) do { } while (0) +#ifndef CONFIG_XEN +#define eventchn_poll() +#endif + +#define __wait_event_timeout(condition, timeout, ret) \ +({ \ + ulong __ret = ret; /* explicit shadow */ \ + ulong start = get_timer(0); \ + for (;;) { \ + eventchn_poll(); \ + if (condition) { \ + __ret = 1; \ + break; \ + } \ + if ((get_timer(start) > timeout) || ctrlc()) { \ + __ret = 0; \ + break; \ + } \ + cpu_relax(); \ + } \ + __ret; \ +}) + +/** + * wait_event_timeout() - Wait until the event occurs before the timeout. + * @wr_head: The wait queue to wait on. + * @condition: Expression for the event to wait for. + * @timeout: Maximum waiting time. + * + * We wait until the @condition evaluates to %true (succeed) or + * %false (@timeout elapsed). + * + * Return: + * 0 - if the @condition evaluated to %false after the @timeout elapsed + * 1 - if the @condition evaluated to %true + */ +#define wait_event_timeout(wq_head, condition, timeout) \ +({ \ + ulong __ret; \ + if (condition) \ + __ret = 1; \ + else \ + __ret = __wait_event_timeout(condition, timeout, __ret);\ + __ret; \ +}) + #define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c)) /* This is also defined in ARMv8's mmu.h */ diff --git a/include/pci.h b/include/pci.h index 281f3539168..2089db9f16f 100644 --- a/include/pci.h +++ b/include/pci.h @@ -1315,7 +1315,8 @@ struct udevice *pci_get_controller(struct udevice *dev); */ int pci_get_regions(struct udevice *dev, struct pci_region **iop, struct pci_region **memp, struct pci_region **prefp); - +int +pci_get_dma_regions(struct udevice *dev, struct pci_region *memp, int index); /** * dm_pci_write_bar32() - Write the address of a BAR * diff --git a/include/pvblock.h b/include/pvblock.h new file mode 100644 index 00000000000..1023a6ab3bd --- /dev/null +++ b/include/pvblock.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0+ + * + * (C) 2020 EPAM Systems Inc. + */ + +#ifndef _PVBLOCK_H +#define _PVBLOCK_H + +/** + * pvblock_init() - Initialize para-virtual block device class driver + * + * Bind PV block to UCLASS_ROOT device and probe all UCLASS_PVBLOCK + * virtual block devices. + */ +void pvblock_init(void); + +#endif /* _PVBLOCK_H */ diff --git a/include/vsprintf.h b/include/vsprintf.h index d9fb68add0c..2290083eba4 100644 --- a/include/vsprintf.h +++ b/include/vsprintf.h @@ -234,4 +234,12 @@ char *strmhz(char *buf, unsigned long hz); */ void str_to_upper(const char *in, char *out, size_t len); +/** + * sscanf - Unformat a buffer into a list of arguments + * @buf: input buffer + * @fmt: formatting of buffer + * @...: resulting arguments + */ +int sscanf(const char *buf, const char *fmt, ...); + #endif diff --git a/include/xen.h b/include/xen.h new file mode 100644 index 00000000000..a952a2c84b8 --- /dev/null +++ b/include/xen.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * (C) 2020, EPAM Systems Inc. + */ +#ifndef __XEN_H__ +#define __XEN_H__ + +/** + * xen_init() - Xen initialization + * + * Map Xen memory pages, initialize event handler and xenbus, + * setup the grant table. + */ +void xen_init(void); + +/** + * xen_fini() - Board cleanup before Linux kernel start + * + * Unmap Xen memory pages the specified guest's pseudophysical + * address space and unbind all event channels. + */ +void xen_fini(void); + +#endif /* __XEN_H__ */ diff --git a/include/xen/arm/interface.h b/include/xen/arm/interface.h new file mode 100644 index 00000000000..0dbc5876f35 --- /dev/null +++ b/include/xen/arm/interface.h @@ -0,0 +1,88 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Guest OS interface to ARM Xen. + * + * Stefano Stabellini <stefano.stabellini@eu.citrix.com>, Citrix, 2012 + */ + +#ifndef _ASM_ARM_XEN_INTERFACE_H +#define _ASM_ARM_XEN_INTERFACE_H + +#ifndef __ASSEMBLY__ +#include <linux/types.h> +#endif + +#define uint64_aligned_t u64 __attribute__((aligned(8))) + +#define __DEFINE_GUEST_HANDLE(name, type) \ + typedef struct { union { type * p; uint64_aligned_t q; }; } \ + __guest_handle_ ## name + +#define DEFINE_GUEST_HANDLE_STRUCT(name) \ + __DEFINE_GUEST_HANDLE(name, struct name) +#define DEFINE_GUEST_HANDLE(name) __DEFINE_GUEST_HANDLE(name, name) +#define GUEST_HANDLE(name) __guest_handle_ ## name + +#define set_xen_guest_handle(hnd, val) \ + do { \ + if (sizeof(hnd) == 8) \ + *(u64 *)&(hnd) = 0; \ + (hnd).p = val; \ + } while (0) + +#define __HYPERVISOR_platform_op_raw __HYPERVISOR_platform_op + +#ifndef __ASSEMBLY__ +/* Explicitly size integers that represent pfns in the interface with + * Xen so that we can have one ABI that works for 32 and 64 bit guests. + * Note that this means that the xen_pfn_t type may be capable of + * representing pfn's which the guest cannot represent in its own pfn + * type. However since pfn space is controlled by the guest this is + * fine since it simply wouldn't be able to create any sure pfns in + * the first place. + */ +typedef u64 xen_pfn_t; +#define PRI_xen_pfn "llx" +typedef u64 xen_ulong_t; +#define PRI_xen_ulong "llx" +typedef s64 xen_long_t; +#define PRI_xen_long "llx" +/* Guest handles for primitive C types. */ +__DEFINE_GUEST_HANDLE(uchar, unsigned char); +__DEFINE_GUEST_HANDLE(uint, unsigned int); +DEFINE_GUEST_HANDLE(char); +DEFINE_GUEST_HANDLE(int); +DEFINE_GUEST_HANDLE(void); +DEFINE_GUEST_HANDLE(u64); +DEFINE_GUEST_HANDLE(u32); +DEFINE_GUEST_HANDLE(xen_pfn_t); +DEFINE_GUEST_HANDLE(xen_ulong_t); + +/* Maximum number of virtual CPUs in multi-processor guests. */ +#define MAX_VIRT_CPUS 1 + +struct arch_vcpu_info { }; +struct arch_shared_info { }; + +/* TODO: Move pvclock definitions some place arch independent */ +struct pvclock_vcpu_time_info { + u32 version; + u32 pad0; + u64 tsc_timestamp; + u64 system_time; + u32 tsc_to_system_mul; + s8 tsc_shift; + u8 flags; + u8 pad[2]; +} __attribute__((__packed__)); /* 32 bytes */ + +/* It is OK to have a 12 bytes struct with no padding because it is packed */ +struct pvclock_wall_clock { + u32 version; + u32 sec; + u32 nsec; + u32 sec_hi; +} __attribute__((__packed__)); +#endif + +#endif /* _ASM_ARM_XEN_INTERFACE_H */ diff --git a/include/xen/events.h b/include/xen/events.h new file mode 100644 index 00000000000..82bd18b48c8 --- /dev/null +++ b/include/xen/events.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge + * (C) 2005 - Grzegorz Milos - Intel Reseach Cambridge + * (C) 2020 - EPAM Systems Inc. + * + * File: events.h + * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk) + * Changes: Grzegorz Milos (gm281@cam.ac.uk) + * + * Date: Jul 2003, changes Jun 2005 + * + * Description: Deals with events on the event channels + */ +#ifndef _EVENTS_H_ +#define _EVENTS_H_ + +#include <asm/xen/hypercall.h> +#include <xen/interface/event_channel.h> + +void init_events(void); +void fini_events(void); + +int do_event(evtchn_port_t port, struct pt_regs *regs); +void unbind_evtchn(evtchn_port_t port); +void unbind_all_ports(void); +int evtchn_alloc_unbound(domid_t pal, + void (*handler)(evtchn_port_t, struct pt_regs *, void *), + void *data, evtchn_port_t *port); + +/* Send notification via event channel */ +static inline int notify_remote_via_evtchn(evtchn_port_t port) +{ + struct evtchn_send op; + + op.port = port; + return HYPERVISOR_event_channel_op(EVTCHNOP_send, &op); +} + +void eventchn_poll(void); + +#endif /* _EVENTS_H_ */ diff --git a/include/xen/gnttab.h b/include/xen/gnttab.h new file mode 100644 index 00000000000..db1d5361fed --- /dev/null +++ b/include/xen/gnttab.h @@ -0,0 +1,24 @@ +/* + * SPDX-License-Identifier: GPL-2.0 + * + * (C) 2006, Steven Smith <sos22@cam.ac.uk> + * (C) 2006, Grzegorz Milos <gm281@cam.ac.uk> + * (C) 2020, EPAM Systems Inc. + */ +#ifndef __GNTTAB_H__ +#define __GNTTAB_H__ + +#include <xen/interface/grant_table.h> + +void init_gnttab(void); +void fini_gnttab(void); + +grant_ref_t gnttab_alloc_and_grant(void **map); +grant_ref_t gnttab_grant_access(domid_t domid, unsigned long frame, + int readonly); +int gnttab_end_access(grant_ref_t ref); +const char *gnttabop_error(int16_t status); + +void get_gnttab_base(phys_addr_t *gnttab_base, phys_size_t *gnttab_sz); + +#endif /* !__GNTTAB_H__ */ diff --git a/include/xen/hvm.h b/include/xen/hvm.h new file mode 100644 index 00000000000..f02c0798a6f --- /dev/null +++ b/include/xen/hvm.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Simple wrappers around HVM functions + * + * Copyright (c) 2002-2003, K A Fraser + * Copyright (c) 2005, Grzegorz Milos, gm281@cam.ac.uk,Intel Research Cambridge + * Copyright (c) 2020, EPAM Systems Inc. + */ +#ifndef XEN_HVM_H__ +#define XEN_HVM_H__ + +#include <asm/xen/hypercall.h> +#include <xen/interface/hvm/params.h> +#include <xen/interface/xen.h> + +extern struct shared_info *HYPERVISOR_shared_info; + +int hvm_get_parameter(int idx, uint64_t *value); +int hvm_get_parameter_maintain_dcache(int idx, uint64_t *value); + +struct shared_info *map_shared_info(void *p); +void do_hypervisor_callback(struct pt_regs *regs); +void mask_evtchn(uint32_t port); +void unmask_evtchn(uint32_t port); +void clear_evtchn(uint32_t port); + +#endif /* XEN_HVM_H__ */ diff --git a/include/xen/interface/event_channel.h b/include/xen/interface/event_channel.h new file mode 100644 index 00000000000..b0e35987591 --- /dev/null +++ b/include/xen/interface/event_channel.h @@ -0,0 +1,279 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * event_channel.h + * + * Event channels between domains. + * + * Copyright (c) 2003-2004, K A Fraser. + */ + +#ifndef __XEN_PUBLIC_EVENT_CHANNEL_H__ +#define __XEN_PUBLIC_EVENT_CHANNEL_H__ + +#include <xen/interface/xen.h> + +typedef u32 evtchn_port_t; +DEFINE_GUEST_HANDLE(evtchn_port_t); + +/* + * EVTCHNOP_alloc_unbound: Allocate a port in domain <dom> and mark as + * accepting interdomain bindings from domain <remote_dom>. A fresh port + * is allocated in <dom> and returned as <port>. + * NOTES: + * 1. If the caller is unprivileged then <dom> must be DOMID_SELF. + * 2. <rdom> may be DOMID_SELF, allowing loopback connections. + */ +#define EVTCHNOP_alloc_unbound 6 +struct evtchn_alloc_unbound { + /* IN parameters */ + domid_t dom, remote_dom; + /* OUT parameters */ + evtchn_port_t port; +}; + +/* + * EVTCHNOP_bind_interdomain: Construct an interdomain event channel between + * the calling domain and <remote_dom>. <remote_dom,remote_port> must identify + * a port that is unbound and marked as accepting bindings from the calling + * domain. A fresh port is allocated in the calling domain and returned as + * <local_port>. + * NOTES: + * 2. <remote_dom> may be DOMID_SELF, allowing loopback connections. + */ +#define EVTCHNOP_bind_interdomain 0 +struct evtchn_bind_interdomain { + /* IN parameters. */ + domid_t remote_dom; + evtchn_port_t remote_port; + /* OUT parameters. */ + evtchn_port_t local_port; +}; + +/* + * EVTCHNOP_bind_virq: Bind a local event channel to VIRQ <irq> on specified + * vcpu. + * NOTES: + * 1. A virtual IRQ may be bound to at most one event channel per vcpu. + * 2. The allocated event channel is bound to the specified vcpu. The binding + * may not be changed. + */ +#define EVTCHNOP_bind_virq 1 +struct evtchn_bind_virq { + /* IN parameters. */ + u32 virq; + u32 vcpu; + /* OUT parameters. */ + evtchn_port_t port; +}; + +/* + * EVTCHNOP_bind_pirq: Bind a local event channel to PIRQ <irq>. + * NOTES: + * 1. A physical IRQ may be bound to at most one event channel per domain. + * 2. Only a sufficiently-privileged domain may bind to a physical IRQ. + */ +#define EVTCHNOP_bind_pirq 2 +struct evtchn_bind_pirq { + /* IN parameters. */ + u32 pirq; +#define BIND_PIRQ__WILL_SHARE 1 + u32 flags; /* BIND_PIRQ__* */ + /* OUT parameters. */ + evtchn_port_t port; +}; + +/* + * EVTCHNOP_bind_ipi: Bind a local event channel to receive events. + * NOTES: + * 1. The allocated event channel is bound to the specified vcpu. The binding + * may not be changed. + */ +#define EVTCHNOP_bind_ipi 7 +struct evtchn_bind_ipi { + u32 vcpu; + /* OUT parameters. */ + evtchn_port_t port; +}; + +/* + * EVTCHNOP_close: Close a local event channel <port>. If the channel is + * interdomain then the remote end is placed in the unbound state + * (EVTCHNSTAT_unbound), awaiting a new connection. + */ +#define EVTCHNOP_close 3 +struct evtchn_close { + /* IN parameters. */ + evtchn_port_t port; +}; + +/* + * EVTCHNOP_send: Send an event to the remote end of the channel whose local + * endpoint is <port>. + */ +#define EVTCHNOP_send 4 +struct evtchn_send { + /* IN parameters. */ + evtchn_port_t port; +}; + +/* + * EVTCHNOP_status: Get the current status of the communication channel which + * has an endpoint at <dom, port>. + * NOTES: + * 1. <dom> may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may obtain the status of an event + * channel for which <dom> is not DOMID_SELF. + */ +#define EVTCHNOP_status 5 +struct evtchn_status { + /* IN parameters */ + domid_t dom; + evtchn_port_t port; + /* OUT parameters */ +#define EVTCHNSTAT_closed 0 /* Channel is not in use. */ +#define EVTCHNSTAT_unbound 1 /* Channel is waiting interdom connection.*/ +#define EVTCHNSTAT_interdomain 2 /* Channel is connected to remote domain. */ +#define EVTCHNSTAT_pirq 3 /* Channel is bound to a phys IRQ line. */ +#define EVTCHNSTAT_virq 4 /* Channel is bound to a virtual IRQ line */ +#define EVTCHNSTAT_ipi 5 /* Channel is bound to a virtual IPI line */ + u32 status; + u32 vcpu; /* VCPU to which this channel is bound. */ + union { + struct { + domid_t dom; + } unbound; /* EVTCHNSTAT_unbound */ + struct { + domid_t dom; + evtchn_port_t port; + } interdomain; /* EVTCHNSTAT_interdomain */ + u32 pirq; /* EVTCHNSTAT_pirq */ + u32 virq; /* EVTCHNSTAT_virq */ + } u; +}; + +/* + * EVTCHNOP_bind_vcpu: Specify which vcpu a channel should notify when an + * event is pending. + * NOTES: + * 1. IPI- and VIRQ-bound channels always notify the vcpu that initialised + * the binding. This binding cannot be changed. + * 2. All other channels notify vcpu0 by default. This default is set when + * the channel is allocated (a port that is freed and subsequently reused + * has its binding reset to vcpu0). + */ +#define EVTCHNOP_bind_vcpu 8 +struct evtchn_bind_vcpu { + /* IN parameters. */ + evtchn_port_t port; + u32 vcpu; +}; + +/* + * EVTCHNOP_unmask: Unmask the specified local event-channel port and deliver + * a notification to the appropriate VCPU if an event is pending. + */ +#define EVTCHNOP_unmask 9 +struct evtchn_unmask { + /* IN parameters. */ + evtchn_port_t port; +}; + +/* + * EVTCHNOP_reset: Close all event channels associated with specified domain. + * NOTES: + * 1. <dom> may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may specify other than DOMID_SELF. + */ +#define EVTCHNOP_reset 10 +struct evtchn_reset { + /* IN parameters. */ + domid_t dom; +}; + +/* + * EVTCHNOP_init_control: initialize the control block for the FIFO ABI. + */ +#define EVTCHNOP_init_control 11 +struct evtchn_init_control { + /* IN parameters. */ + u64 control_gfn; + u32 offset; + u32 vcpu; + /* OUT parameters. */ + u8 link_bits; + u8 _pad[7]; +}; + +/* + * EVTCHNOP_expand_array: add an additional page to the event array. + */ +#define EVTCHNOP_expand_array 12 +struct evtchn_expand_array { + /* IN parameters. */ + u64 array_gfn; +}; + +/* + * EVTCHNOP_set_priority: set the priority for an event channel. + */ +#define EVTCHNOP_set_priority 13 +struct evtchn_set_priority { + /* IN parameters. */ + evtchn_port_t port; + u32 priority; +}; + +struct evtchn_op { + u32 cmd; /* EVTCHNOP_* */ + union { + struct evtchn_alloc_unbound alloc_unbound; + struct evtchn_bind_interdomain bind_interdomain; + struct evtchn_bind_virq bind_virq; + struct evtchn_bind_pirq bind_pirq; + struct evtchn_bind_ipi bind_ipi; + struct evtchn_close close; + struct evtchn_send send; + struct evtchn_status status; + struct evtchn_bind_vcpu bind_vcpu; + struct evtchn_unmask unmask; + } u; +}; + +DEFINE_GUEST_HANDLE_STRUCT(evtchn_op); + +/* + * 2-level ABI + */ + +#define EVTCHN_2L_NR_CHANNELS (sizeof(xen_ulong_t) * sizeof(xen_ulong_t) * 64) + +/* + * FIFO ABI + */ + +/* Events may have priorities from 0 (highest) to 15 (lowest). */ +#define EVTCHN_FIFO_PRIORITY_MAX 0 +#define EVTCHN_FIFO_PRIORITY_DEFAULT 7 +#define EVTCHN_FIFO_PRIORITY_MIN 15 + +#define EVTCHN_FIFO_MAX_QUEUES (EVTCHN_FIFO_PRIORITY_MIN + 1) + +typedef u32 event_word_t; + +#define EVTCHN_FIFO_PENDING 31 +#define EVTCHN_FIFO_MASKED 30 +#define EVTCHN_FIFO_LINKED 29 +#define EVTCHN_FIFO_BUSY 28 + +#define EVTCHN_FIFO_LINK_BITS 17 +#define EVTCHN_FIFO_LINK_MASK ((1 << EVTCHN_FIFO_LINK_BITS) - 1) + +#define EVTCHN_FIFO_NR_CHANNELS (1 << EVTCHN_FIFO_LINK_BITS) + +struct evtchn_fifo_control_block { + u32 ready; + u32 _rsvd; + event_word_t head[EVTCHN_FIFO_MAX_QUEUES]; +}; + +#endif /* __XEN_PUBLIC_EVENT_CHANNEL_H__ */ diff --git a/include/xen/interface/grant_table.h b/include/xen/interface/grant_table.h new file mode 100644 index 00000000000..4acd4bd193b --- /dev/null +++ b/include/xen/interface/grant_table.h @@ -0,0 +1,565 @@ +/* SPDX-License-Identifier: MIT + * + * grant_table.h + * + * Interface for granting foreign access to page frames, and receiving + * page-ownership transfers. + * + * Copyright (c) 2004, K A Fraser + */ + +#ifndef __XEN_PUBLIC_GRANT_TABLE_H__ +#define __XEN_PUBLIC_GRANT_TABLE_H__ + +#include <xen/interface/xen.h> + +/*********************************** + * GRANT TABLE REPRESENTATION + */ + +/* Some rough guidelines on accessing and updating grant-table entries + * in a concurrency-safe manner. For more information, Linux contains a + * reference implementation for guest OSes (arch/xen/kernel/grant_table.c). + * + * NB. WMB is a no-op on current-generation x86 processors. However, a + * compiler barrier will still be required. + * + * Introducing a valid entry into the grant table: + * 1. Write ent->domid. + * 2. Write ent->frame: + * GTF_permit_access: Frame to which access is permitted. + * GTF_accept_transfer: Pseudo-phys frame slot being filled by new + * frame, or zero if none. + * 3. Write memory barrier (WMB). + * 4. Write ent->flags, inc. valid type. + * + * Invalidating an unused GTF_permit_access entry: + * 1. flags = ent->flags. + * 2. Observe that !(flags & (GTF_reading|GTF_writing)). + * 3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0). + * NB. No need for WMB as reuse of entry is control-dependent on success of + * step 3, and all architectures guarantee ordering of ctrl-dep writes. + * + * Invalidating an in-use GTF_permit_access entry: + * This cannot be done directly. Request assistance from the domain controller + * which can set a timeout on the use of a grant entry and take necessary + * action. (NB. This is not yet implemented!). + * + * Invalidating an unused GTF_accept_transfer entry: + * 1. flags = ent->flags. + * 2. Observe that !(flags & GTF_transfer_committed). [*] + * 3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0). + * NB. No need for WMB as reuse of entry is control-dependent on success of + * step 3, and all architectures guarantee ordering of ctrl-dep writes. + * [*] If GTF_transfer_committed is set then the grant entry is 'committed'. + * The guest must /not/ modify the grant entry until the address of the + * transferred frame is written. It is safe for the guest to spin waiting + * for this to occur (detect by observing GTF_transfer_completed in + * ent->flags). + * + * Invalidating a committed GTF_accept_transfer entry: + * 1. Wait for (ent->flags & GTF_transfer_completed). + * + * Changing a GTF_permit_access from writable to read-only: + * Use SMP-safe CMPXCHG to set GTF_readonly, while checking !GTF_writing. + * + * Changing a GTF_permit_access from read-only to writable: + * Use SMP-safe bit-setting instruction. + */ + +/* + * Reference to a grant entry in a specified domain's grant table. + */ +typedef u32 grant_ref_t; + +/* + * A grant table comprises a packed array of grant entries in one or more + * page frames shared between Xen and a guest. + * [XEN]: This field is written by Xen and read by the sharing guest. + * [GST]: This field is written by the guest and read by Xen. + */ + +/* + * Version 1 of the grant table entry structure is maintained purely + * for backwards compatibility. New guests should use version 2. + */ +struct grant_entry_v1 { + /* GTF_xxx: various type and flag information. [XEN,GST] */ + u16 flags; + /* The domain being granted foreign privileges. [GST] */ + domid_t domid; + /* + * GTF_permit_access: Frame that @domid is allowed to map and access. [GST] + * GTF_accept_transfer: Frame whose ownership transferred by @domid. [XEN] + */ + u32 frame; +}; + +/* + * Type of grant entry. + * GTF_invalid: This grant entry grants no privileges. + * GTF_permit_access: Allow @domid to map/access @frame. + * GTF_accept_transfer: Allow @domid to transfer ownership of one page frame + * to this guest. Xen writes the page number to @frame. + * GTF_transitive: Allow @domid to transitively access a subrange of + * @trans_grant in @trans_domid. No mappings are allowed. + */ +#define GTF_invalid (0U << 0) +#define GTF_permit_access (1U << 0) +#define GTF_accept_transfer (2U << 0) +#define GTF_transitive (3U << 0) +#define GTF_type_mask (3U << 0) + +/* + * Subflags for GTF_permit_access. + * GTF_readonly: Restrict @domid to read-only mappings and accesses. [GST] + * GTF_reading: Grant entry is currently mapped for reading by @domid. [XEN] + * GTF_writing: Grant entry is currently mapped for writing by @domid. [XEN] + * GTF_sub_page: Grant access to only a subrange of the page. @domid + * will only be allowed to copy from the grant, and not + * map it. [GST] + */ +#define _GTF_readonly (2) +#define GTF_readonly (1U << _GTF_readonly) +#define _GTF_reading (3) +#define GTF_reading (1U << _GTF_reading) +#define _GTF_writing (4) +#define GTF_writing (1U << _GTF_writing) +#define _GTF_sub_page (8) +#define GTF_sub_page (1U << _GTF_sub_page) + +/* + * Subflags for GTF_accept_transfer: + * GTF_transfer_committed: Xen sets this flag to indicate that it is committed + * to transferring ownership of a page frame. When a guest sees this flag + * it must /not/ modify the grant entry until GTF_transfer_completed is + * set by Xen. + * GTF_transfer_completed: It is safe for the guest to spin-wait on this flag + * after reading GTF_transfer_committed. Xen will always write the frame + * address, followed by ORing this flag, in a timely manner. + */ +#define _GTF_transfer_committed (2) +#define GTF_transfer_committed (1U << _GTF_transfer_committed) +#define _GTF_transfer_completed (3) +#define GTF_transfer_completed (1U << _GTF_transfer_completed) + +/* + * Version 2 grant table entries. These fulfil the same role as + * version 1 entries, but can represent more complicated operations. + * Any given domain will have either a version 1 or a version 2 table, + * and every entry in the table will be the same version. + * + * The interface by which domains use grant references does not depend + * on the grant table version in use by the other domain. + */ + +/* + * Version 1 and version 2 grant entries share a common prefix. The + * fields of the prefix are documented as part of struct + * grant_entry_v1. + */ +struct grant_entry_header { + u16 flags; + domid_t domid; +}; + +/* + * Version 2 of the grant entry structure, here is a union because three + * different types are suppotted: full_page, sub_page and transitive. + */ +union grant_entry_v2 { + struct grant_entry_header hdr; + + /* + * This member is used for V1-style full page grants, where either: + * + * -- hdr.type is GTF_accept_transfer, or + * -- hdr.type is GTF_permit_access and GTF_sub_page is not set. + * + * In that case, the frame field has the same semantics as the + * field of the same name in the V1 entry structure. + */ + struct { + struct grant_entry_header hdr; + u32 pad0; + u64 frame; + } full_page; + + /* + * If the grant type is GTF_grant_access and GTF_sub_page is set, + * @domid is allowed to access bytes [@page_off,@page_off+@length) + * in frame @frame. + */ + struct { + struct grant_entry_header hdr; + u16 page_off; + u16 length; + u64 frame; + } sub_page; + + /* + * If the grant is GTF_transitive, @domid is allowed to use the + * grant @gref in domain @trans_domid, as if it was the local + * domain. Obviously, the transitive access must be compatible + * with the original grant. + */ + struct { + struct grant_entry_header hdr; + domid_t trans_domid; + u16 pad0; + grant_ref_t gref; + } transitive; + + u32 __spacer[4]; /* Pad to a power of two */ +}; + +typedef u16 grant_status_t; + +/*********************************** + * GRANT TABLE QUERIES AND USES + */ + +/* + * Handle to track a mapping created via a grant reference. + */ +typedef u32 grant_handle_t; + +/* + * GNTTABOP_map_grant_ref: Map the grant entry (<dom>,<ref>) for access + * by devices and/or host CPUs. If successful, <handle> is a tracking number + * that must be presented later to destroy the mapping(s). On error, <handle> + * is a negative status code. + * NOTES: + * 1. If GNTMAP_device_map is specified then <dev_bus_addr> is the address + * via which I/O devices may access the granted frame. + * 2. If GNTMAP_host_map is specified then a mapping will be added at + * either a host virtual address in the current address space, or at + * a PTE at the specified machine address. The type of mapping to + * perform is selected through the GNTMAP_contains_pte flag, and the + * address is specified in <host_addr>. + * 3. Mappings should only be destroyed via GNTTABOP_unmap_grant_ref. If a + * host mapping is destroyed by other means then it is *NOT* guaranteed + * to be accounted to the correct grant reference! + */ +#define GNTTABOP_map_grant_ref 0 +struct gnttab_map_grant_ref { + /* IN parameters. */ + u64 host_addr; + u32 flags; /* GNTMAP_* */ + grant_ref_t ref; + domid_t dom; + /* OUT parameters. */ + s16 status; /* GNTST_* */ + grant_handle_t handle; + u64 dev_bus_addr; +}; + +DEFINE_GUEST_HANDLE_STRUCT(gnttab_map_grant_ref); + +/* + * GNTTABOP_unmap_grant_ref: Destroy one or more grant-reference mappings + * tracked by <handle>. If <host_addr> or <dev_bus_addr> is zero, that + * field is ignored. If non-zero, they must refer to a device/host mapping + * that is tracked by <handle> + * NOTES: + * 1. The call may fail in an undefined manner if either mapping is not + * tracked by <handle>. + * 3. After executing a batch of unmaps, it is guaranteed that no stale + * mappings will remain in the device or host TLBs. + */ +#define GNTTABOP_unmap_grant_ref 1 +struct gnttab_unmap_grant_ref { + /* IN parameters. */ + u64 host_addr; + u64 dev_bus_addr; + grant_handle_t handle; + /* OUT parameters. */ + s16 status; /* GNTST_* */ +}; + +DEFINE_GUEST_HANDLE_STRUCT(gnttab_unmap_grant_ref); + +/* + * GNTTABOP_setup_table: Set up a grant table for <dom> comprising at least + * <nr_frames> pages. The frame addresses are written to the <frame_list>. + * Only <nr_frames> addresses are written, even if the table is larger. + * NOTES: + * 1. <dom> may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF. + * 3. Xen may not support more than a single grant-table page per domain. + */ +#define GNTTABOP_setup_table 2 +struct gnttab_setup_table { + /* IN parameters. */ + domid_t dom; + u32 nr_frames; + /* OUT parameters. */ + s16 status; /* GNTST_* */ + + GUEST_HANDLE(xen_pfn_t)frame_list; +}; + +DEFINE_GUEST_HANDLE_STRUCT(gnttab_setup_table); + +/* + * GNTTABOP_dump_table: Dump the contents of the grant table to the + * xen console. Debugging use only. + */ +#define GNTTABOP_dump_table 3 +struct gnttab_dump_table { + /* IN parameters. */ + domid_t dom; + /* OUT parameters. */ + s16 status; /* GNTST_* */ +}; + +DEFINE_GUEST_HANDLE_STRUCT(gnttab_dump_table); + +/* + * GNTTABOP_transfer_grant_ref: Transfer <frame> to a foreign domain. The + * foreign domain has previously registered its interest in the transfer via + * <domid, ref>. + * + * Note that, even if the transfer fails, the specified page no longer belongs + * to the calling domain *unless* the error is GNTST_bad_page. + */ +#define GNTTABOP_transfer 4 +struct gnttab_transfer { + /* IN parameters. */ + xen_pfn_t mfn; + domid_t domid; + grant_ref_t ref; + /* OUT parameters. */ + s16 status; +}; + +DEFINE_GUEST_HANDLE_STRUCT(gnttab_transfer); + +/* + * GNTTABOP_copy: Hypervisor based copy + * source and destinations can be eithers MFNs or, for foreign domains, + * grant references. the foreign domain has to grant read/write access + * in its grant table. + * + * The flags specify what type source and destinations are (either MFN + * or grant reference). + * + * Note that this can also be used to copy data between two domains + * via a third party if the source and destination domains had previously + * grant appropriate access to their pages to the third party. + * + * source_offset specifies an offset in the source frame, dest_offset + * the offset in the target frame and len specifies the number of + * bytes to be copied. + */ + +#define _GNTCOPY_source_gref (0) +#define GNTCOPY_source_gref (1 << _GNTCOPY_source_gref) +#define _GNTCOPY_dest_gref (1) +#define GNTCOPY_dest_gref (1 << _GNTCOPY_dest_gref) + +#define GNTTABOP_copy 5 +struct gnttab_copy { + /* IN parameters. */ + struct { + union { + grant_ref_t ref; + xen_pfn_t gmfn; + } u; + domid_t domid; + u16 offset; + } source, dest; + u16 len; + u16 flags; /* GNTCOPY_* */ + /* OUT parameters. */ + s16 status; +}; + +DEFINE_GUEST_HANDLE_STRUCT(gnttab_copy); + +/* + * GNTTABOP_query_size: Query the current and maximum sizes of the shared + * grant table. + * NOTES: + * 1. <dom> may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF. + */ +#define GNTTABOP_query_size 6 +struct gnttab_query_size { + /* IN parameters. */ + domid_t dom; + /* OUT parameters. */ + u32 nr_frames; + u32 max_nr_frames; + s16 status; /* GNTST_* */ +}; + +DEFINE_GUEST_HANDLE_STRUCT(gnttab_query_size); + +/* + * GNTTABOP_unmap_and_replace: Destroy one or more grant-reference mappings + * tracked by <handle> but atomically replace the page table entry with one + * pointing to the machine address under <new_addr>. <new_addr> will be + * redirected to the null entry. + * NOTES: + * 1. The call may fail in an undefined manner if either mapping is not + * tracked by <handle>. + * 2. After executing a batch of unmaps, it is guaranteed that no stale + * mappings will remain in the device or host TLBs. + */ +#define GNTTABOP_unmap_and_replace 7 +struct gnttab_unmap_and_replace { + /* IN parameters. */ + u64 host_addr; + u64 new_addr; + grant_handle_t handle; + /* OUT parameters. */ + s16 status; /* GNTST_* */ +}; + +DEFINE_GUEST_HANDLE_STRUCT(gnttab_unmap_and_replace); + +/* + * GNTTABOP_set_version: Request a particular version of the grant + * table shared table structure. This operation can only be performed + * once in any given domain. It must be performed before any grants + * are activated; otherwise, the domain will be stuck with version 1. + * The only defined versions are 1 and 2. + */ +#define GNTTABOP_set_version 8 +struct gnttab_set_version { + /* IN parameters */ + u32 version; +}; + +DEFINE_GUEST_HANDLE_STRUCT(gnttab_set_version); + +/* + * GNTTABOP_get_status_frames: Get the list of frames used to store grant + * status for <dom>. In grant format version 2, the status is separated + * from the other shared grant fields to allow more efficient synchronization + * using barriers instead of atomic cmpexch operations. + * <nr_frames> specify the size of vector <frame_list>. + * The frame addresses are returned in the <frame_list>. + * Only <nr_frames> addresses are returned, even if the table is larger. + * NOTES: + * 1. <dom> may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF. + */ +#define GNTTABOP_get_status_frames 9 +struct gnttab_get_status_frames { + /* IN parameters. */ + u32 nr_frames; + domid_t dom; + /* OUT parameters. */ + s16 status; /* GNTST_* */ + + GUEST_HANDLE(u64)frame_list; +}; + +DEFINE_GUEST_HANDLE_STRUCT(gnttab_get_status_frames); + +/* + * GNTTABOP_get_version: Get the grant table version which is in + * effect for domain <dom>. + */ +#define GNTTABOP_get_version 10 +struct gnttab_get_version { + /* IN parameters */ + domid_t dom; + u16 pad; + /* OUT parameters */ + u32 version; +}; + +DEFINE_GUEST_HANDLE_STRUCT(gnttab_get_version); + +/* + * Issue one or more cache maintenance operations on a portion of a + * page granted to the calling domain by a foreign domain. + */ +#define GNTTABOP_cache_flush 12 +struct gnttab_cache_flush { + union { + u64 dev_bus_addr; + grant_ref_t ref; + } a; + u16 offset; /* offset from start of grant */ + u16 length; /* size within the grant */ +#define GNTTAB_CACHE_CLEAN (1 << 0) +#define GNTTAB_CACHE_INVAL (1 << 1) +#define GNTTAB_CACHE_SOURCE_GREF (1 << 31) + u32 op; +}; + +DEFINE_GUEST_HANDLE_STRUCT(gnttab_cache_flush); + +/* + * Bitfield values for update_pin_status.flags. + */ + /* Map the grant entry for access by I/O devices. */ +#define _GNTMAP_device_map (0) +#define GNTMAP_device_map (1 << _GNTMAP_device_map) +/* Map the grant entry for access by host CPUs. */ +#define _GNTMAP_host_map (1) +#define GNTMAP_host_map (1 << _GNTMAP_host_map) +/* Accesses to the granted frame will be restricted to read-only access. */ +#define _GNTMAP_readonly (2) +#define GNTMAP_readonly (1 << _GNTMAP_readonly) +/* + * GNTMAP_host_map subflag: + * 0 => The host mapping is usable only by the guest OS. + * 1 => The host mapping is usable by guest OS + current application. + */ +#define _GNTMAP_application_map (3) +#define GNTMAP_application_map (1 << _GNTMAP_application_map) + +/* + * GNTMAP_contains_pte subflag: + * 0 => This map request contains a host virtual address. + * 1 => This map request contains the machine addess of the PTE to update. + */ +#define _GNTMAP_contains_pte (4) +#define GNTMAP_contains_pte (1 << _GNTMAP_contains_pte) + +/* + * Bits to be placed in guest kernel available PTE bits (architecture + * dependent; only supported when XENFEAT_gnttab_map_avail_bits is set). + */ +#define _GNTMAP_guest_avail0 (16) +#define GNTMAP_guest_avail_mask ((u32)~0 << _GNTMAP_guest_avail0) + +/* + * Values for error status returns. All errors are -ve. + */ +#define GNTST_okay (0) /* Normal return. */ +#define GNTST_general_error (-1) /* General undefined error. */ +#define GNTST_bad_domain (-2) /* Unrecognsed domain id. */ +#define GNTST_bad_gntref (-3) /* Unrecognised or inappropriate gntref. */ +#define GNTST_bad_handle (-4) /* Unrecognised or inappropriate handle. */ +#define GNTST_bad_virt_addr (-5) /* Inappropriate virtual address to map. */ +#define GNTST_bad_dev_addr (-6) /* Inappropriate device address to unmap.*/ +#define GNTST_no_device_space (-7) /* Out of space in I/O MMU. */ +#define GNTST_permission_denied (-8) /* Not enough privilege for operation. */ +#define GNTST_bad_page (-9) /* Specified page was invalid for op. */ +#define GNTST_bad_copy_arg (-10) /* copy arguments cross page boundary. */ +#define GNTST_address_too_big (-11) /* transfer page address too large. */ +#define GNTST_eagain (-12) /* Operation not done; try again. */ + +#define GNTTABOP_error_msgs { \ + "okay", \ + "undefined error", \ + "unrecognised domain id", \ + "invalid grant reference", \ + "invalid mapping handle", \ + "invalid virtual address", \ + "invalid device address", \ + "no spare translation slot in the I/O MMU", \ + "permission denied", \ + "bad page", \ + "copy arguments cross page boundary", \ + "page address size too large", \ + "operation not done; try again" \ +} + +#endif /* __XEN_PUBLIC_GRANT_TABLE_H__ */ diff --git a/include/xen/interface/hvm/hvm_op.h b/include/xen/interface/hvm/hvm_op.h new file mode 100644 index 00000000000..605d943fb1b --- /dev/null +++ b/include/xen/interface/hvm/hvm_op.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: MIT + * + * hvm_op.h + * + * Copyright (c) 2007, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_HVM_HVM_OP_H__ +#define __XEN_PUBLIC_HVM_HVM_OP_H__ + +/* Get/set subcommands: the second argument of the hypercall is a + * pointer to a xen_hvm_param struct. + */ +#define HVMOP_set_param 0 +#define HVMOP_get_param 1 +struct xen_hvm_param { + domid_t domid; /* IN */ + u32 index; /* IN */ + u64 value; /* IN/OUT */ +}; + +DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_param); + +/* Hint from PV drivers for pagetable destruction. */ +#define HVMOP_pagetable_dying 9 +struct xen_hvm_pagetable_dying { + /* Domain with a pagetable about to be destroyed. */ + domid_t domid; + /* guest physical address of the toplevel pagetable dying */ + aligned_u64 gpa; +}; + +DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_pagetable_dying); + +enum hvmmem_type_t { + HVMMEM_ram_rw, /* Normal read/write guest RAM */ + HVMMEM_ram_ro, /* Read-only; writes are discarded */ + HVMMEM_mmio_dm, /* Reads and write go to the device model */ +}; + +#define HVMOP_get_mem_type 15 +/* Return hvmmem_type_t for the specified pfn. */ +struct xen_hvm_get_mem_type { + /* Domain to be queried. */ + domid_t domid; + /* OUT variable. */ + u16 mem_type; + u16 pad[2]; /* align next field on 8-byte boundary */ + /* IN variable. */ + u64 pfn; +}; + +DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_get_mem_type); + +#endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */ diff --git a/include/xen/interface/hvm/params.h b/include/xen/interface/hvm/params.h new file mode 100644 index 00000000000..a81bb5e7c7a --- /dev/null +++ b/include/xen/interface/hvm/params.h @@ -0,0 +1,116 @@ +/* SPDX-License-Identifier: MIT + * + * params.h + * + * HVM parameters. HVM (Hardware Virtual Machine) is the type of instance + * that mimics bare-metal server setup which provides better hardware + * isolation. + */ + +#ifndef __XEN_PUBLIC_HVM_PARAMS_H__ +#define __XEN_PUBLIC_HVM_PARAMS_H__ + +#include <xen/interface/hvm/hvm_op.h> + +/* + * Parameter space for HVMOP_{set,get}_param. + */ + +#define HVM_PARAM_CALLBACK_IRQ 0 +/* + * How should CPU0 event-channel notifications be delivered? + * + * If val == 0 then CPU0 event-channel notifications are not delivered. + * If val != 0, val[63:56] encodes the type, as follows: + */ + +#define HVM_PARAM_CALLBACK_TYPE_GSI 0 +/* + * val[55:0] is a delivery GSI. GSI 0 cannot be used, as it aliases val == 0, + * and disables all notifications. + */ + +#define HVM_PARAM_CALLBACK_TYPE_PCI_INTX 1 +/* + * val[55:0] is a delivery PCI INTx line: + * Domain = val[47:32], Bus = val[31:16] DevFn = val[15:8], IntX = val[1:0] + */ + +#if defined(__i386__) || defined(__x86_64__) +#define HVM_PARAM_CALLBACK_TYPE_VECTOR 2 +/* + * val[7:0] is a vector number. Check for XENFEAT_hvm_callback_vector to know + * if this delivery method is available. + */ +#elif defined(__arm__) || defined(__aarch64__) +#define HVM_PARAM_CALLBACK_TYPE_PPI 2 +/* + * val[55:16] needs to be zero. + * val[15:8] is interrupt flag of the PPI used by event-channel: + * bit 8: the PPI is edge(1) or level(0) triggered + * bit 9: the PPI is active low(1) or high(0) + * val[7:0] is a PPI number used by event-channel. + * This is only used by ARM/ARM64 and masking/eoi the interrupt associated to + * the notification is handled by the interrupt controller. + */ +#endif + +#define HVM_PARAM_STORE_PFN 1 +#define HVM_PARAM_STORE_EVTCHN 2 + +#define HVM_PARAM_PAE_ENABLED 4 + +#define HVM_PARAM_IOREQ_PFN 5 + +#define HVM_PARAM_BUFIOREQ_PFN 6 + +/* + * Set mode for virtual timers (currently x86 only): + * delay_for_missed_ticks (default): + * Do not advance a vcpu's time beyond the correct delivery time for + * interrupts that have been missed due to preemption. Deliver missed + * interrupts when the vcpu is rescheduled and advance the vcpu's virtual + * time stepwise for each one. + * no_delay_for_missed_ticks: + * As above, missed interrupts are delivered, but guest time always tracks + * wallclock (i.e., real) time while doing so. + * no_missed_ticks_pending: + * No missed interrupts are held pending. Instead, to ensure ticks are + * delivered at some non-zero rate, if we detect missed ticks then the + * internal tick alarm is not disabled if the VCPU is preempted during the + * next tick period. + * one_missed_tick_pending: + * Missed interrupts are collapsed together and delivered as one 'late tick'. + * Guest time always tracks wallclock (i.e., real) time. + */ +#define HVM_PARAM_TIMER_MODE 10 +#define HVMPTM_delay_for_missed_ticks 0 +#define HVMPTM_no_delay_for_missed_ticks 1 +#define HVMPTM_no_missed_ticks_pending 2 +#define HVMPTM_one_missed_tick_pending 3 + +/* Boolean: Enable virtual HPET (high-precision event timer)? (x86-only) */ +#define HVM_PARAM_HPET_ENABLED 11 + +/* Identity-map page directory used by Intel EPT when CR0.PG=0. */ +#define HVM_PARAM_IDENT_PT 12 + +/* Device Model domain, defaults to 0. */ +#define HVM_PARAM_DM_DOMAIN 13 + +/* ACPI S state: currently support S0 and S3 on x86. */ +#define HVM_PARAM_ACPI_S_STATE 14 + +/* TSS used on Intel when CR0.PE=0. */ +#define HVM_PARAM_VM86_TSS 15 + +/* Boolean: Enable aligning all periodic vpts to reduce interrupts */ +#define HVM_PARAM_VPT_ALIGN 16 + +/* Console debug shared memory ring and event channel */ +#define HVM_PARAM_CONSOLE_PFN 17 +#define HVM_PARAM_CONSOLE_EVTCHN 18 + +#define HVM_NR_PARAMS 19 + +#endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */ diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h new file mode 100644 index 00000000000..38b4d7c73bb --- /dev/null +++ b/include/xen/interface/io/blkif.h @@ -0,0 +1,701 @@ +/* SPDX-License-Identifier: MIT + * + * blkif.h + * + * Unified block-device I/O interface for Xen guest OSes. + * + * Copyright (c) 2003-2004, Keir Fraser + * Copyright (c) 2012, Spectra Logic Corporation + */ + +#ifndef __XEN_PUBLIC_IO_BLKIF_H__ +#define __XEN_PUBLIC_IO_BLKIF_H__ + +#include "ring.h" +#include "../grant_table.h" + +/* + * Front->back notifications: When enqueuing a new request, sending a + * notification can be made conditional on req_event (i.e., the generic + * hold-off mechanism provided by the ring macros). Backends must set + * req_event appropriately (e.g., using RING_FINAL_CHECK_FOR_REQUESTS()). + * + * Back->front notifications: When enqueuing a new response, sending a + * notification can be made conditional on rsp_event (i.e., the generic + * hold-off mechanism provided by the ring macros). Frontends must set + * rsp_event appropriately (e.g., using RING_FINAL_CHECK_FOR_RESPONSES()). + */ + +#ifndef blkif_vdev_t +#define blkif_vdev_t u16 +#endif +#define blkif_sector_t u64 + +/* + * Feature and Parameter Negotiation + * ================================= + * The two halves of a Xen block driver utilize nodes within the XenStore to + * communicate capabilities and to negotiate operating parameters. This + * section enumerates these nodes which reside in the respective front and + * backend portions of the XenStore, following the XenBus convention. + * + * All data in the XenStore is stored as strings. Nodes specifying numeric + * values are encoded in decimal. Integer value ranges listed below are + * expressed as fixed sized integer types capable of storing the conversion + * of a properly formated node string, without loss of information. + * + * Any specified default value is in effect if the corresponding XenBus node + * is not present in the XenStore. + * + * XenStore nodes in sections marked "PRIVATE" are solely for use by the + * driver side whose XenBus tree contains them. + * + * XenStore nodes marked "DEPRECATED" in their notes section should only be + * used to provide interoperability with legacy implementations. + * + * See the XenBus state transition diagram below for details on when XenBus + * nodes must be published and when they can be queried. + * + ***************************************************************************** + * Backend XenBus Nodes + ***************************************************************************** + * + *------------------ Backend Device Identification (PRIVATE) ------------------ + * + * mode + * Values: "r" (read only), "w" (writable) + * + * The read or write access permissions to the backing store to be + * granted to the frontend. + * + * params + * Values: string + * + * A free formatted string providing sufficient information for the + * hotplug script to attach the device and provide a suitable + * handler (ie: a block device) for blkback to use. + * + * physical-device + * Values: "MAJOR:MINOR" + * Notes: 11 + * + * MAJOR and MINOR are the major number and minor number of the + * backing device respectively. + * + * physical-device-path + * Values: path string + * + * A string that contains the absolute path to the disk image. On + * NetBSD and Linux this is always a block device, while on FreeBSD + * it can be either a block device or a regular file. + * + * type + * Values: "file", "phy", "tap" + * + * The type of the backing device/object. + * + * + * direct-io-safe + * Values: 0/1 (boolean) + * Default Value: 0 + * + * The underlying storage is not affected by the direct IO memory + * lifetime bug. See: + * http://lists.xen.org/archives/html/xen-devel/2012-12/msg01154.html + * + * Therefore this option gives the backend permission to use + * O_DIRECT, notwithstanding that bug. + * + * That is, if this option is enabled, use of O_DIRECT is safe, + * in circumstances where we would normally have avoided it as a + * workaround for that bug. This option is not relevant for all + * backends, and even not necessarily supported for those for + * which it is relevant. A backend which knows that it is not + * affected by the bug can ignore this option. + * + * This option doesn't require a backend to use O_DIRECT, so it + * should not be used to try to control the caching behaviour. + * + *--------------------------------- Features --------------------------------- + * + * feature-barrier + * Values: 0/1 (boolean) + * Default Value: 0 + * + * A value of "1" indicates that the backend can process requests + * containing the BLKIF_OP_WRITE_BARRIER request opcode. Requests + * of this type may still be returned at any time with the + * BLKIF_RSP_EOPNOTSUPP result code. + * + * feature-flush-cache + * Values: 0/1 (boolean) + * Default Value: 0 + * + * A value of "1" indicates that the backend can process requests + * containing the BLKIF_OP_FLUSH_DISKCACHE request opcode. Requests + * of this type may still be returned at any time with the + * BLKIF_RSP_EOPNOTSUPP result code. + * + * feature-discard + * Values: 0/1 (boolean) + * Default Value: 0 + * + * A value of "1" indicates that the backend can process requests + * containing the BLKIF_OP_DISCARD request opcode. Requests + * of this type may still be returned at any time with the + * BLKIF_RSP_EOPNOTSUPP result code. + * + * feature-persistent + * Values: 0/1 (boolean) + * Default Value: 0 + * Notes: 7 + * + * A value of "1" indicates that the backend can keep the grants used + * by the frontend driver mapped, so the same set of grants should be + * used in all transactions. The maximum number of grants the backend + * can map persistently depends on the implementation, but ideally it + * should be RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST. Using this + * feature the backend doesn't need to unmap each grant, preventing + * costly TLB flushes. The backend driver should only map grants + * persistently if the frontend supports it. If a backend driver chooses + * to use the persistent protocol when the frontend doesn't support it, + * it will probably hit the maximum number of persistently mapped grants + * (due to the fact that the frontend won't be reusing the same grants), + * and fall back to non-persistent mode. Backend implementations may + * shrink or expand the number of persistently mapped grants without + * notifying the frontend depending on memory constraints (this might + * cause a performance degradation). + * + * If a backend driver wants to limit the maximum number of persistently + * mapped grants to a value less than RING_SIZE * + * BLKIF_MAX_SEGMENTS_PER_REQUEST a LRU strategy should be used to + * discard the grants that are less commonly used. Using a LRU in the + * backend driver paired with a LIFO queue in the frontend will + * allow us to have better performance in this scenario. + * + *----------------------- Request Transport Parameters ------------------------ + * + * max-ring-page-order + * Values: <uint32_t> + * Default Value: 0 + * Notes: 1, 3 + * + * The maximum supported size of the request ring buffer in units of + * lb(machine pages). (e.g. 0 == 1 page, 1 = 2 pages, 2 == 4 pages, + * etc.). + * + * max-ring-pages + * Values: <uint32_t> + * Default Value: 1 + * Notes: DEPRECATED, 2, 3 + * + * The maximum supported size of the request ring buffer in units of + * machine pages. The value must be a power of 2. + * + *------------------------- Backend Device Properties ------------------------- + * + * discard-enable + * Values: 0/1 (boolean) + * Default Value: 1 + * + * This optional property, set by the toolstack, instructs the backend + * to offer (or not to offer) discard to the frontend. If the property + * is missing the backend should offer discard if the backing storage + * actually supports it. + * + * discard-alignment + * Values: <uint32_t> + * Default Value: 0 + * Notes: 4, 5 + * + * The offset, in bytes from the beginning of the virtual block device, + * to the first, addressable, discard extent on the underlying device. + * + * discard-granularity + * Values: <uint32_t> + * Default Value: <"sector-size"> + * Notes: 4 + * + * The size, in bytes, of the individually addressable discard extents + * of the underlying device. + * + * discard-secure + * Values: 0/1 (boolean) + * Default Value: 0 + * Notes: 10 + * + * A value of "1" indicates that the backend can process BLKIF_OP_DISCARD + * requests with the BLKIF_DISCARD_SECURE flag set. + * + * info + * Values: <uint32_t> (bitmap) + * + * A collection of bit flags describing attributes of the backing + * device. The VDISK_* macros define the meaning of each bit + * location. + * + * sector-size + * Values: <uint32_t> + * + * The logical block size, in bytes, of the underlying storage. This + * must be a power of two with a minimum value of 512. + * + * NOTE: Because of implementation bugs in some frontends this must be + * set to 512, unless the frontend advertizes a non-zero value + * in its "feature-large-sector-size" xenbus node. (See below). + * + * physical-sector-size + * Values: <uint32_t> + * Default Value: <"sector-size"> + * + * The physical block size, in bytes, of the backend storage. This + * must be an integer multiple of "sector-size". + * + * sectors + * Values: <u64> + * + * The size of the backend device, expressed in units of "sector-size". + * The product of "sector-size" and "sectors" must also be an integer + * multiple of "physical-sector-size", if that node is present. + * + ***************************************************************************** + * Frontend XenBus Nodes + ***************************************************************************** + * + *----------------------- Request Transport Parameters ----------------------- + * + * event-channel + * Values: <uint32_t> + * + * The identifier of the Xen event channel used to signal activity + * in the ring buffer. + * + * ring-ref + * Values: <uint32_t> + * Notes: 6 + * + * The Xen grant reference granting permission for the backend to map + * the sole page in a single page sized ring buffer. + * + * ring-ref%u + * Values: <uint32_t> + * Notes: 6 + * + * For a frontend providing a multi-page ring, a "number of ring pages" + * sized list of nodes, each containing a Xen grant reference granting + * permission for the backend to map the page of the ring located + * at page index "%u". Page indexes are zero based. + * + * protocol + * Values: string (XEN_IO_PROTO_ABI_*) + * Default Value: XEN_IO_PROTO_ABI_NATIVE + * + * The machine ABI rules governing the format of all ring request and + * response structures. + * + * ring-page-order + * Values: <uint32_t> + * Default Value: 0 + * Maximum Value: MAX(ffs(max-ring-pages) - 1, max-ring-page-order) + * Notes: 1, 3 + * + * The size of the frontend allocated request ring buffer in units + * of lb(machine pages). (e.g. 0 == 1 page, 1 = 2 pages, 2 == 4 pages, + * etc.). + * + * num-ring-pages + * Values: <uint32_t> + * Default Value: 1 + * Maximum Value: MAX(max-ring-pages,(0x1 << max-ring-page-order)) + * Notes: DEPRECATED, 2, 3 + * + * The size of the frontend allocated request ring buffer in units of + * machine pages. The value must be a power of 2. + * + *--------------------------------- Features --------------------------------- + * + * feature-persistent + * Values: 0/1 (boolean) + * Default Value: 0 + * Notes: 7, 8, 9 + * + * A value of "1" indicates that the frontend will reuse the same grants + * for all transactions, allowing the backend to map them with write + * access (even when it should be read-only). If the frontend hits the + * maximum number of allowed persistently mapped grants, it can fallback + * to non persistent mode. This will cause a performance degradation, + * since the the backend driver will still try to map those grants + * persistently. Since the persistent grants protocol is compatible with + * the previous protocol, a frontend driver can choose to work in + * persistent mode even when the backend doesn't support it. + * + * It is recommended that the frontend driver stores the persistently + * mapped grants in a LIFO queue, so a subset of all persistently mapped + * grants gets used commonly. This is done in case the backend driver + * decides to limit the maximum number of persistently mapped grants + * to a value less than RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST. + * + * feature-large-sector-size + * Values: 0/1 (boolean) + * Default Value: 0 + * + * A value of "1" indicates that the frontend will correctly supply and + * interpret all sector-based quantities in terms of the "sector-size" + * value supplied in the backend info, whatever that may be set to. + * If this node is not present or its value is "0" then it is assumed + * that the frontend requires that the logical block size is 512 as it + * is hardcoded (which is the case in some frontend implementations). + * + *------------------------- Virtual Device Properties ------------------------- + * + * device-type + * Values: "disk", "cdrom", "floppy", etc. + * + * virtual-device + * Values: <uint32_t> + * + * A value indicating the physical device to virtualize within the + * frontend's domain. (e.g. "The first ATA disk", "The third SCSI + * disk", etc.) + * + * See docs/misc/vbd-interface.txt for details on the format of this + * value. + * + * Notes + * ----- + * (1) Multi-page ring buffer scheme first developed in the Citrix XenServer + * PV drivers. + * (2) Multi-page ring buffer scheme first used in some RedHat distributions + * including a distribution deployed on certain nodes of the Amazon + * EC2 cluster. + * (3) Support for multi-page ring buffers was implemented independently, + * in slightly different forms, by both Citrix and RedHat/Amazon. + * For full interoperability, block front and backends should publish + * identical ring parameters, adjusted for unit differences, to the + * XenStore nodes used in both schemes. + * (4) Devices that support discard functionality may internally allocate space + * (discardable extents) in units that are larger than the exported logical + * block size. If the backing device has such discardable extents the + * backend should provide both discard-granularity and discard-alignment. + * Providing just one of the two may be considered an error by the frontend. + * Backends supporting discard should include discard-granularity and + * discard-alignment even if it supports discarding individual sectors. + * Frontends should assume discard-alignment == 0 and discard-granularity + * == sector size if these keys are missing. + * (5) The discard-alignment parameter allows a physical device to be + * partitioned into virtual devices that do not necessarily begin or + * end on a discardable extent boundary. + * (6) When there is only a single page allocated to the request ring, + * 'ring-ref' is used to communicate the grant reference for this + * page to the backend. When using a multi-page ring, the 'ring-ref' + * node is not created. Instead 'ring-ref0' - 'ring-refN' are used. + * (7) When using persistent grants data has to be copied from/to the page + * where the grant is currently mapped. The overhead of doing this copy + * however doesn't suppress the speed improvement of not having to unmap + * the grants. + * (8) The frontend driver has to allow the backend driver to map all grants + * with write access, even when they should be mapped read-only, since + * further requests may reuse these grants and require write permissions. + * (9) Linux implementation doesn't have a limit on the maximum number of + * grants that can be persistently mapped in the frontend driver, but + * due to the frontent driver implementation it should never be bigger + * than RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST. + *(10) The discard-secure property may be present and will be set to 1 if the + * backing device supports secure discard. + *(11) Only used by Linux and NetBSD. + */ + +/* + * Multiple hardware queues/rings: + * If supported, the backend will write the key "multi-queue-max-queues" to + * the directory for that vbd, and set its value to the maximum supported + * number of queues. + * Frontends that are aware of this feature and wish to use it can write the + * key "multi-queue-num-queues" with the number they wish to use, which must be + * greater than zero, and no more than the value reported by the backend in + * "multi-queue-max-queues". + * + * For frontends requesting just one queue, the usual event-channel and + * ring-ref keys are written as before, simplifying the backend processing + * to avoid distinguishing between a frontend that doesn't understand the + * multi-queue feature, and one that does, but requested only one queue. + * + * Frontends requesting two or more queues must not write the toplevel + * event-channel and ring-ref keys, instead writing those keys under sub-keys + * having the name "queue-N" where N is the integer ID of the queue/ring for + * which those keys belong. Queues are indexed from zero. + * For example, a frontend with two queues must write the following set of + * queue-related keys: + * + * /local/domain/1/device/vbd/0/multi-queue-num-queues = "2" + * /local/domain/1/device/vbd/0/queue-0 = "" + * /local/domain/1/device/vbd/0/queue-0/ring-ref = "<ring-ref#0>" + * /local/domain/1/device/vbd/0/queue-0/event-channel = "<evtchn#0>" + * /local/domain/1/device/vbd/0/queue-1 = "" + * /local/domain/1/device/vbd/0/queue-1/ring-ref = "<ring-ref#1>" + * /local/domain/1/device/vbd/0/queue-1/event-channel = "<evtchn#1>" + * + * It is also possible to use multiple queues/rings together with + * feature multi-page ring buffer. + * For example, a frontend requests two queues/rings and the size of each ring + * buffer is two pages must write the following set of related keys: + * + * /local/domain/1/device/vbd/0/multi-queue-num-queues = "2" + * /local/domain/1/device/vbd/0/ring-page-order = "1" + * /local/domain/1/device/vbd/0/queue-0 = "" + * /local/domain/1/device/vbd/0/queue-0/ring-ref0 = "<ring-ref#0>" + * /local/domain/1/device/vbd/0/queue-0/ring-ref1 = "<ring-ref#1>" + * /local/domain/1/device/vbd/0/queue-0/event-channel = "<evtchn#0>" + * /local/domain/1/device/vbd/0/queue-1 = "" + * /local/domain/1/device/vbd/0/queue-1/ring-ref0 = "<ring-ref#2>" + * /local/domain/1/device/vbd/0/queue-1/ring-ref1 = "<ring-ref#3>" + * /local/domain/1/device/vbd/0/queue-1/event-channel = "<evtchn#1>" + * + */ + +/* + * STATE DIAGRAMS + * + ***************************************************************************** + * Startup * + ***************************************************************************** + * + * Tool stack creates front and back nodes with state XenbusStateInitialising. + * + * Front Back + * ================================= ===================================== + * XenbusStateInitialising XenbusStateInitialising + * o Query virtual device o Query backend device identification + * properties. data. + * o Setup OS device instance. o Open and validate backend device. + * o Publish backend features and + * transport parameters. + * | + * | + * V + * XenbusStateInitWait + * + * o Query backend features and + * transport parameters. + * o Allocate and initialize the + * request ring. + * o Publish transport parameters + * that will be in effect during + * this connection. + * | + * | + * V + * XenbusStateInitialised + * + * o Query frontend transport parameters. + * o Connect to the request ring and + * event channel. + * o Publish backend device properties. + * | + * | + * V + * XenbusStateConnected + * + * o Query backend device properties. + * o Finalize OS virtual device + * instance. + * | + * | + * V + * XenbusStateConnected + * + * Note: Drivers that do not support any optional features, or the negotiation + * of transport parameters, can skip certain states in the state machine: + * + * o A frontend may transition to XenbusStateInitialised without + * waiting for the backend to enter XenbusStateInitWait. In this + * case, default transport parameters are in effect and any + * transport parameters published by the frontend must contain + * their default values. + * + * o A backend may transition to XenbusStateInitialised, bypassing + * XenbusStateInitWait, without waiting for the frontend to first + * enter the XenbusStateInitialised state. In this case, default + * transport parameters are in effect and any transport parameters + * published by the backend must contain their default values. + * + * Drivers that support optional features and/or transport parameter + * negotiation must tolerate these additional state transition paths. + * In general this means performing the work of any skipped state + * transition, if it has not already been performed, in addition to the + * work associated with entry into the current state. + */ + +/* + * REQUEST CODES. + */ +#define BLKIF_OP_READ 0 +#define BLKIF_OP_WRITE 1 +/* + * All writes issued prior to a request with the BLKIF_OP_WRITE_BARRIER + * operation code ("barrier request") must be completed prior to the + * execution of the barrier request. All writes issued after the barrier + * request must not execute until after the completion of the barrier request. + * + * Optional. See "feature-barrier" XenBus node documentation above. + */ +#define BLKIF_OP_WRITE_BARRIER 2 +/* + * Commit any uncommitted contents of the backing device's volatile cache + * to stable storage. + * + * Optional. See "feature-flush-cache" XenBus node documentation above. + */ +#define BLKIF_OP_FLUSH_DISKCACHE 3 +/* + * Used in SLES sources for device specific command packet + * contained within the request. Reserved for that purpose. + */ +#define BLKIF_OP_RESERVED_1 4 +/* + * Indicate to the backend device that a region of storage is no longer in + * use, and may be discarded at any time without impact to the client. If + * the BLKIF_DISCARD_SECURE flag is set on the request, all copies of the + * discarded region on the device must be rendered unrecoverable before the + * command returns. + * + * This operation is analogous to performing a trim (ATA) or unamp (SCSI), + * command on a native device. + * + * More information about trim/unmap operations can be found at: + * http://t13.org/Documents/UploadedDocuments/docs2008/ + * e07154r6-Data_Set_Management_Proposal_for_ATA-ACS2.doc + * http://www.seagate.com/staticfiles/support/disc/manuals/ + * Interface%20manuals/100293068c.pdf + * + * Optional. See "feature-discard", "discard-alignment", + * "discard-granularity", and "discard-secure" in the XenBus node + * documentation above. + */ +#define BLKIF_OP_DISCARD 5 + +/* + * Recognized if "feature-max-indirect-segments" in present in the backend + * xenbus info. The "feature-max-indirect-segments" node contains the maximum + * number of segments allowed by the backend per request. If the node is + * present, the frontend might use blkif_request_indirect structs in order to + * issue requests with more than BLKIF_MAX_SEGMENTS_PER_REQUEST (11). The + * maximum number of indirect segments is fixed by the backend, but the + * frontend can issue requests with any number of indirect segments as long as + * it's less than the number provided by the backend. The indirect_grefs field + * in blkif_request_indirect should be filled by the frontend with the + * grant references of the pages that are holding the indirect segments. + * These pages are filled with an array of blkif_request_segment that hold the + * information about the segments. The number of indirect pages to use is + * determined by the number of segments an indirect request contains. Every + * indirect page can contain a maximum of + * (PAGE_SIZE / sizeof(struct blkif_request_segment)) segments, so to + * calculate the number of indirect pages to use we have to do + * ceil(indirect_segments / (PAGE_SIZE / sizeof(struct blkif_request_segment))). + * + * If a backend does not recognize BLKIF_OP_INDIRECT, it should *not* + * create the "feature-max-indirect-segments" node! + */ +#define BLKIF_OP_INDIRECT 6 + +/* + * Maximum scatter/gather segments per request. + * This is carefully chosen so that sizeof(blkif_ring_t) <= PAGE_SIZE. + * NB. This could be 12 if the ring indexes weren't stored in the same page. + */ +#define BLKIF_MAX_SEGMENTS_PER_REQUEST 11 + +/* + * Maximum number of indirect pages to use per request. + */ +#define BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST 8 + +/* + * NB. 'first_sect' and 'last_sect' in blkif_request_segment, as well as + * 'sector_number' in blkif_request, blkif_request_discard and + * blkif_request_indirect are sector-based quantities. See the description + * of the "feature-large-sector-size" frontend xenbus node above for + * more information. + */ +struct blkif_request_segment { + grant_ref_t gref; /* reference to I/O buffer frame */ + /* @first_sect: first sector in frame to transfer (inclusive). */ + /* @last_sect: last sector in frame to transfer (inclusive). */ + u8 first_sect, last_sect; +}; + +/* + * Starting ring element for any I/O request. + */ +struct blkif_request { + u8 operation; /* BLKIF_OP_??? */ + u8 nr_segments; /* number of segments */ + blkif_vdev_t handle; /* only for read/write requests */ + u64 id; /* private guest value, echoed in resp */ + blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ + struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; +}; + +/* + * Cast to this structure when blkif_request.operation == BLKIF_OP_DISCARD + * sizeof(struct blkif_request_discard) <= sizeof(struct blkif_request) + */ +struct blkif_request_discard { + u8 operation; /* BLKIF_OP_DISCARD */ + u8 flag; /* BLKIF_DISCARD_SECURE or zero */ +#define BLKIF_DISCARD_SECURE (1 << 0) /* ignored if discard-secure=0 */ + blkif_vdev_t handle; /* same as for read/write requests */ + u64 id; /* private guest value, echoed in resp */ + blkif_sector_t sector_number;/* start sector idx on disk */ + u64 nr_sectors; /* number of contiguous sectors to discard*/ +}; + +struct blkif_request_indirect { + u8 operation; /* BLKIF_OP_INDIRECT */ + u8 indirect_op; /* BLKIF_OP_{READ/WRITE} */ + u16 nr_segments; /* number of segments */ + u64 id; /* private guest value, echoed in resp */ + blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ + blkif_vdev_t handle; /* same as for read/write requests */ + grant_ref_t indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST]; +#ifdef __i386__ + u64 pad; /* Make it 64 byte aligned on i386 */ +#endif +}; + +struct blkif_response { + u64 id; /* copied from request */ + u8 operation; /* copied from request */ + s16 status; /* BLKIF_RSP_??? */ +}; + +/* + * STATUS RETURN CODES. + */ + /* Operation not supported (only happens on barrier writes). */ +#define BLKIF_RSP_EOPNOTSUPP -2 + /* Operation failed for some unspecified reason (-EIO). */ +#define BLKIF_RSP_ERROR -1 + /* Operation completed successfully. */ +#define BLKIF_RSP_OKAY 0 + +/* + * Generate blkif ring structures and types. + */ +DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response); + +#define VDISK_CDROM 0x1 +#define VDISK_REMOVABLE 0x2 +#define VDISK_READONLY 0x4 + +#endif /* __XEN_PUBLIC_IO_BLKIF_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/include/xen/interface/io/console.h b/include/xen/interface/io/console.h new file mode 100644 index 00000000000..d4dccc74afa --- /dev/null +++ b/include/xen/interface/io/console.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: MIT + * + * console.h + * + * Console I/O interface for Xen guest OSes. + * + * Copyright (c) 2005, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_IO_CONSOLE_H__ +#define __XEN_PUBLIC_IO_CONSOLE_H__ + +typedef u32 XENCONS_RING_IDX; + +#define MASK_XENCONS_IDX(idx, ring) ((idx) & (sizeof(ring) - 1)) + +struct xencons_interface { + char in[1024]; + char out[2048]; + XENCONS_RING_IDX in_cons, in_prod; + XENCONS_RING_IDX out_cons, out_prod; +}; + +#ifdef XEN_WANT_FLEX_CONSOLE_RING +#include "ring.h" +DEFINE_XEN_FLEX_RING(xencons); +#endif + +#endif /* __XEN_PUBLIC_IO_CONSOLE_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/include/xen/interface/io/protocols.h b/include/xen/interface/io/protocols.h new file mode 100644 index 00000000000..5aa0aaa93be --- /dev/null +++ b/include/xen/interface/io/protocols.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: MIT + * + * protocols.h + * + * Copyright (c) 2008, Keir Fraser + * + * Xen protocols, which are used as ABI rules governing the format of all + * ring request and response structures. + */ + +#ifndef __XEN_PROTOCOLS_H__ +#define __XEN_PROTOCOLS_H__ + +#define XEN_IO_PROTO_ABI_X86_32 "x86_32-abi" +#define XEN_IO_PROTO_ABI_X86_64 "x86_64-abi" +#define XEN_IO_PROTO_ABI_ARM "arm-abi" + +#if defined(__i386__) +# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_32 +#elif defined(__x86_64__) +# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_64 +#elif defined(__arm__) || defined(__aarch64__) +# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_ARM +#else +# error arch fixup needed here +#endif + +#endif diff --git a/include/xen/interface/io/ring.h b/include/xen/interface/io/ring.h new file mode 100644 index 00000000000..3c5c87deda9 --- /dev/null +++ b/include/xen/interface/io/ring.h @@ -0,0 +1,462 @@ +/* SPDX-License-Identifier: MIT + * + * ring.h + * + * Shared producer-consumer ring macros. + * + * Tim Deegan and Andrew Warfield November 2004. + */ + +#ifndef __XEN_PUBLIC_IO_RING_H__ +#define __XEN_PUBLIC_IO_RING_H__ + +/* + * When #include'ing this header, you need to provide the following + * declaration upfront: + * - standard integers types (u8, u16, etc) + * They are provided by stdint.h of the standard headers. + * + * In addition, if you intend to use the FLEX macros, you also need to + * provide the following, before invoking the FLEX macros: + * - size_t + * - memcpy + * - grant_ref_t + * These declarations are provided by string.h of the standard headers, + * and grant_table.h from the Xen public headers. + */ + +#include <xen/interface/grant_table.h> + +typedef unsigned int RING_IDX; + +/* Round a 32-bit unsigned constant down to the nearest power of two. */ +#define __RD2(_x) (((_x) & 0x00000002) ? 0x2 : ((_x) & 0x1)) +#define __RD4(_x) (((_x) & 0x0000000c) ? __RD2((_x) >> 2) << 2 : __RD2(_x)) +#define __RD8(_x) (((_x) & 0x000000f0) ? __RD4((_x) >> 4) << 4 : __RD4(_x)) +#define __RD16(_x) (((_x) & 0x0000ff00) ? __RD8((_x) >> 8) << 8 : __RD8(_x)) +#define __RD32(_x) (((_x) & 0xffff0000) ? __RD16((_x) >> 16) << 16 : __RD16(_x)) + +/* + * Calculate size of a shared ring, given the total available space for the + * ring and indexes (_sz), and the name tag of the request/response structure. + * A ring contains as many entries as will fit, rounded down to the nearest + * power of two (so we can mask with (size-1) to loop around). + */ +#define __CONST_RING_SIZE(_s, _sz) \ + (__RD32(((_sz) - offsetof(struct _s##_sring, ring)) / \ + sizeof(((struct _s##_sring *)0)->ring[0]))) +/* + * The same for passing in an actual pointer instead of a name tag. + */ +#define __RING_SIZE(_s, _sz) \ + (__RD32(((_sz) - (long)(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0]))) + +/* + * Macros to make the correct C datatypes for a new kind of ring. + * + * To make a new ring datatype, you need to have two message structures, + * let's say request_t, and response_t already defined. + * + * In a header where you want the ring datatype declared, you then do: + * + * DEFINE_RING_TYPES(mytag, request_t, response_t); + * + * These expand out to give you a set of types, as you can see below. + * The most important of these are: + * + * mytag_sring_t - The shared ring. + * mytag_front_ring_t - The 'front' half of the ring. + * mytag_back_ring_t - The 'back' half of the ring. + * + * To initialize a ring in your code you need to know the location and size + * of the shared memory area (PAGE_SIZE, for instance). To initialise + * the front half: + * + * mytag_front_ring_t front_ring; + * SHARED_RING_INIT((mytag_sring_t *)shared_page); + * FRONT_RING_INIT(&front_ring, (mytag_sring_t *)shared_page, PAGE_SIZE); + * + * Initializing the back follows similarly (note that only the front + * initializes the shared ring): + * + * mytag_back_ring_t back_ring; + * BACK_RING_INIT(&back_ring, (mytag_sring_t *)shared_page, PAGE_SIZE); + */ + +#define DEFINE_RING_TYPES(__name, __req_t, __rsp_t) \ + \ +/* Shared ring entry */ \ +union __name##_sring_entry { \ + __req_t req; \ + __rsp_t rsp; \ +}; \ + \ +/* Shared ring page */ \ +struct __name##_sring { \ + RING_IDX req_prod, req_event; \ + RING_IDX rsp_prod, rsp_event; \ + union { \ + struct { \ + u8 smartpoll_active; \ + } netif; \ + struct { \ + u8 msg; \ + } tapif_user; \ + u8 pvt_pad[4]; \ + } pvt; \ + u8 __pad[44]; \ + union __name##_sring_entry ring[1]; /* variable-length */ \ +}; \ + \ +/* "Front" end's private variables */ \ +struct __name##_front_ring { \ + RING_IDX req_prod_pvt; \ + RING_IDX rsp_cons; \ + unsigned int nr_ents; \ + struct __name##_sring *sring; \ +}; \ + \ +/* "Back" end's private variables */ \ +struct __name##_back_ring { \ + RING_IDX rsp_prod_pvt; \ + RING_IDX req_cons; \ + unsigned int nr_ents; \ + struct __name##_sring *sring; \ +}; \ + \ +/* Syntactic sugar */ \ +typedef struct __name##_sring __name##_sring_t; \ +typedef struct __name##_front_ring __name##_front_ring_t; \ +typedef struct __name##_back_ring __name##_back_ring_t + +/* + * Macros for manipulating rings. + * + * FRONT_RING_whatever works on the "front end" of a ring: here + * requests are pushed on to the ring and responses taken off it. + * + * BACK_RING_whatever works on the "back end" of a ring: here + * requests are taken off the ring and responses put on. + * + * N.B. these macros do NO INTERLOCKS OR FLOW CONTROL. + * This is OK in 1-for-1 request-response situations where the + * requestor (front end) never has more than RING_SIZE()-1 + * outstanding requests. + */ + +/* Initialising empty rings */ +#define SHARED_RING_INIT(_s) do { \ + (_s)->req_prod = (_s)->rsp_prod = 0; \ + (_s)->req_event = (_s)->rsp_event = 1; \ + (void)memset((_s)->pvt.pvt_pad, 0, sizeof((_s)->pvt.pvt_pad)); \ + (void)memset((_s)->__pad, 0, sizeof((_s)->__pad)); \ +} while (0) + +#define FRONT_RING_INIT(_r, _s, __size) do { \ + (_r)->req_prod_pvt = 0; \ + (_r)->rsp_cons = 0; \ + (_r)->nr_ents = __RING_SIZE(_s, __size); \ + (_r)->sring = (_s); \ +} while (0) + +#define BACK_RING_INIT(_r, _s, __size) do { \ + (_r)->rsp_prod_pvt = 0; \ + (_r)->req_cons = 0; \ + (_r)->nr_ents = __RING_SIZE(_s, __size); \ + (_r)->sring = (_s); \ +} while (0) + +/* How big is this ring? */ +#define RING_SIZE(_r) \ + ((_r)->nr_ents) + +/* Number of free requests (for use on front side only). */ +#define RING_FREE_REQUESTS(_r) \ + (RING_SIZE(_r) - ((_r)->req_prod_pvt - (_r)->rsp_cons)) + +/* Test if there is an empty slot available on the front ring. + * (This is only meaningful from the front. ) + */ +#define RING_FULL(_r) \ + (RING_FREE_REQUESTS(_r) == 0) + +/* Test if there are outstanding messages to be processed on a ring. */ +#define RING_HAS_UNCONSUMED_RESPONSES(_r) \ + ((_r)->sring->rsp_prod - (_r)->rsp_cons) + +#ifdef __GNUC__ +#define RING_HAS_UNCONSUMED_REQUESTS(_r) ({ \ + unsigned int req = (_r)->sring->req_prod - (_r)->req_cons; \ + unsigned int rsp = RING_SIZE(_r) - \ + ((_r)->req_cons - (_r)->rsp_prod_pvt); \ + req < rsp ? req : rsp; \ +}) +#else +/* Same as above, but without the nice GCC ({ ... }) syntax. */ +#define RING_HAS_UNCONSUMED_REQUESTS(_r) \ + ((((_r)->sring->req_prod - (_r)->req_cons) < \ + (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt))) ? \ + ((_r)->sring->req_prod - (_r)->req_cons) : \ + (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt))) +#endif + +/* Direct access to individual ring elements, by index. */ +#define RING_GET_REQUEST(_r, _idx) \ + (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].req)) + +/* + * Get a local copy of a request. + * + * Use this in preference to RING_GET_REQUEST() so all processing is + * done on a local copy that cannot be modified by the other end. + * + * Note that https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145 may cause this + * to be ineffective where _req is a struct which consists of only bitfields. + */ +#define RING_COPY_REQUEST(_r, _idx, _req) do { \ + /* Use volatile to force the copy into _req. */ \ + *(_req) = *(volatile typeof(_req))RING_GET_REQUEST(_r, _idx); \ +} while (0) + +#define RING_GET_RESPONSE(_r, _idx) \ + (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].rsp)) + +/* Loop termination condition: Would the specified index overflow the ring? */ +#define RING_REQUEST_CONS_OVERFLOW(_r, _cons) \ + (((_cons) - (_r)->rsp_prod_pvt) >= RING_SIZE(_r)) + +/* Ill-behaved frontend determination: Can there be this many requests? */ +#define RING_REQUEST_PROD_OVERFLOW(_r, _prod) \ + (((_prod) - (_r)->rsp_prod_pvt) > RING_SIZE(_r)) + +#define RING_PUSH_REQUESTS(_r) do { \ + xen_wmb(); /* back sees requests /before/ updated producer index */ \ + (_r)->sring->req_prod = (_r)->req_prod_pvt; \ +} while (0) + +#define RING_PUSH_RESPONSES(_r) do { \ + xen_wmb(); /* front sees resps /before/ updated producer index */ \ + (_r)->sring->rsp_prod = (_r)->rsp_prod_pvt; \ +} while (0) + +/* + * Notification hold-off (req_event and rsp_event): + * + * When queueing requests or responses on a shared ring, it may not always be + * necessary to notify the remote end. For example, if requests are in flight + * in a backend, the front may be able to queue further requests without + * notifying the back (if the back checks for new requests when it queues + * responses). + * + * When enqueuing requests or responses: + * + * Use RING_PUSH_{REQUESTS,RESPONSES}_AND_CHECK_NOTIFY(). The second argument + * is a boolean return value. True indicates that the receiver requires an + * asynchronous notification. + * + * After dequeuing requests or responses (before sleeping the connection): + * + * Use RING_FINAL_CHECK_FOR_REQUESTS() or RING_FINAL_CHECK_FOR_RESPONSES(). + * The second argument is a boolean return value. True indicates that there + * are pending messages on the ring (i.e., the connection should not be put + * to sleep). + * + * These macros will set the req_event/rsp_event field to trigger a + * notification on the very next message that is enqueued. If you want to + * create batches of work (i.e., only receive a notification after several + * messages have been enqueued) then you will need to create a customised + * version of the FINAL_CHECK macro in your own code, which sets the event + * field appropriately. + */ + +#define RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(_r, _notify) do { \ + RING_IDX __old = (_r)->sring->req_prod; \ + RING_IDX __new = (_r)->req_prod_pvt; \ + xen_wmb(); /* back sees requests /before/ updated producer index */ \ + (_r)->sring->req_prod = __new; \ + xen_mb(); /* back sees new requests /before/ we check req_event */ \ + (_notify) = ((RING_IDX)(__new - (_r)->sring->req_event) < \ + (RING_IDX)(__new - __old)); \ +} while (0) + +#define RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(_r, _notify) do { \ + RING_IDX __old = (_r)->sring->rsp_prod; \ + RING_IDX __new = (_r)->rsp_prod_pvt; \ + xen_wmb(); /* front sees resps /before/ updated producer index */ \ + (_r)->sring->rsp_prod = __new; \ + xen_mb(); /* front sees new resps /before/ we check rsp_event */ \ + (_notify) = ((RING_IDX)(__new - (_r)->sring->rsp_event) < \ + (RING_IDX)(__new - __old)); \ +} while (0) + +#define RING_FINAL_CHECK_FOR_REQUESTS(_r, _work_to_do) do { \ + (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \ + if (_work_to_do) \ + break; \ + (_r)->sring->req_event = (_r)->req_cons + 1; \ + xen_mb(); \ + (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \ +} while (0) + +#define RING_FINAL_CHECK_FOR_RESPONSES(_r, _work_to_do) do { \ + (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ + if (_work_to_do) \ + break; \ + (_r)->sring->rsp_event = (_r)->rsp_cons + 1; \ + xen_mb(); \ + (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ +} while (0) + +/* + * DEFINE_XEN_FLEX_RING_AND_INTF defines two monodirectional rings and + * functions to check if there is data on the ring, and to read and + * write to them. + * + * DEFINE_XEN_FLEX_RING is similar to DEFINE_XEN_FLEX_RING_AND_INTF, but + * does not define the indexes page. As different protocols can have + * extensions to the basic format, this macro allow them to define their + * own struct. + * + * XEN_FLEX_RING_SIZE + * Convenience macro to calculate the size of one of the two rings + * from the overall order. + * + * $NAME_mask + * Function to apply the size mask to an index, to reduce the index + * within the range [0-size]. + * + * $NAME_read_packet + * Function to read data from the ring. The amount of data to read is + * specified by the "size" argument. + * + * $NAME_write_packet + * Function to write data to the ring. The amount of data to write is + * specified by the "size" argument. + * + * $NAME_get_ring_ptr + * Convenience function that returns a pointer to read/write to the + * ring at the right location. + * + * $NAME_data_intf + * Indexes page, shared between frontend and backend. It also + * contains the array of grant refs. + * + * $NAME_queued + * Function to calculate how many bytes are currently on the ring, + * ready to be read. It can also be used to calculate how much free + * space is currently on the ring (XEN_FLEX_RING_SIZE() - + * $NAME_queued()). + */ + +#ifndef XEN_PAGE_SHIFT +/* The PAGE_SIZE for ring protocols and hypercall interfaces is always + * 4K, regardless of the architecture, and page granularity chosen by + * operating systems. + */ +#define XEN_PAGE_SHIFT 12 +#endif +#define XEN_FLEX_RING_SIZE(order) \ + (1UL << ((order) + XEN_PAGE_SHIFT - 1)) + +#define DEFINE_XEN_FLEX_RING(name) \ +static inline RING_IDX name##_mask(RING_IDX idx, RING_IDX ring_size) \ +{ \ + return idx & (ring_size - 1); \ +} \ + \ +static inline unsigned char *name##_get_ring_ptr(unsigned char *buf, \ + RING_IDX idx, \ + RING_IDX ring_size) \ +{ \ + return buf + name##_mask(idx, ring_size); \ +} \ + \ +static inline void name##_read_packet(void *opaque, \ + const unsigned char *buf, \ + size_t size, \ + RING_IDX masked_prod, \ + RING_IDX *masked_cons, \ + RING_IDX ring_size) \ +{ \ + if (*masked_cons < masked_prod || \ + size <= ring_size - *masked_cons) { \ + memcpy(opaque, buf + *masked_cons, size); \ + } else { \ + memcpy(opaque, buf + *masked_cons, ring_size - *masked_cons); \ + memcpy((unsigned char *)opaque + ring_size - *masked_cons, buf, \ + size - (ring_size - *masked_cons)); \ + } \ + *masked_cons = name##_mask(*masked_cons + size, ring_size); \ +} \ + \ +static inline void name##_write_packet(unsigned char *buf, \ + const void *opaque, \ + size_t size, \ + RING_IDX *masked_prod, \ + RING_IDX masked_cons, \ + RING_IDX ring_size) \ +{ \ + if (*masked_prod < masked_cons || \ + size <= ring_size - *masked_prod) { \ + memcpy(buf + *masked_prod, opaque, size); \ + } else { \ + memcpy(buf + *masked_prod, opaque, ring_size - *masked_prod); \ + memcpy(buf, (unsigned char *)opaque + (ring_size - *masked_prod), \ + size - (ring_size - *masked_prod)); \ + } \ + *masked_prod = name##_mask(*masked_prod + size, ring_size); \ +} \ + \ +static inline RING_IDX name##_queued(RING_IDX prod, \ + RING_IDX cons, \ + RING_IDX ring_size) \ +{ \ + RING_IDX size; \ + \ + if (prod == cons) \ + return 0; \ + \ + prod = name##_mask(prod, ring_size); \ + cons = name##_mask(cons, ring_size); \ + \ + if (prod == cons) \ + return ring_size; \ + \ + if (prod > cons) \ + size = prod - cons; \ + else \ + size = ring_size - (cons - prod); \ + return size; \ +} \ + \ +struct name##_data { \ + unsigned char *in; /* half of the allocation */ \ + unsigned char *out; /* half of the allocation */ \ +} + +#define DEFINE_XEN_FLEX_RING_AND_INTF(name) \ +struct name##_data_intf { \ + RING_IDX in_cons, in_prod; \ + \ + u8 pad1[56]; \ + \ + RING_IDX out_cons, out_prod; \ + \ + u8 pad2[56]; \ + \ + RING_IDX ring_order; \ + grant_ref_t ref[]; \ +}; \ +DEFINE_XEN_FLEX_RING(name) + +#endif /* __XEN_PUBLIC_IO_RING_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 8 + * indent-tabs-mode: nil + * End: + */ diff --git a/include/xen/interface/io/xenbus.h b/include/xen/interface/io/xenbus.h new file mode 100644 index 00000000000..946d46ddb1b --- /dev/null +++ b/include/xen/interface/io/xenbus.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: MIT + * + * xenbus.h + * + * Xenbus protocol details. + * + * Copyright (C) 2005 XenSource Ltd. + */ + +#ifndef _XEN_PUBLIC_IO_XENBUS_H +#define _XEN_PUBLIC_IO_XENBUS_H + +/* + * The state of either end of the Xenbus, i.e. the current communication + * status of initialisation across the bus. States here imply nothing about + * the state of the connection between the driver and the kernel's device + * layers. + */ +enum xenbus_state { + XenbusStateUnknown = 0, + + XenbusStateInitialising = 1, + + /* + * InitWait: Finished early initialisation but waiting for information + * from the peer or hotplug scripts. + */ + XenbusStateInitWait = 2, + + /* + * Initialised: Waiting for a connection from the peer. + */ + XenbusStateInitialised = 3, + + XenbusStateConnected = 4, + + /* + * Closing: The device is being closed due to an error or an unplug event. + */ + XenbusStateClosing = 5, + + XenbusStateClosed = 6, + + /* + * Reconfiguring: The device is being reconfigured. + */ + XenbusStateReconfiguring = 7, + + XenbusStateReconfigured = 8 +}; + +typedef enum xenbus_state XenbusState; + +#endif /* _XEN_PUBLIC_IO_XENBUS_H */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/include/xen/interface/io/xs_wire.h b/include/xen/interface/io/xs_wire.h new file mode 100644 index 00000000000..f20d9c51226 --- /dev/null +++ b/include/xen/interface/io/xs_wire.h @@ -0,0 +1,134 @@ +/* SPDX-License-Identifier: MIT + * + * Details of the "wire" protocol between Xen Store Daemon and client + * library or guest kernel. + * + * Copyright (C) 2005 Rusty Russell IBM Corporation + */ + +#ifndef _XS_WIRE_H +#define _XS_WIRE_H + +enum xsd_sockmsg_type { + XS_CONTROL, +#define XS_DEBUG XS_CONTROL + XS_DIRECTORY, + XS_READ, + XS_GET_PERMS, + XS_WATCH, + XS_UNWATCH, + XS_TRANSACTION_START, + XS_TRANSACTION_END, + XS_INTRODUCE, + XS_RELEASE, + XS_GET_DOMAIN_PATH, + XS_WRITE, + XS_MKDIR, + XS_RM, + XS_SET_PERMS, + XS_WATCH_EVENT, + XS_ERROR, + XS_IS_DOMAIN_INTRODUCED, + XS_RESUME, + XS_SET_TARGET, + /* XS_RESTRICT has been removed */ + XS_RESET_WATCHES = XS_SET_TARGET + 2, + XS_DIRECTORY_PART, + + XS_TYPE_COUNT, /* Number of valid types. */ + + XS_INVALID = 0xffff /* Guaranteed to remain an invalid type */ +}; + +#define XS_WRITE_NONE "NONE" +#define XS_WRITE_CREATE "CREATE" +#define XS_WRITE_CREATE_EXCL "CREATE|EXCL" + +/* We hand errors as strings, for portability. */ +struct xsd_errors { + int errnum; + const char *errstring; +}; + +#ifdef EINVAL +#define XSD_ERROR(x) { x, #x } +/* LINTED: static unused */ +static struct xsd_errors xsd_errors[] +#if defined(__GNUC__) +__attribute__((unused)) +#endif + = { + XSD_ERROR(EINVAL), + XSD_ERROR(EACCES), + XSD_ERROR(EEXIST), + XSD_ERROR(EISDIR), + XSD_ERROR(ENOENT), + XSD_ERROR(ENOMEM), + XSD_ERROR(ENOSPC), + XSD_ERROR(EIO), + XSD_ERROR(ENOTEMPTY), + XSD_ERROR(ENOSYS), + XSD_ERROR(EROFS), + XSD_ERROR(EBUSY), + XSD_ERROR(EAGAIN), + XSD_ERROR(EISCONN), + XSD_ERROR(E2BIG) +}; +#endif + +struct xsd_sockmsg { + u32 type; /* XS_??? */ + u32 req_id;/* Request identifier, echoed in daemon's response. */ + u32 tx_id; /* Transaction id (0 if not related to a transaction). */ + u32 len; /* Length of data following this. */ + + /* Generally followed by nul-terminated string(s). */ +}; + +enum xs_watch_type { + XS_WATCH_PATH = 0, + XS_WATCH_TOKEN +}; + +/* + * `incontents 150 xenstore_struct XenStore wire protocol. + * + * Inter-domain shared memory communications. + */ +#define XENSTORE_RING_SIZE 1024 +typedef u32 XENSTORE_RING_IDX; +#define MASK_XENSTORE_IDX(idx) ((idx) & (XENSTORE_RING_SIZE - 1)) +struct xenstore_domain_interface { + char req[XENSTORE_RING_SIZE]; /* Requests to xenstore daemon. */ + char rsp[XENSTORE_RING_SIZE]; /* Replies and async watch events. */ + XENSTORE_RING_IDX req_cons, req_prod; + XENSTORE_RING_IDX rsp_cons, rsp_prod; + u32 server_features; /* Bitmap of features supported by the server */ + u32 connection; +}; + +/* Violating this is very bad. See docs/misc/xenstore.txt. */ +#define XENSTORE_PAYLOAD_MAX 4096 + +/* Violating these just gets you an error back */ +#define XENSTORE_ABS_PATH_MAX 3072 +#define XENSTORE_REL_PATH_MAX 2048 + +/* The ability to reconnect a ring */ +#define XENSTORE_SERVER_FEATURE_RECONNECTION 1 + +/* Valid values for the connection field */ +#define XENSTORE_CONNECTED 0 /* the steady-state */ +#define XENSTORE_RECONNECT 1 /* guest has initiated a reconnect */ + +#endif /* _XS_WIRE_H */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 8 + * indent-tabs-mode: nil + * End: + */ diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h new file mode 100644 index 00000000000..59a95dbc738 --- /dev/null +++ b/include/xen/interface/memory.h @@ -0,0 +1,332 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * memory.h + * + * Memory reservation and information. + * + * Copyright (c) 2005, Keir Fraser <keir@xensource.com> + */ + +#ifndef __XEN_PUBLIC_MEMORY_H__ +#define __XEN_PUBLIC_MEMORY_H__ + +/* + * Increase or decrease the specified domain's memory reservation. Returns a + * -ve errcode on failure, or the # extents successfully allocated or freed. + * arg == addr of struct xen_memory_reservation. + */ +#define XENMEM_increase_reservation 0 +#define XENMEM_decrease_reservation 1 +#define XENMEM_populate_physmap 6 +struct xen_memory_reservation { + /* + * XENMEM_increase_reservation: + * OUT: MFN (*not* GMFN) bases of extents that were allocated + * XENMEM_decrease_reservation: + * IN: GMFN bases of extents to free + * XENMEM_populate_physmap: + * IN: GPFN bases of extents to populate with memory + * OUT: GMFN bases of extents that were allocated + * (NB. This command also updates the mach_to_phys translation table) + */ + GUEST_HANDLE(xen_pfn_t)extent_start; + + /* Number of extents, and size/alignment of each (2^extent_order pages). */ + xen_ulong_t nr_extents; + unsigned int extent_order; + + /* + * Maximum # bits addressable by the user of the allocated region (e.g., + * I/O devices often have a 32-bit limitation even in 64-bit systems). If + * zero then the user has no addressing restriction. + * This field is not used by XENMEM_decrease_reservation. + */ + unsigned int address_bits; + + /* + * Domain whose reservation is being changed. + * Unprivileged domains can specify only DOMID_SELF. + */ + domid_t domid; + +}; + +DEFINE_GUEST_HANDLE_STRUCT(xen_memory_reservation); + +/* + * An atomic exchange of memory pages. If return code is zero then + * @out.extent_list provides GMFNs of the newly-allocated memory. + * Returns zero on complete success, otherwise a negative error code. + * On complete success then always @nr_exchanged == @in.nr_extents. + * On partial success @nr_exchanged indicates how much work was done. + */ +#define XENMEM_exchange 11 +struct xen_memory_exchange { + /* + * [IN] Details of memory extents to be exchanged (GMFN bases). + * Note that @in.address_bits is ignored and unused. + */ + struct xen_memory_reservation in; + + /* + * [IN/OUT] Details of new memory extents. + * We require that: + * 1. @in.domid == @out.domid + * 2. @in.nr_extents << @in.extent_order == + * @out.nr_extents << @out.extent_order + * 3. @in.extent_start and @out.extent_start lists must not overlap + * 4. @out.extent_start lists GPFN bases to be populated + * 5. @out.extent_start is overwritten with allocated GMFN bases + */ + struct xen_memory_reservation out; + + /* + * [OUT] Number of input extents that were successfully exchanged: + * 1. The first @nr_exchanged input extents were successfully + * deallocated. + * 2. The corresponding first entries in the output extent list correctly + * indicate the GMFNs that were successfully exchanged. + * 3. All other input and output extents are untouched. + * 4. If not all input exents are exchanged then the return code of this + * command will be non-zero. + * 5. THIS FIELD MUST BE INITIALISED TO ZERO BY THE CALLER! + */ + xen_ulong_t nr_exchanged; +}; + +DEFINE_GUEST_HANDLE_STRUCT(xen_memory_exchange); +/* + * Returns the maximum machine frame number of mapped RAM in this system. + * This command always succeeds (it never returns an error code). + * arg == NULL. + */ +#define XENMEM_maximum_ram_page 2 + +/* + * Returns the current or maximum memory reservation, in pages, of the + * specified domain (may be DOMID_SELF). Returns -ve errcode on failure. + * arg == addr of domid_t. + */ +#define XENMEM_current_reservation 3 +#define XENMEM_maximum_reservation 4 + +/* + * Returns a list of MFN bases of 2MB extents comprising the machine_to_phys + * mapping table. Architectures which do not have a m2p table do not implement + * this command. + * arg == addr of xen_machphys_mfn_list_t. + */ +#define XENMEM_machphys_mfn_list 5 +struct xen_machphys_mfn_list { + /* + * Size of the 'extent_start' array. Fewer entries will be filled if the + * machphys table is smaller than max_extents * 2MB. + */ + unsigned int max_extents; + + /* + * Pointer to buffer to fill with list of extent starts. If there are + * any large discontiguities in the machine address space, 2MB gaps in + * the machphys table will be represented by an MFN base of zero. + */ + GUEST_HANDLE(xen_pfn_t)extent_start; + + /* + * Number of extents written to the above array. This will be smaller + * than 'max_extents' if the machphys table is smaller than max_e * 2MB. + */ + unsigned int nr_extents; +}; + +DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mfn_list); + +/* + * Returns the location in virtual address space of the machine_to_phys + * mapping table. Architectures which do not have a m2p table, or which do not + * map it by default into guest address space, do not implement this command. + * arg == addr of xen_machphys_mapping_t. + */ +#define XENMEM_machphys_mapping 12 +struct xen_machphys_mapping { + xen_ulong_t v_start, v_end; /* Start and end virtual addresses. */ + xen_ulong_t max_mfn; /* Maximum MFN that can be looked up. */ +}; + +DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mapping_t); + +#define XENMAPSPACE_shared_info 0 /* shared info page */ +#define XENMAPSPACE_grant_table 1 /* grant table page */ +#define XENMAPSPACE_gmfn 2 /* GMFN */ +#define XENMAPSPACE_gmfn_range 3 /* GMFN range, XENMEM_add_to_physmap only. */ +#define XENMAPSPACE_gmfn_foreign 4 /* GMFN from another dom, + * XENMEM_add_to_physmap_range only. + */ +#define XENMAPSPACE_dev_mmio 5 /* device mmio region */ + +/* + * Sets the GPFN at which a particular page appears in the specified guest's + * pseudophysical address space. + * arg == addr of xen_add_to_physmap_t. + */ +#define XENMEM_add_to_physmap 7 +struct xen_add_to_physmap { + /* Which domain to change the mapping for. */ + domid_t domid; + + /* Number of pages to go through for gmfn_range */ + u16 size; + + /* Source mapping space. */ + unsigned int space; + + /* Index into source mapping space. */ + xen_ulong_t idx; + + /* GPFN where the source mapping page should appear. */ + xen_pfn_t gpfn; +}; + +DEFINE_GUEST_HANDLE_STRUCT(xen_add_to_physmap); + +/*** REMOVED ***/ +/*#define XENMEM_translate_gpfn_list 8*/ + +#define XENMEM_add_to_physmap_range 23 +struct xen_add_to_physmap_range { + /* IN */ + /* Which domain to change the mapping for. */ + domid_t domid; + u16 space; /* => enum phys_map_space */ + + /* Number of pages to go through */ + u16 size; + domid_t foreign_domid; /* IFF gmfn_foreign */ + + /* Indexes into space being mapped. */ + GUEST_HANDLE(xen_ulong_t)idxs; + + /* GPFN in domid where the source mapping page should appear. */ + GUEST_HANDLE(xen_pfn_t)gpfns; + + /* OUT */ + + /* Per index error code. */ + GUEST_HANDLE(int)errs; +}; + +DEFINE_GUEST_HANDLE_STRUCT(xen_add_to_physmap_range); + +/* + * Returns the pseudo-physical memory map as it was when the domain + * was started (specified by XENMEM_set_memory_map). + * arg == addr of struct xen_memory_map. + */ +#define XENMEM_memory_map 9 +struct xen_memory_map { + /* + * On call the number of entries which can be stored in buffer. On + * return the number of entries which have been stored in + * buffer. + */ + unsigned int nr_entries; + + /* + * Entries in the buffer are in the same format as returned by the + * BIOS INT 0x15 EAX=0xE820 call. + */ + GUEST_HANDLE(void)buffer; +}; + +DEFINE_GUEST_HANDLE_STRUCT(xen_memory_map); + +/* + * Returns the real physical memory map. Passes the same structure as + * XENMEM_memory_map. + * arg == addr of struct xen_memory_map. + */ +#define XENMEM_machine_memory_map 10 + +/* + * Unmaps the page appearing at a particular GPFN from the specified guest's + * pseudophysical address space. + * arg == addr of xen_remove_from_physmap_t. + */ +#define XENMEM_remove_from_physmap 15 +struct xen_remove_from_physmap { + /* Which domain to change the mapping for. */ + domid_t domid; + + /* GPFN of the current mapping of the page. */ + xen_pfn_t gpfn; +}; + +DEFINE_GUEST_HANDLE_STRUCT(xen_remove_from_physmap); + +/* + * Get the pages for a particular guest resource, so that they can be + * mapped directly by a tools domain. + */ +#define XENMEM_acquire_resource 28 +struct xen_mem_acquire_resource { + /* IN - The domain whose resource is to be mapped */ + domid_t domid; + /* IN - the type of resource */ + u16 type; + +#define XENMEM_resource_ioreq_server 0 +#define XENMEM_resource_grant_table 1 + + /* + * IN - a type-specific resource identifier, which must be zero + * unless stated otherwise. + * + * type == XENMEM_resource_ioreq_server -> id == ioreq server id + * type == XENMEM_resource_grant_table -> id defined below + */ + u32 id; + +#define XENMEM_resource_grant_table_id_shared 0 +#define XENMEM_resource_grant_table_id_status 1 + + /* IN/OUT - As an IN parameter number of frames of the resource + * to be mapped. However, if the specified value is 0 and + * frame_list is NULL then this field will be set to the + * maximum value supported by the implementation on return. + */ + u32 nr_frames; + /* + * OUT - Must be zero on entry. On return this may contain a bitwise + * OR of the following values. + */ + u32 flags; + + /* The resource pages have been assigned to the calling domain */ +#define _XENMEM_rsrc_acq_caller_owned 0 +#define XENMEM_rsrc_acq_caller_owned (1u << _XENMEM_rsrc_acq_caller_owned) + + /* + * IN - the index of the initial frame to be mapped. This parameter + * is ignored if nr_frames is 0. + */ + u64 frame; + +#define XENMEM_resource_ioreq_server_frame_bufioreq 0 +#define XENMEM_resource_ioreq_server_frame_ioreq(n) (1 + (n)) + + /* + * IN/OUT - If the tools domain is PV then, upon return, frame_list + * will be populated with the MFNs of the resource. + * If the tools domain is HVM then it is expected that, on + * entry, frame_list will be populated with a list of GFNs + * that will be mapped to the MFNs of the resource. + * If -EIO is returned then the frame_list has only been + * partially mapped and it is up to the caller to unmap all + * the GFNs. + * This parameter may be NULL if nr_frames is 0. + */ + GUEST_HANDLE(xen_pfn_t)frame_list; +}; + +DEFINE_GUEST_HANDLE_STRUCT(xen_mem_acquire_resource); + +#endif /* __XEN_PUBLIC_MEMORY_H__ */ diff --git a/include/xen/interface/sched.h b/include/xen/interface/sched.h new file mode 100644 index 00000000000..387589be495 --- /dev/null +++ b/include/xen/interface/sched.h @@ -0,0 +1,171 @@ +/* SPDX-License-Identifier: MIT + * + * sched.h + * + * Scheduler state interactions + * + * Copyright (c) 2005, Keir Fraser <keir@xensource.com> + */ + +#ifndef __XEN_PUBLIC_SCHED_H__ +#define __XEN_PUBLIC_SCHED_H__ + +#include <xen/interface/event_channel.h> + +/* + * Guest Scheduler Operations + * + * The SCHEDOP interface provides mechanisms for a guest to interact + * with the scheduler, including yield, blocking and shutting itself + * down. + */ + +/* + * The prototype for this hypercall is: + * long HYPERVISOR_sched_op(enum sched_op cmd, void *arg, ...) + * + * @cmd == SCHEDOP_??? (scheduler operation). + * @arg == Operation-specific extra argument(s), as described below. + * ... == Additional Operation-specific extra arguments, described below. + * + * Versions of Xen prior to 3.0.2 provided only the following legacy version + * of this hypercall, supporting only the commands yield, block and shutdown: + * long sched_op(int cmd, unsigned long arg) + * @cmd == SCHEDOP_??? (scheduler operation). + * @arg == 0 (SCHEDOP_yield and SCHEDOP_block) + * == SHUTDOWN_* code (SCHEDOP_shutdown) + * + * This legacy version is available to new guests as: + * long HYPERVISOR_sched_op_compat(enum sched_op cmd, unsigned long arg) + */ + +/* + * Voluntarily yield the CPU. + * @arg == NULL. + */ +#define SCHEDOP_yield 0 + +/* + * Block execution of this VCPU until an event is received for processing. + * If called with event upcalls masked, this operation will atomically + * reenable event delivery and check for pending events before blocking the + * VCPU. This avoids a "wakeup waiting" race. + * @arg == NULL. + */ +#define SCHEDOP_block 1 + +/* + * Halt execution of this domain (all VCPUs) and notify the system controller. + * @arg == pointer to sched_shutdown structure. + * + * If the sched_shutdown_t reason is SHUTDOWN_suspend then + * x86 PV guests must also set RDX (EDX for 32-bit guests) to the MFN + * of the guest's start info page. RDX/EDX is the third hypercall + * argument. + * + * In addition, which reason is SHUTDOWN_suspend this hypercall + * returns 1 if suspend was cancelled or the domain was merely + * checkpointed, and 0 if it is resuming in a new domain. + */ +#define SCHEDOP_shutdown 2 + +/* + * Poll a set of event-channel ports. Return when one or more are pending. An + * optional timeout may be specified. + * @arg == pointer to sched_poll structure. + */ +#define SCHEDOP_poll 3 + +/* + * Declare a shutdown for another domain. The main use of this function is + * in interpreting shutdown requests and reasons for fully-virtualized + * domains. A para-virtualized domain may use SCHEDOP_shutdown directly. + * @arg == pointer to sched_remote_shutdown structure. + */ +#define SCHEDOP_remote_shutdown 4 + +/* + * Latch a shutdown code, so that when the domain later shuts down it + * reports this code to the control tools. + * @arg == sched_shutdown, as for SCHEDOP_shutdown. + */ +#define SCHEDOP_shutdown_code 5 + +/* + * Setup, poke and destroy a domain watchdog timer. + * @arg == pointer to sched_watchdog structure. + * With id == 0, setup a domain watchdog timer to cause domain shutdown + * after timeout, returns watchdog id. + * With id != 0 and timeout == 0, destroy domain watchdog timer. + * With id != 0 and timeout != 0, poke watchdog timer and set new timeout. + */ +#define SCHEDOP_watchdog 6 + +/* + * Override the current vcpu affinity by pinning it to one physical cpu or + * undo this override restoring the previous affinity. + * @arg == pointer to sched_pin_override structure. + * + * A negative pcpu value will undo a previous pin override and restore the + * previous cpu affinity. + * This call is allowed for the hardware domain only and requires the cpu + * to be part of the domain's cpupool. + */ +#define SCHEDOP_pin_override 7 + +struct sched_shutdown { + unsigned int reason; /* SHUTDOWN_* => shutdown reason */ +}; + +DEFINE_GUEST_HANDLE_STRUCT(sched_shutdown); + +struct sched_poll { + GUEST_HANDLE(evtchn_port_t)ports; + unsigned int nr_ports; + u64 timeout; +}; + +DEFINE_GUEST_HANDLE_STRUCT(sched_poll); + +struct sched_remote_shutdown { + domid_t domain_id; /* Remote domain ID */ + unsigned int reason; /* SHUTDOWN_* => shutdown reason */ +}; + +DEFINE_GUEST_HANDLE_STRUCT(sched_remote_shutdown); + +struct sched_watchdog { + u32 id; /* watchdog ID */ + u32 timeout; /* timeout */ +}; + +DEFINE_GUEST_HANDLE_STRUCT(sched_watchdog); + +struct sched_pin_override { + s32 pcpu; +}; + +DEFINE_GUEST_HANDLE_STRUCT(sched_pin_override); + +/* + * Reason codes for SCHEDOP_shutdown. These may be interpreted by control + * software to determine the appropriate action. For the most part, Xen does + * not care about the shutdown code. + */ +#define SHUTDOWN_poweroff 0 /* Domain exited normally. Clean up and kill. */ +#define SHUTDOWN_reboot 1 /* Clean up, kill, and then restart. */ +#define SHUTDOWN_suspend 2 /* Clean up, save suspend info, kill. */ +#define SHUTDOWN_crash 3 /* Tell controller we've crashed. */ +#define SHUTDOWN_watchdog 4 /* Restart because watchdog time expired. */ + +/* + * Domain asked to perform 'soft reset' for it. The expected behavior is to + * reset internal Xen state for the domain returning it to the point where it + * was created but leaving the domain's memory contents and vCPU contexts + * intact. This will allow the domain to start over and set up all Xen specific + * interfaces again. + */ +#define SHUTDOWN_soft_reset 5 +#define SHUTDOWN_MAX 5 /* Maximum valid shutdown reason. */ + +#endif /* __XEN_PUBLIC_SCHED_H__ */ diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h new file mode 100644 index 00000000000..eec8ab75b9c --- /dev/null +++ b/include/xen/interface/xen.h @@ -0,0 +1,208 @@ +/* SPDX-License-Identifier: MIT + * + * xen.h + * + * Guest OS interface to Xen. + * + * Copyright (c) 2004, K A Fraser + */ + +#ifndef __XEN_PUBLIC_XEN_H__ +#define __XEN_PUBLIC_XEN_H__ + +#include <xen/arm/interface.h> + +/* + * XEN "SYSTEM CALLS" (a.k.a. HYPERCALLS). + */ + +/* + * x86_32: EAX = vector; EBX, ECX, EDX, ESI, EDI = args 1, 2, 3, 4, 5. + * EAX = return value + * (argument registers may be clobbered on return) + * x86_64: RAX = vector; RDI, RSI, RDX, R10, R8, R9 = args 1, 2, 3, 4, 5, 6. + * RAX = return value + * (argument registers not clobbered on return; RCX, R11 are) + */ +#define __HYPERVISOR_set_trap_table 0 +#define __HYPERVISOR_mmu_update 1 +#define __HYPERVISOR_set_gdt 2 +#define __HYPERVISOR_stack_switch 3 +#define __HYPERVISOR_set_callbacks 4 +#define __HYPERVISOR_fpu_taskswitch 5 +#define __HYPERVISOR_sched_op_compat 6 +#define __HYPERVISOR_platform_op 7 +#define __HYPERVISOR_set_debugreg 8 +#define __HYPERVISOR_get_debugreg 9 +#define __HYPERVISOR_update_descriptor 10 +#define __HYPERVISOR_memory_op 12 +#define __HYPERVISOR_multicall 13 +#define __HYPERVISOR_update_va_mapping 14 +#define __HYPERVISOR_set_timer_op 15 +#define __HYPERVISOR_event_channel_op_compat 16 +#define __HYPERVISOR_xen_version 17 +#define __HYPERVISOR_console_io 18 +#define __HYPERVISOR_physdev_op_compat 19 +#define __HYPERVISOR_grant_table_op 20 +#define __HYPERVISOR_vm_assist 21 +#define __HYPERVISOR_update_va_mapping_otherdomain 22 +#define __HYPERVISOR_iret 23 /* x86 only */ +#define __HYPERVISOR_vcpu_op 24 +#define __HYPERVISOR_set_segment_base 25 /* x86/64 only */ +#define __HYPERVISOR_mmuext_op 26 +#define __HYPERVISOR_xsm_op 27 +#define __HYPERVISOR_nmi_op 28 +#define __HYPERVISOR_sched_op 29 +#define __HYPERVISOR_callback_op 30 +#define __HYPERVISOR_xenoprof_op 31 +#define __HYPERVISOR_event_channel_op 32 +#define __HYPERVISOR_physdev_op 33 +#define __HYPERVISOR_hvm_op 34 +#define __HYPERVISOR_sysctl 35 +#define __HYPERVISOR_domctl 36 +#define __HYPERVISOR_kexec_op 37 +#define __HYPERVISOR_tmem_op 38 +#define __HYPERVISOR_xc_reserved_op 39 /* reserved for XenClient */ +#define __HYPERVISOR_xenpmu_op 40 +#define __HYPERVISOR_dm_op 41 + +/* Architecture-specific hypercall definitions. */ +#define __HYPERVISOR_arch_0 48 +#define __HYPERVISOR_arch_1 49 +#define __HYPERVISOR_arch_2 50 +#define __HYPERVISOR_arch_3 51 +#define __HYPERVISOR_arch_4 52 +#define __HYPERVISOR_arch_5 53 +#define __HYPERVISOR_arch_6 54 +#define __HYPERVISOR_arch_7 55 + +#ifndef __ASSEMBLY__ + +typedef u16 domid_t; + +/* Domain ids >= DOMID_FIRST_RESERVED cannot be used for ordinary domains. */ +#define DOMID_FIRST_RESERVED (0x7FF0U) + +/* DOMID_SELF is used in certain contexts to refer to oneself. */ +#define DOMID_SELF (0x7FF0U) + +/* + * DOMID_IO is used to restrict page-table updates to mapping I/O memory. + * Although no Foreign Domain need be specified to map I/O pages, DOMID_IO + * is useful to ensure that no mappings to the OS's own heap are accidentally + * installed. (e.g., in Linux this could cause havoc as reference counts + * aren't adjusted on the I/O-mapping code path). + * This only makes sense in MMUEXT_SET_FOREIGNDOM, but in that context can + * be specified by any calling domain. + */ +#define DOMID_IO (0x7FF1U) + +/* + * DOMID_XEN is used to allow privileged domains to map restricted parts of + * Xen's heap space (e.g., the machine_to_phys table). + * This only makes sense in MMUEXT_SET_FOREIGNDOM, and is only permitted if + * the caller is privileged. + */ +#define DOMID_XEN (0x7FF2U) + +/* DOMID_COW is used as the owner of sharable pages */ +#define DOMID_COW (0x7FF3U) + +/* DOMID_INVALID is used to identify pages with unknown owner. */ +#define DOMID_INVALID (0x7FF4U) + +/* Idle domain. */ +#define DOMID_IDLE (0x7FFFU) + +struct vcpu_info { + /* + * 'evtchn_upcall_pending' is written non-zero by Xen to indicate + * a pending notification for a particular VCPU. It is then cleared + * by the guest OS /before/ checking for pending work, thus avoiding + * a set-and-check race. Note that the mask is only accessed by Xen + * on the CPU that is currently hosting the VCPU. This means that the + * pending and mask flags can be updated by the guest without special + * synchronisation (i.e., no need for the x86 LOCK prefix). + * This may seem suboptimal because if the pending flag is set by + * a different CPU then an IPI may be scheduled even when the mask + * is set. However, note: + * 1. The task of 'interrupt holdoff' is covered by the per-event- + * channel mask bits. A 'noisy' event that is continually being + * triggered can be masked at source at this very precise + * granularity. + * 2. The main purpose of the per-VCPU mask is therefore to restrict + * reentrant execution: whether for concurrency control, or to + * prevent unbounded stack usage. Whatever the purpose, we expect + * that the mask will be asserted only for short periods at a time, + * and so the likelihood of a 'spurious' IPI is suitably small. + * The mask is read before making an event upcall to the guest: a + * non-zero mask therefore guarantees that the VCPU will not receive + * an upcall activation. The mask is cleared when the VCPU requests + * to block: this avoids wakeup-waiting races. + */ + u8 evtchn_upcall_pending; + u8 evtchn_upcall_mask; + xen_ulong_t evtchn_pending_sel; + struct arch_vcpu_info arch; + struct pvclock_vcpu_time_info time; +}; /* 64 bytes (x86) */ + +/* + * Xen/kernel shared data -- pointer provided in start_info. + * NB. We expect that this struct is smaller than a page. + */ +struct shared_info { + struct vcpu_info vcpu_info[MAX_VIRT_CPUS]; + + /* + * A domain can create "event channels" on which it can send and receive + * asynchronous event notifications. There are three classes of event that + * are delivered by this mechanism: + * 1. Bi-directional inter- and intra-domain connections. Domains must + * arrange out-of-band to set up a connection (usually by allocating + * an unbound 'listener' port and avertising that via a storage service + * such as xenstore). + * 2. Physical interrupts. A domain with suitable hardware-access + * privileges can bind an event-channel port to a physical interrupt + * source. + * 3. Virtual interrupts ('events'). A domain can bind an event-channel + * port to a virtual interrupt source, such as the virtual-timer + * device or the emergency console. + * + * Event channels are addressed by a "port index". Each channel is + * associated with two bits of information: + * 1. PENDING -- notifies the domain that there is a pending notification + * to be processed. This bit is cleared by the guest. + * 2. MASK -- if this bit is clear then a 0->1 transition of PENDING + * will cause an asynchronous upcall to be scheduled. This bit is only + * updated by the guest. It is read-only within Xen. If a channel + * becomes pending while the channel is masked then the 'edge' is lost + * (i.e., when the channel is unmasked, the guest must manually handle + * pending notifications as no upcall will be scheduled by Xen). + * + * To expedite scanning of pending notifications, any 0->1 pending + * transition on an unmasked channel causes a corresponding bit in a + * per-vcpu selector word to be set. Each bit in the selector covers a + * 'C long' in the PENDING bitfield array. + */ + xen_ulong_t evtchn_pending[sizeof(xen_ulong_t) * 8]; + xen_ulong_t evtchn_mask[sizeof(xen_ulong_t) * 8]; + + /* + * Wallclock time: updated only by control software. Guests should base + * their gettimeofday() syscall on this wallclock-base value. + */ + struct pvclock_wall_clock wc; + + struct arch_shared_info arch; + +}; + +#else /* __ASSEMBLY__ */ + +/* In assembly code we cannot use C numeric constant suffixes. */ +#define mk_unsigned_long(x) x + +#endif /* !__ASSEMBLY__ */ + +#endif /* __XEN_PUBLIC_XEN_H__ */ diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h new file mode 100644 index 00000000000..3ed7fd57333 --- /dev/null +++ b/include/xen/xenbus.h @@ -0,0 +1,116 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef XENBUS_H__ +#define XENBUS_H__ + +#include <xen/interface/xen.h> +#include <xen/interface/io/xenbus.h> + +typedef unsigned long xenbus_transaction_t; +#define XBT_NIL ((xenbus_transaction_t)0) + +extern u32 xenbus_evtchn; + +/* Initialize the XenBus system. */ +void init_xenbus(void); +/* Finalize the XenBus system. */ +void fini_xenbus(void); + +/** + * xenbus_read() - Read the value associated with a path. + * + * Returns a malloc'd error string on failure and sets *value to NULL. + * On success, *value is set to a malloc'd copy of the value. + */ +char *xenbus_read(xenbus_transaction_t xbt, const char *path, char **value); + +char *xenbus_wait_for_state_change(const char *path, XenbusState *state); +char *xenbus_switch_state(xenbus_transaction_t xbt, const char *path, + XenbusState state); + +/** + * xenbus_write() - Associates a value with a path. + * + * Returns a malloc'd error string on failure. + */ +char *xenbus_write(xenbus_transaction_t xbt, const char *path, + const char *value); + +/** + * xenbus_rm() - Removes the value associated with a path. + * + * Returns a malloc'd error string on failure. + */ +char *xenbus_rm(xenbus_transaction_t xbt, const char *path); + +/** + * xenbus_ls() - List the contents of a directory. + * + * Returns a malloc'd error string on failure and sets *contents to NULL. + * On success, *contents is set to a malloc'd array of pointers to malloc'd + * strings. The array is NULL terminated. May block. + */ +char *xenbus_ls(xenbus_transaction_t xbt, const char *prefix, char ***contents); + +/** + * xenbus_get_perms() - Reads permissions associated with a path. + * + * Returns a malloc'd error string on failure and sets *value to NULL. + * On success, *value is set to a malloc'd copy of the value. + */ +char *xenbus_get_perms(xenbus_transaction_t xbt, const char *path, char **value); + +/** + * xenbus_set_perms() - Sets the permissions associated with a path. + * + * Returns a malloc'd error string on failure. + */ +char *xenbus_set_perms(xenbus_transaction_t xbt, const char *path, domid_t dom, + char perm); + +/** + * xenbus_transaction_start() - Start a xenbus transaction. + * + * Returns the transaction in xbt on success or a malloc'd error string + * otherwise. + */ +char *xenbus_transaction_start(xenbus_transaction_t *xbt); + +/** + * xenbus_transaction_end() - End a xenbus transaction. + * + * Returns a malloc'd error string if it fails. Abort says whether the + * transaction should be aborted. + * Returns 1 in *retry if the transaction should be retried. + */ +char *xenbus_transaction_end(xenbus_transaction_t xbt, int abort, + int *retry); + +/** + * xenbus_read_integer() - Read path and parse it as an integer. + * + * Returns -1 on error. + */ +int xenbus_read_integer(const char *path); + +/** + * xenbus_read_uuid() - Read path and parse it as 16 byte uuid. + * + * Returns 1 if read and parsing were successful, 0 if not + */ +int xenbus_read_uuid(const char *path, unsigned char uuid[16]); + +/** + * xenbus_printf() - Contraction of snprintf and xenbus_write(path/node). + */ +char *xenbus_printf(xenbus_transaction_t xbt, + const char *node, const char *path, + const char *fmt, ...) + __attribute__((__format__(printf, 4, 5))); + +/** + * xenbus_get_self_id() - Utility function to figure out our domain id + */ +domid_t xenbus_get_self_id(void); + +#endif /* XENBUS_H__ */ diff --git a/lib/Kconfig b/lib/Kconfig index 089348af739..8efb154f734 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -80,6 +80,10 @@ config SPL_SPRINTF config TPL_SPRINTF bool +config SSCANF + bool + default n + config STRTO bool default y diff --git a/lib/Makefile b/lib/Makefile index 1dc06c57d5f..0cd7bea2823 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -123,6 +123,7 @@ else # Main U-Boot always uses the full printf support obj-y += vsprintf.o strto.o obj-$(CONFIG_OID_REGISTRY) += oid_registry.o +obj-$(CONFIG_SSCANF) += sscanf.o endif obj-y += date.o diff --git a/lib/sscanf.c b/lib/sscanf.c new file mode 100644 index 00000000000..d1e2dc272cc --- /dev/null +++ b/lib/sscanf.c @@ -0,0 +1,823 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Chris Torek. + * + * Copyright (c) 2011 The FreeBSD Foundation + * All rights reserved. + * Portions of this software were developed by David Chisnall + * under sponsorship from the FreeBSD Foundation. + * + * Author: Juergen Gross <jgross@suse.com> + * Date: Jun 2016 + */ + +#if !defined HAVE_LIBC + +#include <os.h> +#include <linux/kernel.h> +#include <linux/ctype.h> +#include <vsprintf.h> +#include <linux/string.h> +#include <malloc.h> +#define __DECONST(type, var) ((type)(uintptr_t)(const void *)(var)) + +/** + * struct str_info - Input string parameters + * @neg: negative number or not + * 0 - not negative + * 1 - negative + * @any: set any if any `digits' consumed; make it negative to indicate + * overflow + * @acc: accumulated value + */ +struct str_info { + int neg, any; + u64 acc; +}; + +/** + * str_to_int_convert() - Write string data to structure + * @nptr: pointer to string + * @base: number's base + * @unsign: describes what integer is expected + * 0 - not unsigned + * 1 - unsigned + * + * Ignores `locale' stuff. Assumes that the upper and lower case + * alphabets and digits are each contiguous. + * + * Return: struct str_info *, which contains string data to future process + */ +static struct str_info * +str_to_int_convert(const char **nptr, int base, unsigned int unsign) +{ + const char *s = *nptr; + u64 acc; + unsigned char c; + u64 cutoff; + int neg, any, cutlim; + u64 qbase; + struct str_info *info; + + /* + * Skip white space and pick up leading +/- sign if any. + * If base is 0, allow 0x for hex and 0 for octal, else + * assume decimal; if base is already 16, allow 0x. + */ + info = (struct str_info *)malloc(sizeof(struct str_info)); + if (!info) + return NULL; + + do { + c = *s++; + } while (isspace(c)); + if (c == '-') { + neg = 1; + c = *s++; + } else { + neg = 0; + if (c == '+') + c = *s++; + } + if ((base == 0 || base == 16) && + c == '0' && (*s == 'x' || *s == 'X')) { + c = s[1]; + s += 2; + base = 16; + } + if (base == 0) + base = c == '0' ? 8 : 10; + + /* + * Compute the cutoff value between legal numbers and illegal + * numbers. That is the largest legal value, divided by the + * base. An input number that is greater than this value, if + * followed by a legal input character, is too big. One that + * is equal to this value may be valid or not; the limit + * between valid and invalid numbers is then based on the last + * digit. For instance, if the range for quads is + * [-9223372036854775808..9223372036854775807] and the input base + * is 10, cutoff will be set to 922337203685477580 and cutlim to + * either 7 (neg==0) or 8 (neg==1), meaning that if we have + * accumulated a value > 922337203685477580, or equal but the + * next digit is > 7 (or 8), the number is too big, and we will + * return a range error. + * + * Set any if any `digits' consumed; make it negative to indicate + * overflow. + */ + qbase = (unsigned int)base; + + if (!unsign) { + cutoff = neg ? (u64)-(LLONG_MIN + LLONG_MAX) + LLONG_MAX : LLONG_MAX; + cutlim = cutoff % qbase; + cutoff /= qbase; + } else { + cutoff = (u64)ULLONG_MAX / qbase; + cutlim = (u64)ULLONG_MAX % qbase; + } + + for (acc = 0, any = 0;; c = *s++) { + if (!isascii(c)) + break; + if (isdigit(c)) + c -= '0'; + else if (isalpha(c)) + c -= isupper(c) ? 'A' - 10 : 'a' - 10; + else + break; + if (c >= base) + break; + if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim)) { + any = -1; + } else { + any = 1; + acc *= qbase; + acc += c; + } + } + + info->any = any; + info->neg = neg; + info->acc = acc; + + *nptr = s; + + return info; +} + +/** + * strtoq() - Convert a string to a quad integer + * @nptr: pointer to string + * @endptr: pointer to number's end in the string + * @base: number's base + * + * Return: s64 quad integer number converted from input string + */ +static s64 +strtoq(const char *nptr, char **endptr, int base) +{ + const char *s = nptr; + u64 acc; + int unsign = 0; + struct str_info *info; + + info = str_to_int_convert(&s, base, unsign); + if (!info) + return -1; + + acc = info->acc; + + if (info->any < 0) + acc = info->neg ? LLONG_MIN : LLONG_MAX; + else if (info->neg) + acc = -acc; + if (endptr != 0) + *endptr = __DECONST(char *, info->any ? s - 1 : nptr); + + free(info); + + return acc; +} + +/** + * strtouq() - Convert a string to an unsigned quad integer + * @nptr: pointer to string + * @endptr: pointer to number's end in the string + * @base: number's base + * + * Return: s64 unsigned quad integer number converted from + * input string + */ +u64 +strtouq(const char *nptr, char **endptr, int base) +{ + const char *s = nptr; + u64 acc; + int unsign = 1; + struct str_info *info; + + info = str_to_int_convert(&s, base, unsign); + if (!info) + return -1; + + acc = info->acc; + + if (info->any < 0) + acc = ULLONG_MAX; + else if (info->neg) + acc = -acc; + if (endptr != 0) + *endptr = __DECONST(char *, info->any ? s - 1 : nptr); + + free(info); + + return acc; +} + +/** + * __sccl() - Fill in the given table from the scanset at the given format + * (just after `[') + * @tab: table to fill in + * @fmt: format of buffer + * + * The table has a 1 wherever characters should be considered part of the + * scanset. + * + * Return: pointer to the character past the closing `]' + */ +static const u_char * +__sccl(char *tab, const u_char *fmt) +{ + int c, n, v; + + /* first `clear' the whole table */ + c = *fmt++; /* first char hat => negated scanset */ + if (c == '^') { + v = 1; /* default => accept */ + c = *fmt++; /* get new first char */ + } else { + v = 0; /* default => reject */ + } + + /* XXX: Will not work if sizeof(tab*) > sizeof(char) */ + for (n = 0; n < 256; n++) + tab[n] = v; /* memset(tab, v, 256) */ + + if (c == 0) + return (fmt - 1);/* format ended before closing ] */ + + /* + * Now set the entries corresponding to the actual scanset + * to the opposite of the above. + * + * The first character may be ']' (or '-') without being special; + * the last character may be '-'. + */ + v = 1 - v; + for (;;) { + tab[c] = v; /* take character c */ +doswitch: + n = *fmt++; /* and examine the next */ + switch (n) { + case 0: /* format ended too soon */ + return (fmt - 1); + + case '-': + /* + * A scanset of the form + * [01+-] + * is defined as `the digit 0, the digit 1, + * the character +, the character -', but + * the effect of a scanset such as + * [a-zA-Z0-9] + * is implementation defined. The V7 Unix + * scanf treats `a-z' as `the letters a through + * z', but treats `a-a' as `the letter a, the + * character -, and the letter a'. + * + * For compatibility, the `-' is not considerd + * to define a range if the character following + * it is either a close bracket (required by ANSI) + * or is not numerically greater than the character + * we just stored in the table (c). + */ + n = *fmt; + if (n == ']' || n < c) { + c = '-'; + break; /* resume the for(;;) */ + } + fmt++; + /* fill in the range */ + do { + tab[++c] = v; + } while (c < n); + c = n; + /* + * Alas, the V7 Unix scanf also treats formats + * such as [a-c-e] as `the letters a through e'. + * This too is permitted by the standard.... + */ + goto doswitch; + break; + + case ']': /* end of scanset */ + return (fmt); + + default: /* just another character */ + c = n; + break; + } + } + /* NOTREACHED */ +} + +/** + * vsscanf - Unformat a buffer into a list of arguments + * @buf: input buffer + * @fmt: format of buffer + * @args: arguments + */ +#define BUF 32 /* Maximum length of numeric string. */ + +/* + * Flags used during conversion. + */ +#define LONG 0x01 /* l: long or double */ +#define SHORT 0x04 /* h: short */ +#define SUPPRESS 0x08 /* suppress assignment */ +#define POINTER 0x10 /* weird %p pointer (`fake hex') */ +#define NOSKIP 0x20 /* do not skip blanks */ +#define QUAD 0x400 +#define SHORTSHORT 0x4000 /** hh: char */ + +/* + * The following are used in numeric conversions only: + * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point; + * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral. + */ +#define SIGNOK 0x40 /* +/- is (still) legal */ +#define NDIGITS 0x80 /* no digits detected */ + +#define DPTOK 0x100 /* (float) decimal point is still legal */ +#define EXPOK 0x200 /* (float) exponent (e+3, etc) still legal */ + +#define PFXOK 0x100 /* 0x prefix is (still) legal */ +#define NZDIGITS 0x200 /* no zero digits detected */ + +/* + * Conversion types. + */ +#define CT_CHAR 0 /* %c conversion */ +#define CT_CCL 1 /* %[...] conversion */ +#define CT_STRING 2 /* %s conversion */ +#define CT_INT 3 /* integer, i.e., strtoq or strtouq */ +typedef u64 (*ccfntype)(const char *, char **, int); + +int +vsscanf(const char *inp, char const *fmt0, va_list ap) +{ + int inr; + const u_char *fmt = (const u_char *)fmt0; + int c; /* character from format, or conversion */ + size_t width; /* field width, or 0 */ + char *p; /* points into all kinds of strings */ + int n; /* handy integer */ + int flags; /* flags as defined above */ + char *p0; /* saves original value of p when necessary */ + int nassigned; /* number of fields assigned */ + int nconversions; /* number of conversions */ + int nread; /* number of characters consumed from fp */ + int base; /* base argument to strtoq/strtouq */ + ccfntype ccfn; /* conversion function (strtoq/strtouq) */ + char ccltab[256]; /* character class table for %[...] */ + char buf[BUF]; /* buffer for numeric conversions */ + + /* `basefix' is used to avoid `if' tests in the integer scanner */ + static short basefix[17] = { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16 }; + + inr = strlen(inp); + + nassigned = 0; + nconversions = 0; + nread = 0; + base = 0; /* XXX just to keep gcc happy */ + ccfn = NULL; /* XXX just to keep gcc happy */ + for (;;) { + c = *fmt++; + if (c == 0) + return (nassigned); + if (isspace(c)) { + while (inr > 0 && isspace(*inp)) + nread++, inr--, inp++; + continue; + } + if (c != '%') + goto literal; + width = 0; + flags = 0; + /* + * switch on the format. continue if done; + * break once format type is derived. + */ +again: c = *fmt++; + switch (c) { + case '%': +literal: + if (inr <= 0) + goto input_failure; + if (*inp != c) + goto match_failure; + inr--, inp++; + nread++; + continue; + + case '*': + flags |= SUPPRESS; + goto again; + case 'l': + if (flags & LONG) { + flags &= ~LONG; + flags |= QUAD; + } else { + flags |= LONG; + } + goto again; + case 'q': + flags |= QUAD; + goto again; + case 'h': + if (flags & SHORT) { + flags &= ~SHORT; + flags |= SHORTSHORT; + } else { + flags |= SHORT; + } + goto again; + + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + width = width * 10 + c - '0'; + goto again; + + /* + * Conversions. + * + */ + case 'd': + c = CT_INT; + ccfn = (ccfntype)strtoq; + base = 10; + break; + + case 'i': + c = CT_INT; + ccfn = (ccfntype)strtoq; + base = 0; + break; + + case 'o': + c = CT_INT; + ccfn = strtouq; + base = 8; + break; + + case 'u': + c = CT_INT; + ccfn = strtouq; + base = 10; + break; + + case 'x': + flags |= PFXOK; /* enable 0x prefixing */ + c = CT_INT; + ccfn = strtouq; + base = 16; + break; + + case 's': + c = CT_STRING; + break; + + case '[': + fmt = __sccl(ccltab, fmt); + flags |= NOSKIP; + c = CT_CCL; + break; + + case 'c': + flags |= NOSKIP; + c = CT_CHAR; + break; + + case 'p': /* pointer format is like hex */ + flags |= POINTER | PFXOK; + c = CT_INT; + ccfn = strtouq; + base = 16; + break; + + case 'n': + nconversions++; + if (flags & SUPPRESS) /* ??? */ + continue; + if (flags & SHORTSHORT) + *va_arg(ap, char *) = nread; + else if (flags & SHORT) + *va_arg(ap, short *) = nread; + else if (flags & LONG) + *va_arg(ap, long *) = nread; + else if (flags & QUAD) + *va_arg(ap, s64 *) = nread; + else + *va_arg(ap, int *) = nread; + continue; + } + + /* + * We have a conversion that requires input. + */ + if (inr <= 0) + goto input_failure; + + /* + * Consume leading white space, except for formats + * that suppress this. + */ + if ((flags & NOSKIP) == 0) { + while (isspace(*inp)) { + nread++; + if (--inr > 0) + inp++; + else + goto input_failure; + } + /* + * Note that there is at least one character in + * the buffer, so conversions that do not set NOSKIP + * can no longer result in an input failure. + */ + } + + /* + * Do the conversion. + */ + switch (c) { + case CT_CHAR: + /* scan arbitrary characters (sets NOSKIP) */ + if (width == 0) + width = 1; + if (flags & SUPPRESS) { + size_t sum = 0; + + if ((n = inr) < width) { + sum += n; + width -= n; + inp += n; + if (sum == 0) + goto input_failure; + } else { + sum += width; + inr -= width; + inp += width; + } + nread += sum; + } else { + memcpy(va_arg(ap, char *), inp, width); + inr -= width; + inp += width; + nread += width; + nassigned++; + } + nconversions++; + break; + + case CT_CCL: + /* scan a (nonempty) character class (sets NOSKIP) */ + if (width == 0) + width = (size_t)~0; /* `infinity' */ + /* take only those things in the class */ + if (flags & SUPPRESS) { + n = 0; + while (ccltab[(unsigned char)*inp]) { + n++, inr--, inp++; + if (--width == 0) + break; + if (inr <= 0) { + if (n == 0) + goto input_failure; + break; + } + } + if (n == 0) + goto match_failure; + } else { + p = va_arg(ap, char *); + p0 = p; + while (ccltab[(unsigned char)*inp]) { + inr--; + *p++ = *inp++; + if (--width == 0) + break; + if (inr <= 0) { + if (p == p0) + goto input_failure; + break; + } + } + n = p - p0; + if (n == 0) + goto match_failure; + *p = 0; + nassigned++; + } + nread += n; + nconversions++; + break; + + case CT_STRING: + /* like CCL, but zero-length string OK, & no NOSKIP */ + if (width == 0) + width = (size_t)~0; + if (flags & SUPPRESS) { + n = 0; + while (!isspace(*inp)) { + n++, inr--, inp++; + if (--width == 0) + break; + if (inr <= 0) + break; + } + nread += n; + } else { + p = va_arg(ap, char *); + p0 = p; + while (!isspace(*inp)) { + inr--; + *p++ = *inp++; + if (--width == 0) + break; + if (inr <= 0) + break; + } + *p = 0; + nread += p - p0; + nassigned++; + } + nconversions++; + continue; + + case CT_INT: + /* scan an integer as if by strtoq/strtouq */ +#ifdef hardway + if (width == 0 || width > sizeof(buf) - 1) + width = sizeof(buf) - 1; +#else + /* size_t is unsigned, hence this optimisation */ + if (--width > sizeof(buf) - 2) + width = sizeof(buf) - 2; + width++; +#endif + flags |= SIGNOK | NDIGITS | NZDIGITS; + for (p = buf; width; width--) { + c = *inp; + /* + * Switch on the character; `goto ok' + * if we accept it as a part of number. + */ + switch (c) { + /* + * The digit 0 is always legal, but is + * special. For %i conversions, if no + * digits (zero or nonzero) have been + * scanned (only signs), we will have + * base==0. In that case, we should set + * it to 8 and enable 0x prefixing. + * Also, if we have not scanned zero digits + * before this, do not turn off prefixing + * (someone else will turn it off if we + * have scanned any nonzero digits). + */ + case '0': + if (base == 0) { + base = 8; + flags |= PFXOK; + } + if (flags & NZDIGITS) + flags &= ~(SIGNOK | NZDIGITS | NDIGITS); + else + flags &= ~(SIGNOK | PFXOK | NDIGITS); + goto ok; + + /* 1 through 7 always legal */ + case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + base = basefix[base]; + flags &= ~(SIGNOK | PFXOK | NDIGITS); + goto ok; + + /* digits 8 and 9 ok iff decimal or hex */ + case '8': case '9': + base = basefix[base]; + if (base <= 8) + break; /* not legal here */ + flags &= ~(SIGNOK | PFXOK | NDIGITS); + goto ok; + + /* letters ok iff hex */ + case 'A': case 'B': case 'C': + case 'D': case 'E': case 'F': + case 'a': case 'b': case 'c': + case 'd': case 'e': case 'f': + /* no need to fix base here */ + if (base <= 10) + break; /* not legal here */ + flags &= ~(SIGNOK | PFXOK | NDIGITS); + goto ok; + + /* sign ok only as first character */ + case '+': case '-': + if (flags & SIGNOK) { + flags &= ~SIGNOK; + goto ok; + } + break; + + /* x ok iff flag still set & 2nd char */ + case 'x': case 'X': + if (flags & PFXOK && p == buf + 1) { + base = 16; /* if %i */ + flags &= ~PFXOK; + goto ok; + } + break; + } + + /* + * If we got here, c is not a legal character + * for a number. Stop accumulating digits. + */ + break; +ok: + /* + * c is legal: store it and look at the next. + */ + *p++ = c; + if (--inr > 0) + inp++; + else + break; /* end of input */ + } + /* + * If we had only a sign, it is no good; push + * back the sign. If the number ends in `x', + * it was [sign] '' 'x', so push back the x + * and treat it as [sign] ''. + */ + if (flags & NDIGITS) { + if (p > buf) { + inp--; + inr++; + } + goto match_failure; + } + c = ((u_char *)p)[-1]; + if (c == 'x' || c == 'X') { + --p; + inp--; + inr++; + } + if ((flags & SUPPRESS) == 0) { + u64 res; + + *p = 0; + res = (*ccfn)(buf, (char **)NULL, base); + if (flags & POINTER) + *va_arg(ap, void **) = + (void *)(uintptr_t)res; + else if (flags & SHORTSHORT) + *va_arg(ap, char *) = res; + else if (flags & SHORT) + *va_arg(ap, short *) = res; + else if (flags & LONG) + *va_arg(ap, long *) = res; + else if (flags & QUAD) + *va_arg(ap, s64 *) = res; + else + *va_arg(ap, int *) = res; + nassigned++; + } + nread += p - buf; + nconversions++; + break; + } + } +input_failure: + return (nconversions != 0 ? nassigned : -1); +match_failure: + return (nassigned); +} + +/** + * sscanf - Unformat a buffer into a list of arguments + * @buf: input buffer + * @fmt: formatting of buffer + * @...: resulting arguments + */ +int sscanf(const char *buf, const char *fmt, ...) +{ + va_list args; + int i; + + va_start(args, fmt); + i = vsscanf(buf, fmt, args); + va_end(args); + return i; +} + +#endif diff --git a/test/lib/Makefile b/test/lib/Makefile index 6ccc2c41bc3..b6a0a208c5e 100644 --- a/test/lib/Makefile +++ b/test/lib/Makefile @@ -6,6 +6,7 @@ obj-y += cmd_ut_lib.o obj-$(CONFIG_EFI_SECURE_BOOT) += efi_image_region.o obj-y += hexdump.o obj-y += lmb.o +obj-y += sscanf.o obj-y += string.o obj-$(CONFIG_ERRNO_STR) += test_errno_str.o obj-$(CONFIG_UT_LIB_ASN1) += asn1.o diff --git a/test/lib/sscanf.c b/test/lib/sscanf.c new file mode 100644 index 00000000000..772e4b92042 --- /dev/null +++ b/test/lib/sscanf.c @@ -0,0 +1,174 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) 2002, Uwe Bonnes + * Copyright (c) 2001-2004, Roger Dingledine. + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2016, The Tor Project, Inc. + * Copyright (c) 2020, EPAM Systems Inc. + * + * Unit tests for sscanf() function + */ + +#include <common.h> +#include <command.h> +#include <log.h> +#include <test/lib.h> +#include <test/test.h> +#include <test/ut.h> + +#define EOF -1 + +/** + * lib_sscanf() - unit test for sscanf() + * @uts: unit test state + * + * Test sscanf() with varied parameters in different combinations passed + * as arguments. + * + * Return: 0 - success + * 1 - failure + */ +static int lib_sscanf(struct unit_test_state *uts) +{ + char buffer[100], buffer1[100]; + int result, ret; + static const char pname[] = " Hello World!\n"; + int hour = 21, min = 59, sec = 20; + int number, number_so_far; + unsigned int u1, u2, u3; + char s1[20], s2[10], s3[10], ch; + int r, int1, int2; + long lng1; + + /* check EOF */ + strcpy(buffer, ""); + ret = sscanf(buffer, "%d", &result); + ut_asserteq(ret, EOF); + + /* check %x */ + strcpy(buffer, "0x519"); + ut_asserteq(sscanf(buffer, "%x", &result), 1); + ut_asserteq(result, 0x519); + + strcpy(buffer, "0x51a"); + ut_asserteq(sscanf(buffer, "%x", &result), 1); + ut_asserteq(result, 0x51a); + + strcpy(buffer, "0x51g"); + ut_asserteq(sscanf(buffer, "%x", &result), 1); + ut_asserteq(result, 0x51); + + /* check strings */ + ret = sprintf(buffer, " %s", pname); + ret = sscanf(buffer, "%*c%[^\n]", buffer1); + ut_asserteq(ret, 1); + ut_asserteq(strncmp(pname, buffer1, strlen(buffer1)), 0); + + /* check digits */ + ret = sprintf(buffer, "%d:%d:%d", hour, min, sec); + ret = sscanf(buffer, "%d%n", &number, &number_so_far); + ut_asserteq(ret, 1); + ut_asserteq(number, hour); + ut_asserteq(number_so_far, 2); + + ret = sscanf(buffer + 2, "%*c%n", &number_so_far); + ut_asserteq(ret, 0); + ut_asserteq(number_so_far, 1); + + /* Check %i */ + strcpy(buffer, "123"); + ret = sscanf(buffer, "%i", &result); + ut_asserteq(ret, 1); + ut_asserteq(result, 123); + ret = sscanf(buffer, "%d", &result); + ut_asserteq(ret, 1); + ut_asserteq(result, 123); + + ut_asserteq(0, sscanf("hello world", "hello world")); + ut_asserteq(0, sscanf("hello world", "good bye")); + /* Excess data */ + ut_asserteq(0, sscanf("hello 3", "%u", &u1)); /* have to match the start */ + ut_asserteq(1, sscanf("3 hello", "%u", &u1)); /* but trailing is alright */ + + /* Numbers (ie. %u) */ + ut_asserteq(0, sscanf("hello world 3", "hello worlb %u", &u1)); /* d vs b */ + ut_asserteq(1, sscanf("12345", "%u", &u1)); + ut_asserteq(12345u, u1); + ut_asserteq(1, sscanf("0", "%u", &u1)); + ut_asserteq(0u, u1); + ut_asserteq(1, sscanf("0000", "%u", &u2)); + ut_asserteq(0u, u2); + ut_asserteq(0, sscanf("A", "%u", &u1)); /* bogus number */ + + /* Numbers with size (eg. %2u) */ + ut_asserteq(2, sscanf("123456", "%2u%u", &u1, &u2)); + ut_asserteq(12u, u1); + ut_asserteq(3456u, u2); + ut_asserteq(1, sscanf("123456", "%8u", &u1)); + ut_asserteq(123456u, u1); + ut_asserteq(1, sscanf("123457 ", "%8u", &u1)); + ut_asserteq(123457u, u1); + ut_asserteq(3, sscanf("!12:3:456", "!%2u:%2u:%3u", &u1, &u2, &u3)); + ut_asserteq(12u, u1); + ut_asserteq(3u, u2); + ut_asserteq(456u, u3); + ut_asserteq(3, sscanf("67:8:099", "%2u:%2u:%3u", &u1, &u2, &u3)); /* 0s */ + ut_asserteq(67u, u1); + ut_asserteq(8u, u2); + ut_asserteq(99u, u3); + /* Arbitrary amounts of 0-padding are okay */ + ut_asserteq(3, sscanf("12:03:000000000000000099", "%2u:%2u:%u", &u1, &u2, &u3)); + ut_asserteq(12u, u1); + ut_asserteq(3u, u2); + ut_asserteq(99u, u3); + + /* Hex (ie. %x) */ + ut_asserteq(3, sscanf("1234 02aBcdEf ff", "%x %x %x", &u1, &u2, &u3)); + ut_asserteq(0x1234, u1); + ut_asserteq(0x2ABCDEF, u2); + ut_asserteq(0xFF, u3); + /* Width works on %x */ + ut_asserteq(3, sscanf("f00dcafe444", "%4x%4x%u", &u1, &u2, &u3)); + ut_asserteq(0xf00d, u1); + ut_asserteq(0xcafe, u2); + ut_asserteq(444, u3); + + /* Literal '%' (ie. '%%') */ + ut_asserteq(1, sscanf("99% fresh", "%3u%% fresh", &u1)); + ut_asserteq(99, u1); + ut_asserteq(0, sscanf("99 fresh", "%% %3u %s", &u1, s1)); + ut_asserteq(1, sscanf("99 fresh", "%3u%% %s", &u1, s1)); + ut_asserteq(2, sscanf("99 fresh", "%3u %5s %%", &u1, s1)); + ut_asserteq(99, u1); + ut_asserteq_str(s1, "fresh"); + ut_asserteq(1, sscanf("% boo", "%% %3s", s1)); + ut_asserteq_str("boo", s1); + + /* Strings (ie. %s) */ + ut_asserteq(2, sscanf("hello", "%3s%7s", s1, s2)); + ut_asserteq_str(s1, "hel"); + ut_asserteq_str(s2, "lo"); + ut_asserteq(2, sscanf("WD40", "%2s%u", s3, &u1)); /* %s%u */ + ut_asserteq_str(s3, "WD"); + ut_asserteq(40, u1); + ut_asserteq(2, sscanf("WD40", "%3s%u", s3, &u1)); /* %s%u */ + ut_asserteq_str(s3, "WD4"); + ut_asserteq(0, u1); + ut_asserteq(2, sscanf("76trombones", "%6u%9s", &u1, s1)); /* %u%s */ + ut_asserteq(76, u1); + ut_asserteq_str(s1, "trombones"); + + ut_asserteq(3, sscanf("1.2.3", "%u.%u.%u%c", &u1, &u2, &u3, &ch)); + ut_asserteq(4, sscanf("1.2.3 foobar", "%u.%u.%u%c", &u1, &u2, &u3, &ch)); + ut_asserteq(' ', ch); + + r = sscanf("12345 -67890 -1", "%d %ld %d", &int1, &lng1, &int2); + ut_asserteq(r, 3); + ut_asserteq(int1, 12345); + ut_asserteq(lng1, -67890); + ut_asserteq(int2, -1); + + return 0; +} + +LIB_TEST(lib_sscanf, 0); |