aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Kconfig18
-rw-r--r--Licenses/README1
-rw-r--r--Licenses/mit.txt20
-rw-r--r--MAINTAINERS12
-rw-r--r--arch/arm/Kconfig9
-rw-r--r--arch/arm/cpu/armv8/Makefile1
-rw-r--r--arch/arm/cpu/armv8/xen/Makefile6
-rw-r--r--arch/arm/cpu/armv8/xen/hypercall.S79
-rw-r--r--arch/arm/cpu/armv8/xen/lowlevel_init.S33
-rw-r--r--arch/arm/include/asm/arch-aspeed/platform.h20
-rw-r--r--arch/arm/include/asm/io.h4
-rw-r--r--arch/arm/include/asm/xen.h7
-rw-r--r--arch/arm/include/asm/xen/hypercall.h22
-rw-r--r--arch/arm/include/asm/xen/system.h88
-rw-r--r--arch/arm/mach-aspeed/Makefile2
-rw-r--r--arch/arm/mach-aspeed/ast2500/Makefile2
-rw-r--r--arch/arm/mach-aspeed/ast2500/board_common.c (renamed from arch/arm/mach-aspeed/ast2500-board.c)25
-rw-r--r--arch/arm/mach-aspeed/ast2500/lowlevel_init.S41
-rw-r--r--board/xen/xenguest_arm64/Kconfig12
-rw-r--r--board/xen/xenguest_arm64/MAINTAINERS7
-rw-r--r--board/xen/xenguest_arm64/Makefile5
-rw-r--r--board/xen/xenguest_arm64/xenguest_arm64.c202
-rw-r--r--cmd/Kconfig7
-rw-r--r--cmd/Makefile1
-rw-r--r--cmd/demo.c1
-rw-r--r--cmd/pvblock.c30
-rw-r--r--common/board_r.c27
-rw-r--r--configs/evb-ast2500_defconfig3
-rw-r--r--configs/xenguest_arm64_defconfig60
-rw-r--r--disk/part.c4
-rw-r--r--doc/board/index.rst1
-rw-r--r--doc/board/xen/index.rst9
-rw-r--r--doc/board/xen/xenguest_arm64.rst81
-rw-r--r--drivers/Kconfig2
-rw-r--r--drivers/Makefile1
-rw-r--r--drivers/block/blk-uclass.c2
-rw-r--r--drivers/pci/Kconfig7
-rw-r--r--drivers/pci/Makefile1
-rw-r--r--drivers/pci/pci-uclass.c42
-rw-r--r--drivers/pci/pcie_iproc.c1287
-rw-r--r--drivers/serial/Kconfig7
-rw-r--r--drivers/serial/Makefile1
-rw-r--r--drivers/serial/serial_xen.c182
-rw-r--r--drivers/usb/gadget/ether.c1
-rw-r--r--drivers/usb/musb-new/linux-compat.h4
-rw-r--r--drivers/xen/Kconfig10
-rw-r--r--drivers/xen/Makefile10
-rw-r--r--drivers/xen/events.c199
-rw-r--r--drivers/xen/gnttab.c216
-rw-r--r--drivers/xen/hypervisor.c252
-rw-r--r--drivers/xen/pvblock.c867
-rw-r--r--drivers/xen/xenbus.c557
-rw-r--r--include/blk.h1
-rw-r--r--include/configs/aspeed-common.h18
-rw-r--r--include/configs/xenguest_arm64.h52
-rw-r--r--include/dm/uclass-id.h1
-rw-r--r--include/linux/compat.h54
-rw-r--r--include/pci.h3
-rw-r--r--include/pvblock.h17
-rw-r--r--include/vsprintf.h8
-rw-r--r--include/xen.h24
-rw-r--r--include/xen/arm/interface.h88
-rw-r--r--include/xen/events.h42
-rw-r--r--include/xen/gnttab.h24
-rw-r--r--include/xen/hvm.h27
-rw-r--r--include/xen/interface/event_channel.h279
-rw-r--r--include/xen/interface/grant_table.h565
-rw-r--r--include/xen/interface/hvm/hvm_op.h55
-rw-r--r--include/xen/interface/hvm/params.h116
-rw-r--r--include/xen/interface/io/blkif.h701
-rw-r--r--include/xen/interface/io/console.h39
-rw-r--r--include/xen/interface/io/protocols.h28
-rw-r--r--include/xen/interface/io/ring.h462
-rw-r--r--include/xen/interface/io/xenbus.h64
-rw-r--r--include/xen/interface/io/xs_wire.h134
-rw-r--r--include/xen/interface/memory.h332
-rw-r--r--include/xen/interface/sched.h171
-rw-r--r--include/xen/interface/xen.h208
-rw-r--r--include/xen/xenbus.h116
-rw-r--r--lib/Kconfig4
-rw-r--r--lib/Makefile1
-rw-r--r--lib/sscanf.c823
-rw-r--r--test/lib/Makefile1
-rw-r--r--test/lib/sscanf.c174
84 files changed, 9077 insertions, 43 deletions
diff --git a/Kconfig b/Kconfig
index 372425ed529..883e3f71d01 100644
--- a/Kconfig
+++ b/Kconfig
@@ -105,6 +105,24 @@ config CC_COVERAGE
config CC_HAS_ASM_INLINE
def_bool $(success,echo 'void foo(void) { asm inline (""); }' | $(CC) -x c - -c -o /dev/null)
+config XEN
+ bool "Select U-Boot be run as a bootloader for XEN Virtual Machine"
+ help
+ Enabling this option will make U-Boot be run as a bootloader
+ for XEN [1] Virtual Machine.
+
+ Xen is a virtual machine monitor (VMM) or a type-1 hypervisor with support
+ for para-virtualization. Xen can organize the safe execution of several
+ virtual machines on the same physical system with performance close to
+ native. It is used as the basis for a number of different commercial and
+ open source applications, such as: server virtualization, Infrastructure
+ as a Service (IaaS), desktop virtualization, security applications,
+ embedded and hardware appliances.
+ Xen has a special VM called Domain-0 that runs the Dom0 kernel and allows
+ Xen to use the device drivers for the Domain-0 kernel by default.
+
+ [1] - https://xenproject.org/
+
config DISTRO_DEFAULTS
bool "Select defaults suitable for booting general purpose Linux distributions"
select AUTO_COMPLETE
diff --git a/Licenses/README b/Licenses/README
index 486e18d0d80..c23ad216fc8 100644
--- a/Licenses/README
+++ b/Licenses/README
@@ -149,5 +149,6 @@ BSD 3-clause "New" or "Revised" License BSD-3-Clause Y bsd-3-clause.txt http:/
IBM PIBS (PowerPC Initialization and IBM-pibs ibm-pibs.txt
Boot Software) license
ISC License ISC Y isc.txt https://spdx.org/licenses/ISC
+MIT License MIT Y mit.txt https://spdx.org/licenses/MIT.html
SIL OPEN FONT LICENSE (OFL-1.1) OFL-1.1 Y OFL.txt https://spdx.org/licenses/OFL-1.1.html
X11 License X11 x11.txt https://spdx.org/licenses/X11.html
diff --git a/Licenses/mit.txt b/Licenses/mit.txt
new file mode 100644
index 00000000000..25a55e054f7
--- /dev/null
+++ b/Licenses/mit.txt
@@ -0,0 +1,20 @@
+MIT License
+Copyright (c) 2020 EPAM Systems Inc.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/MAINTAINERS b/MAINTAINERS
index e35d5d4fcbf..17ac45587b7 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -148,6 +148,18 @@ F: include/configs/meson64_android.h
F: doc/board/amlogic/
N: meson
+ARM ASPEED
+M: Ryan Chen <ryan_chen@aspeedtech.com>
+M: Chia-Wei Wang <chiawei_wang@aspeedtech.com>
+R: Aspeed BMC SW team <BMC-SW@aspeedtech.com>
+S: Maintained
+F: arch/arm/mach-aspeed/
+F: arch/arm/include/asm/arch-aspeed/
+F: board/aspeed/
+F: drivers/clk/aspeed/
+F: drivers/pinctrl/aspeed/
+N: aspeed
+
ARM BROADCOM BCM283X
M: Matthias Brugger <mbrugger@suse.com>
S: Maintained
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 6b8a32c38d9..84018516668 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1749,6 +1749,14 @@ config TARGET_PRESIDIO_ASIC
bool "Support Cortina Presidio ASIC Platform"
select ARM64
+config TARGET_XENGUEST_ARM64
+ bool "Xen guest ARM64"
+ select ARM64
+ select XEN
+ select OF_CONTROL
+ select LINUX_KERNEL_IMAGE_HEADER
+ select XEN_SERIAL
+ select SSCANF
endchoice
config ARCH_SUPPORT_TFABOOT
@@ -1955,6 +1963,7 @@ source "board/xilinx/Kconfig"
source "board/xilinx/zynq/Kconfig"
source "board/xilinx/zynqmp/Kconfig"
source "board/phytium/durian/Kconfig"
+source "board/xen/xenguest_arm64/Kconfig"
source "arch/arm/Kconfig.debug"
diff --git a/arch/arm/cpu/armv8/Makefile b/arch/arm/cpu/armv8/Makefile
index 7e33a183d54..93d26f98568 100644
--- a/arch/arm/cpu/armv8/Makefile
+++ b/arch/arm/cpu/armv8/Makefile
@@ -40,3 +40,4 @@ obj-$(CONFIG_TARGET_HIKEY) += hisilicon/
obj-$(CONFIG_ARMV8_PSCI) += psci.o
obj-$(CONFIG_ARCH_SUNXI) += lowlevel_init.o
obj-$(CONFIG_TARGET_BCMNS3) += bcmns3/
+obj-$(CONFIG_XEN) += xen/
diff --git a/arch/arm/cpu/armv8/xen/Makefile b/arch/arm/cpu/armv8/xen/Makefile
new file mode 100644
index 00000000000..e3b4ae2bd40
--- /dev/null
+++ b/arch/arm/cpu/armv8/xen/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0+
+#
+# (C) 2018 NXP
+# (C) 2020 EPAM Systems Inc.
+
+obj-y += lowlevel_init.o hypercall.o
diff --git a/arch/arm/cpu/armv8/xen/hypercall.S b/arch/arm/cpu/armv8/xen/hypercall.S
new file mode 100644
index 00000000000..731256b34e2
--- /dev/null
+++ b/arch/arm/cpu/armv8/xen/hypercall.S
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * hypercall.S
+ *
+ * Xen hypercall wrappers
+ *
+ * Stefano Stabellini <stefano.stabellini@eu.citrix.com>, Citrix, 2012
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/*
+ * The Xen hypercall calling convention is very similar to the procedure
+ * call standard for the ARM 64-bit architecture: the first parameter is
+ * passed in x0, the second in x1, the third in x2, the fourth in x3 and
+ * the fifth in x4.
+ *
+ * The hypercall number is passed in x16.
+ *
+ * The return value is in x0.
+ *
+ * The hvc ISS is required to be 0xEA1, that is the Xen specific ARM
+ * hypercall tag.
+ *
+ * Parameter structs passed to hypercalls are laid out according to
+ * the ARM 64-bit EABI standard.
+ */
+
+#include <xen/interface/xen.h>
+
+#define XEN_HYPERCALL_TAG 0xEA1
+
+#define HYPERCALL_SIMPLE(hypercall) \
+.globl HYPERVISOR_##hypercall; \
+.align 4,0x90; \
+HYPERVISOR_##hypercall: \
+ mov x16, #__HYPERVISOR_##hypercall; \
+ hvc XEN_HYPERCALL_TAG; \
+ ret; \
+
+#define HYPERCALL0 HYPERCALL_SIMPLE
+#define HYPERCALL1 HYPERCALL_SIMPLE
+#define HYPERCALL2 HYPERCALL_SIMPLE
+#define HYPERCALL3 HYPERCALL_SIMPLE
+#define HYPERCALL4 HYPERCALL_SIMPLE
+#define HYPERCALL5 HYPERCALL_SIMPLE
+
+ .text
+
+HYPERCALL2(xen_version);
+HYPERCALL3(console_io);
+HYPERCALL3(grant_table_op);
+HYPERCALL2(sched_op);
+HYPERCALL2(event_channel_op);
+HYPERCALL2(hvm_op);
+HYPERCALL2(memory_op);
+
diff --git a/arch/arm/cpu/armv8/xen/lowlevel_init.S b/arch/arm/cpu/armv8/xen/lowlevel_init.S
new file mode 100644
index 00000000000..760e32ed761
--- /dev/null
+++ b/arch/arm/cpu/armv8/xen/lowlevel_init.S
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0+
+ *
+ * (C) 2017 NXP
+ * (C) 2020 EPAM Systems Inc.
+ */
+
+#include <config.h>
+
+.align 8
+.global rom_pointer
+rom_pointer:
+ .space 32
+
+/*
+ * Routine: save_boot_params (called after reset from start.S)
+ */
+
+.global save_boot_params
+save_boot_params:
+ /* The firmware provided ATAG/FDT address can be found in r2/x0 */
+ adr x1, rom_pointer
+ stp x0, x2, [x1], #16
+ stp x3, x4, [x1], #16
+
+ /* Returns */
+ b save_boot_params_ret
+
+.global restore_boot_params
+restore_boot_params:
+ adr x1, rom_pointer
+ ldp x0, x2, [x1], #16
+ ldp x3, x4, [x1], #16
+ ret
diff --git a/arch/arm/include/asm/arch-aspeed/platform.h b/arch/arm/include/asm/arch-aspeed/platform.h
new file mode 100644
index 00000000000..6cee036f54c
--- /dev/null
+++ b/arch/arm/include/asm/arch-aspeed/platform.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (C) ASPEED Technology Inc.
+ * Ryan Chen <ryan_chen@aspeedtech.com>
+ *
+ */
+
+#ifndef _ASM_ARCH_PLATFORM_H
+#define _ASM_ARCH_PLATFORM_H
+
+#if defined(CONFIG_ASPEED_AST2500)
+#define ASPEED_MAC_COUNT 2
+#define ASPEED_DRAM_BASE 0x80000000
+#define ASPEED_SRAM_BASE 0x1e720000
+#define ASPEED_SRAM_SIZE 0x9000
+#else
+#err "Unrecognized Aspeed platform."
+#endif
+
+#endif
diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
index 8959749ad65..ade1401f3b4 100644
--- a/arch/arm/include/asm/io.h
+++ b/arch/arm/include/asm/io.h
@@ -110,9 +110,13 @@ static inline void __raw_readsl(unsigned long addr, void *data, int longlen)
* have some advantages to use them instead of the simple one here.
*/
#define mb() dsb()
+#define rmb() dsb()
+#define wmb() dsb()
#define __iormb() dmb()
#define __iowmb() dmb()
+#define smp_processor_id() 0
+
#define writeb(v,c) ({ u8 __v = v; __iowmb(); __arch_putb(__v,c); __v; })
#define writew(v,c) ({ u16 __v = v; __iowmb(); __arch_putw(__v,c); __v; })
#define writel(v,c) ({ u32 __v = v; __iowmb(); __arch_putl(__v,c); __v; })
diff --git a/arch/arm/include/asm/xen.h b/arch/arm/include/asm/xen.h
new file mode 100644
index 00000000000..8e2ee3d64ea
--- /dev/null
+++ b/arch/arm/include/asm/xen.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0+
+ *
+ * (C) 2020 EPAM Systems Inc.
+ */
+
+extern unsigned long rom_pointer[];
+
diff --git a/arch/arm/include/asm/xen/hypercall.h b/arch/arm/include/asm/xen/hypercall.h
new file mode 100644
index 00000000000..a4fd077079a
--- /dev/null
+++ b/arch/arm/include/asm/xen/hypercall.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * hypercall.h
+ *
+ * Linux-specific hypervisor handling.
+ *
+ * Stefano Stabellini <stefano.stabellini@eu.citrix.com>, Citrix, 2012
+ */
+
+#ifndef _ASM_ARM_XEN_HYPERCALL_H
+#define _ASM_ARM_XEN_HYPERCALL_H
+
+#include <xen/interface/xen.h>
+
+int HYPERVISOR_xen_version(int cmd, void *arg);
+int HYPERVISOR_console_io(int cmd, int count, char *str);
+int HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count);
+int HYPERVISOR_sched_op(int cmd, void *arg);
+int HYPERVISOR_event_channel_op(int cmd, void *arg);
+unsigned long HYPERVISOR_hvm_op(int op, void *arg);
+int HYPERVISOR_memory_op(unsigned int cmd, void *arg);
+#endif /* _ASM_ARM_XEN_HYPERCALL_H */
diff --git a/arch/arm/include/asm/xen/system.h b/arch/arm/include/asm/xen/system.h
new file mode 100644
index 00000000000..0fc8a7995ca
--- /dev/null
+++ b/arch/arm/include/asm/xen/system.h
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * (C) 2014 Karim Allah Ahmed <karim.allah.ahmed@gmail.com>
+ * (C) 2020, EPAM Systems Inc.
+ */
+#ifndef _ASM_ARM_XEN_SYSTEM_H
+#define _ASM_ARM_XEN_SYSTEM_H
+
+#include <compiler.h>
+#include <asm/bitops.h>
+
+/* If *ptr == old, then store new there (and return new).
+ * Otherwise, return the old value.
+ * Atomic.
+ */
+#define synch_cmpxchg(ptr, old, new) \
+({ __typeof__(*ptr) stored = old; \
+ __atomic_compare_exchange_n(ptr, &stored, new, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) ? new : old; \
+})
+
+/* As test_and_clear_bit, but using __ATOMIC_SEQ_CST */
+static inline int synch_test_and_clear_bit(int nr, volatile void *addr)
+{
+ u8 *byte = ((u8 *)addr) + (nr >> 3);
+ u8 bit = 1 << (nr & 7);
+ u8 orig;
+
+ orig = __atomic_fetch_and(byte, ~bit, __ATOMIC_SEQ_CST);
+
+ return (orig & bit) != 0;
+}
+
+/* As test_and_set_bit, but using __ATOMIC_SEQ_CST */
+static inline int synch_test_and_set_bit(int nr, volatile void *base)
+{
+ u8 *byte = ((u8 *)base) + (nr >> 3);
+ u8 bit = 1 << (nr & 7);
+ u8 orig;
+
+ orig = __atomic_fetch_or(byte, bit, __ATOMIC_SEQ_CST);
+
+ return (orig & bit) != 0;
+}
+
+/* As set_bit, but using __ATOMIC_SEQ_CST */
+static inline void synch_set_bit(int nr, volatile void *addr)
+{
+ synch_test_and_set_bit(nr, addr);
+}
+
+/* As clear_bit, but using __ATOMIC_SEQ_CST */
+static inline void synch_clear_bit(int nr, volatile void *addr)
+{
+ synch_test_and_clear_bit(nr, addr);
+}
+
+/* As test_bit, but with a following memory barrier. */
+//static inline int synch_test_bit(int nr, volatile void *addr)
+static inline int synch_test_bit(int nr, const void *addr)
+{
+ int result;
+
+ result = test_bit(nr, addr);
+ barrier();
+ return result;
+}
+
+#define xchg(ptr, v) __atomic_exchange_n(ptr, v, __ATOMIC_SEQ_CST)
+#define xchg(ptr, v) __atomic_exchange_n(ptr, v, __ATOMIC_SEQ_CST)
+
+#define xen_mb() mb()
+#define xen_rmb() rmb()
+#define xen_wmb() wmb()
+
+#define to_phys(x) ((unsigned long)(x))
+#define to_virt(x) ((void *)(x))
+
+#define PFN_UP(x) (unsigned long)(((x) + PAGE_SIZE - 1) >> PAGE_SHIFT)
+#define PFN_DOWN(x) (unsigned long)((x) >> PAGE_SHIFT)
+#define PFN_PHYS(x) ((unsigned long)(x) << PAGE_SHIFT)
+#define PHYS_PFN(x) (unsigned long)((x) >> PAGE_SHIFT)
+
+#define virt_to_pfn(_virt) (PFN_DOWN(to_phys(_virt)))
+#define virt_to_mfn(_virt) (PFN_DOWN(to_phys(_virt)))
+#define mfn_to_virt(_mfn) (to_virt(PFN_PHYS(_mfn)))
+#define pfn_to_virt(_pfn) (to_virt(PFN_PHYS(_pfn)))
+
+#endif
diff --git a/arch/arm/mach-aspeed/Makefile b/arch/arm/mach-aspeed/Makefile
index 1557dcae66d..33f65b50b28 100644
--- a/arch/arm/mach-aspeed/Makefile
+++ b/arch/arm/mach-aspeed/Makefile
@@ -3,4 +3,4 @@
# Copyright (c) 2016 Google, Inc
obj-$(CONFIG_ARCH_ASPEED) += ast_wdt.o
-obj-$(CONFIG_ASPEED_AST2500) += ast2500/ ast2500-board.o
+obj-$(CONFIG_ASPEED_AST2500) += ast2500/
diff --git a/arch/arm/mach-aspeed/ast2500/Makefile b/arch/arm/mach-aspeed/ast2500/Makefile
index a35b239ef35..4c27c8fc465 100644
--- a/arch/arm/mach-aspeed/ast2500/Makefile
+++ b/arch/arm/mach-aspeed/ast2500/Makefile
@@ -1 +1,3 @@
+obj-y += lowlevel_init.o
+obj-y += board_common.o
obj-y += clk_ast2500.o sdram_ast2500.o
diff --git a/arch/arm/mach-aspeed/ast2500-board.c b/arch/arm/mach-aspeed/ast2500/board_common.c
index f74dcbbb624..3482ee91efd 100644
--- a/arch/arm/mach-aspeed/ast2500-board.c
+++ b/arch/arm/mach-aspeed/ast2500/board_common.c
@@ -28,31 +28,6 @@
DECLARE_GLOBAL_DATA_PTR;
-void lowlevel_init(void)
-{
- /*
- * These two watchdogs need to be stopped as soon as possible,
- * otherwise the board might hang. By default they are set to
- * a very short timeout and even simple debug write to serial
- * console early in the init process might cause them to fire.
- */
- struct ast_wdt *flash_addr_wdt =
- (struct ast_wdt *)(WDT_BASE +
- sizeof(struct ast_wdt) *
- AST_FLASH_ADDR_DETECT_WDT);
-
- clrbits_le32(&flash_addr_wdt->ctrl, WDT_CTRL_EN);
-
-#ifndef CONFIG_FIRMWARE_2ND_BOOT
- struct ast_wdt *sec_boot_wdt =
- (struct ast_wdt *)(WDT_BASE +
- sizeof(struct ast_wdt) *
- AST_2ND_BOOT_WDT);
-
- clrbits_le32(&sec_boot_wdt->ctrl, WDT_CTRL_EN);
-#endif
-}
-
int board_init(void)
{
gd->bd->bi_boot_params = CONFIG_SYS_SDRAM_BASE + 0x100;
diff --git a/arch/arm/mach-aspeed/ast2500/lowlevel_init.S b/arch/arm/mach-aspeed/ast2500/lowlevel_init.S
new file mode 100644
index 00000000000..9ec3dd46b70
--- /dev/null
+++ b/arch/arm/mach-aspeed/ast2500/lowlevel_init.S
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (C) ASPEED Technology Inc.
+ */
+#include <asm/arch/scu_ast2500.h>
+
+/* registers for low level init */
+#define SCU_PROT_KEY 0x1e6e2000
+#define SCU_VGA_HANDSHAKE 0x1e6e2040
+#define SCU_HW_STRAP 0x1e6e2070
+#define SCU_HW_STRAP_CLR 0x1e6e207c
+#define WDT3_CTRL 0x1e78504c
+
+.global lowlevel_init
+lowlevel_init:
+
+ /* unlock SCU */
+ ldr r0, =SCU_PROT_KEY
+ ldr r1, =SCU_UNLOCK_VALUE
+ str r1, [r0]
+
+ /* set BMC FW as DRAM initializer */
+ ldr r0, =SCU_VGA_HANDSHAKE
+ ldr r1, [r0]
+ orr r1, #0x80
+ str r1, [r0]
+
+ /* set PERST# as LPC reset source if eSPI mode is enabled*/
+ ldr r0, =SCU_HW_STRAP
+ ldr r1, [r0]
+ tst r1, #(0x1 << 25)
+ ldrne r0, =SCU_HW_STRAP_CLR
+ movne r1, #(0x1 << 14)
+ strne r1, [r0]
+
+ /* disable WDT3 for SPI 3/4 bytes auto-detection */
+ ldr r0, =WDT3_CTRL
+ mov r1, #0x0
+ str r1, [r0]
+
+ mov pc, lr
diff --git a/board/xen/xenguest_arm64/Kconfig b/board/xen/xenguest_arm64/Kconfig
new file mode 100644
index 00000000000..cc131ed5b96
--- /dev/null
+++ b/board/xen/xenguest_arm64/Kconfig
@@ -0,0 +1,12 @@
+if TARGET_XENGUEST_ARM64
+
+config SYS_BOARD
+ default "xenguest_arm64"
+
+config SYS_VENDOR
+ default "xen"
+
+config SYS_CONFIG_NAME
+ default "xenguest_arm64"
+
+endif
diff --git a/board/xen/xenguest_arm64/MAINTAINERS b/board/xen/xenguest_arm64/MAINTAINERS
new file mode 100644
index 00000000000..787e9e0d0e1
--- /dev/null
+++ b/board/xen/xenguest_arm64/MAINTAINERS
@@ -0,0 +1,7 @@
+XEN GUEST FOR ARM64
+M: Andrii Anisov <andrii_anisov@epam.com>
+S: Maintained
+F: board/xen/xenguest_arm64/
+F: doc/board/xen/
+F: include/configs/xenguest_arm64.h
+F: configs/xenguest_arm64_defconfig
diff --git a/board/xen/xenguest_arm64/Makefile b/board/xen/xenguest_arm64/Makefile
new file mode 100644
index 00000000000..1cf87a728f1
--- /dev/null
+++ b/board/xen/xenguest_arm64/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0+
+#
+# (C) Copyright 2020 EPAM Systems Inc.
+
+obj-y := xenguest_arm64.o
diff --git a/board/xen/xenguest_arm64/xenguest_arm64.c b/board/xen/xenguest_arm64/xenguest_arm64.c
new file mode 100644
index 00000000000..cce54369bb3
--- /dev/null
+++ b/board/xen/xenguest_arm64/xenguest_arm64.c
@@ -0,0 +1,202 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * (C) 2013
+ * David Feng <fenghua@phytium.com.cn>
+ * Sharma Bhupesh <bhupesh.sharma@freescale.com>
+ *
+ * (C) 2020 EPAM Systems Inc
+ */
+
+#include <common.h>
+#include <cpu_func.h>
+#include <dm.h>
+#include <errno.h>
+#include <malloc.h>
+#include <xen.h>
+
+#include <asm/io.h>
+#include <asm/armv8/mmu.h>
+#include <asm/xen.h>
+#include <asm/xen/hypercall.h>
+#include <asm/xen/system.h>
+
+#include <linux/compiler.h>
+
+#include <xen/gnttab.h>
+#include <xen/hvm.h>
+
+DECLARE_GLOBAL_DATA_PTR;
+
+int board_init(void)
+{
+ return 0;
+}
+
+/*
+ * Use fdt provided by Xen: according to
+ * https://www.kernel.org/doc/Documentation/arm64/booting.txt
+ * x0 is the physical address of the device tree blob (dtb) in system RAM.
+ * This is stored in rom_pointer during low level init.
+ */
+void *board_fdt_blob_setup(void)
+{
+ if (fdt_magic(rom_pointer[0]) != FDT_MAGIC)
+ return NULL;
+ return (void *)rom_pointer[0];
+}
+
+#define MAX_MEM_MAP_REGIONS 5
+static struct mm_region xen_mem_map[MAX_MEM_MAP_REGIONS];
+struct mm_region *mem_map = xen_mem_map;
+
+static int get_next_memory_node(const void *blob, int mem)
+{
+ do {
+ mem = fdt_node_offset_by_prop_value(blob, mem,
+ "device_type", "memory", 7);
+ } while (!fdtdec_get_is_enabled(blob, mem));
+
+ return mem;
+}
+
+static int setup_mem_map(void)
+{
+ int i = 0, ret, mem, reg = 0;
+ struct fdt_resource res;
+ const void *blob = gd->fdt_blob;
+ u64 gfn;
+ phys_addr_t gnttab_base;
+ phys_size_t gnttab_sz;
+
+ /*
+ * Add "magic" region which is used by Xen to provide some essentials
+ * for the guest: we need console and xenstore.
+ */
+ ret = hvm_get_parameter_maintain_dcache(HVM_PARAM_CONSOLE_PFN, &gfn);
+ if (ret < 0) {
+ printf("%s: Can't get HVM_PARAM_CONSOLE_PFN, ret %d\n",
+ __func__, ret);
+ return -EINVAL;
+ }
+
+ xen_mem_map[i].virt = PFN_PHYS(gfn);
+ xen_mem_map[i].phys = PFN_PHYS(gfn);
+ xen_mem_map[i].size = PAGE_SIZE;
+ xen_mem_map[i].attrs = (PTE_BLOCK_MEMTYPE(MT_NORMAL) |
+ PTE_BLOCK_INNER_SHARE);
+ i++;
+
+ ret = hvm_get_parameter_maintain_dcache(HVM_PARAM_STORE_PFN, &gfn);
+ if (ret < 0) {
+ printf("%s: Can't get HVM_PARAM_STORE_PFN, ret %d\n",
+ __func__, ret);
+ return -EINVAL;
+ }
+
+ xen_mem_map[i].virt = PFN_PHYS(gfn);
+ xen_mem_map[i].phys = PFN_PHYS(gfn);
+ xen_mem_map[i].size = PAGE_SIZE;
+ xen_mem_map[i].attrs = (PTE_BLOCK_MEMTYPE(MT_NORMAL) |
+ PTE_BLOCK_INNER_SHARE);
+ i++;
+
+ /* Get Xen's suggested physical page assignments for the grant table. */
+ get_gnttab_base(&gnttab_base, &gnttab_sz);
+
+ xen_mem_map[i].virt = gnttab_base;
+ xen_mem_map[i].phys = gnttab_base;
+ xen_mem_map[i].size = gnttab_sz;
+ xen_mem_map[i].attrs = (PTE_BLOCK_MEMTYPE(MT_NORMAL) |
+ PTE_BLOCK_INNER_SHARE);
+ i++;
+
+ mem = get_next_memory_node(blob, -1);
+ if (mem < 0) {
+ printf("%s: Missing /memory node\n", __func__);
+ return -EINVAL;
+ }
+
+ for (; i < MAX_MEM_MAP_REGIONS; i++) {
+ ret = fdt_get_resource(blob, mem, "reg", reg++, &res);
+ if (ret == -FDT_ERR_NOTFOUND) {
+ reg = 0;
+ mem = get_next_memory_node(blob, mem);
+ if (mem == -FDT_ERR_NOTFOUND)
+ break;
+
+ ret = fdt_get_resource(blob, mem, "reg", reg++, &res);
+ if (ret == -FDT_ERR_NOTFOUND)
+ break;
+ }
+ if (ret != 0) {
+ printf("No reg property for memory node\n");
+ return -EINVAL;
+ }
+
+ xen_mem_map[i].virt = (phys_addr_t)res.start;
+ xen_mem_map[i].phys = (phys_addr_t)res.start;
+ xen_mem_map[i].size = (phys_size_t)(res.end - res.start + 1);
+ xen_mem_map[i].attrs = (PTE_BLOCK_MEMTYPE(MT_NORMAL) |
+ PTE_BLOCK_INNER_SHARE);
+ }
+ return 0;
+}
+
+void enable_caches(void)
+{
+ /* Re-setup the memory map as BSS gets cleared after relocation. */
+ setup_mem_map();
+ icache_enable();
+ dcache_enable();
+}
+
+/* Read memory settings from the Xen provided device tree. */
+int dram_init(void)
+{
+ int ret;
+
+ ret = fdtdec_setup_mem_size_base();
+ if (ret < 0)
+ return ret;
+ /* Setup memory map, so MMU page table size can be estimated. */
+ return setup_mem_map();
+}
+
+int dram_init_banksize(void)
+{
+ return fdtdec_setup_memory_banksize();
+}
+
+/*
+ * Board specific reset that is system reset.
+ */
+void reset_cpu(ulong addr)
+{
+}
+
+int ft_system_setup(void *blob, struct bd_info *bd)
+{
+ return 0;
+}
+
+int ft_board_setup(void *blob, struct bd_info *bd)
+{
+ return 0;
+}
+
+int board_early_init_f(void)
+{
+ return 0;
+}
+
+int print_cpuinfo(void)
+{
+ printf("Xen virtual CPU\n");
+ return 0;
+}
+
+void board_cleanup_before_linux(void)
+{
+ xen_fini();
+}
+
diff --git a/cmd/Kconfig b/cmd/Kconfig
index 23d7e27dc8d..9ad511aa176 100644
--- a/cmd/Kconfig
+++ b/cmd/Kconfig
@@ -1370,6 +1370,13 @@ config CMD_USB_MASS_STORAGE
help
USB mass storage support
+config CMD_PVBLOCK
+ bool "Xen para-virtualized block device"
+ depends on XEN
+ select PVBLOCK
+ help
+ Xen para-virtualized block device support
+
config CMD_VIRTIO
bool "virtio"
depends on VIRTIO
diff --git a/cmd/Makefile b/cmd/Makefile
index ef2a22f9b12..3a9c9747c94 100644
--- a/cmd/Makefile
+++ b/cmd/Makefile
@@ -174,6 +174,7 @@ obj-$(CONFIG_CMD_DFU) += dfu.o
obj-$(CONFIG_CMD_GPT) += gpt.o
obj-$(CONFIG_CMD_ETHSW) += ethsw.o
obj-$(CONFIG_CMD_AXI) += axi.o
+obj-$(CONFIG_CMD_PVBLOCK) += pvblock.o
# Power
obj-$(CONFIG_CMD_PMIC) += pmic.o
diff --git a/cmd/demo.c b/cmd/demo.c
index f923533f794..7310aa2907a 100644
--- a/cmd/demo.c
+++ b/cmd/demo.c
@@ -130,5 +130,4 @@ U_BOOT_CMD(
"demo hello <num> [<char>] Say hello\n"
"demo light [<num>] Set or get the lights\n"
"demo status <num> Get demo device status\n"
- "demo list List available demo devices"
);
diff --git a/cmd/pvblock.c b/cmd/pvblock.c
new file mode 100644
index 00000000000..4e99b06122b
--- /dev/null
+++ b/cmd/pvblock.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * (C) Copyright 2020 EPAM Systems Inc.
+ *
+ * XEN para-virtualized block device support
+ */
+
+#include <blk.h>
+#include <common.h>
+#include <command.h>
+
+/* Current I/O Device */
+static int pvblock_curr_device;
+
+int do_pvblock(struct cmd_tbl *cmdtp, int flag, int argc, char *const argv[])
+{
+ return blk_common_cmd(argc, argv, IF_TYPE_PVBLOCK,
+ &pvblock_curr_device);
+}
+
+U_BOOT_CMD(pvblock, 5, 1, do_pvblock,
+ "Xen para-virtualized block device",
+ "info - show available block devices\n"
+ "pvblock device [dev] - show or set current device\n"
+ "pvblock part [dev] - print partition table of one or all devices\n"
+ "pvblock read addr blk# cnt\n"
+ "pvblock write addr blk# cnt - read/write `cnt'"
+ " blocks starting at block `blk#'\n"
+ " to/from memory address `addr'");
+
diff --git a/common/board_r.c b/common/board_r.c
index d48d2bb8a04..9b2fec701a5 100644
--- a/common/board_r.c
+++ b/common/board_r.c
@@ -49,6 +49,7 @@
#include <nand.h>
#include <of_live.h>
#include <onenand_uboot.h>
+#include <pvblock.h>
#include <scsi.h>
#include <serial.h>
#include <status_led.h>
@@ -56,6 +57,9 @@
#include <timer.h>
#include <trace.h>
#include <watchdog.h>
+#ifdef CONFIG_XEN
+#include <xen.h>
+#endif
#ifdef CONFIG_ADDR_MAP
#include <asm/mmu.h>
#endif
@@ -465,6 +469,23 @@ static int initr_mmc(void)
}
#endif
+#ifdef CONFIG_XEN
+static int initr_xen(void)
+{
+ xen_init();
+ return 0;
+}
+#endif
+
+#ifdef CONFIG_PVBLOCK
+static int initr_pvblock(void)
+{
+ puts("PVBLOCK: ");
+ pvblock_init();
+ return 0;
+}
+#endif
+
/*
* Tell if it's OK to load the environment early in boot.
*
@@ -762,6 +783,12 @@ static init_fnc_t init_sequence_r[] = {
#ifdef CONFIG_MMC
initr_mmc,
#endif
+#ifdef CONFIG_XEN
+ initr_xen,
+#endif
+#ifdef CONFIG_PVBLOCK
+ initr_pvblock,
+#endif
initr_env,
#ifdef CONFIG_SYS_BOOTPARAMS_LEN
initr_malloc_bootparams,
diff --git a/configs/evb-ast2500_defconfig b/configs/evb-ast2500_defconfig
index 825fa4dae1d..105975c9e6c 100644
--- a/configs/evb-ast2500_defconfig
+++ b/configs/evb-ast2500_defconfig
@@ -11,6 +11,8 @@ CONFIG_PRE_CON_BUF_ADDR=0x1e720000
CONFIG_DEFAULT_DEVICE_TREE="ast2500-evb"
CONFIG_USE_BOOTARGS=y
CONFIG_BOOTARGS="console=ttyS4,115200n8 root=/dev/ram rw"
+CONFIG_USE_BOOTCOMMAND=y
+CONFIG_BOOTCOMMAND="bootm 20080000 20300000"
CONFIG_PRE_CONSOLE_BUFFER=y
# CONFIG_DISPLAY_CPUINFO is not set
CONFIG_HUSH_PARSER=y
@@ -20,7 +22,6 @@ CONFIG_CMD_MMC=y
CONFIG_CMD_DHCP=y
CONFIG_CMD_MII=y
CONFIG_CMD_PING=y
-CONFIG_OF_EMBED=y
CONFIG_ENV_OVERWRITE=y
CONFIG_SYS_RELOC_GD_ENV_ADDR=y
CONFIG_NET_RANDOM_ETHADDR=y
diff --git a/configs/xenguest_arm64_defconfig b/configs/xenguest_arm64_defconfig
new file mode 100644
index 00000000000..46473c251da
--- /dev/null
+++ b/configs/xenguest_arm64_defconfig
@@ -0,0 +1,60 @@
+CONFIG_ARM=y
+CONFIG_POSITION_INDEPENDENT=y
+CONFIG_SYS_TEXT_BASE=0x40080000
+CONFIG_SYS_MALLOC_F_LEN=0x2000
+CONFIG_IDENT_STRING=" xenguest"
+CONFIG_TARGET_XENGUEST_ARM64=y
+CONFIG_BOOTDELAY=10
+
+CONFIG_SYS_PROMPT="xenguest# "
+
+CONFIG_CMD_NET=n
+CONFIG_CMD_BDI=n
+CONFIG_CMD_BOOTD=n
+CONFIG_CMD_BOOTEFI=n
+CONFIG_CMD_BOOTEFI_HELLO_COMPILE=n
+CONFIG_CMD_ELF=n
+CONFIG_CMD_EXT4=y
+CONFIG_CMD_FAT=y
+CONFIG_CMD_GO=n
+CONFIG_CMD_RUN=n
+CONFIG_CMD_IMI=n
+CONFIG_CMD_IMLS=n
+CONFIG_CMD_XIMG=n
+CONFIG_CMD_EXPORTENV=n
+CONFIG_CMD_IMPORTENV=n
+CONFIG_CMD_EDITENV=n
+CONFIG_CMD_ENV_EXISTS=n
+CONFIG_CMD_MEMORY=y
+CONFIG_CMD_CRC32=n
+CONFIG_CMD_DM=n
+CONFIG_CMD_LOADB=n
+CONFIG_CMD_LOADS=n
+CONFIG_CMD_FLASH=n
+CONFIG_CMD_GPT=n
+CONFIG_CMD_FPGA=n
+CONFIG_CMD_ECHO=n
+CONFIG_CMD_ITEST=n
+CONFIG_CMD_SOURCE=n
+CONFIG_CMD_SETEXPR=n
+CONFIG_CMD_MISC=n
+CONFIG_CMD_UNZIP=n
+CONFIG_CMD_LZMADEC=n
+CONFIG_CMD_SAVEENV=n
+CONFIG_CMD_UMS=n
+
+CONFIG_CMD_PVBLOCK=y
+
+#CONFIG_USB=n
+# CONFIG_ISO_PARTITION is not set
+
+#CONFIG_EFI_PARTITION=y
+# CONFIG_EFI_LOADER is not set
+
+CONFIG_DM=y
+# CONFIG_MMC is not set
+CONFIG_DM_SERIAL=y
+# CONFIG_REQUIRE_SERIAL_CONSOLE is not set
+
+CONFIG_OF_BOARD=y
+CONFIG_OF_LIBFDT=y
diff --git a/disk/part.c b/disk/part.c
index f6a31025dc8..b69fd345f36 100644
--- a/disk/part.c
+++ b/disk/part.c
@@ -149,6 +149,7 @@ void dev_print (struct blk_desc *dev_desc)
case IF_TYPE_MMC:
case IF_TYPE_USB:
case IF_TYPE_NVME:
+ case IF_TYPE_PVBLOCK:
printf ("Vendor: %s Rev: %s Prod: %s\n",
dev_desc->vendor,
dev_desc->revision,
@@ -288,6 +289,9 @@ static void print_part_header(const char *type, struct blk_desc *dev_desc)
case IF_TYPE_NVME:
puts ("NVMe");
break;
+ case IF_TYPE_PVBLOCK:
+ puts("PV BLOCK");
+ break;
case IF_TYPE_VIRTIO:
puts("VirtIO");
break;
diff --git a/doc/board/index.rst b/doc/board/index.rst
index 0a15899180f..63935abcd79 100644
--- a/doc/board/index.rst
+++ b/doc/board/index.rst
@@ -22,4 +22,5 @@ Board-specific doc
st/index
tbs/index
toradex/index
+ xen/index
xilinx/index
diff --git a/doc/board/xen/index.rst b/doc/board/xen/index.rst
new file mode 100644
index 00000000000..e58fe9e3512
--- /dev/null
+++ b/doc/board/xen/index.rst
@@ -0,0 +1,9 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+XenGuestARM64
+=============
+
+.. toctree::
+ :maxdepth: 2
+
+ xenguest_arm64
diff --git a/doc/board/xen/xenguest_arm64.rst b/doc/board/xen/xenguest_arm64.rst
new file mode 100644
index 00000000000..1327f88f990
--- /dev/null
+++ b/doc/board/xen/xenguest_arm64.rst
@@ -0,0 +1,81 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+Xen guest ARM64 board
+=====================
+
+This board specification
+------------------------
+
+This board is to be run as a virtual Xen [1] guest with U-boot as its primary
+bootloader. Xen is a type 1 hypervisor that allows multiple operating systems
+to run simultaneously on a single physical server. Xen is capable of running
+virtual machines in both full virtualization and para-virtualization (PV)
+modes. Xen runs virtual machines, which are called “domains”.
+
+Paravirtualized drivers are a special type of device drivers that are used in
+a guest system in the Xen domain and perform I/O operations using a special
+interface provided by the virtualization system and the host system.
+
+Xen support for U-boot is implemented by introducing a new Xen guest ARM64
+board and porting essential drivers from MiniOS [3] as well as some of the work
+previously done by NXP [4]:
+
+- PV block device frontend driver with XenStore based device enumeration and
+ UCLASS_PVBLOCK class;
+- PV serial console device frontend driver;
+- Xen hypervisor support with minimal set of the essential headers adapted from
+ the Linux kernel;
+- Xen grant table support;
+- Xen event channel support in polling mode;
+- XenBus support;
+- dynamic RAM size as defined in the device tree instead of the statically
+ defined values;
+- position-independent pre-relocation code is used as we cannot statically
+ define any start addresses at compile time which is up to Xen to choose at
+ run-time;
+- new defconfig introduced: xenguest_arm64_defconfig.
+
+
+Board limitations
+-----------------
+
+1. U-boot runs without MMU enabled at the early stages.
+ According to Xen on ARM ABI (xen/include/public/arch-arm.h): all memory
+ which is shared with other entities in the system (including the hypervisor
+ and other guests) must reside in memory which is mapped as Normal Inner
+ Write-Back Outer Write-Back Inner-Shareable.
+ Thus, page attributes must be equally set for all the entities working with
+ that page.
+ Before MMU is set up the data cache is turned off and pages are seen by the
+ vCPU and Xen in different ways - cacheable by Xen and non-cacheable by vCPU.
+ So it means that manual data cache maintenance is required at the early
+ stages.
+
+2. No serial console until MMU is up.
+ Because data cache maintenance is required until the MMU setup the
+ early/debug serial console is not implemented. Therefore, we do not have
+ usual prints like U-boot’s banner etc. until the serial driver is
+ initialized.
+
+3. Single RAM bank supported.
+ If a Xen guest is given much memory it is possible that Xen allocates two
+ memory banks for it. The first one is allocated under 4GB address space and
+ in some cases may represent the whole guest’s memory. It is assumed that
+ U-boot most likely won’t require high memory bank for its work andlaunching
+ OS, so it is enough to take the first one.
+
+
+Board default configuration
+---------------------------
+
+One can select the configuration as follows:
+
+ - make xenguest_arm64_defconfig
+
+[1] - https://xenproject.org/
+
+[2] - https://wiki.xenproject.org/wiki/Paravirtualization_(PV)
+
+[3] - https://wiki.xenproject.org/wiki/Mini-OS
+
+[4] - https://source.codeaurora.org/external/imx/uboot-imx/tree/?h=imx_v2018.03_4.14.98_2.0.0_ga
diff --git a/drivers/Kconfig b/drivers/Kconfig
index 119e412849f..613669cb381 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -136,6 +136,8 @@ source "drivers/w1-eeprom/Kconfig"
source "drivers/watchdog/Kconfig"
+source "drivers/xen/Kconfig"
+
config PHYS_TO_BUS
bool "Custom physical to bus address mapping"
help
diff --git a/drivers/Makefile b/drivers/Makefile
index 2178871bfb5..33126b2da7b 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -29,6 +29,7 @@ obj-$(CONFIG_$(SPL_)REMOTEPROC) += remoteproc/
obj-$(CONFIG_$(SPL_TPL_)TPM) += tpm/
obj-$(CONFIG_$(SPL_TPL_)ACPI_PMC) += power/acpi_pmc/
obj-$(CONFIG_$(SPL_)BOARD) += board/
+obj-$(CONFIG_XEN) += xen/
ifndef CONFIG_TPL_BUILD
ifdef CONFIG_SPL_BUILD
diff --git a/drivers/block/blk-uclass.c b/drivers/block/blk-uclass.c
index b46a1ac8d21..2fb9f6b765e 100644
--- a/drivers/block/blk-uclass.c
+++ b/drivers/block/blk-uclass.c
@@ -28,6 +28,7 @@ static const char *if_typename_str[IF_TYPE_COUNT] = {
[IF_TYPE_NVME] = "nvme",
[IF_TYPE_EFI] = "efi",
[IF_TYPE_VIRTIO] = "virtio",
+ [IF_TYPE_PVBLOCK] = "pvblock",
};
static enum uclass_id if_type_uclass_id[IF_TYPE_COUNT] = {
@@ -43,6 +44,7 @@ static enum uclass_id if_type_uclass_id[IF_TYPE_COUNT] = {
[IF_TYPE_NVME] = UCLASS_NVME,
[IF_TYPE_EFI] = UCLASS_EFI,
[IF_TYPE_VIRTIO] = UCLASS_VIRTIO,
+ [IF_TYPE_PVBLOCK] = UCLASS_PVBLOCK,
};
static enum if_type if_typename_to_iftype(const char *if_typename)
diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
index 91065e67f1b..5e0a39396bb 100644
--- a/drivers/pci/Kconfig
+++ b/drivers/pci/Kconfig
@@ -191,6 +191,13 @@ config PCIE_INTEL_FPGA
Say Y here if you want to enable PCIe controller support on Intel
FPGA, example Stratix 10.
+config PCIE_IPROC
+ bool "Iproc PCIe support"
+ depends on DM_PCI
+ help
+ Broadcom iProc PCIe controller driver.
+ Say Y here if you want to enable Broadcom iProc PCIe controller,
+
config PCI_MVEBU
bool "Enable Armada XP/38x PCIe driver"
depends on ARCH_MVEBU
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index 9faebffa488..9db90fb53c5 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -30,6 +30,7 @@ obj-$(CONFIG_SH4_PCI) += pci_sh4.o
obj-$(CONFIG_SH7751_PCI) +=pci_sh7751.o
obj-$(CONFIG_SH7780_PCI) +=pci_sh7780.o
obj-$(CONFIG_PCI_TEGRA) += pci_tegra.o
+obj-$(CONFIG_PCIE_IPROC) += pcie_iproc.o
obj-$(CONFIG_PCI_AARDVARK) += pci-aardvark.o
obj-$(CONFIG_PCIE_DW_MVEBU) += pcie_dw_mvebu.o
obj-$(CONFIG_PCIE_FSL) += pcie_fsl.o pcie_fsl_fixup.o
diff --git a/drivers/pci/pci-uclass.c b/drivers/pci/pci-uclass.c
index 834526c5a47..40cc9f1090e 100644
--- a/drivers/pci/pci-uclass.c
+++ b/drivers/pci/pci-uclass.c
@@ -1179,6 +1179,48 @@ ulong pci_conv_size_to_32(ulong old, ulong value, uint offset,
return value;
}
+int pci_get_dma_regions(struct udevice *dev, struct pci_region *memp, int index)
+{
+ int pci_addr_cells, addr_cells, size_cells;
+ int cells_per_record;
+ const u32 *prop;
+ int len;
+ int i = 0;
+
+ prop = ofnode_get_property(dev_ofnode(dev), "dma-ranges", &len);
+ if (!prop) {
+ log_err("PCI: Device '%s': Cannot decode dma-ranges\n",
+ dev->name);
+ return -EINVAL;
+ }
+
+ pci_addr_cells = ofnode_read_simple_addr_cells(dev_ofnode(dev));
+ addr_cells = ofnode_read_simple_addr_cells(dev_ofnode(dev->parent));
+ size_cells = ofnode_read_simple_size_cells(dev_ofnode(dev));
+
+ /* PCI addresses are always 3-cells */
+ len /= sizeof(u32);
+ cells_per_record = pci_addr_cells + addr_cells + size_cells;
+ debug("%s: len=%d, cells_per_record=%d\n", __func__, len,
+ cells_per_record);
+
+ while (len) {
+ memp->bus_start = fdtdec_get_number(prop + 1, 2);
+ prop += pci_addr_cells;
+ memp->phys_start = fdtdec_get_number(prop, addr_cells);
+ prop += addr_cells;
+ memp->size = fdtdec_get_number(prop, size_cells);
+ prop += size_cells;
+
+ if (i == index)
+ return 0;
+ i++;
+ len -= cells_per_record;
+ }
+
+ return -EINVAL;
+}
+
int pci_get_regions(struct udevice *dev, struct pci_region **iop,
struct pci_region **memp, struct pci_region **prefp)
{
diff --git a/drivers/pci/pcie_iproc.c b/drivers/pci/pcie_iproc.c
new file mode 100644
index 00000000000..d77735fcf26
--- /dev/null
+++ b/drivers/pci/pcie_iproc.c
@@ -0,0 +1,1287 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2020 Broadcom
+ *
+ */
+
+#include <common.h>
+#include <dm.h>
+#include <errno.h>
+#include <generic-phy.h>
+#include <pci.h>
+#include <malloc.h>
+#include <asm/io.h>
+#include <dm/device_compat.h>
+#include <linux/log2.h>
+
+#define EP_PERST_SOURCE_SELECT_SHIFT 2
+#define EP_PERST_SOURCE_SELECT BIT(EP_PERST_SOURCE_SELECT_SHIFT)
+#define EP_MODE_SURVIVE_PERST_SHIFT 1
+#define EP_MODE_SURVIVE_PERST BIT(EP_MODE_SURVIVE_PERST_SHIFT)
+#define RC_PCIE_RST_OUTPUT_SHIFT 0
+#define RC_PCIE_RST_OUTPUT BIT(RC_PCIE_RST_OUTPUT_SHIFT)
+
+#define CFG_IND_ADDR_MASK 0x00001ffc
+
+#define CFG_ADDR_BUS_NUM_SHIFT 20
+#define CFG_ADDR_BUS_NUM_MASK 0x0ff00000
+#define CFG_ADDR_DEV_NUM_SHIFT 15
+#define CFG_ADDR_DEV_NUM_MASK 0x000f8000
+#define CFG_ADDR_FUNC_NUM_SHIFT 12
+#define CFG_ADDR_FUNC_NUM_MASK 0x00007000
+#define CFG_ADDR_REG_NUM_SHIFT 2
+#define CFG_ADDR_REG_NUM_MASK 0x00000ffc
+#define CFG_ADDR_CFG_TYPE_SHIFT 0
+#define CFG_ADDR_CFG_TYPE_MASK 0x00000003
+
+#define IPROC_PCI_PM_CAP 0x48
+#define IPROC_PCI_PM_CAP_MASK 0xffff
+#define IPROC_PCI_EXP_CAP 0xac
+
+#define IPROC_PCIE_REG_INVALID 0xffff
+
+#define PCI_EXP_TYPE_ROOT_PORT 0x4 /* Root Port */
+#define PCI_EXP_RTCTL 28 /* Root Control */
+/* CRS Software Visibility capability */
+#define PCI_EXP_RTCAP_CRSVIS 0x0001
+
+#define PCI_EXP_LNKSTA 18 /* Link Status */
+#define PCI_EXP_LNKSTA_NLW 0x03f0 /* Negotiated Link Width */
+
+#define PCIE_PHYLINKUP_SHIFT 3
+#define PCIE_PHYLINKUP BIT(PCIE_PHYLINKUP_SHIFT)
+#define PCIE_DL_ACTIVE_SHIFT 2
+#define PCIE_DL_ACTIVE BIT(PCIE_DL_ACTIVE_SHIFT)
+
+/* derive the enum index of the outbound/inbound mapping registers */
+#define MAP_REG(base_reg, index) ((base_reg) + (index) * 2)
+
+/*
+ * Maximum number of outbound mapping window sizes that can be supported by any
+ * OARR/OMAP mapping pair
+ */
+#define MAX_NUM_OB_WINDOW_SIZES 4
+
+#define OARR_VALID_SHIFT 0
+#define OARR_VALID BIT(OARR_VALID_SHIFT)
+#define OARR_SIZE_CFG_SHIFT 1
+
+/*
+ * Maximum number of inbound mapping region sizes that can be supported by an
+ * IARR
+ */
+#define MAX_NUM_IB_REGION_SIZES 9
+
+#define IMAP_VALID_SHIFT 0
+#define IMAP_VALID BIT(IMAP_VALID_SHIFT)
+
+#define APB_ERR_EN_SHIFT 0
+#define APB_ERR_EN BIT(APB_ERR_EN_SHIFT)
+
+/**
+ * iProc PCIe host registers
+ */
+enum iproc_pcie_reg {
+ /* clock/reset signal control */
+ IPROC_PCIE_CLK_CTRL = 0,
+
+ /*
+ * To allow MSI to be steered to an external MSI controller (e.g., ARM
+ * GICv3 ITS)
+ */
+ IPROC_PCIE_MSI_GIC_MODE,
+
+ /*
+ * IPROC_PCIE_MSI_BASE_ADDR and IPROC_PCIE_MSI_WINDOW_SIZE define the
+ * window where the MSI posted writes are written, for the writes to be
+ * interpreted as MSI writes.
+ */
+ IPROC_PCIE_MSI_BASE_ADDR,
+ IPROC_PCIE_MSI_WINDOW_SIZE,
+
+ /*
+ * To hold the address of the register where the MSI writes are
+ * programed. When ARM GICv3 ITS is used, this should be programmed
+ * with the address of the GITS_TRANSLATER register.
+ */
+ IPROC_PCIE_MSI_ADDR_LO,
+ IPROC_PCIE_MSI_ADDR_HI,
+
+ /* enable MSI */
+ IPROC_PCIE_MSI_EN_CFG,
+
+ /* allow access to root complex configuration space */
+ IPROC_PCIE_CFG_IND_ADDR,
+ IPROC_PCIE_CFG_IND_DATA,
+
+ /* allow access to device configuration space */
+ IPROC_PCIE_CFG_ADDR,
+ IPROC_PCIE_CFG_DATA,
+
+ /* enable INTx */
+ IPROC_PCIE_INTX_EN,
+ IPROC_PCIE_INTX_CSR,
+
+ /* outbound address mapping */
+ IPROC_PCIE_OARR0,
+ IPROC_PCIE_OMAP0,
+ IPROC_PCIE_OARR1,
+ IPROC_PCIE_OMAP1,
+ IPROC_PCIE_OARR2,
+ IPROC_PCIE_OMAP2,
+ IPROC_PCIE_OARR3,
+ IPROC_PCIE_OMAP3,
+
+ /* inbound address mapping */
+ IPROC_PCIE_IARR0,
+ IPROC_PCIE_IMAP0,
+ IPROC_PCIE_IARR1,
+ IPROC_PCIE_IMAP1,
+ IPROC_PCIE_IARR2,
+ IPROC_PCIE_IMAP2,
+ IPROC_PCIE_IARR3,
+ IPROC_PCIE_IMAP3,
+ IPROC_PCIE_IARR4,
+ IPROC_PCIE_IMAP4,
+
+ /* config read status */
+ IPROC_PCIE_CFG_RD_STATUS,
+
+ /* link status */
+ IPROC_PCIE_LINK_STATUS,
+
+ /* enable APB error for unsupported requests */
+ IPROC_PCIE_APB_ERR_EN,
+
+ /* Ordering Mode configuration registers */
+ IPROC_PCIE_ORDERING_CFG,
+ IPROC_PCIE_IMAP0_RO_CONTROL,
+ IPROC_PCIE_IMAP1_RO_CONTROL,
+ IPROC_PCIE_IMAP2_RO_CONTROL,
+ IPROC_PCIE_IMAP3_RO_CONTROL,
+ IPROC_PCIE_IMAP4_RO_CONTROL,
+
+ /* total number of core registers */
+ IPROC_PCIE_MAX_NUM_REG,
+};
+
+/* iProc PCIe PAXB v2 registers */
+static const u16 iproc_pcie_reg_paxb_v2[] = {
+ [IPROC_PCIE_CLK_CTRL] = 0x000,
+ [IPROC_PCIE_CFG_IND_ADDR] = 0x120,
+ [IPROC_PCIE_CFG_IND_DATA] = 0x124,
+ [IPROC_PCIE_CFG_ADDR] = 0x1f8,
+ [IPROC_PCIE_CFG_DATA] = 0x1fc,
+ [IPROC_PCIE_INTX_EN] = 0x330,
+ [IPROC_PCIE_INTX_CSR] = 0x334,
+ [IPROC_PCIE_OARR0] = 0xd20,
+ [IPROC_PCIE_OMAP0] = 0xd40,
+ [IPROC_PCIE_OARR1] = 0xd28,
+ [IPROC_PCIE_OMAP1] = 0xd48,
+ [IPROC_PCIE_OARR2] = 0xd60,
+ [IPROC_PCIE_OMAP2] = 0xd68,
+ [IPROC_PCIE_OARR3] = 0xdf0,
+ [IPROC_PCIE_OMAP3] = 0xdf8,
+ [IPROC_PCIE_IARR0] = 0xd00,
+ [IPROC_PCIE_IMAP0] = 0xc00,
+ [IPROC_PCIE_IARR2] = 0xd10,
+ [IPROC_PCIE_IMAP2] = 0xcc0,
+ [IPROC_PCIE_IARR3] = 0xe00,
+ [IPROC_PCIE_IMAP3] = 0xe08,
+ [IPROC_PCIE_IARR4] = 0xe68,
+ [IPROC_PCIE_IMAP4] = 0xe70,
+ [IPROC_PCIE_CFG_RD_STATUS] = 0xee0,
+ [IPROC_PCIE_LINK_STATUS] = 0xf0c,
+ [IPROC_PCIE_APB_ERR_EN] = 0xf40,
+ [IPROC_PCIE_ORDERING_CFG] = 0x2000,
+ [IPROC_PCIE_IMAP0_RO_CONTROL] = 0x201c,
+ [IPROC_PCIE_IMAP1_RO_CONTROL] = 0x2020,
+ [IPROC_PCIE_IMAP2_RO_CONTROL] = 0x2024,
+ [IPROC_PCIE_IMAP3_RO_CONTROL] = 0x2028,
+ [IPROC_PCIE_IMAP4_RO_CONTROL] = 0x202c,
+};
+
+/* iProc PCIe PAXC v2 registers */
+static const u16 iproc_pcie_reg_paxc_v2[] = {
+ [IPROC_PCIE_MSI_GIC_MODE] = 0x050,
+ [IPROC_PCIE_MSI_BASE_ADDR] = 0x074,
+ [IPROC_PCIE_MSI_WINDOW_SIZE] = 0x078,
+ [IPROC_PCIE_MSI_ADDR_LO] = 0x07c,
+ [IPROC_PCIE_MSI_ADDR_HI] = 0x080,
+ [IPROC_PCIE_MSI_EN_CFG] = 0x09c,
+ [IPROC_PCIE_CFG_IND_ADDR] = 0x1f0,
+ [IPROC_PCIE_CFG_IND_DATA] = 0x1f4,
+ [IPROC_PCIE_CFG_ADDR] = 0x1f8,
+ [IPROC_PCIE_CFG_DATA] = 0x1fc,
+};
+
+/**
+ * List of device IDs of controllers that have corrupted
+ * capability list that require SW fixup
+ */
+static const u16 iproc_pcie_corrupt_cap_did[] = {
+ 0x16cd,
+ 0x16f0,
+ 0xd802,
+ 0xd804
+};
+
+enum iproc_pcie_type {
+ IPROC_PCIE_PAXB_V2,
+ IPROC_PCIE_PAXC,
+ IPROC_PCIE_PAXC_V2,
+};
+
+/**
+ * struct iproc_pcie_ob - iProc PCIe outbound mapping
+ *
+ * @axi_offset: offset from the AXI address to the internal address used by
+ * the iProc PCIe core
+ * @nr_windows: total number of supported outbound mapping windows
+ */
+struct iproc_pcie_ob {
+ resource_size_t axi_offset;
+ unsigned int nr_windows;
+};
+
+/**
+ * struct iproc_pcie_ib - iProc PCIe inbound mapping
+ *
+ * @nr_regions: total number of supported inbound mapping regions
+ */
+struct iproc_pcie_ib {
+ unsigned int nr_regions;
+};
+
+/**
+ * struct iproc_pcie_ob_map - outbound mapping controller specific parameters
+ *
+ * @window_sizes: list of supported outbound mapping window sizes in MB
+ * @nr_sizes: number of supported outbound mapping window sizes
+ */
+struct iproc_pcie_ob_map {
+ resource_size_t window_sizes[MAX_NUM_OB_WINDOW_SIZES];
+ unsigned int nr_sizes;
+};
+
+static const struct iproc_pcie_ob_map paxb_v2_ob_map[] = {
+ {
+ /* OARR0/OMAP0 */
+ .window_sizes = { 128, 256 },
+ .nr_sizes = 2,
+ },
+ {
+ /* OARR1/OMAP1 */
+ .window_sizes = { 128, 256 },
+ .nr_sizes = 2,
+ },
+ {
+ /* OARR2/OMAP2 */
+ .window_sizes = { 128, 256, 512, 1024 },
+ .nr_sizes = 4,
+ },
+ {
+ /* OARR3/OMAP3 */
+ .window_sizes = { 128, 256, 512, 1024 },
+ .nr_sizes = 4,
+ },
+};
+
+/**
+ * iProc PCIe inbound mapping type
+ */
+enum iproc_pcie_ib_map_type {
+ /* for DDR memory */
+ IPROC_PCIE_IB_MAP_MEM = 0,
+
+ /* for device I/O memory */
+ IPROC_PCIE_IB_MAP_IO,
+
+ /* invalid or unused */
+ IPROC_PCIE_IB_MAP_INVALID
+};
+
+/**
+ * struct iproc_pcie_ib_map - inbound mapping controller specific parameters
+ *
+ * @type: inbound mapping region type
+ * @size_unit: inbound mapping region size unit, could be SZ_1K, SZ_1M, or SZ_1G
+ * @region_sizes: list of supported inbound mapping region sizes in KB, MB, or
+ * GB, depedning on the size unit
+ * @nr_sizes: number of supported inbound mapping region sizes
+ * @nr_windows: number of supported inbound mapping windows for the region
+ * @imap_addr_offset: register offset between the upper and lower 32-bit
+ * IMAP address registers
+ * @imap_window_offset: register offset between each IMAP window
+ */
+struct iproc_pcie_ib_map {
+ enum iproc_pcie_ib_map_type type;
+ unsigned int size_unit;
+ resource_size_t region_sizes[MAX_NUM_IB_REGION_SIZES];
+ unsigned int nr_sizes;
+ unsigned int nr_windows;
+ u16 imap_addr_offset;
+ u16 imap_window_offset;
+};
+
+static const struct iproc_pcie_ib_map paxb_v2_ib_map[] = {
+ {
+ /* IARR0/IMAP0 */
+ .type = IPROC_PCIE_IB_MAP_IO,
+ .size_unit = SZ_1K,
+ .region_sizes = { 32 },
+ .nr_sizes = 1,
+ .nr_windows = 8,
+ .imap_addr_offset = 0x40,
+ .imap_window_offset = 0x4,
+ },
+ {
+ /* IARR1/IMAP1 (currently unused) */
+ .type = IPROC_PCIE_IB_MAP_INVALID,
+ },
+ {
+ /* IARR2/IMAP2 */
+ .type = IPROC_PCIE_IB_MAP_MEM,
+ .size_unit = SZ_1M,
+ .region_sizes = { 64, 128, 256, 512, 1024, 2048, 4096, 8192,
+ 16384 },
+ .nr_sizes = 9,
+ .nr_windows = 1,
+ .imap_addr_offset = 0x4,
+ .imap_window_offset = 0x8,
+ },
+ {
+ /* IARR3/IMAP3 */
+ .type = IPROC_PCIE_IB_MAP_MEM,
+ .size_unit = SZ_1G,
+ .region_sizes = { 1, 2, 4, 8, 16, 32 },
+ .nr_sizes = 6,
+ .nr_windows = 8,
+ .imap_addr_offset = 0x4,
+ .imap_window_offset = 0x8,
+ },
+ {
+ /* IARR4/IMAP4 */
+ .type = IPROC_PCIE_IB_MAP_MEM,
+ .size_unit = SZ_1G,
+ .region_sizes = { 32, 64, 128, 256, 512 },
+ .nr_sizes = 5,
+ .nr_windows = 8,
+ .imap_addr_offset = 0x4,
+ .imap_window_offset = 0x8,
+ },
+};
+
+/**
+ * struct iproc_pcie - iproc pcie device instance
+ *
+ * @dev: pointer to pcie udevice
+ * @base: device I/O base address
+ * @type: pci device type, PAXC or PAXB
+ * @reg_offsets: pointer to pcie host register
+ * @fix_paxc_cap: paxc capability
+ * @need_ob_cfg: outbound mapping status
+ * @ob: pcie outbound mapping
+ * @ob_map: pointer to outbound mapping parameters
+ * @need_ib_cfg: inbound mapping status
+ * @ib: pcie inbound mapping
+ * @ib_map: pointer to inbound mapping parameters
+ * @ep_is_internal: ep status
+ * @phy: phy device
+ * @link_is_active: link up status
+ * @has_apb_err_disable: apb error status
+ */
+struct iproc_pcie {
+ struct udevice *dev;
+ void __iomem *base;
+ enum iproc_pcie_type type;
+ u16 *reg_offsets;
+ bool fix_paxc_cap;
+ bool need_ob_cfg;
+ struct iproc_pcie_ob ob;
+ const struct iproc_pcie_ob_map *ob_map;
+ bool need_ib_cfg;
+ struct iproc_pcie_ib ib;
+ const struct iproc_pcie_ib_map *ib_map;
+ bool ep_is_internal;
+ struct phy phy;
+ bool link_is_active;
+ bool has_apb_err_disable;
+};
+
+static inline bool iproc_pcie_reg_is_invalid(u16 reg_offset)
+{
+ return !!(reg_offset == IPROC_PCIE_REG_INVALID);
+}
+
+static inline u16 iproc_pcie_reg_offset(struct iproc_pcie *pcie,
+ enum iproc_pcie_reg reg)
+{
+ return pcie->reg_offsets[reg];
+}
+
+static inline u32 iproc_pcie_read_reg(struct iproc_pcie *pcie,
+ enum iproc_pcie_reg reg)
+{
+ u16 offset = iproc_pcie_reg_offset(pcie, reg);
+
+ if (iproc_pcie_reg_is_invalid(offset))
+ return 0;
+
+ return readl(pcie->base + offset);
+}
+
+static inline void iproc_pcie_write_reg(struct iproc_pcie *pcie,
+ enum iproc_pcie_reg reg, u32 val)
+{
+ u16 offset = iproc_pcie_reg_offset(pcie, reg);
+
+ if (iproc_pcie_reg_is_invalid(offset))
+ return;
+
+ writel(val, pcie->base + offset);
+}
+
+static int iproc_pcie_map_ep_cfg_reg(const struct udevice *udev, pci_dev_t bdf,
+ uint where, void **paddress)
+{
+ struct iproc_pcie *pcie = dev_get_priv(udev);
+ unsigned int busno = PCI_BUS(bdf);
+ unsigned int slot = PCI_DEV(bdf);
+ unsigned int fn = PCI_FUNC(bdf);
+
+ u16 offset;
+ u32 val;
+
+ /* root complex access */
+ if (busno == 0) {
+ if (slot > 0 || fn > 0)
+ return -ENODEV;
+
+ iproc_pcie_write_reg(pcie, IPROC_PCIE_CFG_IND_ADDR,
+ where & CFG_IND_ADDR_MASK);
+ offset = iproc_pcie_reg_offset(pcie, IPROC_PCIE_CFG_IND_DATA);
+ if (iproc_pcie_reg_is_invalid(offset))
+ return -ENODEV;
+
+ *paddress = (pcie->base + offset);
+ return 0;
+ }
+
+ if (!pcie->link_is_active)
+ return -ENODEV;
+
+ /* EP device access */
+ val = (busno << CFG_ADDR_BUS_NUM_SHIFT) |
+ (slot << CFG_ADDR_DEV_NUM_SHIFT) |
+ (fn << CFG_ADDR_FUNC_NUM_SHIFT) |
+ (where & CFG_ADDR_REG_NUM_MASK) |
+ (1 & CFG_ADDR_CFG_TYPE_MASK);
+
+ iproc_pcie_write_reg(pcie, IPROC_PCIE_CFG_ADDR, val);
+ offset = iproc_pcie_reg_offset(pcie, IPROC_PCIE_CFG_DATA);
+
+ if (iproc_pcie_reg_is_invalid(offset))
+ return -ENODEV;
+
+ *paddress = (pcie->base + offset);
+
+ return 0;
+}
+
+static void iproc_pcie_fix_cap(struct iproc_pcie *pcie, int where, ulong *val)
+{
+ u32 i, dev_id;
+
+ switch (where & ~0x3) {
+ case PCI_VENDOR_ID:
+ dev_id = *val >> 16;
+
+ /*
+ * Activate fixup for those controllers that have corrupted
+ * capability list registers
+ */
+ for (i = 0; i < ARRAY_SIZE(iproc_pcie_corrupt_cap_did); i++)
+ if (dev_id == iproc_pcie_corrupt_cap_did[i])
+ pcie->fix_paxc_cap = true;
+ break;
+
+ case IPROC_PCI_PM_CAP:
+ if (pcie->fix_paxc_cap) {
+ /* advertise PM, force next capability to PCIe */
+ *val &= ~IPROC_PCI_PM_CAP_MASK;
+ *val |= IPROC_PCI_EXP_CAP << 8 | PCI_CAP_ID_PM;
+ }
+ break;
+
+ case IPROC_PCI_EXP_CAP:
+ if (pcie->fix_paxc_cap) {
+ /* advertise root port, version 2, terminate here */
+ *val = (PCI_EXP_TYPE_ROOT_PORT << 4 | 2) << 16 |
+ PCI_CAP_ID_EXP;
+ }
+ break;
+
+ case IPROC_PCI_EXP_CAP + PCI_EXP_RTCTL:
+ /* Don't advertise CRS SV support */
+ *val &= ~(PCI_EXP_RTCAP_CRSVIS << 16);
+ break;
+
+ default:
+ break;
+ }
+}
+
+static int iproc_pci_raw_config_read32(struct iproc_pcie *pcie,
+ unsigned int devfn, int where,
+ int size, u32 *val)
+{
+ void __iomem *addr;
+ int ret;
+
+ ret = iproc_pcie_map_ep_cfg_reg(pcie->dev, devfn, where & ~0x3, &addr);
+ if (ret) {
+ *val = ~0;
+ return -EINVAL;
+ }
+
+ *val = readl(addr);
+
+ if (size <= 2)
+ *val = (*val >> (8 * (where & 3))) & ((1 << (size * 8)) - 1);
+
+ return 0;
+}
+
+static int iproc_pci_raw_config_write32(struct iproc_pcie *pcie,
+ unsigned int devfn, int where,
+ int size, u32 val)
+{
+ void __iomem *addr;
+ int ret;
+ u32 mask, tmp;
+
+ ret = iproc_pcie_map_ep_cfg_reg(pcie->dev, devfn, where & ~0x3, &addr);
+ if (ret)
+ return -EINVAL;
+
+ if (size == 4) {
+ writel(val, addr);
+ return 0;
+ }
+
+ mask = ~(((1 << (size * 8)) - 1) << ((where & 0x3) * 8));
+ tmp = readl(addr) & mask;
+ tmp |= val << ((where & 0x3) * 8);
+ writel(tmp, addr);
+ return 0;
+}
+
+/**
+ * iproc_pcie_apb_err_disable() - configure apb error
+ *
+ * APB error forwarding can be disabled during access of configuration
+ * registers of the endpoint device, to prevent unsupported requests
+ * (typically seen during enumeration with multi-function devices) from
+ * triggering a system exception.
+ *
+ * @bus: pcie udevice
+ * @bdf: pdf value
+ * @disabled: flag to enable/disabled apb error
+ */
+static inline void iproc_pcie_apb_err_disable(const struct udevice *bus,
+ pci_dev_t bdf, bool disable)
+{
+ struct iproc_pcie *pcie = dev_get_priv(bus);
+ u32 val;
+
+ if (PCI_BUS(bdf) && pcie->has_apb_err_disable) {
+ val = iproc_pcie_read_reg(pcie, IPROC_PCIE_APB_ERR_EN);
+ if (disable)
+ val &= ~APB_ERR_EN;
+ else
+ val |= APB_ERR_EN;
+ iproc_pcie_write_reg(pcie, IPROC_PCIE_APB_ERR_EN, val);
+ }
+}
+
+static int iproc_pcie_config_read32(const struct udevice *bus, pci_dev_t bdf,
+ uint offset, ulong *valuep,
+ enum pci_size_t size)
+{
+ struct iproc_pcie *pcie = dev_get_priv(bus);
+ int ret;
+ ulong data;
+
+ iproc_pcie_apb_err_disable(bus, bdf, true);
+ ret = pci_generic_mmap_read_config(bus, iproc_pcie_map_ep_cfg_reg,
+ bdf, offset, &data, PCI_SIZE_32);
+ iproc_pcie_apb_err_disable(bus, bdf, false);
+ if (size <= PCI_SIZE_16)
+ *valuep = (data >> (8 * (offset & 3))) &
+ ((1 << (BIT(size) * 8)) - 1);
+ else
+ *valuep = data;
+
+ if (!ret && PCI_BUS(bdf) == 0)
+ iproc_pcie_fix_cap(pcie, offset, valuep);
+
+ return ret;
+}
+
+static int iproc_pcie_config_write32(struct udevice *bus, pci_dev_t bdf,
+ uint offset, ulong value,
+ enum pci_size_t size)
+{
+ void *addr;
+ ulong mask, tmp;
+ int ret;
+
+ ret = iproc_pcie_map_ep_cfg_reg(bus, bdf, offset, &addr);
+ if (ret)
+ return ret;
+
+ if (size == PCI_SIZE_32) {
+ writel(value, addr);
+ return ret;
+ }
+
+ iproc_pcie_apb_err_disable(bus, bdf, true);
+ mask = ~(((1 << (BIT(size) * 8)) - 1) << ((offset & 0x3) * 8));
+ tmp = readl(addr) & mask;
+ tmp |= (value << ((offset & 0x3) * 8));
+ writel(tmp, addr);
+ iproc_pcie_apb_err_disable(bus, bdf, false);
+
+ return ret;
+}
+
+const static struct dm_pci_ops iproc_pcie_ops = {
+ .read_config = iproc_pcie_config_read32,
+ .write_config = iproc_pcie_config_write32,
+};
+
+static int iproc_pcie_rev_init(struct iproc_pcie *pcie)
+{
+ unsigned int reg_idx;
+ const u16 *regs;
+ u16 num_elements;
+
+ switch (pcie->type) {
+ case IPROC_PCIE_PAXC_V2:
+ pcie->ep_is_internal = true;
+ regs = iproc_pcie_reg_paxc_v2;
+ num_elements = ARRAY_SIZE(iproc_pcie_reg_paxc_v2);
+ break;
+ case IPROC_PCIE_PAXB_V2:
+ regs = iproc_pcie_reg_paxb_v2;
+ num_elements = ARRAY_SIZE(iproc_pcie_reg_paxb_v2);
+ pcie->has_apb_err_disable = true;
+ if (pcie->need_ob_cfg) {
+ pcie->ob.axi_offset = 0;
+ pcie->ob_map = paxb_v2_ob_map;
+ pcie->ob.nr_windows = ARRAY_SIZE(paxb_v2_ob_map);
+ }
+ pcie->need_ib_cfg = true;
+ pcie->ib.nr_regions = ARRAY_SIZE(paxb_v2_ib_map);
+ pcie->ib_map = paxb_v2_ib_map;
+ break;
+ default:
+ dev_dbg(pcie->dev, "incompatible iProc PCIe interface\n");
+ return -EINVAL;
+ }
+
+ pcie->reg_offsets = calloc(IPROC_PCIE_MAX_NUM_REG,
+ sizeof(*pcie->reg_offsets));
+ if (!pcie->reg_offsets)
+ return -ENOMEM;
+
+ /* go through the register table and populate all valid registers */
+ pcie->reg_offsets[0] = (pcie->type == IPROC_PCIE_PAXC_V2) ?
+ IPROC_PCIE_REG_INVALID : regs[0];
+ for (reg_idx = 1; reg_idx < num_elements; reg_idx++)
+ pcie->reg_offsets[reg_idx] = regs[reg_idx] ?
+ regs[reg_idx] : IPROC_PCIE_REG_INVALID;
+
+ return 0;
+}
+
+static inline bool iproc_pcie_ob_is_valid(struct iproc_pcie *pcie,
+ int window_idx)
+{
+ u32 val;
+
+ val = iproc_pcie_read_reg(pcie, MAP_REG(IPROC_PCIE_OARR0, window_idx));
+
+ return !!(val & OARR_VALID);
+}
+
+static inline int iproc_pcie_ob_write(struct iproc_pcie *pcie, int window_idx,
+ int size_idx, u64 axi_addr, u64 pci_addr)
+{
+ u16 oarr_offset, omap_offset;
+
+ /*
+ * Derive the OARR/OMAP offset from the first pair (OARR0/OMAP0) based
+ * on window index.
+ */
+ oarr_offset = iproc_pcie_reg_offset(pcie, MAP_REG(IPROC_PCIE_OARR0,
+ window_idx));
+ omap_offset = iproc_pcie_reg_offset(pcie, MAP_REG(IPROC_PCIE_OMAP0,
+ window_idx));
+ if (iproc_pcie_reg_is_invalid(oarr_offset) ||
+ iproc_pcie_reg_is_invalid(omap_offset))
+ return -EINVAL;
+
+ /*
+ * Program the OARR registers. The upper 32-bit OARR register is
+ * always right after the lower 32-bit OARR register.
+ */
+ writel(lower_32_bits(axi_addr) | (size_idx << OARR_SIZE_CFG_SHIFT) |
+ OARR_VALID, pcie->base + oarr_offset);
+ writel(upper_32_bits(axi_addr), pcie->base + oarr_offset + 4);
+
+ /* now program the OMAP registers */
+ writel(lower_32_bits(pci_addr), pcie->base + omap_offset);
+ writel(upper_32_bits(pci_addr), pcie->base + omap_offset + 4);
+
+ debug("ob window [%d]: offset 0x%x axi %pap pci %pap\n",
+ window_idx, oarr_offset, &axi_addr, &pci_addr);
+ debug("oarr lo 0x%x oarr hi 0x%x\n",
+ readl(pcie->base + oarr_offset),
+ readl(pcie->base + oarr_offset + 4));
+ debug("omap lo 0x%x omap hi 0x%x\n",
+ readl(pcie->base + omap_offset),
+ readl(pcie->base + omap_offset + 4));
+
+ return 0;
+}
+
+/**
+ * iproc_pcie_setup_ob() - setup outbound address mapping
+ *
+ * Some iProc SoCs require the SW to configure the outbound address mapping
+ * Outbound address translation:
+ *
+ * iproc_pcie_address = axi_address - axi_offset
+ * OARR = iproc_pcie_address
+ * OMAP = pci_addr
+ * axi_addr -> iproc_pcie_address -> OARR -> OMAP -> pci_address
+ *
+ * @pcie: pcie device
+ * @axi_addr: axi address to be translated
+ * @pci_addr: pci address
+ * @size: window size
+ *
+ * @return: 0 on success and -ve on failure
+ */
+static int iproc_pcie_setup_ob(struct iproc_pcie *pcie, u64 axi_addr,
+ u64 pci_addr, resource_size_t size)
+{
+ struct iproc_pcie_ob *ob = &pcie->ob;
+ int ret = -EINVAL, window_idx, size_idx;
+
+ if (axi_addr < ob->axi_offset) {
+ pr_err("axi address %pap less than offset %pap\n",
+ &axi_addr, &ob->axi_offset);
+ return -EINVAL;
+ }
+
+ /*
+ * Translate the AXI address to the internal address used by the iProc
+ * PCIe core before programming the OARR
+ */
+ axi_addr -= ob->axi_offset;
+
+ /* iterate through all OARR/OMAP mapping windows */
+ for (window_idx = ob->nr_windows - 1; window_idx >= 0; window_idx--) {
+ const struct iproc_pcie_ob_map *ob_map =
+ &pcie->ob_map[window_idx];
+
+ /*
+ * If current outbound window is already in use, move on to the
+ * next one.
+ */
+ if (iproc_pcie_ob_is_valid(pcie, window_idx))
+ continue;
+
+ /*
+ * Iterate through all supported window sizes within the
+ * OARR/OMAP pair to find a match. Go through the window sizes
+ * in a descending order.
+ */
+ for (size_idx = ob_map->nr_sizes - 1; size_idx >= 0;
+ size_idx--) {
+ resource_size_t window_size =
+ ob_map->window_sizes[size_idx] * SZ_1M;
+
+ /*
+ * Keep iterating until we reach the last window and
+ * with the minimal window size at index zero. In this
+ * case, we take a compromise by mapping it using the
+ * minimum window size that can be supported
+ */
+ if (size < window_size) {
+ if (size_idx > 0 || window_idx > 0)
+ continue;
+
+ /*
+ * For the corner case of reaching the minimal
+ * window size that can be supported on the
+ * last window
+ */
+ axi_addr = ALIGN_DOWN(axi_addr, window_size);
+ pci_addr = ALIGN_DOWN(pci_addr, window_size);
+ size = window_size;
+ }
+
+ if (!IS_ALIGNED(axi_addr, window_size) ||
+ !IS_ALIGNED(pci_addr, window_size)) {
+ pr_err("axi %pap or pci %pap not aligned\n",
+ &axi_addr, &pci_addr);
+ return -EINVAL;
+ }
+
+ /*
+ * Match found! Program both OARR and OMAP and mark
+ * them as a valid entry.
+ */
+ ret = iproc_pcie_ob_write(pcie, window_idx, size_idx,
+ axi_addr, pci_addr);
+ if (ret)
+ goto err_ob;
+
+ size -= window_size;
+ if (size == 0)
+ return 0;
+
+ /*
+ * If we are here, we are done with the current window,
+ * but not yet finished all mappings. Need to move on
+ * to the next window.
+ */
+ axi_addr += window_size;
+ pci_addr += window_size;
+ break;
+ }
+ }
+
+err_ob:
+ pr_err("unable to configure outbound mapping\n");
+ pr_err("axi %pap, axi offset %pap, pci %pap, res size %pap\n",
+ &axi_addr, &ob->axi_offset, &pci_addr, &size);
+
+ return ret;
+}
+
+static int iproc_pcie_map_ranges(struct udevice *dev)
+{
+ struct iproc_pcie *pcie = dev_get_priv(dev);
+ struct udevice *bus = pci_get_controller(dev);
+ struct pci_controller *hose = dev_get_uclass_priv(bus);
+ int i, ret;
+
+ for (i = 0; i < hose->region_count; i++) {
+ if (hose->regions[i].flags == PCI_REGION_MEM ||
+ hose->regions[i].flags == PCI_REGION_PREFETCH) {
+ debug("%d: bus_addr %p, axi_addr %p, size 0x%lx\n",
+ i, &hose->regions[i].bus_start,
+ &hose->regions[i].phys_start,
+ hose->regions[i].size);
+ ret = iproc_pcie_setup_ob(pcie,
+ hose->regions[i].phys_start,
+ hose->regions[i].bus_start,
+ hose->regions[i].size);
+ if (ret)
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static inline bool iproc_pcie_ib_is_in_use(struct iproc_pcie *pcie,
+ int region_idx)
+{
+ const struct iproc_pcie_ib_map *ib_map = &pcie->ib_map[region_idx];
+ u32 val;
+
+ val = iproc_pcie_read_reg(pcie, MAP_REG(IPROC_PCIE_IARR0, region_idx));
+
+ return !!(val & (BIT(ib_map->nr_sizes) - 1));
+}
+
+static inline bool
+iproc_pcie_ib_check_type(const struct iproc_pcie_ib_map *ib_map,
+ enum iproc_pcie_ib_map_type type)
+{
+ return !!(ib_map->type == type);
+}
+
+static int iproc_pcie_ib_write(struct iproc_pcie *pcie, int region_idx,
+ int size_idx, int nr_windows, u64 axi_addr,
+ u64 pci_addr, resource_size_t size)
+{
+ const struct iproc_pcie_ib_map *ib_map = &pcie->ib_map[region_idx];
+ u16 iarr_offset, imap_offset;
+ u32 val;
+ int window_idx;
+
+ iarr_offset = iproc_pcie_reg_offset(pcie, MAP_REG(IPROC_PCIE_IARR0,
+ region_idx));
+ imap_offset = iproc_pcie_reg_offset(pcie, MAP_REG(IPROC_PCIE_IMAP0,
+ region_idx));
+ if (iproc_pcie_reg_is_invalid(iarr_offset) ||
+ iproc_pcie_reg_is_invalid(imap_offset))
+ return -EINVAL;
+
+ debug("ib region [%d]: offset 0x%x axi %pap pci %pap\n",
+ region_idx, iarr_offset, &axi_addr, &pci_addr);
+
+ /*
+ * Program the IARR registers. The upper 32-bit IARR register is
+ * always right after the lower 32-bit IARR register.
+ */
+ writel(lower_32_bits(pci_addr) | BIT(size_idx),
+ pcie->base + iarr_offset);
+ writel(upper_32_bits(pci_addr), pcie->base + iarr_offset + 4);
+
+ debug("iarr lo 0x%x iarr hi 0x%x\n",
+ readl(pcie->base + iarr_offset),
+ readl(pcie->base + iarr_offset + 4));
+
+ /*
+ * Now program the IMAP registers. Each IARR region may have one or
+ * more IMAP windows.
+ */
+ size >>= ilog2(nr_windows);
+ for (window_idx = 0; window_idx < nr_windows; window_idx++) {
+ val = readl(pcie->base + imap_offset);
+ val |= lower_32_bits(axi_addr) | IMAP_VALID;
+ writel(val, pcie->base + imap_offset);
+ writel(upper_32_bits(axi_addr),
+ pcie->base + imap_offset + ib_map->imap_addr_offset);
+
+ debug("imap window [%d] lo 0x%x hi 0x%x\n",
+ window_idx, readl(pcie->base + imap_offset),
+ readl(pcie->base + imap_offset +
+ ib_map->imap_addr_offset));
+
+ imap_offset += ib_map->imap_window_offset;
+ axi_addr += size;
+ }
+
+ return 0;
+}
+
+/**
+ * iproc_pcie_setup_ib() - setup inbound address mapping
+ *
+ * @pcie: pcie device
+ * @axi_addr: axi address to be translated
+ * @pci_addr: pci address
+ * @size: window size
+ * @type: inbound mapping type
+ *
+ * @return: 0 on success and -ve on failure
+ */
+static int iproc_pcie_setup_ib(struct iproc_pcie *pcie, u64 axi_addr,
+ u64 pci_addr, resource_size_t size,
+ enum iproc_pcie_ib_map_type type)
+{
+ struct iproc_pcie_ib *ib = &pcie->ib;
+ int ret;
+ unsigned int region_idx, size_idx;
+
+ /* iterate through all IARR mapping regions */
+ for (region_idx = 0; region_idx < ib->nr_regions; region_idx++) {
+ const struct iproc_pcie_ib_map *ib_map =
+ &pcie->ib_map[region_idx];
+
+ /*
+ * If current inbound region is already in use or not a
+ * compatible type, move on to the next.
+ */
+ if (iproc_pcie_ib_is_in_use(pcie, region_idx) ||
+ !iproc_pcie_ib_check_type(ib_map, type))
+ continue;
+
+ /* iterate through all supported region sizes to find a match */
+ for (size_idx = 0; size_idx < ib_map->nr_sizes; size_idx++) {
+ resource_size_t region_size =
+ ib_map->region_sizes[size_idx] * ib_map->size_unit;
+
+ if (size != region_size)
+ continue;
+
+ if (!IS_ALIGNED(axi_addr, region_size) ||
+ !IS_ALIGNED(pci_addr, region_size)) {
+ pr_err("axi %pap or pci %pap not aligned\n",
+ &axi_addr, &pci_addr);
+ return -EINVAL;
+ }
+
+ /* Match found! Program IARR and all IMAP windows. */
+ ret = iproc_pcie_ib_write(pcie, region_idx, size_idx,
+ ib_map->nr_windows, axi_addr,
+ pci_addr, size);
+ if (ret)
+ goto err_ib;
+ else
+ return 0;
+ }
+ }
+ ret = -EINVAL;
+
+err_ib:
+ pr_err("unable to configure inbound mapping\n");
+ pr_err("axi %pap, pci %pap, res size %pap\n",
+ &axi_addr, &pci_addr, &size);
+
+ return ret;
+}
+
+static int iproc_pcie_map_dma_ranges(struct iproc_pcie *pcie)
+{
+ int ret;
+ struct pci_region regions;
+ int i = 0;
+
+ while (!pci_get_dma_regions(pcie->dev, &regions, i)) {
+ dev_dbg(pcie->dev,
+ "dma %d: bus_addr %#lx, axi_addr %#llx, size %#lx\n",
+ i, regions.bus_start, regions.phys_start, regions.size);
+
+ /* Each range entry corresponds to an inbound mapping region */
+ ret = iproc_pcie_setup_ib(pcie, regions.phys_start,
+ regions.bus_start,
+ regions.size,
+ IPROC_PCIE_IB_MAP_MEM);
+ if (ret)
+ return ret;
+ i++;
+ }
+ return 0;
+}
+
+static void iproc_pcie_reset_map_regs(struct iproc_pcie *pcie)
+{
+ struct iproc_pcie_ib *ib = &pcie->ib;
+ struct iproc_pcie_ob *ob = &pcie->ob;
+ int window_idx, region_idx;
+
+ if (pcie->ep_is_internal)
+ return;
+
+ /* iterate through all OARR mapping regions */
+ for (window_idx = ob->nr_windows - 1; window_idx >= 0; window_idx--) {
+ iproc_pcie_write_reg(pcie, MAP_REG(IPROC_PCIE_OARR0,
+ window_idx), 0);
+ }
+
+ /* iterate through all IARR mapping regions */
+ for (region_idx = 0; region_idx < ib->nr_regions; region_idx++) {
+ iproc_pcie_write_reg(pcie, MAP_REG(IPROC_PCIE_IARR0,
+ region_idx), 0);
+ }
+}
+
+static void iproc_pcie_reset(struct iproc_pcie *pcie)
+{
+ u32 val;
+
+ /*
+ * PAXC and the internal emulated endpoint device downstream should not
+ * be reset. If firmware has been loaded on the endpoint device at an
+ * earlier boot stage, reset here causes issues.
+ */
+ if (pcie->ep_is_internal)
+ return;
+
+ /*
+ * Select perst_b signal as reset source. Put the device into reset,
+ * and then bring it out of reset
+ */
+ val = iproc_pcie_read_reg(pcie, IPROC_PCIE_CLK_CTRL);
+ val &= ~EP_PERST_SOURCE_SELECT & ~EP_MODE_SURVIVE_PERST &
+ ~RC_PCIE_RST_OUTPUT;
+ iproc_pcie_write_reg(pcie, IPROC_PCIE_CLK_CTRL, val);
+ udelay(250);
+
+ val |= RC_PCIE_RST_OUTPUT;
+ iproc_pcie_write_reg(pcie, IPROC_PCIE_CLK_CTRL, val);
+ mdelay(100);
+}
+
+static inline bool iproc_pcie_link_is_active(struct iproc_pcie *pcie)
+{
+ u32 val;
+
+ val = iproc_pcie_read_reg(pcie, IPROC_PCIE_LINK_STATUS);
+ return !!((val & PCIE_PHYLINKUP) && (val & PCIE_DL_ACTIVE));
+}
+
+static int iproc_pcie_check_link(struct iproc_pcie *pcie)
+{
+ u32 link_status, class;
+
+ pcie->link_is_active = false;
+ /* force class to PCI_CLASS_BRIDGE_PCI (0x0604) */
+#define PCI_BRIDGE_CTRL_REG_OFFSET 0x43c
+#define PCI_CLASS_BRIDGE_MASK 0xffff00
+#define PCI_CLASS_BRIDGE_SHIFT 8
+ iproc_pci_raw_config_read32(pcie, 0,
+ PCI_BRIDGE_CTRL_REG_OFFSET,
+ 4, &class);
+ class &= ~PCI_CLASS_BRIDGE_MASK;
+ class |= (PCI_CLASS_BRIDGE_PCI << PCI_CLASS_BRIDGE_SHIFT);
+ iproc_pci_raw_config_write32(pcie, 0,
+ PCI_BRIDGE_CTRL_REG_OFFSET,
+ 4, class);
+
+ /*
+ * PAXC connects to emulated endpoint devices directly and does not
+ * have a Serdes. Therefore skip the link detection logic here.
+ */
+ if (pcie->ep_is_internal) {
+ pcie->link_is_active = true;
+ return 0;
+ }
+
+ if (!iproc_pcie_link_is_active(pcie)) {
+ pr_err("PHY or data link is INACTIVE!\n");
+ return -ENODEV;
+ }
+
+#define PCI_TARGET_LINK_SPEED_MASK 0xf
+#define PCI_TARGET_LINK_WIDTH_MASK 0x3f
+#define PCI_TARGET_LINK_WIDTH_OFFSET 0x4
+
+ /* check link status to see if link is active */
+ iproc_pci_raw_config_read32(pcie, 0,
+ IPROC_PCI_EXP_CAP + PCI_EXP_LNKSTA,
+ 2, &link_status);
+ if (link_status & PCI_EXP_LNKSTA_NLW)
+ pcie->link_is_active = true;
+
+ if (pcie->link_is_active)
+ pr_info("link UP @ Speed Gen-%d and width-x%d\n",
+ link_status & PCI_TARGET_LINK_SPEED_MASK,
+ (link_status >> PCI_TARGET_LINK_WIDTH_OFFSET) &
+ PCI_TARGET_LINK_WIDTH_MASK);
+ else
+ pr_info("link DOWN\n");
+
+ return 0;
+}
+
+static int iproc_pcie_probe(struct udevice *dev)
+{
+ struct iproc_pcie *pcie = dev_get_priv(dev);
+ int ret;
+
+ pcie->type = (enum iproc_pcie_type)dev_get_driver_data(dev);
+ debug("PAX type %d\n", pcie->type);
+ pcie->base = dev_read_addr_ptr(dev);
+ debug("PAX reg base %p\n", pcie->base);
+
+ if (!pcie->base)
+ return -ENODEV;
+
+ if (dev_read_bool(dev, "brcm,pcie-ob"))
+ pcie->need_ob_cfg = true;
+
+ pcie->dev = dev;
+ ret = iproc_pcie_rev_init(pcie);
+ if (ret)
+ return ret;
+
+ if (!pcie->ep_is_internal) {
+ ret = generic_phy_get_by_name(dev, "pcie-phy", &pcie->phy);
+ if (!ret) {
+ ret = generic_phy_init(&pcie->phy);
+ if (ret) {
+ pr_err("failed to init %s PHY\n", dev->name);
+ return ret;
+ }
+
+ ret = generic_phy_power_on(&pcie->phy);
+ if (ret) {
+ pr_err("power on %s PHY failed\n", dev->name);
+ goto err_exit_phy;
+ }
+ }
+ }
+
+ iproc_pcie_reset(pcie);
+
+ if (pcie->need_ob_cfg) {
+ ret = iproc_pcie_map_ranges(dev);
+ if (ret) {
+ pr_err("outbound map failed\n");
+ goto err_power_off_phy;
+ }
+ }
+
+ if (pcie->need_ib_cfg) {
+ ret = iproc_pcie_map_dma_ranges(pcie);
+ if (ret) {
+ pr_err("inbound map failed\n");
+ goto err_power_off_phy;
+ }
+ }
+
+ if (iproc_pcie_check_link(pcie))
+ pr_info("no PCIe EP device detected\n");
+
+ return 0;
+
+err_power_off_phy:
+ generic_phy_power_off(&pcie->phy);
+err_exit_phy:
+ generic_phy_exit(&pcie->phy);
+ return ret;
+}
+
+static int iproc_pcie_remove(struct udevice *dev)
+{
+ struct iproc_pcie *pcie = dev_get_priv(dev);
+ int ret;
+
+ iproc_pcie_reset_map_regs(pcie);
+
+ if (generic_phy_valid(&pcie->phy)) {
+ ret = generic_phy_power_off(&pcie->phy);
+ if (ret) {
+ pr_err("failed to power off PCIe phy\n");
+ return ret;
+ }
+
+ ret = generic_phy_exit(&pcie->phy);
+ if (ret) {
+ pr_err("failed to power off PCIe phy\n");
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static const struct udevice_id pci_iproc_ids[] = {
+ { .compatible = "brcm,iproc-pcie-paxb-v2",
+ .data = IPROC_PCIE_PAXB_V2 },
+ { .compatible = "brcm,iproc-pcie-paxc-v2",
+ .data = IPROC_PCIE_PAXC_V2 },
+ { }
+};
+
+U_BOOT_DRIVER(pci_iproc) = {
+ .name = "pci_iproc",
+ .id = UCLASS_PCI,
+ .of_match = pci_iproc_ids,
+ .ops = &iproc_pcie_ops,
+ .probe = iproc_pcie_probe,
+ .remove = iproc_pcie_remove,
+ .priv_auto_alloc_size = sizeof(struct iproc_pcie),
+ .flags = DM_REMOVE_OS_PREPARE,
+};
diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig
index e146ffc5f86..e344677f91f 100644
--- a/drivers/serial/Kconfig
+++ b/drivers/serial/Kconfig
@@ -822,6 +822,13 @@ config MPC8XX_CONS
depends on MPC8xx
default y
+config XEN_SERIAL
+ bool "XEN serial support"
+ depends on XEN
+ help
+ If built without DM support, then requires Xen
+ to be built with CONFIG_VERBOSE_DEBUG.
+
choice
prompt "Console port"
default 8xx_CONS_SMC1
diff --git a/drivers/serial/Makefile b/drivers/serial/Makefile
index e4a92bbbb71..25f7f8d342c 100644
--- a/drivers/serial/Makefile
+++ b/drivers/serial/Makefile
@@ -70,6 +70,7 @@ obj-$(CONFIG_OWL_SERIAL) += serial_owl.o
obj-$(CONFIG_OMAP_SERIAL) += serial_omap.o
obj-$(CONFIG_MTK_SERIAL) += serial_mtk.o
obj-$(CONFIG_SIFIVE_SERIAL) += serial_sifive.o
+obj-$(CONFIG_XEN_SERIAL) += serial_xen.o
ifndef CONFIG_SPL_BUILD
obj-$(CONFIG_USB_TTY) += usbtty.o
diff --git a/drivers/serial/serial_xen.c b/drivers/serial/serial_xen.c
new file mode 100644
index 00000000000..ba6504b9479
--- /dev/null
+++ b/drivers/serial/serial_xen.c
@@ -0,0 +1,182 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * (C) 2018 NXP
+ * (C) 2020 EPAM Systems Inc.
+ */
+#include <common.h>
+#include <cpu_func.h>
+#include <dm.h>
+#include <serial.h>
+#include <watchdog.h>
+
+#include <linux/bug.h>
+
+#include <xen/hvm.h>
+#include <xen/events.h>
+
+#include <xen/interface/sched.h>
+#include <xen/interface/hvm/hvm_op.h>
+#include <xen/interface/hvm/params.h>
+#include <xen/interface/io/console.h>
+#include <xen/interface/io/ring.h>
+
+DECLARE_GLOBAL_DATA_PTR;
+
+u32 console_evtchn;
+
+/*
+ * struct xen_uart_priv - Structure representing a Xen UART info
+ * @intf: Console I/O interface for Xen guest OSes
+ * @evtchn: Console event channel
+ */
+struct xen_uart_priv {
+ struct xencons_interface *intf;
+ u32 evtchn;
+};
+
+int xen_serial_setbrg(struct udevice *dev, int baudrate)
+{
+ return 0;
+}
+
+static int xen_serial_probe(struct udevice *dev)
+{
+ struct xen_uart_priv *priv = dev_get_priv(dev);
+ u64 val = 0;
+ unsigned long gfn;
+ int ret;
+
+ ret = hvm_get_parameter(HVM_PARAM_CONSOLE_EVTCHN, &val);
+ if (ret < 0 || val == 0)
+ return ret;
+
+ priv->evtchn = val;
+ console_evtchn = val;
+
+ ret = hvm_get_parameter(HVM_PARAM_CONSOLE_PFN, &val);
+ if (ret < 0)
+ return ret;
+
+ if (!val)
+ return -EINVAL;
+
+ gfn = val;
+ priv->intf = (struct xencons_interface *)(gfn << XEN_PAGE_SHIFT);
+
+ return 0;
+}
+
+static int xen_serial_pending(struct udevice *dev, bool input)
+{
+ struct xen_uart_priv *priv = dev_get_priv(dev);
+ struct xencons_interface *intf = priv->intf;
+
+ if (!input || intf->in_cons == intf->in_prod)
+ return 0;
+
+ return 1;
+}
+
+static int xen_serial_getc(struct udevice *dev)
+{
+ struct xen_uart_priv *priv = dev_get_priv(dev);
+ struct xencons_interface *intf = priv->intf;
+ XENCONS_RING_IDX cons;
+ char c;
+
+ while (intf->in_cons == intf->in_prod)
+ mb(); /* wait */
+
+ cons = intf->in_cons;
+ mb(); /* get pointers before reading ring */
+
+ c = intf->in[MASK_XENCONS_IDX(cons++, intf->in)];
+
+ mb(); /* read ring before consuming */
+ intf->in_cons = cons;
+
+ notify_remote_via_evtchn(priv->evtchn);
+
+ return c;
+}
+
+static int __write_console(struct udevice *dev, const char *data, int len)
+{
+ struct xen_uart_priv *priv = dev_get_priv(dev);
+ struct xencons_interface *intf = priv->intf;
+ XENCONS_RING_IDX cons, prod;
+ int sent = 0;
+
+ cons = intf->out_cons;
+ prod = intf->out_prod;
+ mb(); /* Update pointer */
+
+ WARN_ON((prod - cons) > sizeof(intf->out));
+
+ while ((sent < len) && ((prod - cons) < sizeof(intf->out)))
+ intf->out[MASK_XENCONS_IDX(prod++, intf->out)] = data[sent++];
+
+ mb(); /* Update data before pointer */
+ intf->out_prod = prod;
+
+ if (sent)
+ notify_remote_via_evtchn(priv->evtchn);
+
+ return sent;
+}
+
+static int write_console(struct udevice *dev, const char *data, int len)
+{
+ /*
+ * Make sure the whole buffer is emitted, polling if
+ * necessary. We don't ever want to rely on the hvc daemon
+ * because the most interesting console output is when the
+ * kernel is crippled.
+ */
+ while (len) {
+ int sent = __write_console(dev, data, len);
+
+ data += sent;
+ len -= sent;
+
+ if (unlikely(len))
+ HYPERVISOR_sched_op(SCHEDOP_yield, NULL);
+ }
+
+ return 0;
+}
+
+static int xen_serial_putc(struct udevice *dev, const char ch)
+{
+ write_console(dev, &ch, 1);
+
+ return 0;
+}
+
+static const struct dm_serial_ops xen_serial_ops = {
+ .putc = xen_serial_putc,
+ .getc = xen_serial_getc,
+ .pending = xen_serial_pending,
+};
+
+#if CONFIG_IS_ENABLED(OF_CONTROL)
+static const struct udevice_id xen_serial_ids[] = {
+ { .compatible = "xen,xen" },
+ { }
+};
+#endif
+
+U_BOOT_DRIVER(serial_xen) = {
+ .name = "serial_xen",
+ .id = UCLASS_SERIAL,
+#if CONFIG_IS_ENABLED(OF_CONTROL)
+ .of_match = xen_serial_ids,
+#endif
+ .priv_auto_alloc_size = sizeof(struct xen_uart_priv),
+ .probe = xen_serial_probe,
+ .ops = &xen_serial_ops,
+#if !CONFIG_IS_ENABLED(OF_CONTROL)
+ .flags = DM_FLAG_PRE_RELOC,
+#endif
+};
+
diff --git a/drivers/usb/gadget/ether.c b/drivers/usb/gadget/ether.c
index a939918e973..a8e8bfc04b4 100644
--- a/drivers/usb/gadget/ether.c
+++ b/drivers/usb/gadget/ether.c
@@ -33,7 +33,6 @@
#define USB_NET_NAME "usb_ether"
-#define atomic_read
extern struct platform_data brd;
diff --git a/drivers/usb/musb-new/linux-compat.h b/drivers/usb/musb-new/linux-compat.h
index 733b197f593..6d9f19dfe6b 100644
--- a/drivers/usb/musb-new/linux-compat.h
+++ b/drivers/usb/musb-new/linux-compat.h
@@ -10,10 +10,6 @@
#define platform_data device_data
-#ifndef wmb
-#define wmb() asm volatile ("" : : : "memory")
-#endif
-
#define msleep(a) udelay(a * 1000)
/*
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
new file mode 100644
index 00000000000..6ad2a936682
--- /dev/null
+++ b/drivers/xen/Kconfig
@@ -0,0 +1,10 @@
+config PVBLOCK
+ bool "Xen para-virtualized block device"
+ depends on DM
+ select BLK
+ select HAVE_BLOCK_DEVICE
+ help
+ This driver implements the front-end of the Xen virtual
+ block device driver. It communicates with a back-end driver
+ in another domain which drives the actual block device.
+
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
new file mode 100644
index 00000000000..87157df69b8
--- /dev/null
+++ b/drivers/xen/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0+
+#
+# (C) Copyright 2020 EPAM Systems Inc.
+
+obj-y += hypervisor.o
+obj-y += events.o
+obj-y += xenbus.o
+obj-y += gnttab.o
+
+obj-$(CONFIG_PVBLOCK) += pvblock.o
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
new file mode 100644
index 00000000000..c490f87b2fc
--- /dev/null
+++ b/drivers/xen/events.c
@@ -0,0 +1,199 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge
+ * (C) 2005 - Grzegorz Milos - Intel Research Cambridge
+ * (C) 2020 - EPAM Systems Inc.
+ *
+ * File: events.c [1]
+ * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk)
+ * Changes: Grzegorz Milos (gm281@cam.ac.uk)
+ *
+ * Date: Jul 2003, changes Jun 2005
+ *
+ * Description: Deals with events received on event channels
+ *
+ * [1] - http://xenbits.xen.org/gitweb/?p=mini-os.git;a=summary
+ */
+#include <common.h>
+#include <log.h>
+
+#include <asm/io.h>
+#include <asm/xen/system.h>
+
+#include <xen/events.h>
+#include <xen/hvm.h>
+
+extern u32 console_evtchn;
+
+#define NR_EVS 1024
+
+/**
+ * struct _ev_action - represents a event handler.
+ *
+ * Chaining or sharing is not allowed
+ */
+struct _ev_action {
+ void (*handler)(evtchn_port_t port, struct pt_regs *regs, void *data);
+ void *data;
+ u32 count;
+};
+
+static struct _ev_action ev_actions[NR_EVS];
+void default_handler(evtchn_port_t port, struct pt_regs *regs, void *data);
+
+static unsigned long bound_ports[NR_EVS / (8 * sizeof(unsigned long))];
+
+void unbind_all_ports(void)
+{
+ int i;
+ int cpu = 0;
+ struct shared_info *s = HYPERVISOR_shared_info;
+ struct vcpu_info *vcpu_info = &s->vcpu_info[cpu];
+
+ for (i = 0; i < NR_EVS; i++) {
+ if (i == console_evtchn)
+ continue;
+ if (test_and_clear_bit(i, bound_ports)) {
+ printf("port %d still bound!\n", i);
+ unbind_evtchn(i);
+ }
+ }
+ vcpu_info->evtchn_upcall_pending = 0;
+ vcpu_info->evtchn_pending_sel = 0;
+}
+
+int do_event(evtchn_port_t port, struct pt_regs *regs)
+{
+ struct _ev_action *action;
+
+ clear_evtchn(port);
+
+ if (port >= NR_EVS) {
+ printk("WARN: do_event(): Port number too large: %d\n", port);
+ return 1;
+ }
+
+ action = &ev_actions[port];
+ action->count++;
+
+ /* call the handler */
+ action->handler(port, regs, action->data);
+
+ return 1;
+}
+
+evtchn_port_t bind_evtchn(evtchn_port_t port,
+ void (*handler)(evtchn_port_t, struct pt_regs *, void *),
+ void *data)
+{
+ if (ev_actions[port].handler != default_handler)
+ printf("WARN: Handler for port %d already registered, replacing\n",
+ port);
+
+ ev_actions[port].data = data;
+ wmb();
+ ev_actions[port].handler = handler;
+ synch_set_bit(port, bound_ports);
+
+ return port;
+}
+
+/**
+ * unbind_evtchn() - Unbind event channel for selected port
+ */
+void unbind_evtchn(evtchn_port_t port)
+{
+ struct evtchn_close close;
+ int rc;
+
+ if (ev_actions[port].handler == default_handler)
+ debug("Default handler for port %d when unbinding\n", port);
+ mask_evtchn(port);
+ clear_evtchn(port);
+
+ ev_actions[port].handler = default_handler;
+ wmb();
+ ev_actions[port].data = NULL;
+ synch_clear_bit(port, bound_ports);
+
+ close.port = port;
+ rc = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close);
+ if (rc)
+ printf("WARN: close_port %d failed rc=%d. ignored\n", port, rc);
+}
+
+void default_handler(evtchn_port_t port, struct pt_regs *regs, void *ignore)
+{
+ debug("[Port %d] - event received\n", port);
+}
+
+/**
+ * evtchn_alloc_unbound() - Create a port available to the pal for
+ * exchanging notifications.
+ *
+ * Unfortunate confusion of terminology: the port is unbound as far
+ * as Xen is concerned, but we automatically bind a handler to it.
+ *
+ * Return: The result of the hypervisor call.
+ */
+int evtchn_alloc_unbound(domid_t pal,
+ void (*handler)(evtchn_port_t, struct pt_regs *, void *),
+ void *data, evtchn_port_t *port)
+{
+ int rc;
+
+ struct evtchn_alloc_unbound op;
+
+ op.dom = DOMID_SELF;
+ op.remote_dom = pal;
+ rc = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, &op);
+ if (rc) {
+ printf("ERROR: alloc_unbound failed with rc=%d", rc);
+ return rc;
+ }
+ if (!handler)
+ handler = default_handler;
+ *port = bind_evtchn(op.port, handler, data);
+ return rc;
+}
+
+/**
+ * eventchn_poll() - Event channel polling function
+ *
+ * Check and process any pending events
+ */
+void eventchn_poll(void)
+{
+ do_hypervisor_callback(NULL);
+}
+
+/**
+ * init_events() - Initialize event handler
+ *
+ * Initially all events are without a handler and disabled.
+ */
+void init_events(void)
+{
+ int i;
+
+ debug("%s\n", __func__);
+
+ for (i = 0; i < NR_EVS; i++) {
+ ev_actions[i].handler = default_handler;
+ mask_evtchn(i);
+ }
+}
+
+/**
+ * fini_events() - Close all ports
+ *
+ * Mask and clear event channels. Close port using EVTCHNOP_close
+ * hypercall.
+ */
+void fini_events(void)
+{
+ debug("%s\n", __func__);
+ /* Dealloc all events */
+ unbind_all_ports();
+}
+
diff --git a/drivers/xen/gnttab.c b/drivers/xen/gnttab.c
new file mode 100644
index 00000000000..becf7a79fbf
--- /dev/null
+++ b/drivers/xen/gnttab.c
@@ -0,0 +1,216 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * (C) 2006 - Cambridge University
+ * (C) 2020 - EPAM Systems Inc.
+ *
+ * File: gnttab.c [1]
+ * Author: Steven Smith (sos22@cam.ac.uk)
+ * Changes: Grzegorz Milos (gm281@cam.ac.uk)
+ *
+ * Date: July 2006
+ *
+ * Description: Simple grant tables implementation. About as stupid as it's
+ * possible to be and still work.
+ *
+ * [1] - http://xenbits.xen.org/gitweb/?p=mini-os.git;a=summary
+ */
+#include <common.h>
+#include <linux/compiler.h>
+#include <log.h>
+#include <malloc.h>
+
+#include <asm/armv8/mmu.h>
+#include <asm/io.h>
+#include <asm/xen/system.h>
+
+#include <linux/bug.h>
+
+#include <xen/gnttab.h>
+#include <xen/hvm.h>
+
+#include <xen/interface/memory.h>
+
+DECLARE_GLOBAL_DATA_PTR;
+
+#define NR_RESERVED_ENTRIES 8
+
+/* NR_GRANT_FRAMES must be less than or equal to that configured in Xen */
+#define NR_GRANT_FRAMES 1
+#define NR_GRANT_ENTRIES (NR_GRANT_FRAMES * PAGE_SIZE / sizeof(struct grant_entry_v1))
+
+static struct grant_entry_v1 *gnttab_table;
+static grant_ref_t gnttab_list[NR_GRANT_ENTRIES];
+
+static void put_free_entry(grant_ref_t ref)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ gnttab_list[ref] = gnttab_list[0];
+ gnttab_list[0] = ref;
+ local_irq_restore(flags);
+}
+
+static grant_ref_t get_free_entry(void)
+{
+ unsigned int ref;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ ref = gnttab_list[0];
+ BUG_ON(ref < NR_RESERVED_ENTRIES || ref >= NR_GRANT_ENTRIES);
+ gnttab_list[0] = gnttab_list[ref];
+ local_irq_restore(flags);
+ return ref;
+}
+
+/**
+ * gnttab_grant_access() - Allow access to the given frame.
+ * The function creates an entry in the grant table according
+ * to the specified parameters.
+ * @domid: the id of the domain for which access is allowed
+ * @frame: the number of the shared frame
+ * @readonly: determines whether the frame is shared read-only or read-write
+ *
+ * Return: relevant grant reference
+ */
+grant_ref_t gnttab_grant_access(domid_t domid, unsigned long frame, int readonly)
+{
+ grant_ref_t ref;
+
+ ref = get_free_entry();
+ gnttab_table[ref].frame = frame;
+ gnttab_table[ref].domid = domid;
+ wmb();
+ readonly *= GTF_readonly;
+ gnttab_table[ref].flags = GTF_permit_access | readonly;
+
+ return ref;
+}
+
+/**
+ * gnttab_end_access() - End of memory sharing. The function invalidates
+ * the entry in the grant table.
+ */
+int gnttab_end_access(grant_ref_t ref)
+{
+ u16 flags, nflags;
+
+ BUG_ON(ref >= NR_GRANT_ENTRIES || ref < NR_RESERVED_ENTRIES);
+
+ nflags = gnttab_table[ref].flags;
+ do {
+ if ((flags = nflags) & (GTF_reading | GTF_writing)) {
+ printf("WARNING: g.e. still in use! (%x)\n", flags);
+ return 0;
+ }
+ } while ((nflags = synch_cmpxchg(&gnttab_table[ref].flags, flags, 0)) !=
+ flags);
+
+ put_free_entry(ref);
+ return 1;
+}
+
+grant_ref_t gnttab_alloc_and_grant(void **map)
+{
+ unsigned long mfn;
+ grant_ref_t gref;
+
+ *map = (void *)memalign(PAGE_SIZE, PAGE_SIZE);
+ mfn = virt_to_mfn(*map);
+ gref = gnttab_grant_access(0, mfn, 0);
+ return gref;
+}
+
+static const char * const gnttabop_error_msgs[] = GNTTABOP_error_msgs;
+
+const char *gnttabop_error(int16_t status)
+{
+ status = -status;
+ if (status < 0 || status >= ARRAY_SIZE(gnttabop_error_msgs))
+ return "bad status";
+ else
+ return gnttabop_error_msgs[status];
+}
+
+/* Get Xen's suggested physical page assignments for the grant table. */
+void get_gnttab_base(phys_addr_t *gnttab_base, phys_size_t *gnttab_sz)
+{
+ const void *blob = gd->fdt_blob;
+ struct fdt_resource res;
+ int mem;
+
+ mem = fdt_node_offset_by_compatible(blob, -1, "xen,xen");
+ if (mem < 0) {
+ printf("No xen,xen compatible found\n");
+ BUG();
+ }
+
+ mem = fdt_get_resource(blob, mem, "reg", 0, &res);
+ if (mem == -FDT_ERR_NOTFOUND) {
+ printf("No grant table base in the device tree\n");
+ BUG();
+ }
+
+ *gnttab_base = (phys_addr_t)res.start;
+ if (gnttab_sz)
+ *gnttab_sz = (phys_size_t)(res.end - res.start + 1);
+
+ debug("FDT suggests grant table base at %llx\n",
+ *gnttab_base);
+}
+
+void init_gnttab(void)
+{
+ struct xen_add_to_physmap xatp;
+ struct gnttab_setup_table setup;
+ xen_pfn_t frames[NR_GRANT_FRAMES];
+ int i, rc;
+
+ debug("%s\n", __func__);
+
+ for (i = NR_RESERVED_ENTRIES; i < NR_GRANT_ENTRIES; i++)
+ put_free_entry(i);
+
+ get_gnttab_base((phys_addr_t *)&gnttab_table, NULL);
+
+ for (i = 0; i < NR_GRANT_FRAMES; i++) {
+ xatp.domid = DOMID_SELF;
+ xatp.size = 0;
+ xatp.space = XENMAPSPACE_grant_table;
+ xatp.idx = i;
+ xatp.gpfn = PFN_DOWN((unsigned long)gnttab_table) + i;
+ rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp);
+ if (rc)
+ printf("XENMEM_add_to_physmap failed; status = %d\n",
+ rc);
+ BUG_ON(rc != 0);
+ }
+
+ setup.dom = DOMID_SELF;
+ setup.nr_frames = NR_GRANT_FRAMES;
+ set_xen_guest_handle(setup.frame_list, frames);
+}
+
+void fini_gnttab(void)
+{
+ struct xen_remove_from_physmap xrtp;
+ struct gnttab_setup_table setup;
+ int i, rc;
+
+ debug("%s\n", __func__);
+
+ for (i = 0; i < NR_GRANT_FRAMES; i++) {
+ xrtp.domid = DOMID_SELF;
+ xrtp.gpfn = PFN_DOWN((unsigned long)gnttab_table) + i;
+ rc = HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &xrtp);
+ if (rc)
+ printf("XENMEM_remove_from_physmap failed; status = %d\n",
+ rc);
+ BUG_ON(rc != 0);
+ }
+
+ setup.dom = DOMID_SELF;
+ setup.nr_frames = 0;
+}
+
diff --git a/drivers/xen/hypervisor.c b/drivers/xen/hypervisor.c
new file mode 100644
index 00000000000..178c206f5bf
--- /dev/null
+++ b/drivers/xen/hypervisor.c
@@ -0,0 +1,252 @@
+// SPDX-License-Identifier: MIT License
+/*
+ * hypervisor.c
+ *
+ * Communication to/from hypervisor.
+ *
+ * Copyright (c) 2002-2003, K A Fraser
+ * Copyright (c) 2005, Grzegorz Milos, gm281@cam.ac.uk,Intel Research Cambridge
+ * Copyright (c) 2020, EPAM Systems Inc.
+ */
+#include <common.h>
+#include <cpu_func.h>
+#include <log.h>
+#include <memalign.h>
+
+#include <asm/io.h>
+#include <asm/armv8/mmu.h>
+#include <asm/xen/system.h>
+
+#include <linux/bug.h>
+
+#include <xen/hvm.h>
+#include <xen/events.h>
+#include <xen/gnttab.h>
+#include <xen/xenbus.h>
+#include <xen/interface/memory.h>
+
+#define active_evtchns(cpu, sh, idx) \
+ ((sh)->evtchn_pending[idx] & \
+ ~(sh)->evtchn_mask[idx])
+
+int in_callback;
+
+/*
+ * Shared page for communicating with the hypervisor.
+ * Events flags go here, for example.
+ */
+struct shared_info *HYPERVISOR_shared_info;
+
+static const char *param_name(int op)
+{
+#define PARAM(x)[HVM_PARAM_##x] = #x
+ static const char *const names[] = {
+ PARAM(CALLBACK_IRQ),
+ PARAM(STORE_PFN),
+ PARAM(STORE_EVTCHN),
+ PARAM(PAE_ENABLED),
+ PARAM(IOREQ_PFN),
+ PARAM(VPT_ALIGN),
+ PARAM(CONSOLE_PFN),
+ PARAM(CONSOLE_EVTCHN),
+ };
+#undef PARAM
+
+ if (op >= ARRAY_SIZE(names))
+ return "unknown";
+
+ if (!names[op])
+ return "reserved";
+
+ return names[op];
+}
+
+/**
+ * hvm_get_parameter_maintain_dcache - function to obtain a HVM
+ * parameter value.
+ * @idx: HVM parameter index
+ * @value: Value to fill in
+ *
+ * According to Xen on ARM ABI (xen/include/public/arch-arm.h):
+ * all memory which is shared with other entities in the system
+ * (including the hypervisor and other guests) must reside in memory
+ * which is mapped as Normal Inner Write-Back Outer Write-Back
+ * Inner-Shareable.
+ *
+ * Thus, page attributes must be equally set for all the entities
+ * working with that page.
+ *
+ * Before MMU setup the data cache is turned off, so it means that
+ * manual data cache maintenance is required, because of the
+ * difference of page attributes.
+ */
+int hvm_get_parameter_maintain_dcache(int idx, uint64_t *value)
+{
+ struct xen_hvm_param xhv;
+ int ret;
+
+ invalidate_dcache_range((unsigned long)&xhv,
+ (unsigned long)&xhv + sizeof(xhv));
+ xhv.domid = DOMID_SELF;
+ xhv.index = idx;
+ invalidate_dcache_range((unsigned long)&xhv,
+ (unsigned long)&xhv + sizeof(xhv));
+
+ ret = HYPERVISOR_hvm_op(HVMOP_get_param, &xhv);
+ if (ret < 0) {
+ pr_err("Cannot get hvm parameter %s (%d): %d!\n",
+ param_name(idx), idx, ret);
+ BUG();
+ }
+ invalidate_dcache_range((unsigned long)&xhv,
+ (unsigned long)&xhv + sizeof(xhv));
+
+ *value = xhv.value;
+
+ return ret;
+}
+
+int hvm_get_parameter(int idx, uint64_t *value)
+{
+ struct xen_hvm_param xhv;
+ int ret;
+
+ xhv.domid = DOMID_SELF;
+ xhv.index = idx;
+ ret = HYPERVISOR_hvm_op(HVMOP_get_param, &xhv);
+ if (ret < 0) {
+ pr_err("Cannot get hvm parameter %s (%d): %d!\n",
+ param_name(idx), idx, ret);
+ BUG();
+ }
+
+ *value = xhv.value;
+
+ return ret;
+}
+
+struct shared_info *map_shared_info(void *p)
+{
+ struct xen_add_to_physmap xatp;
+
+ HYPERVISOR_shared_info = (struct shared_info *)memalign(PAGE_SIZE,
+ PAGE_SIZE);
+ if (!HYPERVISOR_shared_info)
+ BUG();
+
+ xatp.domid = DOMID_SELF;
+ xatp.idx = 0;
+ xatp.space = XENMAPSPACE_shared_info;
+ xatp.gpfn = virt_to_pfn(HYPERVISOR_shared_info);
+ if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp) != 0)
+ BUG();
+
+ return HYPERVISOR_shared_info;
+}
+
+void do_hypervisor_callback(struct pt_regs *regs)
+{
+ unsigned long l1, l2, l1i, l2i;
+ unsigned int port;
+ int cpu = 0;
+ struct shared_info *s = HYPERVISOR_shared_info;
+ struct vcpu_info *vcpu_info = &s->vcpu_info[cpu];
+
+ in_callback = 1;
+
+ vcpu_info->evtchn_upcall_pending = 0;
+ l1 = xchg(&vcpu_info->evtchn_pending_sel, 0);
+
+ while (l1 != 0) {
+ l1i = __ffs(l1);
+ l1 &= ~(1UL << l1i);
+
+ while ((l2 = active_evtchns(cpu, s, l1i)) != 0) {
+ l2i = __ffs(l2);
+ l2 &= ~(1UL << l2i);
+
+ port = (l1i * (sizeof(unsigned long) * 8)) + l2i;
+ do_event(port, regs);
+ }
+ }
+
+ in_callback = 0;
+}
+
+void force_evtchn_callback(void)
+{
+#ifdef XEN_HAVE_PV_UPCALL_MASK
+ int save;
+#endif
+ struct vcpu_info *vcpu;
+
+ vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()];
+#ifdef XEN_HAVE_PV_UPCALL_MASK
+ save = vcpu->evtchn_upcall_mask;
+#endif
+
+ while (vcpu->evtchn_upcall_pending) {
+#ifdef XEN_HAVE_PV_UPCALL_MASK
+ vcpu->evtchn_upcall_mask = 1;
+#endif
+ do_hypervisor_callback(NULL);
+#ifdef XEN_HAVE_PV_UPCALL_MASK
+ vcpu->evtchn_upcall_mask = save;
+#endif
+ };
+}
+
+void mask_evtchn(uint32_t port)
+{
+ struct shared_info *s = HYPERVISOR_shared_info;
+
+ synch_set_bit(port, &s->evtchn_mask[0]);
+}
+
+void unmask_evtchn(uint32_t port)
+{
+ struct shared_info *s = HYPERVISOR_shared_info;
+ struct vcpu_info *vcpu_info = &s->vcpu_info[smp_processor_id()];
+
+ synch_clear_bit(port, &s->evtchn_mask[0]);
+
+ /*
+ * Just like a real IO-APIC we 'lose the interrupt edge' if the
+ * channel is masked.
+ */
+ if (synch_test_bit(port, &s->evtchn_pending[0]) &&
+ !synch_test_and_set_bit(port / (sizeof(unsigned long) * 8),
+ &vcpu_info->evtchn_pending_sel)) {
+ vcpu_info->evtchn_upcall_pending = 1;
+#ifdef XEN_HAVE_PV_UPCALL_MASK
+ if (!vcpu_info->evtchn_upcall_mask)
+#endif
+ force_evtchn_callback();
+ }
+}
+
+void clear_evtchn(uint32_t port)
+{
+ struct shared_info *s = HYPERVISOR_shared_info;
+
+ synch_clear_bit(port, &s->evtchn_pending[0]);
+}
+
+void xen_init(void)
+{
+ debug("%s\n", __func__);
+
+ map_shared_info(NULL);
+ init_events();
+ init_xenbus();
+ init_gnttab();
+}
+
+void xen_fini(void)
+{
+ debug("%s\n", __func__);
+
+ fini_gnttab();
+ fini_xenbus();
+ fini_events();
+}
diff --git a/drivers/xen/pvblock.c b/drivers/xen/pvblock.c
new file mode 100644
index 00000000000..76e82fbf41d
--- /dev/null
+++ b/drivers/xen/pvblock.c
@@ -0,0 +1,867 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * (C) 2007-2008 Samuel Thibault.
+ * (C) Copyright 2020 EPAM Systems Inc.
+ */
+#include <blk.h>
+#include <common.h>
+#include <dm.h>
+#include <dm/device-internal.h>
+#include <malloc.h>
+#include <part.h>
+
+#include <asm/armv8/mmu.h>
+#include <asm/io.h>
+#include <asm/xen/system.h>
+
+#include <linux/bug.h>
+#include <linux/compat.h>
+
+#include <xen/events.h>
+#include <xen/gnttab.h>
+#include <xen/hvm.h>
+#include <xen/xenbus.h>
+
+#include <xen/interface/io/ring.h>
+#include <xen/interface/io/blkif.h>
+#include <xen/interface/io/protocols.h>
+
+#define DRV_NAME "pvblock"
+#define DRV_NAME_BLK "pvblock_blk"
+
+#define O_RDONLY 00
+#define O_RDWR 02
+#define WAIT_RING_TO_MS 10
+
+struct blkfront_info {
+ u64 sectors;
+ unsigned int sector_size;
+ int mode;
+ int info;
+ int barrier;
+ int flush;
+};
+
+/**
+ * struct blkfront_dev - Struct representing blkfront device
+ * @dom: Domain id
+ * @ring: Front_ring structure
+ * @ring_ref: The grant reference, allowing us to grant access
+ * to the ring to the other end/domain
+ * @evtchn: Event channel used to signal ring events
+ * @handle: Events handle
+ * @nodename: Device XenStore path in format "device/vbd/" + @devid
+ * @backend: Backend XenStore path
+ * @info: Private data
+ * @devid: Device id
+ */
+struct blkfront_dev {
+ domid_t dom;
+
+ struct blkif_front_ring ring;
+ grant_ref_t ring_ref;
+ evtchn_port_t evtchn;
+ blkif_vdev_t handle;
+
+ char *nodename;
+ char *backend;
+ struct blkfront_info info;
+ unsigned int devid;
+ u8 *bounce_buffer;
+};
+
+struct blkfront_platdata {
+ unsigned int devid;
+};
+
+/**
+ * struct blkfront_aiocb - AIO сontrol block
+ * @aio_dev: Blockfront device
+ * @aio_buf: Memory buffer, which must be sector-aligned for
+ * @aio_dev sector
+ * @aio_nbytes: Size of AIO, which must be less than @aio_dev
+ * sector-sized amounts
+ * @aio_offset: Offset, which must not go beyond @aio_dev
+ * sector-aligned location
+ * @data: Data used to receiving response from ring
+ * @gref: Array of grant references
+ * @n: Number of segments
+ * @aio_cb: Represents one I/O request.
+ */
+struct blkfront_aiocb {
+ struct blkfront_dev *aio_dev;
+ u8 *aio_buf;
+ size_t aio_nbytes;
+ off_t aio_offset;
+ void *data;
+
+ grant_ref_t gref[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+ int n;
+
+ void (*aio_cb)(struct blkfront_aiocb *aiocb, int ret);
+};
+
+static void blkfront_sync(struct blkfront_dev *dev);
+
+static void free_blkfront(struct blkfront_dev *dev)
+{
+ mask_evtchn(dev->evtchn);
+ free(dev->backend);
+
+ gnttab_end_access(dev->ring_ref);
+ free(dev->ring.sring);
+
+ unbind_evtchn(dev->evtchn);
+
+ free(dev->bounce_buffer);
+ free(dev->nodename);
+ free(dev);
+}
+
+static int init_blkfront(unsigned int devid, struct blkfront_dev *dev)
+{
+ xenbus_transaction_t xbt;
+ char *err = NULL;
+ char *message = NULL;
+ struct blkif_sring *s;
+ int retry = 0;
+ char *msg = NULL;
+ char *c;
+ char nodename[32];
+ char path[ARRAY_SIZE(nodename) + strlen("/backend-id") + 1];
+
+ sprintf(nodename, "device/vbd/%d", devid);
+
+ memset(dev, 0, sizeof(*dev));
+ dev->nodename = strdup(nodename);
+ dev->devid = devid;
+
+ snprintf(path, sizeof(path), "%s/backend-id", nodename);
+ dev->dom = xenbus_read_integer(path);
+ evtchn_alloc_unbound(dev->dom, NULL, dev, &dev->evtchn);
+
+ s = (struct blkif_sring *)memalign(PAGE_SIZE, PAGE_SIZE);
+ if (!s) {
+ printf("Failed to allocate shared ring\n");
+ goto error;
+ }
+
+ SHARED_RING_INIT(s);
+ FRONT_RING_INIT(&dev->ring, s, PAGE_SIZE);
+
+ dev->ring_ref = gnttab_grant_access(dev->dom, virt_to_pfn(s), 0);
+
+again:
+ err = xenbus_transaction_start(&xbt);
+ if (err) {
+ printf("starting transaction\n");
+ free(err);
+ }
+
+ err = xenbus_printf(xbt, nodename, "ring-ref", "%u", dev->ring_ref);
+ if (err) {
+ message = "writing ring-ref";
+ goto abort_transaction;
+ }
+ err = xenbus_printf(xbt, nodename, "event-channel", "%u", dev->evtchn);
+ if (err) {
+ message = "writing event-channel";
+ goto abort_transaction;
+ }
+ err = xenbus_printf(xbt, nodename, "protocol", "%s",
+ XEN_IO_PROTO_ABI_NATIVE);
+ if (err) {
+ message = "writing protocol";
+ goto abort_transaction;
+ }
+
+ snprintf(path, sizeof(path), "%s/state", nodename);
+ err = xenbus_switch_state(xbt, path, XenbusStateConnected);
+ if (err) {
+ message = "switching state";
+ goto abort_transaction;
+ }
+
+ err = xenbus_transaction_end(xbt, 0, &retry);
+ free(err);
+ if (retry) {
+ goto again;
+ printf("completing transaction\n");
+ }
+
+ goto done;
+
+abort_transaction:
+ free(err);
+ err = xenbus_transaction_end(xbt, 1, &retry);
+ printf("Abort transaction %s\n", message);
+ goto error;
+
+done:
+ snprintf(path, sizeof(path), "%s/backend", nodename);
+ msg = xenbus_read(XBT_NIL, path, &dev->backend);
+ if (msg) {
+ printf("Error %s when reading the backend path %s\n",
+ msg, path);
+ goto error;
+ }
+
+ dev->handle = strtoul(strrchr(nodename, '/') + 1, NULL, 0);
+
+ {
+ XenbusState state;
+ char path[strlen(dev->backend) +
+ strlen("/feature-flush-cache") + 1];
+
+ snprintf(path, sizeof(path), "%s/mode", dev->backend);
+ msg = xenbus_read(XBT_NIL, path, &c);
+ if (msg) {
+ printf("Error %s when reading the mode\n", msg);
+ goto error;
+ }
+ if (*c == 'w')
+ dev->info.mode = O_RDWR;
+ else
+ dev->info.mode = O_RDONLY;
+ free(c);
+
+ snprintf(path, sizeof(path), "%s/state", dev->backend);
+
+ msg = NULL;
+ state = xenbus_read_integer(path);
+ while (!msg && state < XenbusStateConnected)
+ msg = xenbus_wait_for_state_change(path, &state);
+ if (msg || state != XenbusStateConnected) {
+ printf("backend not available, state=%d\n", state);
+ goto error;
+ }
+
+ snprintf(path, sizeof(path), "%s/info", dev->backend);
+ dev->info.info = xenbus_read_integer(path);
+
+ snprintf(path, sizeof(path), "%s/sectors", dev->backend);
+ /*
+ * FIXME: read_integer returns an int, so disk size
+ * limited to 1TB for now
+ */
+ dev->info.sectors = xenbus_read_integer(path);
+
+ snprintf(path, sizeof(path), "%s/sector-size", dev->backend);
+ dev->info.sector_size = xenbus_read_integer(path);
+
+ snprintf(path, sizeof(path), "%s/feature-barrier",
+ dev->backend);
+ dev->info.barrier = xenbus_read_integer(path);
+
+ snprintf(path, sizeof(path), "%s/feature-flush-cache",
+ dev->backend);
+ dev->info.flush = xenbus_read_integer(path);
+ }
+ unmask_evtchn(dev->evtchn);
+
+ dev->bounce_buffer = memalign(dev->info.sector_size,
+ dev->info.sector_size);
+ if (!dev->bounce_buffer) {
+ printf("Failed to allocate bouncing buffer\n");
+ goto error;
+ }
+
+ debug("%llu sectors of %u bytes, bounce buffer at %p\n",
+ dev->info.sectors, dev->info.sector_size,
+ dev->bounce_buffer);
+
+ return 0;
+
+error:
+ free(msg);
+ free(err);
+ free_blkfront(dev);
+ return -ENODEV;
+}
+
+static void shutdown_blkfront(struct blkfront_dev *dev)
+{
+ char *err = NULL, *err2;
+ XenbusState state;
+
+ char path[strlen(dev->backend) + strlen("/state") + 1];
+ char nodename[strlen(dev->nodename) + strlen("/event-channel") + 1];
+
+ debug("Close " DRV_NAME ", device ID %d\n", dev->devid);
+
+ blkfront_sync(dev);
+
+ snprintf(path, sizeof(path), "%s/state", dev->backend);
+ snprintf(nodename, sizeof(nodename), "%s/state", dev->nodename);
+
+ if ((err = xenbus_switch_state(XBT_NIL, nodename,
+ XenbusStateClosing)) != NULL) {
+ printf("%s: error changing state to %d: %s\n", __func__,
+ XenbusStateClosing, err);
+ goto close;
+ }
+
+ state = xenbus_read_integer(path);
+ while (!err && state < XenbusStateClosing)
+ err = xenbus_wait_for_state_change(path, &state);
+ free(err);
+
+ if ((err = xenbus_switch_state(XBT_NIL, nodename,
+ XenbusStateClosed)) != NULL) {
+ printf("%s: error changing state to %d: %s\n", __func__,
+ XenbusStateClosed, err);
+ goto close;
+ }
+
+ state = xenbus_read_integer(path);
+ while (state < XenbusStateClosed) {
+ err = xenbus_wait_for_state_change(path, &state);
+ free(err);
+ }
+
+ if ((err = xenbus_switch_state(XBT_NIL, nodename,
+ XenbusStateInitialising)) != NULL) {
+ printf("%s: error changing state to %d: %s\n", __func__,
+ XenbusStateInitialising, err);
+ goto close;
+ }
+
+ state = xenbus_read_integer(path);
+ while (!err &&
+ (state < XenbusStateInitWait || state >= XenbusStateClosed))
+ err = xenbus_wait_for_state_change(path, &state);
+
+close:
+ free(err);
+
+ snprintf(nodename, sizeof(nodename), "%s/ring-ref", dev->nodename);
+ err2 = xenbus_rm(XBT_NIL, nodename);
+ free(err2);
+ snprintf(nodename, sizeof(nodename), "%s/event-channel", dev->nodename);
+ err2 = xenbus_rm(XBT_NIL, nodename);
+ free(err2);
+
+ if (!err)
+ free_blkfront(dev);
+}
+
+/**
+ * blkfront_aio_poll() - AIO polling function.
+ * @dev: Blkfront device
+ *
+ * Here we receive response from the ring and check its status. This happens
+ * until we read all data from the ring. We read the data from consumed pointer
+ * to the response pointer. Then increase consumed pointer to make it clear that
+ * the data has been read.
+ *
+ * Return: Number of consumed bytes.
+ */
+static int blkfront_aio_poll(struct blkfront_dev *dev)
+{
+ RING_IDX rp, cons;
+ struct blkif_response *rsp;
+ int more;
+ int nr_consumed;
+
+moretodo:
+ rp = dev->ring.sring->rsp_prod;
+ rmb(); /* Ensure we see queued responses up to 'rp'. */
+ cons = dev->ring.rsp_cons;
+
+ nr_consumed = 0;
+ while ((cons != rp)) {
+ struct blkfront_aiocb *aiocbp;
+ int status;
+
+ rsp = RING_GET_RESPONSE(&dev->ring, cons);
+ nr_consumed++;
+
+ aiocbp = (void *)(uintptr_t)rsp->id;
+ status = rsp->status;
+
+ switch (rsp->operation) {
+ case BLKIF_OP_READ:
+ case BLKIF_OP_WRITE:
+ {
+ int j;
+
+ if (status != BLKIF_RSP_OKAY)
+ printf("%s error %d on %s at offset %llu, num bytes %llu\n",
+ rsp->operation == BLKIF_OP_READ ?
+ "read" : "write",
+ status, aiocbp->aio_dev->nodename,
+ (unsigned long long)aiocbp->aio_offset,
+ (unsigned long long)aiocbp->aio_nbytes);
+
+ for (j = 0; j < aiocbp->n; j++)
+ gnttab_end_access(aiocbp->gref[j]);
+
+ break;
+ }
+
+ case BLKIF_OP_WRITE_BARRIER:
+ if (status != BLKIF_RSP_OKAY)
+ printf("write barrier error %d\n", status);
+ break;
+ case BLKIF_OP_FLUSH_DISKCACHE:
+ if (status != BLKIF_RSP_OKAY)
+ printf("flush error %d\n", status);
+ break;
+
+ default:
+ printf("unrecognized block operation %d response (status %d)\n",
+ rsp->operation, status);
+ break;
+ }
+
+ dev->ring.rsp_cons = ++cons;
+ /* Nota: callback frees aiocbp itself */
+ if (aiocbp && aiocbp->aio_cb)
+ aiocbp->aio_cb(aiocbp, status ? -EIO : 0);
+ if (dev->ring.rsp_cons != cons)
+ /* We reentered, we must not continue here */
+ break;
+ }
+
+ RING_FINAL_CHECK_FOR_RESPONSES(&dev->ring, more);
+ if (more)
+ goto moretodo;
+
+ return nr_consumed;
+}
+
+static void blkfront_wait_slot(struct blkfront_dev *dev)
+{
+ /* Wait for a slot */
+ if (RING_FULL(&dev->ring)) {
+ while (true) {
+ blkfront_aio_poll(dev);
+ if (!RING_FULL(&dev->ring))
+ break;
+ wait_event_timeout(NULL, !RING_FULL(&dev->ring),
+ WAIT_RING_TO_MS);
+ }
+ }
+}
+
+/**
+ * blkfront_aio_poll() - Issue an aio.
+ * @aiocbp: AIO control block structure
+ * @write: Describes is it read or write operation
+ * 0 - read
+ * 1 - write
+ *
+ * We check whether the AIO parameters meet the requirements of the device.
+ * Then receive request from ring and define its arguments. After this we
+ * grant access to the grant references. The last step is notifying about AIO
+ * via event channel.
+ */
+static void blkfront_aio(struct blkfront_aiocb *aiocbp, int write)
+{
+ struct blkfront_dev *dev = aiocbp->aio_dev;
+ struct blkif_request *req;
+ RING_IDX i;
+ int notify;
+ int n, j;
+ uintptr_t start, end;
+
+ /* Can't io at non-sector-aligned location */
+ BUG_ON(aiocbp->aio_offset & (dev->info.sector_size - 1));
+ /* Can't io non-sector-sized amounts */
+ BUG_ON(aiocbp->aio_nbytes & (dev->info.sector_size - 1));
+ /* Can't io non-sector-aligned buffer */
+ BUG_ON(((uintptr_t)aiocbp->aio_buf & (dev->info.sector_size - 1)));
+
+ start = (uintptr_t)aiocbp->aio_buf & PAGE_MASK;
+ end = ((uintptr_t)aiocbp->aio_buf + aiocbp->aio_nbytes +
+ PAGE_SIZE - 1) & PAGE_MASK;
+ n = (end - start) / PAGE_SIZE;
+ aiocbp->n = n;
+
+ BUG_ON(n > BLKIF_MAX_SEGMENTS_PER_REQUEST);
+
+ blkfront_wait_slot(dev);
+ i = dev->ring.req_prod_pvt;
+ req = RING_GET_REQUEST(&dev->ring, i);
+
+ req->operation = write ? BLKIF_OP_WRITE : BLKIF_OP_READ;
+ req->nr_segments = n;
+ req->handle = dev->handle;
+ req->id = (uintptr_t)aiocbp;
+ req->sector_number = aiocbp->aio_offset / dev->info.sector_size;
+
+ for (j = 0; j < n; j++) {
+ req->seg[j].first_sect = 0;
+ req->seg[j].last_sect = PAGE_SIZE / dev->info.sector_size - 1;
+ }
+ req->seg[0].first_sect = ((uintptr_t)aiocbp->aio_buf & ~PAGE_MASK) /
+ dev->info.sector_size;
+ req->seg[n - 1].last_sect = (((uintptr_t)aiocbp->aio_buf +
+ aiocbp->aio_nbytes - 1) & ~PAGE_MASK) / dev->info.sector_size;
+ for (j = 0; j < n; j++) {
+ uintptr_t data = start + j * PAGE_SIZE;
+
+ if (!write) {
+ /* Trigger CoW if needed */
+ *(char *)(data + (req->seg[j].first_sect *
+ dev->info.sector_size)) = 0;
+ barrier();
+ }
+ req->seg[j].gref = gnttab_grant_access(dev->dom,
+ virt_to_pfn((void *)data),
+ write);
+ aiocbp->gref[j] = req->seg[j].gref;
+ }
+
+ dev->ring.req_prod_pvt = i + 1;
+
+ wmb();
+ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&dev->ring, notify);
+
+ if (notify)
+ notify_remote_via_evtchn(dev->evtchn);
+}
+
+static void blkfront_aio_cb(struct blkfront_aiocb *aiocbp, int ret)
+{
+ aiocbp->data = (void *)1;
+ aiocbp->aio_cb = NULL;
+}
+
+static void blkfront_io(struct blkfront_aiocb *aiocbp, int write)
+{
+ aiocbp->aio_cb = blkfront_aio_cb;
+ blkfront_aio(aiocbp, write);
+ aiocbp->data = NULL;
+
+ while (true) {
+ blkfront_aio_poll(aiocbp->aio_dev);
+ if (aiocbp->data)
+ break;
+ cpu_relax();
+ }
+}
+
+static void blkfront_push_operation(struct blkfront_dev *dev, u8 op,
+ uint64_t id)
+{
+ struct blkif_request *req;
+ int notify, i;
+
+ blkfront_wait_slot(dev);
+ i = dev->ring.req_prod_pvt;
+ req = RING_GET_REQUEST(&dev->ring, i);
+ req->operation = op;
+ req->nr_segments = 0;
+ req->handle = dev->handle;
+ req->id = id;
+ req->sector_number = 0;
+ dev->ring.req_prod_pvt = i + 1;
+ wmb();
+ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&dev->ring, notify);
+ if (notify)
+ notify_remote_via_evtchn(dev->evtchn);
+}
+
+static void blkfront_sync(struct blkfront_dev *dev)
+{
+ if (dev->info.mode == O_RDWR) {
+ if (dev->info.barrier == 1)
+ blkfront_push_operation(dev,
+ BLKIF_OP_WRITE_BARRIER, 0);
+
+ if (dev->info.flush == 1)
+ blkfront_push_operation(dev,
+ BLKIF_OP_FLUSH_DISKCACHE, 0);
+ }
+
+ while (true) {
+ blkfront_aio_poll(dev);
+ if (RING_FREE_REQUESTS(&dev->ring) == RING_SIZE(&dev->ring))
+ break;
+ cpu_relax();
+ }
+}
+
+/**
+ * pvblock_iop() - Issue an aio.
+ * @udev: Pvblock device
+ * @blknr: Block number to read from / write to
+ * @blkcnt: Amount of blocks to read / write
+ * @buffer: Memory buffer with data to be read / write
+ * @write: Describes is it read or write operation
+ * 0 - read
+ * 1 - write
+ *
+ * Depending on the operation - reading or writing, data is read / written from the
+ * specified address (@buffer) to the sector (@blknr).
+ */
+static ulong pvblock_iop(struct udevice *udev, lbaint_t blknr,
+ lbaint_t blkcnt, void *buffer, int write)
+{
+ struct blkfront_dev *blk_dev = dev_get_priv(udev);
+ struct blk_desc *desc = dev_get_uclass_platdata(udev);
+ struct blkfront_aiocb aiocb;
+ lbaint_t blocks_todo;
+ bool unaligned;
+
+ if (blkcnt == 0)
+ return 0;
+
+ if ((blknr + blkcnt) > desc->lba) {
+ printf(DRV_NAME ": block number 0x" LBAF " exceeds max(0x" LBAF ")\n",
+ blknr + blkcnt, desc->lba);
+ return 0;
+ }
+
+ unaligned = (uintptr_t)buffer & (blk_dev->info.sector_size - 1);
+
+ aiocb.aio_dev = blk_dev;
+ aiocb.aio_offset = blknr * desc->blksz;
+ aiocb.aio_cb = NULL;
+ aiocb.data = NULL;
+ blocks_todo = blkcnt;
+ do {
+ aiocb.aio_buf = unaligned ? blk_dev->bounce_buffer : buffer;
+
+ if (write && unaligned)
+ memcpy(blk_dev->bounce_buffer, buffer, desc->blksz);
+
+ aiocb.aio_nbytes = unaligned ? desc->blksz :
+ min((size_t)(BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE),
+ (size_t)(blocks_todo * desc->blksz));
+
+ blkfront_io(&aiocb, write);
+
+ if (!write && unaligned)
+ memcpy(buffer, blk_dev->bounce_buffer, desc->blksz);
+
+ aiocb.aio_offset += aiocb.aio_nbytes;
+ buffer += aiocb.aio_nbytes;
+ blocks_todo -= aiocb.aio_nbytes / desc->blksz;
+ } while (blocks_todo > 0);
+
+ return blkcnt;
+}
+
+ulong pvblock_blk_read(struct udevice *udev, lbaint_t blknr, lbaint_t blkcnt,
+ void *buffer)
+{
+ return pvblock_iop(udev, blknr, blkcnt, buffer, 0);
+}
+
+ulong pvblock_blk_write(struct udevice *udev, lbaint_t blknr, lbaint_t blkcnt,
+ const void *buffer)
+{
+ return pvblock_iop(udev, blknr, blkcnt, (void *)buffer, 1);
+}
+
+static int pvblock_blk_bind(struct udevice *udev)
+{
+ struct blk_desc *desc = dev_get_uclass_platdata(udev);
+ int devnum;
+
+ desc->if_type = IF_TYPE_PVBLOCK;
+ /*
+ * Initialize the devnum to -ENODEV. This is to make sure that
+ * blk_next_free_devnum() works as expected, since the default
+ * value 0 is a valid devnum.
+ */
+ desc->devnum = -ENODEV;
+ devnum = blk_next_free_devnum(IF_TYPE_PVBLOCK);
+ if (devnum < 0)
+ return devnum;
+ desc->devnum = devnum;
+ desc->part_type = PART_TYPE_UNKNOWN;
+ desc->bdev = udev;
+
+ strncpy(desc->vendor, "Xen", sizeof(desc->vendor));
+ strncpy(desc->revision, "1", sizeof(desc->revision));
+ strncpy(desc->product, "Virtual disk", sizeof(desc->product));
+
+ return 0;
+}
+
+static int pvblock_blk_probe(struct udevice *udev)
+{
+ struct blkfront_dev *blk_dev = dev_get_priv(udev);
+ struct blkfront_platdata *platdata = dev_get_platdata(udev);
+ struct blk_desc *desc = dev_get_uclass_platdata(udev);
+ int ret, devid;
+
+ devid = platdata->devid;
+ free(platdata);
+
+ ret = init_blkfront(devid, blk_dev);
+ if (ret < 0)
+ return ret;
+
+ desc->blksz = blk_dev->info.sector_size;
+ desc->lba = blk_dev->info.sectors;
+ desc->log2blksz = LOG2(blk_dev->info.sector_size);
+
+ return 0;
+}
+
+static int pvblock_blk_remove(struct udevice *udev)
+{
+ struct blkfront_dev *blk_dev = dev_get_priv(udev);
+
+ shutdown_blkfront(blk_dev);
+ return 0;
+}
+
+static const struct blk_ops pvblock_blk_ops = {
+ .read = pvblock_blk_read,
+ .write = pvblock_blk_write,
+};
+
+U_BOOT_DRIVER(pvblock_blk) = {
+ .name = DRV_NAME_BLK,
+ .id = UCLASS_BLK,
+ .ops = &pvblock_blk_ops,
+ .bind = pvblock_blk_bind,
+ .probe = pvblock_blk_probe,
+ .remove = pvblock_blk_remove,
+ .priv_auto_alloc_size = sizeof(struct blkfront_dev),
+ .flags = DM_FLAG_OS_PREPARE,
+};
+
+/*******************************************************************************
+ * Para-virtual block device class
+ *******************************************************************************/
+
+typedef int (*enum_vbd_callback)(struct udevice *parent, unsigned int devid);
+
+static int on_new_vbd(struct udevice *parent, unsigned int devid)
+{
+ struct driver_info info;
+ struct udevice *udev;
+ struct blkfront_platdata *platdata;
+ int ret;
+
+ debug("New " DRV_NAME_BLK ", device ID %d\n", devid);
+
+ platdata = malloc(sizeof(struct blkfront_platdata));
+ if (!platdata) {
+ printf("Failed to allocate platform data\n");
+ return -ENOMEM;
+ }
+
+ platdata->devid = devid;
+
+ info.name = DRV_NAME_BLK;
+ info.platdata = platdata;
+
+ ret = device_bind_by_name(parent, false, &info, &udev);
+ if (ret < 0) {
+ printf("Failed to bind " DRV_NAME_BLK " to device with ID %d, ret: %d\n",
+ devid, ret);
+ free(platdata);
+ }
+ return ret;
+}
+
+static int xenbus_enumerate_vbd(struct udevice *udev, enum_vbd_callback clb)
+{
+ char **dirs, *msg;
+ int i, ret;
+
+ msg = xenbus_ls(XBT_NIL, "device/vbd", &dirs);
+ if (msg) {
+ printf("Failed to read device/vbd directory: %s\n", msg);
+ free(msg);
+ return -ENODEV;
+ }
+
+ for (i = 0; dirs[i]; i++) {
+ int devid;
+
+ sscanf(dirs[i], "%d", &devid);
+ ret = clb(udev, devid);
+ if (ret < 0)
+ goto fail;
+
+ free(dirs[i]);
+ }
+ ret = 0;
+
+fail:
+ for (; dirs[i]; i++)
+ free(dirs[i]);
+ free(dirs);
+ return ret;
+}
+
+static void print_pvblock_devices(void)
+{
+ struct udevice *udev;
+ bool first = true;
+ const char *class_name;
+
+ class_name = uclass_get_name(UCLASS_PVBLOCK);
+ for (blk_first_device(IF_TYPE_PVBLOCK, &udev); udev;
+ blk_next_device(&udev), first = false) {
+ struct blk_desc *desc = dev_get_uclass_platdata(udev);
+
+ if (!first)
+ puts(", ");
+ printf("%s: %d", class_name, desc->devnum);
+ }
+ printf("\n");
+}
+
+void pvblock_init(void)
+{
+ struct driver_info info;
+ struct udevice *udev;
+ struct uclass *uc;
+ int ret;
+
+ /*
+ * At this point Xen drivers have already initialized,
+ * so we can instantiate the class driver and enumerate
+ * virtual block devices.
+ */
+ info.name = DRV_NAME;
+ ret = device_bind_by_name(gd->dm_root, false, &info, &udev);
+ if (ret < 0)
+ printf("Failed to bind " DRV_NAME ", ret: %d\n", ret);
+
+ /* Bootstrap virtual block devices class driver */
+ ret = uclass_get(UCLASS_PVBLOCK, &uc);
+ if (ret)
+ return;
+ uclass_foreach_dev_probe(UCLASS_PVBLOCK, udev);
+
+ print_pvblock_devices();
+}
+
+static int pvblock_probe(struct udevice *udev)
+{
+ struct uclass *uc;
+ int ret;
+
+ if (xenbus_enumerate_vbd(udev, on_new_vbd) < 0)
+ return -ENODEV;
+
+ ret = uclass_get(UCLASS_BLK, &uc);
+ if (ret)
+ return ret;
+ uclass_foreach_dev_probe(UCLASS_BLK, udev) {
+ if (_ret)
+ return _ret;
+ };
+ return 0;
+}
+
+U_BOOT_DRIVER(pvblock_drv) = {
+ .name = DRV_NAME,
+ .id = UCLASS_PVBLOCK,
+ .probe = pvblock_probe,
+};
+
+UCLASS_DRIVER(pvblock) = {
+ .name = DRV_NAME,
+ .id = UCLASS_PVBLOCK,
+};
diff --git a/drivers/xen/xenbus.c b/drivers/xen/xenbus.c
new file mode 100644
index 00000000000..177d144723c
--- /dev/null
+++ b/drivers/xen/xenbus.c
@@ -0,0 +1,557 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * (C) 2006 - Cambridge University
+ * (C) 2020 - EPAM Systems Inc.
+ *
+ * File: xenbus.c [1]
+ * Author: Steven Smith (sos22@cam.ac.uk)
+ * Changes: Grzegorz Milos (gm281@cam.ac.uk)
+ * Changes: John D. Ramsdell
+ *
+ * Date: Jun 2006, changes Aug 2006
+ *
+ * Description: Minimal implementation of xenbus
+ *
+ * [1] - http://xenbits.xen.org/gitweb/?p=mini-os.git;a=summary
+ */
+
+#include <common.h>
+#include <log.h>
+
+#include <asm/armv8/mmu.h>
+#include <asm/io.h>
+#include <asm/xen/system.h>
+
+#include <linux/bug.h>
+#include <linux/compat.h>
+
+#include <xen/events.h>
+#include <xen/hvm.h>
+#include <xen/xenbus.h>
+
+#include <xen/interface/io/xs_wire.h>
+
+#define map_frame_virt(v) (v << PAGE_SHIFT)
+
+#define SCNd16 "d"
+
+/* Wait for reply time out, ms */
+#define WAIT_XENBUS_TO_MS 5000
+/* Polling time out, ms */
+#define WAIT_XENBUS_POLL_TO_MS 1
+
+static struct xenstore_domain_interface *xenstore_buf;
+
+static char *errmsg(struct xsd_sockmsg *rep);
+
+u32 xenbus_evtchn;
+
+struct write_req {
+ const void *data;
+ unsigned int len;
+};
+
+static void memcpy_from_ring(const void *r, void *d, int off, int len)
+{
+ int c1, c2;
+ const char *ring = r;
+ char *dest = d;
+
+ c1 = min(len, XENSTORE_RING_SIZE - off);
+ c2 = len - c1;
+ memcpy(dest, ring + off, c1);
+ memcpy(dest + c1, ring, c2);
+}
+
+/**
+ * xenbus_get_reply() - Receive reply from xenbus
+ * @req_reply: reply message structure
+ *
+ * Wait for reply message event from the ring and copy received message
+ * to input xsd_sockmsg structure. Repeat until full reply is
+ * proceeded.
+ *
+ * Return: false - timeout
+ * true - reply is received
+ */
+static bool xenbus_get_reply(struct xsd_sockmsg **req_reply)
+{
+ struct xsd_sockmsg msg;
+ unsigned int prod = xenstore_buf->rsp_prod;
+
+again:
+ if (!wait_event_timeout(NULL, prod != xenstore_buf->rsp_prod,
+ WAIT_XENBUS_TO_MS)) {
+ printk("%s: wait_event timeout\n", __func__);
+ return false;
+ }
+
+ prod = xenstore_buf->rsp_prod;
+ if (xenstore_buf->rsp_prod - xenstore_buf->rsp_cons < sizeof(msg))
+ goto again;
+
+ rmb();
+ memcpy_from_ring(xenstore_buf->rsp, &msg,
+ MASK_XENSTORE_IDX(xenstore_buf->rsp_cons),
+ sizeof(msg));
+
+ if (xenstore_buf->rsp_prod - xenstore_buf->rsp_cons < sizeof(msg) + msg.len)
+ goto again;
+
+ /* We do not support and expect any Xen bus wathes. */
+ BUG_ON(msg.type == XS_WATCH_EVENT);
+
+ *req_reply = malloc(sizeof(msg) + msg.len);
+ memcpy_from_ring(xenstore_buf->rsp, *req_reply,
+ MASK_XENSTORE_IDX(xenstore_buf->rsp_cons),
+ msg.len + sizeof(msg));
+ mb();
+ xenstore_buf->rsp_cons += msg.len + sizeof(msg);
+
+ wmb();
+ notify_remote_via_evtchn(xenbus_evtchn);
+ return true;
+}
+
+char *xenbus_switch_state(xenbus_transaction_t xbt, const char *path,
+ XenbusState state)
+{
+ char *current_state;
+ char *msg = NULL;
+ char *msg2 = NULL;
+ char value[2];
+ XenbusState rs;
+ int xbt_flag = 0;
+ int retry = 0;
+
+ do {
+ if (xbt == XBT_NIL) {
+ msg = xenbus_transaction_start(&xbt);
+ if (msg)
+ goto exit;
+ xbt_flag = 1;
+ }
+
+ msg = xenbus_read(xbt, path, &current_state);
+ if (msg)
+ goto exit;
+
+ rs = (XenbusState)(current_state[0] - '0');
+ free(current_state);
+ if (rs == state) {
+ msg = NULL;
+ goto exit;
+ }
+
+ snprintf(value, 2, "%d", state);
+ msg = xenbus_write(xbt, path, value);
+
+exit:
+ if (xbt_flag) {
+ msg2 = xenbus_transaction_end(xbt, 0, &retry);
+ xbt = XBT_NIL;
+ }
+ if (msg == NULL && msg2 != NULL)
+ msg = msg2;
+ else
+ free(msg2);
+ } while (retry);
+
+ return msg;
+}
+
+char *xenbus_wait_for_state_change(const char *path, XenbusState *state)
+{
+ for (;;) {
+ char *res, *msg;
+ XenbusState rs;
+
+ msg = xenbus_read(XBT_NIL, path, &res);
+ if (msg)
+ return msg;
+
+ rs = (XenbusState)(res[0] - 48);
+ free(res);
+
+ if (rs == *state) {
+ wait_event_timeout(NULL, false, WAIT_XENBUS_POLL_TO_MS);
+ } else {
+ *state = rs;
+ break;
+ }
+ }
+ return NULL;
+}
+
+/* Send data to xenbus. This can block. All of the requests are seen
+ * by xenbus as if sent atomically. The header is added
+ * automatically, using type %type, req_id %req_id, and trans_id
+ * %trans_id.
+ */
+static void xb_write(int type, int req_id, xenbus_transaction_t trans_id,
+ const struct write_req *req, int nr_reqs)
+{
+ XENSTORE_RING_IDX prod;
+ int r;
+ int len = 0;
+ const struct write_req *cur_req;
+ int req_off;
+ int total_off;
+ int this_chunk;
+ struct xsd_sockmsg m = {
+ .type = type,
+ .req_id = req_id,
+ .tx_id = trans_id
+ };
+ struct write_req header_req = {
+ &m,
+ sizeof(m)
+ };
+
+ for (r = 0; r < nr_reqs; r++)
+ len += req[r].len;
+ m.len = len;
+ len += sizeof(m);
+
+ cur_req = &header_req;
+
+ BUG_ON(len > XENSTORE_RING_SIZE);
+ prod = xenstore_buf->req_prod;
+ /* We are running synchronously, so it is a bug if we do not
+ * have enough room to send a message: please note that a message
+ * can occupy multiple slots in the ring buffer.
+ */
+ BUG_ON(prod + len - xenstore_buf->req_cons > XENSTORE_RING_SIZE);
+
+ total_off = 0;
+ req_off = 0;
+ while (total_off < len) {
+ this_chunk = min(cur_req->len - req_off,
+ XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod));
+ memcpy((char *)xenstore_buf->req + MASK_XENSTORE_IDX(prod),
+ (char *)cur_req->data + req_off, this_chunk);
+ prod += this_chunk;
+ req_off += this_chunk;
+ total_off += this_chunk;
+ if (req_off == cur_req->len) {
+ req_off = 0;
+ if (cur_req == &header_req)
+ cur_req = req;
+ else
+ cur_req++;
+ }
+ }
+
+ BUG_ON(req_off != 0);
+ BUG_ON(total_off != len);
+ BUG_ON(prod > xenstore_buf->req_cons + XENSTORE_RING_SIZE);
+
+ /* Remote must see entire message before updating indexes */
+ wmb();
+
+ xenstore_buf->req_prod += len;
+
+ /* Send evtchn to notify remote */
+ notify_remote_via_evtchn(xenbus_evtchn);
+}
+
+/* Send a message to xenbus, in the same fashion as xb_write, and
+ * block waiting for a reply. The reply is malloced and should be
+ * freed by the caller.
+ */
+struct xsd_sockmsg *xenbus_msg_reply(int type,
+ xenbus_transaction_t trans,
+ struct write_req *io,
+ int nr_reqs)
+{
+ struct xsd_sockmsg *rep;
+
+ /* We do not use request identifier which is echoed in daemon's response. */
+ xb_write(type, 0, trans, io, nr_reqs);
+ /* Now wait for the message to arrive. */
+ if (!xenbus_get_reply(&rep))
+ return NULL;
+ return rep;
+}
+
+static char *errmsg(struct xsd_sockmsg *rep)
+{
+ char *res;
+
+ if (!rep) {
+ char msg[] = "No reply";
+ size_t len = strlen(msg) + 1;
+
+ return memcpy(malloc(len), msg, len);
+ }
+ if (rep->type != XS_ERROR)
+ return NULL;
+ res = malloc(rep->len + 1);
+ memcpy(res, rep + 1, rep->len);
+ res[rep->len] = 0;
+ free(rep);
+ return res;
+}
+
+/* List the contents of a directory. Returns a malloc()ed array of
+ * pointers to malloc()ed strings. The array is NULL terminated. May
+ * block.
+ */
+char *xenbus_ls(xenbus_transaction_t xbt, const char *pre, char ***contents)
+{
+ struct xsd_sockmsg *reply, *repmsg;
+ struct write_req req[] = { { pre, strlen(pre) + 1 } };
+ int nr_elems, x, i;
+ char **res, *msg;
+
+ repmsg = xenbus_msg_reply(XS_DIRECTORY, xbt, req, ARRAY_SIZE(req));
+ msg = errmsg(repmsg);
+ if (msg) {
+ *contents = NULL;
+ return msg;
+ }
+ reply = repmsg + 1;
+ for (x = nr_elems = 0; x < repmsg->len; x++)
+ nr_elems += (((char *)reply)[x] == 0);
+ res = malloc(sizeof(res[0]) * (nr_elems + 1));
+ for (x = i = 0; i < nr_elems; i++) {
+ int l = strlen((char *)reply + x);
+
+ res[i] = malloc(l + 1);
+ memcpy(res[i], (char *)reply + x, l + 1);
+ x += l + 1;
+ }
+ res[i] = NULL;
+ free(repmsg);
+ *contents = res;
+ return NULL;
+}
+
+char *xenbus_read(xenbus_transaction_t xbt, const char *path, char **value)
+{
+ struct write_req req[] = { {path, strlen(path) + 1} };
+ struct xsd_sockmsg *rep;
+ char *res, *msg;
+
+ rep = xenbus_msg_reply(XS_READ, xbt, req, ARRAY_SIZE(req));
+ msg = errmsg(rep);
+ if (msg) {
+ *value = NULL;
+ return msg;
+ }
+ res = malloc(rep->len + 1);
+ memcpy(res, rep + 1, rep->len);
+ res[rep->len] = 0;
+ free(rep);
+ *value = res;
+ return NULL;
+}
+
+char *xenbus_write(xenbus_transaction_t xbt, const char *path,
+ const char *value)
+{
+ struct write_req req[] = {
+ {path, strlen(path) + 1},
+ {value, strlen(value)},
+ };
+ struct xsd_sockmsg *rep;
+ char *msg;
+
+ rep = xenbus_msg_reply(XS_WRITE, xbt, req, ARRAY_SIZE(req));
+ msg = errmsg(rep);
+ if (msg)
+ return msg;
+ free(rep);
+ return NULL;
+}
+
+char *xenbus_rm(xenbus_transaction_t xbt, const char *path)
+{
+ struct write_req req[] = { {path, strlen(path) + 1} };
+ struct xsd_sockmsg *rep;
+ char *msg;
+
+ rep = xenbus_msg_reply(XS_RM, xbt, req, ARRAY_SIZE(req));
+ msg = errmsg(rep);
+ if (msg)
+ return msg;
+ free(rep);
+ return NULL;
+}
+
+char *xenbus_get_perms(xenbus_transaction_t xbt, const char *path, char **value)
+{
+ struct write_req req[] = { {path, strlen(path) + 1} };
+ struct xsd_sockmsg *rep;
+ char *res, *msg;
+
+ rep = xenbus_msg_reply(XS_GET_PERMS, xbt, req, ARRAY_SIZE(req));
+ msg = errmsg(rep);
+ if (msg) {
+ *value = NULL;
+ return msg;
+ }
+ res = malloc(rep->len + 1);
+ memcpy(res, rep + 1, rep->len);
+ res[rep->len] = 0;
+ free(rep);
+ *value = res;
+ return NULL;
+}
+
+#define PERM_MAX_SIZE 32
+char *xenbus_set_perms(xenbus_transaction_t xbt, const char *path,
+ domid_t dom, char perm)
+{
+ char value[PERM_MAX_SIZE];
+ struct write_req req[] = {
+ {path, strlen(path) + 1},
+ {value, 0},
+ };
+ struct xsd_sockmsg *rep;
+ char *msg;
+
+ snprintf(value, PERM_MAX_SIZE, "%c%hu", perm, dom);
+ req[1].len = strlen(value) + 1;
+ rep = xenbus_msg_reply(XS_SET_PERMS, xbt, req, ARRAY_SIZE(req));
+ msg = errmsg(rep);
+ if (msg)
+ return msg;
+ free(rep);
+ return NULL;
+}
+
+char *xenbus_transaction_start(xenbus_transaction_t *xbt)
+{
+ /* Xenstored becomes angry if you send a length 0 message, so just
+ * shove a nul terminator on the end
+ */
+ struct write_req req = { "", 1};
+ struct xsd_sockmsg *rep;
+ char *err;
+
+ rep = xenbus_msg_reply(XS_TRANSACTION_START, 0, &req, 1);
+ err = errmsg(rep);
+ if (err)
+ return err;
+ sscanf((char *)(rep + 1), "%lu", xbt);
+ free(rep);
+ return NULL;
+}
+
+char *xenbus_transaction_end(xenbus_transaction_t t, int abort, int *retry)
+{
+ struct xsd_sockmsg *rep;
+ struct write_req req;
+ char *err;
+
+ *retry = 0;
+
+ req.data = abort ? "F" : "T";
+ req.len = 2;
+ rep = xenbus_msg_reply(XS_TRANSACTION_END, t, &req, 1);
+ err = errmsg(rep);
+ if (err) {
+ if (!strcmp(err, "EAGAIN")) {
+ *retry = 1;
+ free(err);
+ return NULL;
+ } else {
+ return err;
+ }
+ }
+ free(rep);
+ return NULL;
+}
+
+int xenbus_read_integer(const char *path)
+{
+ char *res, *buf;
+ int t;
+
+ res = xenbus_read(XBT_NIL, path, &buf);
+ if (res) {
+ printk("Failed to read %s.\n", path);
+ free(res);
+ return -1;
+ }
+ sscanf(buf, "%d", &t);
+ free(buf);
+ return t;
+}
+
+int xenbus_read_uuid(const char *path, unsigned char uuid[16])
+{
+ char *res, *buf;
+
+ res = xenbus_read(XBT_NIL, path, &buf);
+ if (res) {
+ printk("Failed to read %s.\n", path);
+ free(res);
+ return 0;
+ }
+ if (strlen(buf) != ((2 * 16) + 4) /* 16 hex bytes and 4 hyphens */
+ || sscanf(buf,
+ "%2hhx%2hhx%2hhx%2hhx-"
+ "%2hhx%2hhx-"
+ "%2hhx%2hhx-"
+ "%2hhx%2hhx-"
+ "%2hhx%2hhx%2hhx%2hhx%2hhx%2hhx",
+ uuid, uuid + 1, uuid + 2, uuid + 3,
+ uuid + 4, uuid + 5, uuid + 6, uuid + 7,
+ uuid + 8, uuid + 9, uuid + 10, uuid + 11,
+ uuid + 12, uuid + 13, uuid + 14, uuid + 15) != 16) {
+ printk("Xenbus path %s value %s is not a uuid!\n", path, buf);
+ free(buf);
+ return 0;
+ }
+ free(buf);
+ return 1;
+}
+
+char *xenbus_printf(xenbus_transaction_t xbt,
+ const char *node, const char *path,
+ const char *fmt, ...)
+{
+#define BUFFER_SIZE 256
+ char fullpath[BUFFER_SIZE];
+ char val[BUFFER_SIZE];
+ va_list args;
+
+ BUG_ON(strlen(node) + strlen(path) + 1 >= BUFFER_SIZE);
+ sprintf(fullpath, "%s/%s", node, path);
+ va_start(args, fmt);
+ vsprintf(val, fmt, args);
+ va_end(args);
+ return xenbus_write(xbt, fullpath, val);
+}
+
+domid_t xenbus_get_self_id(void)
+{
+ char *dom_id;
+ domid_t ret;
+
+ BUG_ON(xenbus_read(XBT_NIL, "domid", &dom_id));
+ sscanf(dom_id, "%"SCNd16, &ret);
+
+ return ret;
+}
+
+void init_xenbus(void)
+{
+ u64 v;
+
+ debug("%s\n", __func__);
+ if (hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v))
+ BUG();
+ xenbus_evtchn = v;
+
+ if (hvm_get_parameter(HVM_PARAM_STORE_PFN, &v))
+ BUG();
+ xenstore_buf = (struct xenstore_domain_interface *)map_frame_virt(v);
+}
+
+void fini_xenbus(void)
+{
+ debug("%s\n", __func__);
+}
diff --git a/include/blk.h b/include/blk.h
index abcd4bedbbb..9ee10fb80e7 100644
--- a/include/blk.h
+++ b/include/blk.h
@@ -33,6 +33,7 @@ enum if_type {
IF_TYPE_HOST,
IF_TYPE_NVME,
IF_TYPE_EFI,
+ IF_TYPE_PVBLOCK,
IF_TYPE_VIRTIO,
IF_TYPE_COUNT, /* Number of interface types */
diff --git a/include/configs/aspeed-common.h b/include/configs/aspeed-common.h
index 1295a6cd19c..df0f5d2e76f 100644
--- a/include/configs/aspeed-common.h
+++ b/include/configs/aspeed-common.h
@@ -7,22 +7,24 @@
* (C) Copyright 2016 Google, Inc
*/
-#ifndef __AST_COMMON_CONFIG_H
-#define __AST_COMMON_CONFIG_H
+#ifndef _ASPEED_COMMON_CONFIG_H
+#define _ASPEED_COMMON_CONFIG_H
+
+#include <asm/arch/platform.h>
/* Misc CPU related */
#define CONFIG_CMDLINE_TAG
#define CONFIG_SETUP_MEMORY_TAGS
#define CONFIG_INITRD_TAG
-#define CONFIG_SYS_SDRAM_BASE 0x80000000
+#define CONFIG_SYS_SDRAM_BASE ASPEED_DRAM_BASE
#ifdef CONFIG_PRE_CON_BUF_SZ
-#define CONFIG_SYS_INIT_RAM_ADDR (0x1e720000 + CONFIG_PRE_CON_BUF_SZ)
-#define CONFIG_SYS_INIT_RAM_SIZE (36*1024 - CONFIG_PRE_CON_BUF_SZ)
+#define CONFIG_SYS_INIT_RAM_ADDR (ASPEED_SRAM_BASE + CONFIG_PRE_CON_BUF_SZ)
+#define CONFIG_SYS_INIT_RAM_SIZE (ASPEED_SRAM_SIZE - CONFIG_PRE_CON_BUF_SZ)
#else
-#define CONFIG_SYS_INIT_RAM_ADDR (0x1e720000)
-#define CONFIG_SYS_INIT_RAM_SIZE (36*1024)
+#define CONFIG_SYS_INIT_RAM_ADDR (ASPEED_SRAM_BASE)
+#define CONFIG_SYS_INIT_RAM_SIZE (ASPEED_SRAM_SIZE)
#endif
#define SYS_INIT_RAM_END (CONFIG_SYS_INIT_RAM_ADDR \
@@ -45,8 +47,6 @@
* Miscellaneous configurable options
*/
-#define CONFIG_BOOTCOMMAND "bootm 20080000 20300000"
-
#define CONFIG_EXTRA_ENV_SETTINGS \
"verify=yes\0" \
"spi_dma=yes\0" \
diff --git a/include/configs/xenguest_arm64.h b/include/configs/xenguest_arm64.h
new file mode 100644
index 00000000000..db3059a82c6
--- /dev/null
+++ b/include/configs/xenguest_arm64.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0+
+ *
+ * (C) Copyright 2020 EPAM Systemc Inc.
+ */
+#ifndef __XENGUEST_ARM64_H
+#define __XENGUEST_ARM64_H
+
+#ifndef __ASSEMBLY__
+#include <linux/types.h>
+#endif
+
+#define CONFIG_BOARD_EARLY_INIT_F
+
+#define CONFIG_EXTRA_ENV_SETTINGS
+
+#undef CONFIG_NR_DRAM_BANKS
+#undef CONFIG_SYS_SDRAM_BASE
+
+#define CONFIG_NR_DRAM_BANKS 1
+
+/*
+ * This can be any arbitrary address as we are using PIE, but
+ * please note, that CONFIG_SYS_TEXT_BASE must match the below.
+ */
+#define CONFIG_SYS_LOAD_ADDR 0x40000000
+#define CONFIG_LNX_KRNL_IMG_TEXT_OFFSET_BASE CONFIG_SYS_LOAD_ADDR
+
+/* Size of malloc() pool */
+#define CONFIG_SYS_MALLOC_LEN (32 * 1024 * 1024)
+
+/* Monitor Command Prompt */
+#define CONFIG_SYS_PROMPT_HUSH_PS2 "> "
+#define CONFIG_SYS_CBSIZE 1024
+#define CONFIG_SYS_MAXARGS 64
+#define CONFIG_SYS_BARGSIZE CONFIG_SYS_CBSIZE
+#define CONFIG_SYS_PBSIZE (CONFIG_SYS_CBSIZE + \
+ sizeof(CONFIG_SYS_PROMPT) + 16)
+
+#define CONFIG_OF_SYSTEM_SETUP
+
+#define CONFIG_CMDLINE_TAG 1
+#define CONFIG_INITRD_TAG 1
+
+#define CONFIG_CMD_RUN
+
+#undef CONFIG_EXTRA_ENV_SETTINGS
+#define CONFIG_EXTRA_ENV_SETTINGS \
+ "loadimage=ext4load pvblock 0 0x90000000 /boot/Image;\0" \
+ "pvblockboot=run loadimage;" \
+ "booti 0x90000000 - 0x88000000;\0"
+
+#endif /* __XENGUEST_ARM64_H */
diff --git a/include/dm/uclass-id.h b/include/dm/uclass-id.h
index dbc14ec3429..4ec5fa6670a 100644
--- a/include/dm/uclass-id.h
+++ b/include/dm/uclass-id.h
@@ -123,6 +123,7 @@ enum uclass_id {
UCLASS_W1, /* Dallas 1-Wire bus */
UCLASS_W1_EEPROM, /* one-wire EEPROMs */
UCLASS_WDT, /* Watchdog Timer driver */
+ UCLASS_PVBLOCK, /* Xen virtual block device */
UCLASS_COUNT,
UCLASS_INVALID = -1,
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 712eeaef4ed..363b2b94255 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -1,12 +1,20 @@
#ifndef _LINUX_COMPAT_H_
#define _LINUX_COMPAT_H_
+#include <console.h>
#include <log.h>
#include <malloc.h>
+
+#include <asm/processor.h>
+
#include <linux/types.h>
#include <linux/err.h>
#include <linux/kernel.h>
+#ifdef CONFIG_XEN
+#include <xen/events.h>
+#endif
+
struct unused {};
typedef struct unused unused_t;
@@ -122,6 +130,52 @@ static inline void kmem_cache_destroy(struct kmem_cache *cachep)
#define add_wait_queue(...) do { } while (0)
#define remove_wait_queue(...) do { } while (0)
+#ifndef CONFIG_XEN
+#define eventchn_poll()
+#endif
+
+#define __wait_event_timeout(condition, timeout, ret) \
+({ \
+ ulong __ret = ret; /* explicit shadow */ \
+ ulong start = get_timer(0); \
+ for (;;) { \
+ eventchn_poll(); \
+ if (condition) { \
+ __ret = 1; \
+ break; \
+ } \
+ if ((get_timer(start) > timeout) || ctrlc()) { \
+ __ret = 0; \
+ break; \
+ } \
+ cpu_relax(); \
+ } \
+ __ret; \
+})
+
+/**
+ * wait_event_timeout() - Wait until the event occurs before the timeout.
+ * @wr_head: The wait queue to wait on.
+ * @condition: Expression for the event to wait for.
+ * @timeout: Maximum waiting time.
+ *
+ * We wait until the @condition evaluates to %true (succeed) or
+ * %false (@timeout elapsed).
+ *
+ * Return:
+ * 0 - if the @condition evaluated to %false after the @timeout elapsed
+ * 1 - if the @condition evaluated to %true
+ */
+#define wait_event_timeout(wq_head, condition, timeout) \
+({ \
+ ulong __ret; \
+ if (condition) \
+ __ret = 1; \
+ else \
+ __ret = __wait_event_timeout(condition, timeout, __ret);\
+ __ret; \
+})
+
#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c))
/* This is also defined in ARMv8's mmu.h */
diff --git a/include/pci.h b/include/pci.h
index 281f3539168..2089db9f16f 100644
--- a/include/pci.h
+++ b/include/pci.h
@@ -1315,7 +1315,8 @@ struct udevice *pci_get_controller(struct udevice *dev);
*/
int pci_get_regions(struct udevice *dev, struct pci_region **iop,
struct pci_region **memp, struct pci_region **prefp);
-
+int
+pci_get_dma_regions(struct udevice *dev, struct pci_region *memp, int index);
/**
* dm_pci_write_bar32() - Write the address of a BAR
*
diff --git a/include/pvblock.h b/include/pvblock.h
new file mode 100644
index 00000000000..1023a6ab3bd
--- /dev/null
+++ b/include/pvblock.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0+
+ *
+ * (C) 2020 EPAM Systems Inc.
+ */
+
+#ifndef _PVBLOCK_H
+#define _PVBLOCK_H
+
+/**
+ * pvblock_init() - Initialize para-virtual block device class driver
+ *
+ * Bind PV block to UCLASS_ROOT device and probe all UCLASS_PVBLOCK
+ * virtual block devices.
+ */
+void pvblock_init(void);
+
+#endif /* _PVBLOCK_H */
diff --git a/include/vsprintf.h b/include/vsprintf.h
index d9fb68add0c..2290083eba4 100644
--- a/include/vsprintf.h
+++ b/include/vsprintf.h
@@ -234,4 +234,12 @@ char *strmhz(char *buf, unsigned long hz);
*/
void str_to_upper(const char *in, char *out, size_t len);
+/**
+ * sscanf - Unformat a buffer into a list of arguments
+ * @buf: input buffer
+ * @fmt: formatting of buffer
+ * @...: resulting arguments
+ */
+int sscanf(const char *buf, const char *fmt, ...);
+
#endif
diff --git a/include/xen.h b/include/xen.h
new file mode 100644
index 00000000000..a952a2c84b8
--- /dev/null
+++ b/include/xen.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * (C) 2020, EPAM Systems Inc.
+ */
+#ifndef __XEN_H__
+#define __XEN_H__
+
+/**
+ * xen_init() - Xen initialization
+ *
+ * Map Xen memory pages, initialize event handler and xenbus,
+ * setup the grant table.
+ */
+void xen_init(void);
+
+/**
+ * xen_fini() - Board cleanup before Linux kernel start
+ *
+ * Unmap Xen memory pages the specified guest's pseudophysical
+ * address space and unbind all event channels.
+ */
+void xen_fini(void);
+
+#endif /* __XEN_H__ */
diff --git a/include/xen/arm/interface.h b/include/xen/arm/interface.h
new file mode 100644
index 00000000000..0dbc5876f35
--- /dev/null
+++ b/include/xen/arm/interface.h
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Guest OS interface to ARM Xen.
+ *
+ * Stefano Stabellini <stefano.stabellini@eu.citrix.com>, Citrix, 2012
+ */
+
+#ifndef _ASM_ARM_XEN_INTERFACE_H
+#define _ASM_ARM_XEN_INTERFACE_H
+
+#ifndef __ASSEMBLY__
+#include <linux/types.h>
+#endif
+
+#define uint64_aligned_t u64 __attribute__((aligned(8)))
+
+#define __DEFINE_GUEST_HANDLE(name, type) \
+ typedef struct { union { type * p; uint64_aligned_t q; }; } \
+ __guest_handle_ ## name
+
+#define DEFINE_GUEST_HANDLE_STRUCT(name) \
+ __DEFINE_GUEST_HANDLE(name, struct name)
+#define DEFINE_GUEST_HANDLE(name) __DEFINE_GUEST_HANDLE(name, name)
+#define GUEST_HANDLE(name) __guest_handle_ ## name
+
+#define set_xen_guest_handle(hnd, val) \
+ do { \
+ if (sizeof(hnd) == 8) \
+ *(u64 *)&(hnd) = 0; \
+ (hnd).p = val; \
+ } while (0)
+
+#define __HYPERVISOR_platform_op_raw __HYPERVISOR_platform_op
+
+#ifndef __ASSEMBLY__
+/* Explicitly size integers that represent pfns in the interface with
+ * Xen so that we can have one ABI that works for 32 and 64 bit guests.
+ * Note that this means that the xen_pfn_t type may be capable of
+ * representing pfn's which the guest cannot represent in its own pfn
+ * type. However since pfn space is controlled by the guest this is
+ * fine since it simply wouldn't be able to create any sure pfns in
+ * the first place.
+ */
+typedef u64 xen_pfn_t;
+#define PRI_xen_pfn "llx"
+typedef u64 xen_ulong_t;
+#define PRI_xen_ulong "llx"
+typedef s64 xen_long_t;
+#define PRI_xen_long "llx"
+/* Guest handles for primitive C types. */
+__DEFINE_GUEST_HANDLE(uchar, unsigned char);
+__DEFINE_GUEST_HANDLE(uint, unsigned int);
+DEFINE_GUEST_HANDLE(char);
+DEFINE_GUEST_HANDLE(int);
+DEFINE_GUEST_HANDLE(void);
+DEFINE_GUEST_HANDLE(u64);
+DEFINE_GUEST_HANDLE(u32);
+DEFINE_GUEST_HANDLE(xen_pfn_t);
+DEFINE_GUEST_HANDLE(xen_ulong_t);
+
+/* Maximum number of virtual CPUs in multi-processor guests. */
+#define MAX_VIRT_CPUS 1
+
+struct arch_vcpu_info { };
+struct arch_shared_info { };
+
+/* TODO: Move pvclock definitions some place arch independent */
+struct pvclock_vcpu_time_info {
+ u32 version;
+ u32 pad0;
+ u64 tsc_timestamp;
+ u64 system_time;
+ u32 tsc_to_system_mul;
+ s8 tsc_shift;
+ u8 flags;
+ u8 pad[2];
+} __attribute__((__packed__)); /* 32 bytes */
+
+/* It is OK to have a 12 bytes struct with no padding because it is packed */
+struct pvclock_wall_clock {
+ u32 version;
+ u32 sec;
+ u32 nsec;
+ u32 sec_hi;
+} __attribute__((__packed__));
+#endif
+
+#endif /* _ASM_ARM_XEN_INTERFACE_H */
diff --git a/include/xen/events.h b/include/xen/events.h
new file mode 100644
index 00000000000..82bd18b48c8
--- /dev/null
+++ b/include/xen/events.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge
+ * (C) 2005 - Grzegorz Milos - Intel Reseach Cambridge
+ * (C) 2020 - EPAM Systems Inc.
+ *
+ * File: events.h
+ * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk)
+ * Changes: Grzegorz Milos (gm281@cam.ac.uk)
+ *
+ * Date: Jul 2003, changes Jun 2005
+ *
+ * Description: Deals with events on the event channels
+ */
+#ifndef _EVENTS_H_
+#define _EVENTS_H_
+
+#include <asm/xen/hypercall.h>
+#include <xen/interface/event_channel.h>
+
+void init_events(void);
+void fini_events(void);
+
+int do_event(evtchn_port_t port, struct pt_regs *regs);
+void unbind_evtchn(evtchn_port_t port);
+void unbind_all_ports(void);
+int evtchn_alloc_unbound(domid_t pal,
+ void (*handler)(evtchn_port_t, struct pt_regs *, void *),
+ void *data, evtchn_port_t *port);
+
+/* Send notification via event channel */
+static inline int notify_remote_via_evtchn(evtchn_port_t port)
+{
+ struct evtchn_send op;
+
+ op.port = port;
+ return HYPERVISOR_event_channel_op(EVTCHNOP_send, &op);
+}
+
+void eventchn_poll(void);
+
+#endif /* _EVENTS_H_ */
diff --git a/include/xen/gnttab.h b/include/xen/gnttab.h
new file mode 100644
index 00000000000..db1d5361fed
--- /dev/null
+++ b/include/xen/gnttab.h
@@ -0,0 +1,24 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ * (C) 2006, Steven Smith <sos22@cam.ac.uk>
+ * (C) 2006, Grzegorz Milos <gm281@cam.ac.uk>
+ * (C) 2020, EPAM Systems Inc.
+ */
+#ifndef __GNTTAB_H__
+#define __GNTTAB_H__
+
+#include <xen/interface/grant_table.h>
+
+void init_gnttab(void);
+void fini_gnttab(void);
+
+grant_ref_t gnttab_alloc_and_grant(void **map);
+grant_ref_t gnttab_grant_access(domid_t domid, unsigned long frame,
+ int readonly);
+int gnttab_end_access(grant_ref_t ref);
+const char *gnttabop_error(int16_t status);
+
+void get_gnttab_base(phys_addr_t *gnttab_base, phys_size_t *gnttab_sz);
+
+#endif /* !__GNTTAB_H__ */
diff --git a/include/xen/hvm.h b/include/xen/hvm.h
new file mode 100644
index 00000000000..f02c0798a6f
--- /dev/null
+++ b/include/xen/hvm.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Simple wrappers around HVM functions
+ *
+ * Copyright (c) 2002-2003, K A Fraser
+ * Copyright (c) 2005, Grzegorz Milos, gm281@cam.ac.uk,Intel Research Cambridge
+ * Copyright (c) 2020, EPAM Systems Inc.
+ */
+#ifndef XEN_HVM_H__
+#define XEN_HVM_H__
+
+#include <asm/xen/hypercall.h>
+#include <xen/interface/hvm/params.h>
+#include <xen/interface/xen.h>
+
+extern struct shared_info *HYPERVISOR_shared_info;
+
+int hvm_get_parameter(int idx, uint64_t *value);
+int hvm_get_parameter_maintain_dcache(int idx, uint64_t *value);
+
+struct shared_info *map_shared_info(void *p);
+void do_hypervisor_callback(struct pt_regs *regs);
+void mask_evtchn(uint32_t port);
+void unmask_evtchn(uint32_t port);
+void clear_evtchn(uint32_t port);
+
+#endif /* XEN_HVM_H__ */
diff --git a/include/xen/interface/event_channel.h b/include/xen/interface/event_channel.h
new file mode 100644
index 00000000000..b0e35987591
--- /dev/null
+++ b/include/xen/interface/event_channel.h
@@ -0,0 +1,279 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * event_channel.h
+ *
+ * Event channels between domains.
+ *
+ * Copyright (c) 2003-2004, K A Fraser.
+ */
+
+#ifndef __XEN_PUBLIC_EVENT_CHANNEL_H__
+#define __XEN_PUBLIC_EVENT_CHANNEL_H__
+
+#include <xen/interface/xen.h>
+
+typedef u32 evtchn_port_t;
+DEFINE_GUEST_HANDLE(evtchn_port_t);
+
+/*
+ * EVTCHNOP_alloc_unbound: Allocate a port in domain <dom> and mark as
+ * accepting interdomain bindings from domain <remote_dom>. A fresh port
+ * is allocated in <dom> and returned as <port>.
+ * NOTES:
+ * 1. If the caller is unprivileged then <dom> must be DOMID_SELF.
+ * 2. <rdom> may be DOMID_SELF, allowing loopback connections.
+ */
+#define EVTCHNOP_alloc_unbound 6
+struct evtchn_alloc_unbound {
+ /* IN parameters */
+ domid_t dom, remote_dom;
+ /* OUT parameters */
+ evtchn_port_t port;
+};
+
+/*
+ * EVTCHNOP_bind_interdomain: Construct an interdomain event channel between
+ * the calling domain and <remote_dom>. <remote_dom,remote_port> must identify
+ * a port that is unbound and marked as accepting bindings from the calling
+ * domain. A fresh port is allocated in the calling domain and returned as
+ * <local_port>.
+ * NOTES:
+ * 2. <remote_dom> may be DOMID_SELF, allowing loopback connections.
+ */
+#define EVTCHNOP_bind_interdomain 0
+struct evtchn_bind_interdomain {
+ /* IN parameters. */
+ domid_t remote_dom;
+ evtchn_port_t remote_port;
+ /* OUT parameters. */
+ evtchn_port_t local_port;
+};
+
+/*
+ * EVTCHNOP_bind_virq: Bind a local event channel to VIRQ <irq> on specified
+ * vcpu.
+ * NOTES:
+ * 1. A virtual IRQ may be bound to at most one event channel per vcpu.
+ * 2. The allocated event channel is bound to the specified vcpu. The binding
+ * may not be changed.
+ */
+#define EVTCHNOP_bind_virq 1
+struct evtchn_bind_virq {
+ /* IN parameters. */
+ u32 virq;
+ u32 vcpu;
+ /* OUT parameters. */
+ evtchn_port_t port;
+};
+
+/*
+ * EVTCHNOP_bind_pirq: Bind a local event channel to PIRQ <irq>.
+ * NOTES:
+ * 1. A physical IRQ may be bound to at most one event channel per domain.
+ * 2. Only a sufficiently-privileged domain may bind to a physical IRQ.
+ */
+#define EVTCHNOP_bind_pirq 2
+struct evtchn_bind_pirq {
+ /* IN parameters. */
+ u32 pirq;
+#define BIND_PIRQ__WILL_SHARE 1
+ u32 flags; /* BIND_PIRQ__* */
+ /* OUT parameters. */
+ evtchn_port_t port;
+};
+
+/*
+ * EVTCHNOP_bind_ipi: Bind a local event channel to receive events.
+ * NOTES:
+ * 1. The allocated event channel is bound to the specified vcpu. The binding
+ * may not be changed.
+ */
+#define EVTCHNOP_bind_ipi 7
+struct evtchn_bind_ipi {
+ u32 vcpu;
+ /* OUT parameters. */
+ evtchn_port_t port;
+};
+
+/*
+ * EVTCHNOP_close: Close a local event channel <port>. If the channel is
+ * interdomain then the remote end is placed in the unbound state
+ * (EVTCHNSTAT_unbound), awaiting a new connection.
+ */
+#define EVTCHNOP_close 3
+struct evtchn_close {
+ /* IN parameters. */
+ evtchn_port_t port;
+};
+
+/*
+ * EVTCHNOP_send: Send an event to the remote end of the channel whose local
+ * endpoint is <port>.
+ */
+#define EVTCHNOP_send 4
+struct evtchn_send {
+ /* IN parameters. */
+ evtchn_port_t port;
+};
+
+/*
+ * EVTCHNOP_status: Get the current status of the communication channel which
+ * has an endpoint at <dom, port>.
+ * NOTES:
+ * 1. <dom> may be specified as DOMID_SELF.
+ * 2. Only a sufficiently-privileged domain may obtain the status of an event
+ * channel for which <dom> is not DOMID_SELF.
+ */
+#define EVTCHNOP_status 5
+struct evtchn_status {
+ /* IN parameters */
+ domid_t dom;
+ evtchn_port_t port;
+ /* OUT parameters */
+#define EVTCHNSTAT_closed 0 /* Channel is not in use. */
+#define EVTCHNSTAT_unbound 1 /* Channel is waiting interdom connection.*/
+#define EVTCHNSTAT_interdomain 2 /* Channel is connected to remote domain. */
+#define EVTCHNSTAT_pirq 3 /* Channel is bound to a phys IRQ line. */
+#define EVTCHNSTAT_virq 4 /* Channel is bound to a virtual IRQ line */
+#define EVTCHNSTAT_ipi 5 /* Channel is bound to a virtual IPI line */
+ u32 status;
+ u32 vcpu; /* VCPU to which this channel is bound. */
+ union {
+ struct {
+ domid_t dom;
+ } unbound; /* EVTCHNSTAT_unbound */
+ struct {
+ domid_t dom;
+ evtchn_port_t port;
+ } interdomain; /* EVTCHNSTAT_interdomain */
+ u32 pirq; /* EVTCHNSTAT_pirq */
+ u32 virq; /* EVTCHNSTAT_virq */
+ } u;
+};
+
+/*
+ * EVTCHNOP_bind_vcpu: Specify which vcpu a channel should notify when an
+ * event is pending.
+ * NOTES:
+ * 1. IPI- and VIRQ-bound channels always notify the vcpu that initialised
+ * the binding. This binding cannot be changed.
+ * 2. All other channels notify vcpu0 by default. This default is set when
+ * the channel is allocated (a port that is freed and subsequently reused
+ * has its binding reset to vcpu0).
+ */
+#define EVTCHNOP_bind_vcpu 8
+struct evtchn_bind_vcpu {
+ /* IN parameters. */
+ evtchn_port_t port;
+ u32 vcpu;
+};
+
+/*
+ * EVTCHNOP_unmask: Unmask the specified local event-channel port and deliver
+ * a notification to the appropriate VCPU if an event is pending.
+ */
+#define EVTCHNOP_unmask 9
+struct evtchn_unmask {
+ /* IN parameters. */
+ evtchn_port_t port;
+};
+
+/*
+ * EVTCHNOP_reset: Close all event channels associated with specified domain.
+ * NOTES:
+ * 1. <dom> may be specified as DOMID_SELF.
+ * 2. Only a sufficiently-privileged domain may specify other than DOMID_SELF.
+ */
+#define EVTCHNOP_reset 10
+struct evtchn_reset {
+ /* IN parameters. */
+ domid_t dom;
+};
+
+/*
+ * EVTCHNOP_init_control: initialize the control block for the FIFO ABI.
+ */
+#define EVTCHNOP_init_control 11
+struct evtchn_init_control {
+ /* IN parameters. */
+ u64 control_gfn;
+ u32 offset;
+ u32 vcpu;
+ /* OUT parameters. */
+ u8 link_bits;
+ u8 _pad[7];
+};
+
+/*
+ * EVTCHNOP_expand_array: add an additional page to the event array.
+ */
+#define EVTCHNOP_expand_array 12
+struct evtchn_expand_array {
+ /* IN parameters. */
+ u64 array_gfn;
+};
+
+/*
+ * EVTCHNOP_set_priority: set the priority for an event channel.
+ */
+#define EVTCHNOP_set_priority 13
+struct evtchn_set_priority {
+ /* IN parameters. */
+ evtchn_port_t port;
+ u32 priority;
+};
+
+struct evtchn_op {
+ u32 cmd; /* EVTCHNOP_* */
+ union {
+ struct evtchn_alloc_unbound alloc_unbound;
+ struct evtchn_bind_interdomain bind_interdomain;
+ struct evtchn_bind_virq bind_virq;
+ struct evtchn_bind_pirq bind_pirq;
+ struct evtchn_bind_ipi bind_ipi;
+ struct evtchn_close close;
+ struct evtchn_send send;
+ struct evtchn_status status;
+ struct evtchn_bind_vcpu bind_vcpu;
+ struct evtchn_unmask unmask;
+ } u;
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(evtchn_op);
+
+/*
+ * 2-level ABI
+ */
+
+#define EVTCHN_2L_NR_CHANNELS (sizeof(xen_ulong_t) * sizeof(xen_ulong_t) * 64)
+
+/*
+ * FIFO ABI
+ */
+
+/* Events may have priorities from 0 (highest) to 15 (lowest). */
+#define EVTCHN_FIFO_PRIORITY_MAX 0
+#define EVTCHN_FIFO_PRIORITY_DEFAULT 7
+#define EVTCHN_FIFO_PRIORITY_MIN 15
+
+#define EVTCHN_FIFO_MAX_QUEUES (EVTCHN_FIFO_PRIORITY_MIN + 1)
+
+typedef u32 event_word_t;
+
+#define EVTCHN_FIFO_PENDING 31
+#define EVTCHN_FIFO_MASKED 30
+#define EVTCHN_FIFO_LINKED 29
+#define EVTCHN_FIFO_BUSY 28
+
+#define EVTCHN_FIFO_LINK_BITS 17
+#define EVTCHN_FIFO_LINK_MASK ((1 << EVTCHN_FIFO_LINK_BITS) - 1)
+
+#define EVTCHN_FIFO_NR_CHANNELS (1 << EVTCHN_FIFO_LINK_BITS)
+
+struct evtchn_fifo_control_block {
+ u32 ready;
+ u32 _rsvd;
+ event_word_t head[EVTCHN_FIFO_MAX_QUEUES];
+};
+
+#endif /* __XEN_PUBLIC_EVENT_CHANNEL_H__ */
diff --git a/include/xen/interface/grant_table.h b/include/xen/interface/grant_table.h
new file mode 100644
index 00000000000..4acd4bd193b
--- /dev/null
+++ b/include/xen/interface/grant_table.h
@@ -0,0 +1,565 @@
+/* SPDX-License-Identifier: MIT
+ *
+ * grant_table.h
+ *
+ * Interface for granting foreign access to page frames, and receiving
+ * page-ownership transfers.
+ *
+ * Copyright (c) 2004, K A Fraser
+ */
+
+#ifndef __XEN_PUBLIC_GRANT_TABLE_H__
+#define __XEN_PUBLIC_GRANT_TABLE_H__
+
+#include <xen/interface/xen.h>
+
+/***********************************
+ * GRANT TABLE REPRESENTATION
+ */
+
+/* Some rough guidelines on accessing and updating grant-table entries
+ * in a concurrency-safe manner. For more information, Linux contains a
+ * reference implementation for guest OSes (arch/xen/kernel/grant_table.c).
+ *
+ * NB. WMB is a no-op on current-generation x86 processors. However, a
+ * compiler barrier will still be required.
+ *
+ * Introducing a valid entry into the grant table:
+ * 1. Write ent->domid.
+ * 2. Write ent->frame:
+ * GTF_permit_access: Frame to which access is permitted.
+ * GTF_accept_transfer: Pseudo-phys frame slot being filled by new
+ * frame, or zero if none.
+ * 3. Write memory barrier (WMB).
+ * 4. Write ent->flags, inc. valid type.
+ *
+ * Invalidating an unused GTF_permit_access entry:
+ * 1. flags = ent->flags.
+ * 2. Observe that !(flags & (GTF_reading|GTF_writing)).
+ * 3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0).
+ * NB. No need for WMB as reuse of entry is control-dependent on success of
+ * step 3, and all architectures guarantee ordering of ctrl-dep writes.
+ *
+ * Invalidating an in-use GTF_permit_access entry:
+ * This cannot be done directly. Request assistance from the domain controller
+ * which can set a timeout on the use of a grant entry and take necessary
+ * action. (NB. This is not yet implemented!).
+ *
+ * Invalidating an unused GTF_accept_transfer entry:
+ * 1. flags = ent->flags.
+ * 2. Observe that !(flags & GTF_transfer_committed). [*]
+ * 3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0).
+ * NB. No need for WMB as reuse of entry is control-dependent on success of
+ * step 3, and all architectures guarantee ordering of ctrl-dep writes.
+ * [*] If GTF_transfer_committed is set then the grant entry is 'committed'.
+ * The guest must /not/ modify the grant entry until the address of the
+ * transferred frame is written. It is safe for the guest to spin waiting
+ * for this to occur (detect by observing GTF_transfer_completed in
+ * ent->flags).
+ *
+ * Invalidating a committed GTF_accept_transfer entry:
+ * 1. Wait for (ent->flags & GTF_transfer_completed).
+ *
+ * Changing a GTF_permit_access from writable to read-only:
+ * Use SMP-safe CMPXCHG to set GTF_readonly, while checking !GTF_writing.
+ *
+ * Changing a GTF_permit_access from read-only to writable:
+ * Use SMP-safe bit-setting instruction.
+ */
+
+/*
+ * Reference to a grant entry in a specified domain's grant table.
+ */
+typedef u32 grant_ref_t;
+
+/*
+ * A grant table comprises a packed array of grant entries in one or more
+ * page frames shared between Xen and a guest.
+ * [XEN]: This field is written by Xen and read by the sharing guest.
+ * [GST]: This field is written by the guest and read by Xen.
+ */
+
+/*
+ * Version 1 of the grant table entry structure is maintained purely
+ * for backwards compatibility. New guests should use version 2.
+ */
+struct grant_entry_v1 {
+ /* GTF_xxx: various type and flag information. [XEN,GST] */
+ u16 flags;
+ /* The domain being granted foreign privileges. [GST] */
+ domid_t domid;
+ /*
+ * GTF_permit_access: Frame that @domid is allowed to map and access. [GST]
+ * GTF_accept_transfer: Frame whose ownership transferred by @domid. [XEN]
+ */
+ u32 frame;
+};
+
+/*
+ * Type of grant entry.
+ * GTF_invalid: This grant entry grants no privileges.
+ * GTF_permit_access: Allow @domid to map/access @frame.
+ * GTF_accept_transfer: Allow @domid to transfer ownership of one page frame
+ * to this guest. Xen writes the page number to @frame.
+ * GTF_transitive: Allow @domid to transitively access a subrange of
+ * @trans_grant in @trans_domid. No mappings are allowed.
+ */
+#define GTF_invalid (0U << 0)
+#define GTF_permit_access (1U << 0)
+#define GTF_accept_transfer (2U << 0)
+#define GTF_transitive (3U << 0)
+#define GTF_type_mask (3U << 0)
+
+/*
+ * Subflags for GTF_permit_access.
+ * GTF_readonly: Restrict @domid to read-only mappings and accesses. [GST]
+ * GTF_reading: Grant entry is currently mapped for reading by @domid. [XEN]
+ * GTF_writing: Grant entry is currently mapped for writing by @domid. [XEN]
+ * GTF_sub_page: Grant access to only a subrange of the page. @domid
+ * will only be allowed to copy from the grant, and not
+ * map it. [GST]
+ */
+#define _GTF_readonly (2)
+#define GTF_readonly (1U << _GTF_readonly)
+#define _GTF_reading (3)
+#define GTF_reading (1U << _GTF_reading)
+#define _GTF_writing (4)
+#define GTF_writing (1U << _GTF_writing)
+#define _GTF_sub_page (8)
+#define GTF_sub_page (1U << _GTF_sub_page)
+
+/*
+ * Subflags for GTF_accept_transfer:
+ * GTF_transfer_committed: Xen sets this flag to indicate that it is committed
+ * to transferring ownership of a page frame. When a guest sees this flag
+ * it must /not/ modify the grant entry until GTF_transfer_completed is
+ * set by Xen.
+ * GTF_transfer_completed: It is safe for the guest to spin-wait on this flag
+ * after reading GTF_transfer_committed. Xen will always write the frame
+ * address, followed by ORing this flag, in a timely manner.
+ */
+#define _GTF_transfer_committed (2)
+#define GTF_transfer_committed (1U << _GTF_transfer_committed)
+#define _GTF_transfer_completed (3)
+#define GTF_transfer_completed (1U << _GTF_transfer_completed)
+
+/*
+ * Version 2 grant table entries. These fulfil the same role as
+ * version 1 entries, but can represent more complicated operations.
+ * Any given domain will have either a version 1 or a version 2 table,
+ * and every entry in the table will be the same version.
+ *
+ * The interface by which domains use grant references does not depend
+ * on the grant table version in use by the other domain.
+ */
+
+/*
+ * Version 1 and version 2 grant entries share a common prefix. The
+ * fields of the prefix are documented as part of struct
+ * grant_entry_v1.
+ */
+struct grant_entry_header {
+ u16 flags;
+ domid_t domid;
+};
+
+/*
+ * Version 2 of the grant entry structure, here is a union because three
+ * different types are suppotted: full_page, sub_page and transitive.
+ */
+union grant_entry_v2 {
+ struct grant_entry_header hdr;
+
+ /*
+ * This member is used for V1-style full page grants, where either:
+ *
+ * -- hdr.type is GTF_accept_transfer, or
+ * -- hdr.type is GTF_permit_access and GTF_sub_page is not set.
+ *
+ * In that case, the frame field has the same semantics as the
+ * field of the same name in the V1 entry structure.
+ */
+ struct {
+ struct grant_entry_header hdr;
+ u32 pad0;
+ u64 frame;
+ } full_page;
+
+ /*
+ * If the grant type is GTF_grant_access and GTF_sub_page is set,
+ * @domid is allowed to access bytes [@page_off,@page_off+@length)
+ * in frame @frame.
+ */
+ struct {
+ struct grant_entry_header hdr;
+ u16 page_off;
+ u16 length;
+ u64 frame;
+ } sub_page;
+
+ /*
+ * If the grant is GTF_transitive, @domid is allowed to use the
+ * grant @gref in domain @trans_domid, as if it was the local
+ * domain. Obviously, the transitive access must be compatible
+ * with the original grant.
+ */
+ struct {
+ struct grant_entry_header hdr;
+ domid_t trans_domid;
+ u16 pad0;
+ grant_ref_t gref;
+ } transitive;
+
+ u32 __spacer[4]; /* Pad to a power of two */
+};
+
+typedef u16 grant_status_t;
+
+/***********************************
+ * GRANT TABLE QUERIES AND USES
+ */
+
+/*
+ * Handle to track a mapping created via a grant reference.
+ */
+typedef u32 grant_handle_t;
+
+/*
+ * GNTTABOP_map_grant_ref: Map the grant entry (<dom>,<ref>) for access
+ * by devices and/or host CPUs. If successful, <handle> is a tracking number
+ * that must be presented later to destroy the mapping(s). On error, <handle>
+ * is a negative status code.
+ * NOTES:
+ * 1. If GNTMAP_device_map is specified then <dev_bus_addr> is the address
+ * via which I/O devices may access the granted frame.
+ * 2. If GNTMAP_host_map is specified then a mapping will be added at
+ * either a host virtual address in the current address space, or at
+ * a PTE at the specified machine address. The type of mapping to
+ * perform is selected through the GNTMAP_contains_pte flag, and the
+ * address is specified in <host_addr>.
+ * 3. Mappings should only be destroyed via GNTTABOP_unmap_grant_ref. If a
+ * host mapping is destroyed by other means then it is *NOT* guaranteed
+ * to be accounted to the correct grant reference!
+ */
+#define GNTTABOP_map_grant_ref 0
+struct gnttab_map_grant_ref {
+ /* IN parameters. */
+ u64 host_addr;
+ u32 flags; /* GNTMAP_* */
+ grant_ref_t ref;
+ domid_t dom;
+ /* OUT parameters. */
+ s16 status; /* GNTST_* */
+ grant_handle_t handle;
+ u64 dev_bus_addr;
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(gnttab_map_grant_ref);
+
+/*
+ * GNTTABOP_unmap_grant_ref: Destroy one or more grant-reference mappings
+ * tracked by <handle>. If <host_addr> or <dev_bus_addr> is zero, that
+ * field is ignored. If non-zero, they must refer to a device/host mapping
+ * that is tracked by <handle>
+ * NOTES:
+ * 1. The call may fail in an undefined manner if either mapping is not
+ * tracked by <handle>.
+ * 3. After executing a batch of unmaps, it is guaranteed that no stale
+ * mappings will remain in the device or host TLBs.
+ */
+#define GNTTABOP_unmap_grant_ref 1
+struct gnttab_unmap_grant_ref {
+ /* IN parameters. */
+ u64 host_addr;
+ u64 dev_bus_addr;
+ grant_handle_t handle;
+ /* OUT parameters. */
+ s16 status; /* GNTST_* */
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(gnttab_unmap_grant_ref);
+
+/*
+ * GNTTABOP_setup_table: Set up a grant table for <dom> comprising at least
+ * <nr_frames> pages. The frame addresses are written to the <frame_list>.
+ * Only <nr_frames> addresses are written, even if the table is larger.
+ * NOTES:
+ * 1. <dom> may be specified as DOMID_SELF.
+ * 2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF.
+ * 3. Xen may not support more than a single grant-table page per domain.
+ */
+#define GNTTABOP_setup_table 2
+struct gnttab_setup_table {
+ /* IN parameters. */
+ domid_t dom;
+ u32 nr_frames;
+ /* OUT parameters. */
+ s16 status; /* GNTST_* */
+
+ GUEST_HANDLE(xen_pfn_t)frame_list;
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(gnttab_setup_table);
+
+/*
+ * GNTTABOP_dump_table: Dump the contents of the grant table to the
+ * xen console. Debugging use only.
+ */
+#define GNTTABOP_dump_table 3
+struct gnttab_dump_table {
+ /* IN parameters. */
+ domid_t dom;
+ /* OUT parameters. */
+ s16 status; /* GNTST_* */
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(gnttab_dump_table);
+
+/*
+ * GNTTABOP_transfer_grant_ref: Transfer <frame> to a foreign domain. The
+ * foreign domain has previously registered its interest in the transfer via
+ * <domid, ref>.
+ *
+ * Note that, even if the transfer fails, the specified page no longer belongs
+ * to the calling domain *unless* the error is GNTST_bad_page.
+ */
+#define GNTTABOP_transfer 4
+struct gnttab_transfer {
+ /* IN parameters. */
+ xen_pfn_t mfn;
+ domid_t domid;
+ grant_ref_t ref;
+ /* OUT parameters. */
+ s16 status;
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(gnttab_transfer);
+
+/*
+ * GNTTABOP_copy: Hypervisor based copy
+ * source and destinations can be eithers MFNs or, for foreign domains,
+ * grant references. the foreign domain has to grant read/write access
+ * in its grant table.
+ *
+ * The flags specify what type source and destinations are (either MFN
+ * or grant reference).
+ *
+ * Note that this can also be used to copy data between two domains
+ * via a third party if the source and destination domains had previously
+ * grant appropriate access to their pages to the third party.
+ *
+ * source_offset specifies an offset in the source frame, dest_offset
+ * the offset in the target frame and len specifies the number of
+ * bytes to be copied.
+ */
+
+#define _GNTCOPY_source_gref (0)
+#define GNTCOPY_source_gref (1 << _GNTCOPY_source_gref)
+#define _GNTCOPY_dest_gref (1)
+#define GNTCOPY_dest_gref (1 << _GNTCOPY_dest_gref)
+
+#define GNTTABOP_copy 5
+struct gnttab_copy {
+ /* IN parameters. */
+ struct {
+ union {
+ grant_ref_t ref;
+ xen_pfn_t gmfn;
+ } u;
+ domid_t domid;
+ u16 offset;
+ } source, dest;
+ u16 len;
+ u16 flags; /* GNTCOPY_* */
+ /* OUT parameters. */
+ s16 status;
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(gnttab_copy);
+
+/*
+ * GNTTABOP_query_size: Query the current and maximum sizes of the shared
+ * grant table.
+ * NOTES:
+ * 1. <dom> may be specified as DOMID_SELF.
+ * 2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF.
+ */
+#define GNTTABOP_query_size 6
+struct gnttab_query_size {
+ /* IN parameters. */
+ domid_t dom;
+ /* OUT parameters. */
+ u32 nr_frames;
+ u32 max_nr_frames;
+ s16 status; /* GNTST_* */
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(gnttab_query_size);
+
+/*
+ * GNTTABOP_unmap_and_replace: Destroy one or more grant-reference mappings
+ * tracked by <handle> but atomically replace the page table entry with one
+ * pointing to the machine address under <new_addr>. <new_addr> will be
+ * redirected to the null entry.
+ * NOTES:
+ * 1. The call may fail in an undefined manner if either mapping is not
+ * tracked by <handle>.
+ * 2. After executing a batch of unmaps, it is guaranteed that no stale
+ * mappings will remain in the device or host TLBs.
+ */
+#define GNTTABOP_unmap_and_replace 7
+struct gnttab_unmap_and_replace {
+ /* IN parameters. */
+ u64 host_addr;
+ u64 new_addr;
+ grant_handle_t handle;
+ /* OUT parameters. */
+ s16 status; /* GNTST_* */
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(gnttab_unmap_and_replace);
+
+/*
+ * GNTTABOP_set_version: Request a particular version of the grant
+ * table shared table structure. This operation can only be performed
+ * once in any given domain. It must be performed before any grants
+ * are activated; otherwise, the domain will be stuck with version 1.
+ * The only defined versions are 1 and 2.
+ */
+#define GNTTABOP_set_version 8
+struct gnttab_set_version {
+ /* IN parameters */
+ u32 version;
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(gnttab_set_version);
+
+/*
+ * GNTTABOP_get_status_frames: Get the list of frames used to store grant
+ * status for <dom>. In grant format version 2, the status is separated
+ * from the other shared grant fields to allow more efficient synchronization
+ * using barriers instead of atomic cmpexch operations.
+ * <nr_frames> specify the size of vector <frame_list>.
+ * The frame addresses are returned in the <frame_list>.
+ * Only <nr_frames> addresses are returned, even if the table is larger.
+ * NOTES:
+ * 1. <dom> may be specified as DOMID_SELF.
+ * 2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF.
+ */
+#define GNTTABOP_get_status_frames 9
+struct gnttab_get_status_frames {
+ /* IN parameters. */
+ u32 nr_frames;
+ domid_t dom;
+ /* OUT parameters. */
+ s16 status; /* GNTST_* */
+
+ GUEST_HANDLE(u64)frame_list;
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(gnttab_get_status_frames);
+
+/*
+ * GNTTABOP_get_version: Get the grant table version which is in
+ * effect for domain <dom>.
+ */
+#define GNTTABOP_get_version 10
+struct gnttab_get_version {
+ /* IN parameters */
+ domid_t dom;
+ u16 pad;
+ /* OUT parameters */
+ u32 version;
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(gnttab_get_version);
+
+/*
+ * Issue one or more cache maintenance operations on a portion of a
+ * page granted to the calling domain by a foreign domain.
+ */
+#define GNTTABOP_cache_flush 12
+struct gnttab_cache_flush {
+ union {
+ u64 dev_bus_addr;
+ grant_ref_t ref;
+ } a;
+ u16 offset; /* offset from start of grant */
+ u16 length; /* size within the grant */
+#define GNTTAB_CACHE_CLEAN (1 << 0)
+#define GNTTAB_CACHE_INVAL (1 << 1)
+#define GNTTAB_CACHE_SOURCE_GREF (1 << 31)
+ u32 op;
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(gnttab_cache_flush);
+
+/*
+ * Bitfield values for update_pin_status.flags.
+ */
+ /* Map the grant entry for access by I/O devices. */
+#define _GNTMAP_device_map (0)
+#define GNTMAP_device_map (1 << _GNTMAP_device_map)
+/* Map the grant entry for access by host CPUs. */
+#define _GNTMAP_host_map (1)
+#define GNTMAP_host_map (1 << _GNTMAP_host_map)
+/* Accesses to the granted frame will be restricted to read-only access. */
+#define _GNTMAP_readonly (2)
+#define GNTMAP_readonly (1 << _GNTMAP_readonly)
+/*
+ * GNTMAP_host_map subflag:
+ * 0 => The host mapping is usable only by the guest OS.
+ * 1 => The host mapping is usable by guest OS + current application.
+ */
+#define _GNTMAP_application_map (3)
+#define GNTMAP_application_map (1 << _GNTMAP_application_map)
+
+/*
+ * GNTMAP_contains_pte subflag:
+ * 0 => This map request contains a host virtual address.
+ * 1 => This map request contains the machine addess of the PTE to update.
+ */
+#define _GNTMAP_contains_pte (4)
+#define GNTMAP_contains_pte (1 << _GNTMAP_contains_pte)
+
+/*
+ * Bits to be placed in guest kernel available PTE bits (architecture
+ * dependent; only supported when XENFEAT_gnttab_map_avail_bits is set).
+ */
+#define _GNTMAP_guest_avail0 (16)
+#define GNTMAP_guest_avail_mask ((u32)~0 << _GNTMAP_guest_avail0)
+
+/*
+ * Values for error status returns. All errors are -ve.
+ */
+#define GNTST_okay (0) /* Normal return. */
+#define GNTST_general_error (-1) /* General undefined error. */
+#define GNTST_bad_domain (-2) /* Unrecognsed domain id. */
+#define GNTST_bad_gntref (-3) /* Unrecognised or inappropriate gntref. */
+#define GNTST_bad_handle (-4) /* Unrecognised or inappropriate handle. */
+#define GNTST_bad_virt_addr (-5) /* Inappropriate virtual address to map. */
+#define GNTST_bad_dev_addr (-6) /* Inappropriate device address to unmap.*/
+#define GNTST_no_device_space (-7) /* Out of space in I/O MMU. */
+#define GNTST_permission_denied (-8) /* Not enough privilege for operation. */
+#define GNTST_bad_page (-9) /* Specified page was invalid for op. */
+#define GNTST_bad_copy_arg (-10) /* copy arguments cross page boundary. */
+#define GNTST_address_too_big (-11) /* transfer page address too large. */
+#define GNTST_eagain (-12) /* Operation not done; try again. */
+
+#define GNTTABOP_error_msgs { \
+ "okay", \
+ "undefined error", \
+ "unrecognised domain id", \
+ "invalid grant reference", \
+ "invalid mapping handle", \
+ "invalid virtual address", \
+ "invalid device address", \
+ "no spare translation slot in the I/O MMU", \
+ "permission denied", \
+ "bad page", \
+ "copy arguments cross page boundary", \
+ "page address size too large", \
+ "operation not done; try again" \
+}
+
+#endif /* __XEN_PUBLIC_GRANT_TABLE_H__ */
diff --git a/include/xen/interface/hvm/hvm_op.h b/include/xen/interface/hvm/hvm_op.h
new file mode 100644
index 00000000000..605d943fb1b
--- /dev/null
+++ b/include/xen/interface/hvm/hvm_op.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: MIT
+ *
+ * hvm_op.h
+ *
+ * Copyright (c) 2007, Keir Fraser
+ */
+
+#ifndef __XEN_PUBLIC_HVM_HVM_OP_H__
+#define __XEN_PUBLIC_HVM_HVM_OP_H__
+
+/* Get/set subcommands: the second argument of the hypercall is a
+ * pointer to a xen_hvm_param struct.
+ */
+#define HVMOP_set_param 0
+#define HVMOP_get_param 1
+struct xen_hvm_param {
+ domid_t domid; /* IN */
+ u32 index; /* IN */
+ u64 value; /* IN/OUT */
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_param);
+
+/* Hint from PV drivers for pagetable destruction. */
+#define HVMOP_pagetable_dying 9
+struct xen_hvm_pagetable_dying {
+ /* Domain with a pagetable about to be destroyed. */
+ domid_t domid;
+ /* guest physical address of the toplevel pagetable dying */
+ aligned_u64 gpa;
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_pagetable_dying);
+
+enum hvmmem_type_t {
+ HVMMEM_ram_rw, /* Normal read/write guest RAM */
+ HVMMEM_ram_ro, /* Read-only; writes are discarded */
+ HVMMEM_mmio_dm, /* Reads and write go to the device model */
+};
+
+#define HVMOP_get_mem_type 15
+/* Return hvmmem_type_t for the specified pfn. */
+struct xen_hvm_get_mem_type {
+ /* Domain to be queried. */
+ domid_t domid;
+ /* OUT variable. */
+ u16 mem_type;
+ u16 pad[2]; /* align next field on 8-byte boundary */
+ /* IN variable. */
+ u64 pfn;
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_get_mem_type);
+
+#endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */
diff --git a/include/xen/interface/hvm/params.h b/include/xen/interface/hvm/params.h
new file mode 100644
index 00000000000..a81bb5e7c7a
--- /dev/null
+++ b/include/xen/interface/hvm/params.h
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: MIT
+ *
+ * params.h
+ *
+ * HVM parameters. HVM (Hardware Virtual Machine) is the type of instance
+ * that mimics bare-metal server setup which provides better hardware
+ * isolation.
+ */
+
+#ifndef __XEN_PUBLIC_HVM_PARAMS_H__
+#define __XEN_PUBLIC_HVM_PARAMS_H__
+
+#include <xen/interface/hvm/hvm_op.h>
+
+/*
+ * Parameter space for HVMOP_{set,get}_param.
+ */
+
+#define HVM_PARAM_CALLBACK_IRQ 0
+/*
+ * How should CPU0 event-channel notifications be delivered?
+ *
+ * If val == 0 then CPU0 event-channel notifications are not delivered.
+ * If val != 0, val[63:56] encodes the type, as follows:
+ */
+
+#define HVM_PARAM_CALLBACK_TYPE_GSI 0
+/*
+ * val[55:0] is a delivery GSI. GSI 0 cannot be used, as it aliases val == 0,
+ * and disables all notifications.
+ */
+
+#define HVM_PARAM_CALLBACK_TYPE_PCI_INTX 1
+/*
+ * val[55:0] is a delivery PCI INTx line:
+ * Domain = val[47:32], Bus = val[31:16] DevFn = val[15:8], IntX = val[1:0]
+ */
+
+#if defined(__i386__) || defined(__x86_64__)
+#define HVM_PARAM_CALLBACK_TYPE_VECTOR 2
+/*
+ * val[7:0] is a vector number. Check for XENFEAT_hvm_callback_vector to know
+ * if this delivery method is available.
+ */
+#elif defined(__arm__) || defined(__aarch64__)
+#define HVM_PARAM_CALLBACK_TYPE_PPI 2
+/*
+ * val[55:16] needs to be zero.
+ * val[15:8] is interrupt flag of the PPI used by event-channel:
+ * bit 8: the PPI is edge(1) or level(0) triggered
+ * bit 9: the PPI is active low(1) or high(0)
+ * val[7:0] is a PPI number used by event-channel.
+ * This is only used by ARM/ARM64 and masking/eoi the interrupt associated to
+ * the notification is handled by the interrupt controller.
+ */
+#endif
+
+#define HVM_PARAM_STORE_PFN 1
+#define HVM_PARAM_STORE_EVTCHN 2
+
+#define HVM_PARAM_PAE_ENABLED 4
+
+#define HVM_PARAM_IOREQ_PFN 5
+
+#define HVM_PARAM_BUFIOREQ_PFN 6
+
+/*
+ * Set mode for virtual timers (currently x86 only):
+ * delay_for_missed_ticks (default):
+ * Do not advance a vcpu's time beyond the correct delivery time for
+ * interrupts that have been missed due to preemption. Deliver missed
+ * interrupts when the vcpu is rescheduled and advance the vcpu's virtual
+ * time stepwise for each one.
+ * no_delay_for_missed_ticks:
+ * As above, missed interrupts are delivered, but guest time always tracks
+ * wallclock (i.e., real) time while doing so.
+ * no_missed_ticks_pending:
+ * No missed interrupts are held pending. Instead, to ensure ticks are
+ * delivered at some non-zero rate, if we detect missed ticks then the
+ * internal tick alarm is not disabled if the VCPU is preempted during the
+ * next tick period.
+ * one_missed_tick_pending:
+ * Missed interrupts are collapsed together and delivered as one 'late tick'.
+ * Guest time always tracks wallclock (i.e., real) time.
+ */
+#define HVM_PARAM_TIMER_MODE 10
+#define HVMPTM_delay_for_missed_ticks 0
+#define HVMPTM_no_delay_for_missed_ticks 1
+#define HVMPTM_no_missed_ticks_pending 2
+#define HVMPTM_one_missed_tick_pending 3
+
+/* Boolean: Enable virtual HPET (high-precision event timer)? (x86-only) */
+#define HVM_PARAM_HPET_ENABLED 11
+
+/* Identity-map page directory used by Intel EPT when CR0.PG=0. */
+#define HVM_PARAM_IDENT_PT 12
+
+/* Device Model domain, defaults to 0. */
+#define HVM_PARAM_DM_DOMAIN 13
+
+/* ACPI S state: currently support S0 and S3 on x86. */
+#define HVM_PARAM_ACPI_S_STATE 14
+
+/* TSS used on Intel when CR0.PE=0. */
+#define HVM_PARAM_VM86_TSS 15
+
+/* Boolean: Enable aligning all periodic vpts to reduce interrupts */
+#define HVM_PARAM_VPT_ALIGN 16
+
+/* Console debug shared memory ring and event channel */
+#define HVM_PARAM_CONSOLE_PFN 17
+#define HVM_PARAM_CONSOLE_EVTCHN 18
+
+#define HVM_NR_PARAMS 19
+
+#endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */
diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h
new file mode 100644
index 00000000000..38b4d7c73bb
--- /dev/null
+++ b/include/xen/interface/io/blkif.h
@@ -0,0 +1,701 @@
+/* SPDX-License-Identifier: MIT
+ *
+ * blkif.h
+ *
+ * Unified block-device I/O interface for Xen guest OSes.
+ *
+ * Copyright (c) 2003-2004, Keir Fraser
+ * Copyright (c) 2012, Spectra Logic Corporation
+ */
+
+#ifndef __XEN_PUBLIC_IO_BLKIF_H__
+#define __XEN_PUBLIC_IO_BLKIF_H__
+
+#include "ring.h"
+#include "../grant_table.h"
+
+/*
+ * Front->back notifications: When enqueuing a new request, sending a
+ * notification can be made conditional on req_event (i.e., the generic
+ * hold-off mechanism provided by the ring macros). Backends must set
+ * req_event appropriately (e.g., using RING_FINAL_CHECK_FOR_REQUESTS()).
+ *
+ * Back->front notifications: When enqueuing a new response, sending a
+ * notification can be made conditional on rsp_event (i.e., the generic
+ * hold-off mechanism provided by the ring macros). Frontends must set
+ * rsp_event appropriately (e.g., using RING_FINAL_CHECK_FOR_RESPONSES()).
+ */
+
+#ifndef blkif_vdev_t
+#define blkif_vdev_t u16
+#endif
+#define blkif_sector_t u64
+
+/*
+ * Feature and Parameter Negotiation
+ * =================================
+ * The two halves of a Xen block driver utilize nodes within the XenStore to
+ * communicate capabilities and to negotiate operating parameters. This
+ * section enumerates these nodes which reside in the respective front and
+ * backend portions of the XenStore, following the XenBus convention.
+ *
+ * All data in the XenStore is stored as strings. Nodes specifying numeric
+ * values are encoded in decimal. Integer value ranges listed below are
+ * expressed as fixed sized integer types capable of storing the conversion
+ * of a properly formated node string, without loss of information.
+ *
+ * Any specified default value is in effect if the corresponding XenBus node
+ * is not present in the XenStore.
+ *
+ * XenStore nodes in sections marked "PRIVATE" are solely for use by the
+ * driver side whose XenBus tree contains them.
+ *
+ * XenStore nodes marked "DEPRECATED" in their notes section should only be
+ * used to provide interoperability with legacy implementations.
+ *
+ * See the XenBus state transition diagram below for details on when XenBus
+ * nodes must be published and when they can be queried.
+ *
+ *****************************************************************************
+ * Backend XenBus Nodes
+ *****************************************************************************
+ *
+ *------------------ Backend Device Identification (PRIVATE) ------------------
+ *
+ * mode
+ * Values: "r" (read only), "w" (writable)
+ *
+ * The read or write access permissions to the backing store to be
+ * granted to the frontend.
+ *
+ * params
+ * Values: string
+ *
+ * A free formatted string providing sufficient information for the
+ * hotplug script to attach the device and provide a suitable
+ * handler (ie: a block device) for blkback to use.
+ *
+ * physical-device
+ * Values: "MAJOR:MINOR"
+ * Notes: 11
+ *
+ * MAJOR and MINOR are the major number and minor number of the
+ * backing device respectively.
+ *
+ * physical-device-path
+ * Values: path string
+ *
+ * A string that contains the absolute path to the disk image. On
+ * NetBSD and Linux this is always a block device, while on FreeBSD
+ * it can be either a block device or a regular file.
+ *
+ * type
+ * Values: "file", "phy", "tap"
+ *
+ * The type of the backing device/object.
+ *
+ *
+ * direct-io-safe
+ * Values: 0/1 (boolean)
+ * Default Value: 0
+ *
+ * The underlying storage is not affected by the direct IO memory
+ * lifetime bug. See:
+ * http://lists.xen.org/archives/html/xen-devel/2012-12/msg01154.html
+ *
+ * Therefore this option gives the backend permission to use
+ * O_DIRECT, notwithstanding that bug.
+ *
+ * That is, if this option is enabled, use of O_DIRECT is safe,
+ * in circumstances where we would normally have avoided it as a
+ * workaround for that bug. This option is not relevant for all
+ * backends, and even not necessarily supported for those for
+ * which it is relevant. A backend which knows that it is not
+ * affected by the bug can ignore this option.
+ *
+ * This option doesn't require a backend to use O_DIRECT, so it
+ * should not be used to try to control the caching behaviour.
+ *
+ *--------------------------------- Features ---------------------------------
+ *
+ * feature-barrier
+ * Values: 0/1 (boolean)
+ * Default Value: 0
+ *
+ * A value of "1" indicates that the backend can process requests
+ * containing the BLKIF_OP_WRITE_BARRIER request opcode. Requests
+ * of this type may still be returned at any time with the
+ * BLKIF_RSP_EOPNOTSUPP result code.
+ *
+ * feature-flush-cache
+ * Values: 0/1 (boolean)
+ * Default Value: 0
+ *
+ * A value of "1" indicates that the backend can process requests
+ * containing the BLKIF_OP_FLUSH_DISKCACHE request opcode. Requests
+ * of this type may still be returned at any time with the
+ * BLKIF_RSP_EOPNOTSUPP result code.
+ *
+ * feature-discard
+ * Values: 0/1 (boolean)
+ * Default Value: 0
+ *
+ * A value of "1" indicates that the backend can process requests
+ * containing the BLKIF_OP_DISCARD request opcode. Requests
+ * of this type may still be returned at any time with the
+ * BLKIF_RSP_EOPNOTSUPP result code.
+ *
+ * feature-persistent
+ * Values: 0/1 (boolean)
+ * Default Value: 0
+ * Notes: 7
+ *
+ * A value of "1" indicates that the backend can keep the grants used
+ * by the frontend driver mapped, so the same set of grants should be
+ * used in all transactions. The maximum number of grants the backend
+ * can map persistently depends on the implementation, but ideally it
+ * should be RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST. Using this
+ * feature the backend doesn't need to unmap each grant, preventing
+ * costly TLB flushes. The backend driver should only map grants
+ * persistently if the frontend supports it. If a backend driver chooses
+ * to use the persistent protocol when the frontend doesn't support it,
+ * it will probably hit the maximum number of persistently mapped grants
+ * (due to the fact that the frontend won't be reusing the same grants),
+ * and fall back to non-persistent mode. Backend implementations may
+ * shrink or expand the number of persistently mapped grants without
+ * notifying the frontend depending on memory constraints (this might
+ * cause a performance degradation).
+ *
+ * If a backend driver wants to limit the maximum number of persistently
+ * mapped grants to a value less than RING_SIZE *
+ * BLKIF_MAX_SEGMENTS_PER_REQUEST a LRU strategy should be used to
+ * discard the grants that are less commonly used. Using a LRU in the
+ * backend driver paired with a LIFO queue in the frontend will
+ * allow us to have better performance in this scenario.
+ *
+ *----------------------- Request Transport Parameters ------------------------
+ *
+ * max-ring-page-order
+ * Values: <uint32_t>
+ * Default Value: 0
+ * Notes: 1, 3
+ *
+ * The maximum supported size of the request ring buffer in units of
+ * lb(machine pages). (e.g. 0 == 1 page, 1 = 2 pages, 2 == 4 pages,
+ * etc.).
+ *
+ * max-ring-pages
+ * Values: <uint32_t>
+ * Default Value: 1
+ * Notes: DEPRECATED, 2, 3
+ *
+ * The maximum supported size of the request ring buffer in units of
+ * machine pages. The value must be a power of 2.
+ *
+ *------------------------- Backend Device Properties -------------------------
+ *
+ * discard-enable
+ * Values: 0/1 (boolean)
+ * Default Value: 1
+ *
+ * This optional property, set by the toolstack, instructs the backend
+ * to offer (or not to offer) discard to the frontend. If the property
+ * is missing the backend should offer discard if the backing storage
+ * actually supports it.
+ *
+ * discard-alignment
+ * Values: <uint32_t>
+ * Default Value: 0
+ * Notes: 4, 5
+ *
+ * The offset, in bytes from the beginning of the virtual block device,
+ * to the first, addressable, discard extent on the underlying device.
+ *
+ * discard-granularity
+ * Values: <uint32_t>
+ * Default Value: <"sector-size">
+ * Notes: 4
+ *
+ * The size, in bytes, of the individually addressable discard extents
+ * of the underlying device.
+ *
+ * discard-secure
+ * Values: 0/1 (boolean)
+ * Default Value: 0
+ * Notes: 10
+ *
+ * A value of "1" indicates that the backend can process BLKIF_OP_DISCARD
+ * requests with the BLKIF_DISCARD_SECURE flag set.
+ *
+ * info
+ * Values: <uint32_t> (bitmap)
+ *
+ * A collection of bit flags describing attributes of the backing
+ * device. The VDISK_* macros define the meaning of each bit
+ * location.
+ *
+ * sector-size
+ * Values: <uint32_t>
+ *
+ * The logical block size, in bytes, of the underlying storage. This
+ * must be a power of two with a minimum value of 512.
+ *
+ * NOTE: Because of implementation bugs in some frontends this must be
+ * set to 512, unless the frontend advertizes a non-zero value
+ * in its "feature-large-sector-size" xenbus node. (See below).
+ *
+ * physical-sector-size
+ * Values: <uint32_t>
+ * Default Value: <"sector-size">
+ *
+ * The physical block size, in bytes, of the backend storage. This
+ * must be an integer multiple of "sector-size".
+ *
+ * sectors
+ * Values: <u64>
+ *
+ * The size of the backend device, expressed in units of "sector-size".
+ * The product of "sector-size" and "sectors" must also be an integer
+ * multiple of "physical-sector-size", if that node is present.
+ *
+ *****************************************************************************
+ * Frontend XenBus Nodes
+ *****************************************************************************
+ *
+ *----------------------- Request Transport Parameters -----------------------
+ *
+ * event-channel
+ * Values: <uint32_t>
+ *
+ * The identifier of the Xen event channel used to signal activity
+ * in the ring buffer.
+ *
+ * ring-ref
+ * Values: <uint32_t>
+ * Notes: 6
+ *
+ * The Xen grant reference granting permission for the backend to map
+ * the sole page in a single page sized ring buffer.
+ *
+ * ring-ref%u
+ * Values: <uint32_t>
+ * Notes: 6
+ *
+ * For a frontend providing a multi-page ring, a "number of ring pages"
+ * sized list of nodes, each containing a Xen grant reference granting
+ * permission for the backend to map the page of the ring located
+ * at page index "%u". Page indexes are zero based.
+ *
+ * protocol
+ * Values: string (XEN_IO_PROTO_ABI_*)
+ * Default Value: XEN_IO_PROTO_ABI_NATIVE
+ *
+ * The machine ABI rules governing the format of all ring request and
+ * response structures.
+ *
+ * ring-page-order
+ * Values: <uint32_t>
+ * Default Value: 0
+ * Maximum Value: MAX(ffs(max-ring-pages) - 1, max-ring-page-order)
+ * Notes: 1, 3
+ *
+ * The size of the frontend allocated request ring buffer in units
+ * of lb(machine pages). (e.g. 0 == 1 page, 1 = 2 pages, 2 == 4 pages,
+ * etc.).
+ *
+ * num-ring-pages
+ * Values: <uint32_t>
+ * Default Value: 1
+ * Maximum Value: MAX(max-ring-pages,(0x1 << max-ring-page-order))
+ * Notes: DEPRECATED, 2, 3
+ *
+ * The size of the frontend allocated request ring buffer in units of
+ * machine pages. The value must be a power of 2.
+ *
+ *--------------------------------- Features ---------------------------------
+ *
+ * feature-persistent
+ * Values: 0/1 (boolean)
+ * Default Value: 0
+ * Notes: 7, 8, 9
+ *
+ * A value of "1" indicates that the frontend will reuse the same grants
+ * for all transactions, allowing the backend to map them with write
+ * access (even when it should be read-only). If the frontend hits the
+ * maximum number of allowed persistently mapped grants, it can fallback
+ * to non persistent mode. This will cause a performance degradation,
+ * since the the backend driver will still try to map those grants
+ * persistently. Since the persistent grants protocol is compatible with
+ * the previous protocol, a frontend driver can choose to work in
+ * persistent mode even when the backend doesn't support it.
+ *
+ * It is recommended that the frontend driver stores the persistently
+ * mapped grants in a LIFO queue, so a subset of all persistently mapped
+ * grants gets used commonly. This is done in case the backend driver
+ * decides to limit the maximum number of persistently mapped grants
+ * to a value less than RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST.
+ *
+ * feature-large-sector-size
+ * Values: 0/1 (boolean)
+ * Default Value: 0
+ *
+ * A value of "1" indicates that the frontend will correctly supply and
+ * interpret all sector-based quantities in terms of the "sector-size"
+ * value supplied in the backend info, whatever that may be set to.
+ * If this node is not present or its value is "0" then it is assumed
+ * that the frontend requires that the logical block size is 512 as it
+ * is hardcoded (which is the case in some frontend implementations).
+ *
+ *------------------------- Virtual Device Properties -------------------------
+ *
+ * device-type
+ * Values: "disk", "cdrom", "floppy", etc.
+ *
+ * virtual-device
+ * Values: <uint32_t>
+ *
+ * A value indicating the physical device to virtualize within the
+ * frontend's domain. (e.g. "The first ATA disk", "The third SCSI
+ * disk", etc.)
+ *
+ * See docs/misc/vbd-interface.txt for details on the format of this
+ * value.
+ *
+ * Notes
+ * -----
+ * (1) Multi-page ring buffer scheme first developed in the Citrix XenServer
+ * PV drivers.
+ * (2) Multi-page ring buffer scheme first used in some RedHat distributions
+ * including a distribution deployed on certain nodes of the Amazon
+ * EC2 cluster.
+ * (3) Support for multi-page ring buffers was implemented independently,
+ * in slightly different forms, by both Citrix and RedHat/Amazon.
+ * For full interoperability, block front and backends should publish
+ * identical ring parameters, adjusted for unit differences, to the
+ * XenStore nodes used in both schemes.
+ * (4) Devices that support discard functionality may internally allocate space
+ * (discardable extents) in units that are larger than the exported logical
+ * block size. If the backing device has such discardable extents the
+ * backend should provide both discard-granularity and discard-alignment.
+ * Providing just one of the two may be considered an error by the frontend.
+ * Backends supporting discard should include discard-granularity and
+ * discard-alignment even if it supports discarding individual sectors.
+ * Frontends should assume discard-alignment == 0 and discard-granularity
+ * == sector size if these keys are missing.
+ * (5) The discard-alignment parameter allows a physical device to be
+ * partitioned into virtual devices that do not necessarily begin or
+ * end on a discardable extent boundary.
+ * (6) When there is only a single page allocated to the request ring,
+ * 'ring-ref' is used to communicate the grant reference for this
+ * page to the backend. When using a multi-page ring, the 'ring-ref'
+ * node is not created. Instead 'ring-ref0' - 'ring-refN' are used.
+ * (7) When using persistent grants data has to be copied from/to the page
+ * where the grant is currently mapped. The overhead of doing this copy
+ * however doesn't suppress the speed improvement of not having to unmap
+ * the grants.
+ * (8) The frontend driver has to allow the backend driver to map all grants
+ * with write access, even when they should be mapped read-only, since
+ * further requests may reuse these grants and require write permissions.
+ * (9) Linux implementation doesn't have a limit on the maximum number of
+ * grants that can be persistently mapped in the frontend driver, but
+ * due to the frontent driver implementation it should never be bigger
+ * than RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST.
+ *(10) The discard-secure property may be present and will be set to 1 if the
+ * backing device supports secure discard.
+ *(11) Only used by Linux and NetBSD.
+ */
+
+/*
+ * Multiple hardware queues/rings:
+ * If supported, the backend will write the key "multi-queue-max-queues" to
+ * the directory for that vbd, and set its value to the maximum supported
+ * number of queues.
+ * Frontends that are aware of this feature and wish to use it can write the
+ * key "multi-queue-num-queues" with the number they wish to use, which must be
+ * greater than zero, and no more than the value reported by the backend in
+ * "multi-queue-max-queues".
+ *
+ * For frontends requesting just one queue, the usual event-channel and
+ * ring-ref keys are written as before, simplifying the backend processing
+ * to avoid distinguishing between a frontend that doesn't understand the
+ * multi-queue feature, and one that does, but requested only one queue.
+ *
+ * Frontends requesting two or more queues must not write the toplevel
+ * event-channel and ring-ref keys, instead writing those keys under sub-keys
+ * having the name "queue-N" where N is the integer ID of the queue/ring for
+ * which those keys belong. Queues are indexed from zero.
+ * For example, a frontend with two queues must write the following set of
+ * queue-related keys:
+ *
+ * /local/domain/1/device/vbd/0/multi-queue-num-queues = "2"
+ * /local/domain/1/device/vbd/0/queue-0 = ""
+ * /local/domain/1/device/vbd/0/queue-0/ring-ref = "<ring-ref#0>"
+ * /local/domain/1/device/vbd/0/queue-0/event-channel = "<evtchn#0>"
+ * /local/domain/1/device/vbd/0/queue-1 = ""
+ * /local/domain/1/device/vbd/0/queue-1/ring-ref = "<ring-ref#1>"
+ * /local/domain/1/device/vbd/0/queue-1/event-channel = "<evtchn#1>"
+ *
+ * It is also possible to use multiple queues/rings together with
+ * feature multi-page ring buffer.
+ * For example, a frontend requests two queues/rings and the size of each ring
+ * buffer is two pages must write the following set of related keys:
+ *
+ * /local/domain/1/device/vbd/0/multi-queue-num-queues = "2"
+ * /local/domain/1/device/vbd/0/ring-page-order = "1"
+ * /local/domain/1/device/vbd/0/queue-0 = ""
+ * /local/domain/1/device/vbd/0/queue-0/ring-ref0 = "<ring-ref#0>"
+ * /local/domain/1/device/vbd/0/queue-0/ring-ref1 = "<ring-ref#1>"
+ * /local/domain/1/device/vbd/0/queue-0/event-channel = "<evtchn#0>"
+ * /local/domain/1/device/vbd/0/queue-1 = ""
+ * /local/domain/1/device/vbd/0/queue-1/ring-ref0 = "<ring-ref#2>"
+ * /local/domain/1/device/vbd/0/queue-1/ring-ref1 = "<ring-ref#3>"
+ * /local/domain/1/device/vbd/0/queue-1/event-channel = "<evtchn#1>"
+ *
+ */
+
+/*
+ * STATE DIAGRAMS
+ *
+ *****************************************************************************
+ * Startup *
+ *****************************************************************************
+ *
+ * Tool stack creates front and back nodes with state XenbusStateInitialising.
+ *
+ * Front Back
+ * ================================= =====================================
+ * XenbusStateInitialising XenbusStateInitialising
+ * o Query virtual device o Query backend device identification
+ * properties. data.
+ * o Setup OS device instance. o Open and validate backend device.
+ * o Publish backend features and
+ * transport parameters.
+ * |
+ * |
+ * V
+ * XenbusStateInitWait
+ *
+ * o Query backend features and
+ * transport parameters.
+ * o Allocate and initialize the
+ * request ring.
+ * o Publish transport parameters
+ * that will be in effect during
+ * this connection.
+ * |
+ * |
+ * V
+ * XenbusStateInitialised
+ *
+ * o Query frontend transport parameters.
+ * o Connect to the request ring and
+ * event channel.
+ * o Publish backend device properties.
+ * |
+ * |
+ * V
+ * XenbusStateConnected
+ *
+ * o Query backend device properties.
+ * o Finalize OS virtual device
+ * instance.
+ * |
+ * |
+ * V
+ * XenbusStateConnected
+ *
+ * Note: Drivers that do not support any optional features, or the negotiation
+ * of transport parameters, can skip certain states in the state machine:
+ *
+ * o A frontend may transition to XenbusStateInitialised without
+ * waiting for the backend to enter XenbusStateInitWait. In this
+ * case, default transport parameters are in effect and any
+ * transport parameters published by the frontend must contain
+ * their default values.
+ *
+ * o A backend may transition to XenbusStateInitialised, bypassing
+ * XenbusStateInitWait, without waiting for the frontend to first
+ * enter the XenbusStateInitialised state. In this case, default
+ * transport parameters are in effect and any transport parameters
+ * published by the backend must contain their default values.
+ *
+ * Drivers that support optional features and/or transport parameter
+ * negotiation must tolerate these additional state transition paths.
+ * In general this means performing the work of any skipped state
+ * transition, if it has not already been performed, in addition to the
+ * work associated with entry into the current state.
+ */
+
+/*
+ * REQUEST CODES.
+ */
+#define BLKIF_OP_READ 0
+#define BLKIF_OP_WRITE 1
+/*
+ * All writes issued prior to a request with the BLKIF_OP_WRITE_BARRIER
+ * operation code ("barrier request") must be completed prior to the
+ * execution of the barrier request. All writes issued after the barrier
+ * request must not execute until after the completion of the barrier request.
+ *
+ * Optional. See "feature-barrier" XenBus node documentation above.
+ */
+#define BLKIF_OP_WRITE_BARRIER 2
+/*
+ * Commit any uncommitted contents of the backing device's volatile cache
+ * to stable storage.
+ *
+ * Optional. See "feature-flush-cache" XenBus node documentation above.
+ */
+#define BLKIF_OP_FLUSH_DISKCACHE 3
+/*
+ * Used in SLES sources for device specific command packet
+ * contained within the request. Reserved for that purpose.
+ */
+#define BLKIF_OP_RESERVED_1 4
+/*
+ * Indicate to the backend device that a region of storage is no longer in
+ * use, and may be discarded at any time without impact to the client. If
+ * the BLKIF_DISCARD_SECURE flag is set on the request, all copies of the
+ * discarded region on the device must be rendered unrecoverable before the
+ * command returns.
+ *
+ * This operation is analogous to performing a trim (ATA) or unamp (SCSI),
+ * command on a native device.
+ *
+ * More information about trim/unmap operations can be found at:
+ * http://t13.org/Documents/UploadedDocuments/docs2008/
+ * e07154r6-Data_Set_Management_Proposal_for_ATA-ACS2.doc
+ * http://www.seagate.com/staticfiles/support/disc/manuals/
+ * Interface%20manuals/100293068c.pdf
+ *
+ * Optional. See "feature-discard", "discard-alignment",
+ * "discard-granularity", and "discard-secure" in the XenBus node
+ * documentation above.
+ */
+#define BLKIF_OP_DISCARD 5
+
+/*
+ * Recognized if "feature-max-indirect-segments" in present in the backend
+ * xenbus info. The "feature-max-indirect-segments" node contains the maximum
+ * number of segments allowed by the backend per request. If the node is
+ * present, the frontend might use blkif_request_indirect structs in order to
+ * issue requests with more than BLKIF_MAX_SEGMENTS_PER_REQUEST (11). The
+ * maximum number of indirect segments is fixed by the backend, but the
+ * frontend can issue requests with any number of indirect segments as long as
+ * it's less than the number provided by the backend. The indirect_grefs field
+ * in blkif_request_indirect should be filled by the frontend with the
+ * grant references of the pages that are holding the indirect segments.
+ * These pages are filled with an array of blkif_request_segment that hold the
+ * information about the segments. The number of indirect pages to use is
+ * determined by the number of segments an indirect request contains. Every
+ * indirect page can contain a maximum of
+ * (PAGE_SIZE / sizeof(struct blkif_request_segment)) segments, so to
+ * calculate the number of indirect pages to use we have to do
+ * ceil(indirect_segments / (PAGE_SIZE / sizeof(struct blkif_request_segment))).
+ *
+ * If a backend does not recognize BLKIF_OP_INDIRECT, it should *not*
+ * create the "feature-max-indirect-segments" node!
+ */
+#define BLKIF_OP_INDIRECT 6
+
+/*
+ * Maximum scatter/gather segments per request.
+ * This is carefully chosen so that sizeof(blkif_ring_t) <= PAGE_SIZE.
+ * NB. This could be 12 if the ring indexes weren't stored in the same page.
+ */
+#define BLKIF_MAX_SEGMENTS_PER_REQUEST 11
+
+/*
+ * Maximum number of indirect pages to use per request.
+ */
+#define BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST 8
+
+/*
+ * NB. 'first_sect' and 'last_sect' in blkif_request_segment, as well as
+ * 'sector_number' in blkif_request, blkif_request_discard and
+ * blkif_request_indirect are sector-based quantities. See the description
+ * of the "feature-large-sector-size" frontend xenbus node above for
+ * more information.
+ */
+struct blkif_request_segment {
+ grant_ref_t gref; /* reference to I/O buffer frame */
+ /* @first_sect: first sector in frame to transfer (inclusive). */
+ /* @last_sect: last sector in frame to transfer (inclusive). */
+ u8 first_sect, last_sect;
+};
+
+/*
+ * Starting ring element for any I/O request.
+ */
+struct blkif_request {
+ u8 operation; /* BLKIF_OP_??? */
+ u8 nr_segments; /* number of segments */
+ blkif_vdev_t handle; /* only for read/write requests */
+ u64 id; /* private guest value, echoed in resp */
+ blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
+ struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+};
+
+/*
+ * Cast to this structure when blkif_request.operation == BLKIF_OP_DISCARD
+ * sizeof(struct blkif_request_discard) <= sizeof(struct blkif_request)
+ */
+struct blkif_request_discard {
+ u8 operation; /* BLKIF_OP_DISCARD */
+ u8 flag; /* BLKIF_DISCARD_SECURE or zero */
+#define BLKIF_DISCARD_SECURE (1 << 0) /* ignored if discard-secure=0 */
+ blkif_vdev_t handle; /* same as for read/write requests */
+ u64 id; /* private guest value, echoed in resp */
+ blkif_sector_t sector_number;/* start sector idx on disk */
+ u64 nr_sectors; /* number of contiguous sectors to discard*/
+};
+
+struct blkif_request_indirect {
+ u8 operation; /* BLKIF_OP_INDIRECT */
+ u8 indirect_op; /* BLKIF_OP_{READ/WRITE} */
+ u16 nr_segments; /* number of segments */
+ u64 id; /* private guest value, echoed in resp */
+ blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
+ blkif_vdev_t handle; /* same as for read/write requests */
+ grant_ref_t indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST];
+#ifdef __i386__
+ u64 pad; /* Make it 64 byte aligned on i386 */
+#endif
+};
+
+struct blkif_response {
+ u64 id; /* copied from request */
+ u8 operation; /* copied from request */
+ s16 status; /* BLKIF_RSP_??? */
+};
+
+/*
+ * STATUS RETURN CODES.
+ */
+ /* Operation not supported (only happens on barrier writes). */
+#define BLKIF_RSP_EOPNOTSUPP -2
+ /* Operation failed for some unspecified reason (-EIO). */
+#define BLKIF_RSP_ERROR -1
+ /* Operation completed successfully. */
+#define BLKIF_RSP_OKAY 0
+
+/*
+ * Generate blkif ring structures and types.
+ */
+DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response);
+
+#define VDISK_CDROM 0x1
+#define VDISK_REMOVABLE 0x2
+#define VDISK_READONLY 0x4
+
+#endif /* __XEN_PUBLIC_IO_BLKIF_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/include/xen/interface/io/console.h b/include/xen/interface/io/console.h
new file mode 100644
index 00000000000..d4dccc74afa
--- /dev/null
+++ b/include/xen/interface/io/console.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: MIT
+ *
+ * console.h
+ *
+ * Console I/O interface for Xen guest OSes.
+ *
+ * Copyright (c) 2005, Keir Fraser
+ */
+
+#ifndef __XEN_PUBLIC_IO_CONSOLE_H__
+#define __XEN_PUBLIC_IO_CONSOLE_H__
+
+typedef u32 XENCONS_RING_IDX;
+
+#define MASK_XENCONS_IDX(idx, ring) ((idx) & (sizeof(ring) - 1))
+
+struct xencons_interface {
+ char in[1024];
+ char out[2048];
+ XENCONS_RING_IDX in_cons, in_prod;
+ XENCONS_RING_IDX out_cons, out_prod;
+};
+
+#ifdef XEN_WANT_FLEX_CONSOLE_RING
+#include "ring.h"
+DEFINE_XEN_FLEX_RING(xencons);
+#endif
+
+#endif /* __XEN_PUBLIC_IO_CONSOLE_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/include/xen/interface/io/protocols.h b/include/xen/interface/io/protocols.h
new file mode 100644
index 00000000000..5aa0aaa93be
--- /dev/null
+++ b/include/xen/interface/io/protocols.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: MIT
+ *
+ * protocols.h
+ *
+ * Copyright (c) 2008, Keir Fraser
+ *
+ * Xen protocols, which are used as ABI rules governing the format of all
+ * ring request and response structures.
+ */
+
+#ifndef __XEN_PROTOCOLS_H__
+#define __XEN_PROTOCOLS_H__
+
+#define XEN_IO_PROTO_ABI_X86_32 "x86_32-abi"
+#define XEN_IO_PROTO_ABI_X86_64 "x86_64-abi"
+#define XEN_IO_PROTO_ABI_ARM "arm-abi"
+
+#if defined(__i386__)
+# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_32
+#elif defined(__x86_64__)
+# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_64
+#elif defined(__arm__) || defined(__aarch64__)
+# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_ARM
+#else
+# error arch fixup needed here
+#endif
+
+#endif
diff --git a/include/xen/interface/io/ring.h b/include/xen/interface/io/ring.h
new file mode 100644
index 00000000000..3c5c87deda9
--- /dev/null
+++ b/include/xen/interface/io/ring.h
@@ -0,0 +1,462 @@
+/* SPDX-License-Identifier: MIT
+ *
+ * ring.h
+ *
+ * Shared producer-consumer ring macros.
+ *
+ * Tim Deegan and Andrew Warfield November 2004.
+ */
+
+#ifndef __XEN_PUBLIC_IO_RING_H__
+#define __XEN_PUBLIC_IO_RING_H__
+
+/*
+ * When #include'ing this header, you need to provide the following
+ * declaration upfront:
+ * - standard integers types (u8, u16, etc)
+ * They are provided by stdint.h of the standard headers.
+ *
+ * In addition, if you intend to use the FLEX macros, you also need to
+ * provide the following, before invoking the FLEX macros:
+ * - size_t
+ * - memcpy
+ * - grant_ref_t
+ * These declarations are provided by string.h of the standard headers,
+ * and grant_table.h from the Xen public headers.
+ */
+
+#include <xen/interface/grant_table.h>
+
+typedef unsigned int RING_IDX;
+
+/* Round a 32-bit unsigned constant down to the nearest power of two. */
+#define __RD2(_x) (((_x) & 0x00000002) ? 0x2 : ((_x) & 0x1))
+#define __RD4(_x) (((_x) & 0x0000000c) ? __RD2((_x) >> 2) << 2 : __RD2(_x))
+#define __RD8(_x) (((_x) & 0x000000f0) ? __RD4((_x) >> 4) << 4 : __RD4(_x))
+#define __RD16(_x) (((_x) & 0x0000ff00) ? __RD8((_x) >> 8) << 8 : __RD8(_x))
+#define __RD32(_x) (((_x) & 0xffff0000) ? __RD16((_x) >> 16) << 16 : __RD16(_x))
+
+/*
+ * Calculate size of a shared ring, given the total available space for the
+ * ring and indexes (_sz), and the name tag of the request/response structure.
+ * A ring contains as many entries as will fit, rounded down to the nearest
+ * power of two (so we can mask with (size-1) to loop around).
+ */
+#define __CONST_RING_SIZE(_s, _sz) \
+ (__RD32(((_sz) - offsetof(struct _s##_sring, ring)) / \
+ sizeof(((struct _s##_sring *)0)->ring[0])))
+/*
+ * The same for passing in an actual pointer instead of a name tag.
+ */
+#define __RING_SIZE(_s, _sz) \
+ (__RD32(((_sz) - (long)(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0])))
+
+/*
+ * Macros to make the correct C datatypes for a new kind of ring.
+ *
+ * To make a new ring datatype, you need to have two message structures,
+ * let's say request_t, and response_t already defined.
+ *
+ * In a header where you want the ring datatype declared, you then do:
+ *
+ * DEFINE_RING_TYPES(mytag, request_t, response_t);
+ *
+ * These expand out to give you a set of types, as you can see below.
+ * The most important of these are:
+ *
+ * mytag_sring_t - The shared ring.
+ * mytag_front_ring_t - The 'front' half of the ring.
+ * mytag_back_ring_t - The 'back' half of the ring.
+ *
+ * To initialize a ring in your code you need to know the location and size
+ * of the shared memory area (PAGE_SIZE, for instance). To initialise
+ * the front half:
+ *
+ * mytag_front_ring_t front_ring;
+ * SHARED_RING_INIT((mytag_sring_t *)shared_page);
+ * FRONT_RING_INIT(&front_ring, (mytag_sring_t *)shared_page, PAGE_SIZE);
+ *
+ * Initializing the back follows similarly (note that only the front
+ * initializes the shared ring):
+ *
+ * mytag_back_ring_t back_ring;
+ * BACK_RING_INIT(&back_ring, (mytag_sring_t *)shared_page, PAGE_SIZE);
+ */
+
+#define DEFINE_RING_TYPES(__name, __req_t, __rsp_t) \
+ \
+/* Shared ring entry */ \
+union __name##_sring_entry { \
+ __req_t req; \
+ __rsp_t rsp; \
+}; \
+ \
+/* Shared ring page */ \
+struct __name##_sring { \
+ RING_IDX req_prod, req_event; \
+ RING_IDX rsp_prod, rsp_event; \
+ union { \
+ struct { \
+ u8 smartpoll_active; \
+ } netif; \
+ struct { \
+ u8 msg; \
+ } tapif_user; \
+ u8 pvt_pad[4]; \
+ } pvt; \
+ u8 __pad[44]; \
+ union __name##_sring_entry ring[1]; /* variable-length */ \
+}; \
+ \
+/* "Front" end's private variables */ \
+struct __name##_front_ring { \
+ RING_IDX req_prod_pvt; \
+ RING_IDX rsp_cons; \
+ unsigned int nr_ents; \
+ struct __name##_sring *sring; \
+}; \
+ \
+/* "Back" end's private variables */ \
+struct __name##_back_ring { \
+ RING_IDX rsp_prod_pvt; \
+ RING_IDX req_cons; \
+ unsigned int nr_ents; \
+ struct __name##_sring *sring; \
+}; \
+ \
+/* Syntactic sugar */ \
+typedef struct __name##_sring __name##_sring_t; \
+typedef struct __name##_front_ring __name##_front_ring_t; \
+typedef struct __name##_back_ring __name##_back_ring_t
+
+/*
+ * Macros for manipulating rings.
+ *
+ * FRONT_RING_whatever works on the "front end" of a ring: here
+ * requests are pushed on to the ring and responses taken off it.
+ *
+ * BACK_RING_whatever works on the "back end" of a ring: here
+ * requests are taken off the ring and responses put on.
+ *
+ * N.B. these macros do NO INTERLOCKS OR FLOW CONTROL.
+ * This is OK in 1-for-1 request-response situations where the
+ * requestor (front end) never has more than RING_SIZE()-1
+ * outstanding requests.
+ */
+
+/* Initialising empty rings */
+#define SHARED_RING_INIT(_s) do { \
+ (_s)->req_prod = (_s)->rsp_prod = 0; \
+ (_s)->req_event = (_s)->rsp_event = 1; \
+ (void)memset((_s)->pvt.pvt_pad, 0, sizeof((_s)->pvt.pvt_pad)); \
+ (void)memset((_s)->__pad, 0, sizeof((_s)->__pad)); \
+} while (0)
+
+#define FRONT_RING_INIT(_r, _s, __size) do { \
+ (_r)->req_prod_pvt = 0; \
+ (_r)->rsp_cons = 0; \
+ (_r)->nr_ents = __RING_SIZE(_s, __size); \
+ (_r)->sring = (_s); \
+} while (0)
+
+#define BACK_RING_INIT(_r, _s, __size) do { \
+ (_r)->rsp_prod_pvt = 0; \
+ (_r)->req_cons = 0; \
+ (_r)->nr_ents = __RING_SIZE(_s, __size); \
+ (_r)->sring = (_s); \
+} while (0)
+
+/* How big is this ring? */
+#define RING_SIZE(_r) \
+ ((_r)->nr_ents)
+
+/* Number of free requests (for use on front side only). */
+#define RING_FREE_REQUESTS(_r) \
+ (RING_SIZE(_r) - ((_r)->req_prod_pvt - (_r)->rsp_cons))
+
+/* Test if there is an empty slot available on the front ring.
+ * (This is only meaningful from the front. )
+ */
+#define RING_FULL(_r) \
+ (RING_FREE_REQUESTS(_r) == 0)
+
+/* Test if there are outstanding messages to be processed on a ring. */
+#define RING_HAS_UNCONSUMED_RESPONSES(_r) \
+ ((_r)->sring->rsp_prod - (_r)->rsp_cons)
+
+#ifdef __GNUC__
+#define RING_HAS_UNCONSUMED_REQUESTS(_r) ({ \
+ unsigned int req = (_r)->sring->req_prod - (_r)->req_cons; \
+ unsigned int rsp = RING_SIZE(_r) - \
+ ((_r)->req_cons - (_r)->rsp_prod_pvt); \
+ req < rsp ? req : rsp; \
+})
+#else
+/* Same as above, but without the nice GCC ({ ... }) syntax. */
+#define RING_HAS_UNCONSUMED_REQUESTS(_r) \
+ ((((_r)->sring->req_prod - (_r)->req_cons) < \
+ (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt))) ? \
+ ((_r)->sring->req_prod - (_r)->req_cons) : \
+ (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt)))
+#endif
+
+/* Direct access to individual ring elements, by index. */
+#define RING_GET_REQUEST(_r, _idx) \
+ (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].req))
+
+/*
+ * Get a local copy of a request.
+ *
+ * Use this in preference to RING_GET_REQUEST() so all processing is
+ * done on a local copy that cannot be modified by the other end.
+ *
+ * Note that https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145 may cause this
+ * to be ineffective where _req is a struct which consists of only bitfields.
+ */
+#define RING_COPY_REQUEST(_r, _idx, _req) do { \
+ /* Use volatile to force the copy into _req. */ \
+ *(_req) = *(volatile typeof(_req))RING_GET_REQUEST(_r, _idx); \
+} while (0)
+
+#define RING_GET_RESPONSE(_r, _idx) \
+ (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].rsp))
+
+/* Loop termination condition: Would the specified index overflow the ring? */
+#define RING_REQUEST_CONS_OVERFLOW(_r, _cons) \
+ (((_cons) - (_r)->rsp_prod_pvt) >= RING_SIZE(_r))
+
+/* Ill-behaved frontend determination: Can there be this many requests? */
+#define RING_REQUEST_PROD_OVERFLOW(_r, _prod) \
+ (((_prod) - (_r)->rsp_prod_pvt) > RING_SIZE(_r))
+
+#define RING_PUSH_REQUESTS(_r) do { \
+ xen_wmb(); /* back sees requests /before/ updated producer index */ \
+ (_r)->sring->req_prod = (_r)->req_prod_pvt; \
+} while (0)
+
+#define RING_PUSH_RESPONSES(_r) do { \
+ xen_wmb(); /* front sees resps /before/ updated producer index */ \
+ (_r)->sring->rsp_prod = (_r)->rsp_prod_pvt; \
+} while (0)
+
+/*
+ * Notification hold-off (req_event and rsp_event):
+ *
+ * When queueing requests or responses on a shared ring, it may not always be
+ * necessary to notify the remote end. For example, if requests are in flight
+ * in a backend, the front may be able to queue further requests without
+ * notifying the back (if the back checks for new requests when it queues
+ * responses).
+ *
+ * When enqueuing requests or responses:
+ *
+ * Use RING_PUSH_{REQUESTS,RESPONSES}_AND_CHECK_NOTIFY(). The second argument
+ * is a boolean return value. True indicates that the receiver requires an
+ * asynchronous notification.
+ *
+ * After dequeuing requests or responses (before sleeping the connection):
+ *
+ * Use RING_FINAL_CHECK_FOR_REQUESTS() or RING_FINAL_CHECK_FOR_RESPONSES().
+ * The second argument is a boolean return value. True indicates that there
+ * are pending messages on the ring (i.e., the connection should not be put
+ * to sleep).
+ *
+ * These macros will set the req_event/rsp_event field to trigger a
+ * notification on the very next message that is enqueued. If you want to
+ * create batches of work (i.e., only receive a notification after several
+ * messages have been enqueued) then you will need to create a customised
+ * version of the FINAL_CHECK macro in your own code, which sets the event
+ * field appropriately.
+ */
+
+#define RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(_r, _notify) do { \
+ RING_IDX __old = (_r)->sring->req_prod; \
+ RING_IDX __new = (_r)->req_prod_pvt; \
+ xen_wmb(); /* back sees requests /before/ updated producer index */ \
+ (_r)->sring->req_prod = __new; \
+ xen_mb(); /* back sees new requests /before/ we check req_event */ \
+ (_notify) = ((RING_IDX)(__new - (_r)->sring->req_event) < \
+ (RING_IDX)(__new - __old)); \
+} while (0)
+
+#define RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(_r, _notify) do { \
+ RING_IDX __old = (_r)->sring->rsp_prod; \
+ RING_IDX __new = (_r)->rsp_prod_pvt; \
+ xen_wmb(); /* front sees resps /before/ updated producer index */ \
+ (_r)->sring->rsp_prod = __new; \
+ xen_mb(); /* front sees new resps /before/ we check rsp_event */ \
+ (_notify) = ((RING_IDX)(__new - (_r)->sring->rsp_event) < \
+ (RING_IDX)(__new - __old)); \
+} while (0)
+
+#define RING_FINAL_CHECK_FOR_REQUESTS(_r, _work_to_do) do { \
+ (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \
+ if (_work_to_do) \
+ break; \
+ (_r)->sring->req_event = (_r)->req_cons + 1; \
+ xen_mb(); \
+ (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \
+} while (0)
+
+#define RING_FINAL_CHECK_FOR_RESPONSES(_r, _work_to_do) do { \
+ (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \
+ if (_work_to_do) \
+ break; \
+ (_r)->sring->rsp_event = (_r)->rsp_cons + 1; \
+ xen_mb(); \
+ (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \
+} while (0)
+
+/*
+ * DEFINE_XEN_FLEX_RING_AND_INTF defines two monodirectional rings and
+ * functions to check if there is data on the ring, and to read and
+ * write to them.
+ *
+ * DEFINE_XEN_FLEX_RING is similar to DEFINE_XEN_FLEX_RING_AND_INTF, but
+ * does not define the indexes page. As different protocols can have
+ * extensions to the basic format, this macro allow them to define their
+ * own struct.
+ *
+ * XEN_FLEX_RING_SIZE
+ * Convenience macro to calculate the size of one of the two rings
+ * from the overall order.
+ *
+ * $NAME_mask
+ * Function to apply the size mask to an index, to reduce the index
+ * within the range [0-size].
+ *
+ * $NAME_read_packet
+ * Function to read data from the ring. The amount of data to read is
+ * specified by the "size" argument.
+ *
+ * $NAME_write_packet
+ * Function to write data to the ring. The amount of data to write is
+ * specified by the "size" argument.
+ *
+ * $NAME_get_ring_ptr
+ * Convenience function that returns a pointer to read/write to the
+ * ring at the right location.
+ *
+ * $NAME_data_intf
+ * Indexes page, shared between frontend and backend. It also
+ * contains the array of grant refs.
+ *
+ * $NAME_queued
+ * Function to calculate how many bytes are currently on the ring,
+ * ready to be read. It can also be used to calculate how much free
+ * space is currently on the ring (XEN_FLEX_RING_SIZE() -
+ * $NAME_queued()).
+ */
+
+#ifndef XEN_PAGE_SHIFT
+/* The PAGE_SIZE for ring protocols and hypercall interfaces is always
+ * 4K, regardless of the architecture, and page granularity chosen by
+ * operating systems.
+ */
+#define XEN_PAGE_SHIFT 12
+#endif
+#define XEN_FLEX_RING_SIZE(order) \
+ (1UL << ((order) + XEN_PAGE_SHIFT - 1))
+
+#define DEFINE_XEN_FLEX_RING(name) \
+static inline RING_IDX name##_mask(RING_IDX idx, RING_IDX ring_size) \
+{ \
+ return idx & (ring_size - 1); \
+} \
+ \
+static inline unsigned char *name##_get_ring_ptr(unsigned char *buf, \
+ RING_IDX idx, \
+ RING_IDX ring_size) \
+{ \
+ return buf + name##_mask(idx, ring_size); \
+} \
+ \
+static inline void name##_read_packet(void *opaque, \
+ const unsigned char *buf, \
+ size_t size, \
+ RING_IDX masked_prod, \
+ RING_IDX *masked_cons, \
+ RING_IDX ring_size) \
+{ \
+ if (*masked_cons < masked_prod || \
+ size <= ring_size - *masked_cons) { \
+ memcpy(opaque, buf + *masked_cons, size); \
+ } else { \
+ memcpy(opaque, buf + *masked_cons, ring_size - *masked_cons); \
+ memcpy((unsigned char *)opaque + ring_size - *masked_cons, buf, \
+ size - (ring_size - *masked_cons)); \
+ } \
+ *masked_cons = name##_mask(*masked_cons + size, ring_size); \
+} \
+ \
+static inline void name##_write_packet(unsigned char *buf, \
+ const void *opaque, \
+ size_t size, \
+ RING_IDX *masked_prod, \
+ RING_IDX masked_cons, \
+ RING_IDX ring_size) \
+{ \
+ if (*masked_prod < masked_cons || \
+ size <= ring_size - *masked_prod) { \
+ memcpy(buf + *masked_prod, opaque, size); \
+ } else { \
+ memcpy(buf + *masked_prod, opaque, ring_size - *masked_prod); \
+ memcpy(buf, (unsigned char *)opaque + (ring_size - *masked_prod), \
+ size - (ring_size - *masked_prod)); \
+ } \
+ *masked_prod = name##_mask(*masked_prod + size, ring_size); \
+} \
+ \
+static inline RING_IDX name##_queued(RING_IDX prod, \
+ RING_IDX cons, \
+ RING_IDX ring_size) \
+{ \
+ RING_IDX size; \
+ \
+ if (prod == cons) \
+ return 0; \
+ \
+ prod = name##_mask(prod, ring_size); \
+ cons = name##_mask(cons, ring_size); \
+ \
+ if (prod == cons) \
+ return ring_size; \
+ \
+ if (prod > cons) \
+ size = prod - cons; \
+ else \
+ size = ring_size - (cons - prod); \
+ return size; \
+} \
+ \
+struct name##_data { \
+ unsigned char *in; /* half of the allocation */ \
+ unsigned char *out; /* half of the allocation */ \
+}
+
+#define DEFINE_XEN_FLEX_RING_AND_INTF(name) \
+struct name##_data_intf { \
+ RING_IDX in_cons, in_prod; \
+ \
+ u8 pad1[56]; \
+ \
+ RING_IDX out_cons, out_prod; \
+ \
+ u8 pad2[56]; \
+ \
+ RING_IDX ring_order; \
+ grant_ref_t ref[]; \
+}; \
+DEFINE_XEN_FLEX_RING(name)
+
+#endif /* __XEN_PUBLIC_IO_RING_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 8
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/include/xen/interface/io/xenbus.h b/include/xen/interface/io/xenbus.h
new file mode 100644
index 00000000000..946d46ddb1b
--- /dev/null
+++ b/include/xen/interface/io/xenbus.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: MIT
+ *
+ * xenbus.h
+ *
+ * Xenbus protocol details.
+ *
+ * Copyright (C) 2005 XenSource Ltd.
+ */
+
+#ifndef _XEN_PUBLIC_IO_XENBUS_H
+#define _XEN_PUBLIC_IO_XENBUS_H
+
+/*
+ * The state of either end of the Xenbus, i.e. the current communication
+ * status of initialisation across the bus. States here imply nothing about
+ * the state of the connection between the driver and the kernel's device
+ * layers.
+ */
+enum xenbus_state {
+ XenbusStateUnknown = 0,
+
+ XenbusStateInitialising = 1,
+
+ /*
+ * InitWait: Finished early initialisation but waiting for information
+ * from the peer or hotplug scripts.
+ */
+ XenbusStateInitWait = 2,
+
+ /*
+ * Initialised: Waiting for a connection from the peer.
+ */
+ XenbusStateInitialised = 3,
+
+ XenbusStateConnected = 4,
+
+ /*
+ * Closing: The device is being closed due to an error or an unplug event.
+ */
+ XenbusStateClosing = 5,
+
+ XenbusStateClosed = 6,
+
+ /*
+ * Reconfiguring: The device is being reconfigured.
+ */
+ XenbusStateReconfiguring = 7,
+
+ XenbusStateReconfigured = 8
+};
+
+typedef enum xenbus_state XenbusState;
+
+#endif /* _XEN_PUBLIC_IO_XENBUS_H */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/include/xen/interface/io/xs_wire.h b/include/xen/interface/io/xs_wire.h
new file mode 100644
index 00000000000..f20d9c51226
--- /dev/null
+++ b/include/xen/interface/io/xs_wire.h
@@ -0,0 +1,134 @@
+/* SPDX-License-Identifier: MIT
+ *
+ * Details of the "wire" protocol between Xen Store Daemon and client
+ * library or guest kernel.
+ *
+ * Copyright (C) 2005 Rusty Russell IBM Corporation
+ */
+
+#ifndef _XS_WIRE_H
+#define _XS_WIRE_H
+
+enum xsd_sockmsg_type {
+ XS_CONTROL,
+#define XS_DEBUG XS_CONTROL
+ XS_DIRECTORY,
+ XS_READ,
+ XS_GET_PERMS,
+ XS_WATCH,
+ XS_UNWATCH,
+ XS_TRANSACTION_START,
+ XS_TRANSACTION_END,
+ XS_INTRODUCE,
+ XS_RELEASE,
+ XS_GET_DOMAIN_PATH,
+ XS_WRITE,
+ XS_MKDIR,
+ XS_RM,
+ XS_SET_PERMS,
+ XS_WATCH_EVENT,
+ XS_ERROR,
+ XS_IS_DOMAIN_INTRODUCED,
+ XS_RESUME,
+ XS_SET_TARGET,
+ /* XS_RESTRICT has been removed */
+ XS_RESET_WATCHES = XS_SET_TARGET + 2,
+ XS_DIRECTORY_PART,
+
+ XS_TYPE_COUNT, /* Number of valid types. */
+
+ XS_INVALID = 0xffff /* Guaranteed to remain an invalid type */
+};
+
+#define XS_WRITE_NONE "NONE"
+#define XS_WRITE_CREATE "CREATE"
+#define XS_WRITE_CREATE_EXCL "CREATE|EXCL"
+
+/* We hand errors as strings, for portability. */
+struct xsd_errors {
+ int errnum;
+ const char *errstring;
+};
+
+#ifdef EINVAL
+#define XSD_ERROR(x) { x, #x }
+/* LINTED: static unused */
+static struct xsd_errors xsd_errors[]
+#if defined(__GNUC__)
+__attribute__((unused))
+#endif
+ = {
+ XSD_ERROR(EINVAL),
+ XSD_ERROR(EACCES),
+ XSD_ERROR(EEXIST),
+ XSD_ERROR(EISDIR),
+ XSD_ERROR(ENOENT),
+ XSD_ERROR(ENOMEM),
+ XSD_ERROR(ENOSPC),
+ XSD_ERROR(EIO),
+ XSD_ERROR(ENOTEMPTY),
+ XSD_ERROR(ENOSYS),
+ XSD_ERROR(EROFS),
+ XSD_ERROR(EBUSY),
+ XSD_ERROR(EAGAIN),
+ XSD_ERROR(EISCONN),
+ XSD_ERROR(E2BIG)
+};
+#endif
+
+struct xsd_sockmsg {
+ u32 type; /* XS_??? */
+ u32 req_id;/* Request identifier, echoed in daemon's response. */
+ u32 tx_id; /* Transaction id (0 if not related to a transaction). */
+ u32 len; /* Length of data following this. */
+
+ /* Generally followed by nul-terminated string(s). */
+};
+
+enum xs_watch_type {
+ XS_WATCH_PATH = 0,
+ XS_WATCH_TOKEN
+};
+
+/*
+ * `incontents 150 xenstore_struct XenStore wire protocol.
+ *
+ * Inter-domain shared memory communications.
+ */
+#define XENSTORE_RING_SIZE 1024
+typedef u32 XENSTORE_RING_IDX;
+#define MASK_XENSTORE_IDX(idx) ((idx) & (XENSTORE_RING_SIZE - 1))
+struct xenstore_domain_interface {
+ char req[XENSTORE_RING_SIZE]; /* Requests to xenstore daemon. */
+ char rsp[XENSTORE_RING_SIZE]; /* Replies and async watch events. */
+ XENSTORE_RING_IDX req_cons, req_prod;
+ XENSTORE_RING_IDX rsp_cons, rsp_prod;
+ u32 server_features; /* Bitmap of features supported by the server */
+ u32 connection;
+};
+
+/* Violating this is very bad. See docs/misc/xenstore.txt. */
+#define XENSTORE_PAYLOAD_MAX 4096
+
+/* Violating these just gets you an error back */
+#define XENSTORE_ABS_PATH_MAX 3072
+#define XENSTORE_REL_PATH_MAX 2048
+
+/* The ability to reconnect a ring */
+#define XENSTORE_SERVER_FEATURE_RECONNECTION 1
+
+/* Valid values for the connection field */
+#define XENSTORE_CONNECTED 0 /* the steady-state */
+#define XENSTORE_RECONNECT 1 /* guest has initiated a reconnect */
+
+#endif /* _XS_WIRE_H */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 8
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h
new file mode 100644
index 00000000000..59a95dbc738
--- /dev/null
+++ b/include/xen/interface/memory.h
@@ -0,0 +1,332 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * memory.h
+ *
+ * Memory reservation and information.
+ *
+ * Copyright (c) 2005, Keir Fraser <keir@xensource.com>
+ */
+
+#ifndef __XEN_PUBLIC_MEMORY_H__
+#define __XEN_PUBLIC_MEMORY_H__
+
+/*
+ * Increase or decrease the specified domain's memory reservation. Returns a
+ * -ve errcode on failure, or the # extents successfully allocated or freed.
+ * arg == addr of struct xen_memory_reservation.
+ */
+#define XENMEM_increase_reservation 0
+#define XENMEM_decrease_reservation 1
+#define XENMEM_populate_physmap 6
+struct xen_memory_reservation {
+ /*
+ * XENMEM_increase_reservation:
+ * OUT: MFN (*not* GMFN) bases of extents that were allocated
+ * XENMEM_decrease_reservation:
+ * IN: GMFN bases of extents to free
+ * XENMEM_populate_physmap:
+ * IN: GPFN bases of extents to populate with memory
+ * OUT: GMFN bases of extents that were allocated
+ * (NB. This command also updates the mach_to_phys translation table)
+ */
+ GUEST_HANDLE(xen_pfn_t)extent_start;
+
+ /* Number of extents, and size/alignment of each (2^extent_order pages). */
+ xen_ulong_t nr_extents;
+ unsigned int extent_order;
+
+ /*
+ * Maximum # bits addressable by the user of the allocated region (e.g.,
+ * I/O devices often have a 32-bit limitation even in 64-bit systems). If
+ * zero then the user has no addressing restriction.
+ * This field is not used by XENMEM_decrease_reservation.
+ */
+ unsigned int address_bits;
+
+ /*
+ * Domain whose reservation is being changed.
+ * Unprivileged domains can specify only DOMID_SELF.
+ */
+ domid_t domid;
+
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(xen_memory_reservation);
+
+/*
+ * An atomic exchange of memory pages. If return code is zero then
+ * @out.extent_list provides GMFNs of the newly-allocated memory.
+ * Returns zero on complete success, otherwise a negative error code.
+ * On complete success then always @nr_exchanged == @in.nr_extents.
+ * On partial success @nr_exchanged indicates how much work was done.
+ */
+#define XENMEM_exchange 11
+struct xen_memory_exchange {
+ /*
+ * [IN] Details of memory extents to be exchanged (GMFN bases).
+ * Note that @in.address_bits is ignored and unused.
+ */
+ struct xen_memory_reservation in;
+
+ /*
+ * [IN/OUT] Details of new memory extents.
+ * We require that:
+ * 1. @in.domid == @out.domid
+ * 2. @in.nr_extents << @in.extent_order ==
+ * @out.nr_extents << @out.extent_order
+ * 3. @in.extent_start and @out.extent_start lists must not overlap
+ * 4. @out.extent_start lists GPFN bases to be populated
+ * 5. @out.extent_start is overwritten with allocated GMFN bases
+ */
+ struct xen_memory_reservation out;
+
+ /*
+ * [OUT] Number of input extents that were successfully exchanged:
+ * 1. The first @nr_exchanged input extents were successfully
+ * deallocated.
+ * 2. The corresponding first entries in the output extent list correctly
+ * indicate the GMFNs that were successfully exchanged.
+ * 3. All other input and output extents are untouched.
+ * 4. If not all input exents are exchanged then the return code of this
+ * command will be non-zero.
+ * 5. THIS FIELD MUST BE INITIALISED TO ZERO BY THE CALLER!
+ */
+ xen_ulong_t nr_exchanged;
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(xen_memory_exchange);
+/*
+ * Returns the maximum machine frame number of mapped RAM in this system.
+ * This command always succeeds (it never returns an error code).
+ * arg == NULL.
+ */
+#define XENMEM_maximum_ram_page 2
+
+/*
+ * Returns the current or maximum memory reservation, in pages, of the
+ * specified domain (may be DOMID_SELF). Returns -ve errcode on failure.
+ * arg == addr of domid_t.
+ */
+#define XENMEM_current_reservation 3
+#define XENMEM_maximum_reservation 4
+
+/*
+ * Returns a list of MFN bases of 2MB extents comprising the machine_to_phys
+ * mapping table. Architectures which do not have a m2p table do not implement
+ * this command.
+ * arg == addr of xen_machphys_mfn_list_t.
+ */
+#define XENMEM_machphys_mfn_list 5
+struct xen_machphys_mfn_list {
+ /*
+ * Size of the 'extent_start' array. Fewer entries will be filled if the
+ * machphys table is smaller than max_extents * 2MB.
+ */
+ unsigned int max_extents;
+
+ /*
+ * Pointer to buffer to fill with list of extent starts. If there are
+ * any large discontiguities in the machine address space, 2MB gaps in
+ * the machphys table will be represented by an MFN base of zero.
+ */
+ GUEST_HANDLE(xen_pfn_t)extent_start;
+
+ /*
+ * Number of extents written to the above array. This will be smaller
+ * than 'max_extents' if the machphys table is smaller than max_e * 2MB.
+ */
+ unsigned int nr_extents;
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mfn_list);
+
+/*
+ * Returns the location in virtual address space of the machine_to_phys
+ * mapping table. Architectures which do not have a m2p table, or which do not
+ * map it by default into guest address space, do not implement this command.
+ * arg == addr of xen_machphys_mapping_t.
+ */
+#define XENMEM_machphys_mapping 12
+struct xen_machphys_mapping {
+ xen_ulong_t v_start, v_end; /* Start and end virtual addresses. */
+ xen_ulong_t max_mfn; /* Maximum MFN that can be looked up. */
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mapping_t);
+
+#define XENMAPSPACE_shared_info 0 /* shared info page */
+#define XENMAPSPACE_grant_table 1 /* grant table page */
+#define XENMAPSPACE_gmfn 2 /* GMFN */
+#define XENMAPSPACE_gmfn_range 3 /* GMFN range, XENMEM_add_to_physmap only. */
+#define XENMAPSPACE_gmfn_foreign 4 /* GMFN from another dom,
+ * XENMEM_add_to_physmap_range only.
+ */
+#define XENMAPSPACE_dev_mmio 5 /* device mmio region */
+
+/*
+ * Sets the GPFN at which a particular page appears in the specified guest's
+ * pseudophysical address space.
+ * arg == addr of xen_add_to_physmap_t.
+ */
+#define XENMEM_add_to_physmap 7
+struct xen_add_to_physmap {
+ /* Which domain to change the mapping for. */
+ domid_t domid;
+
+ /* Number of pages to go through for gmfn_range */
+ u16 size;
+
+ /* Source mapping space. */
+ unsigned int space;
+
+ /* Index into source mapping space. */
+ xen_ulong_t idx;
+
+ /* GPFN where the source mapping page should appear. */
+ xen_pfn_t gpfn;
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(xen_add_to_physmap);
+
+/*** REMOVED ***/
+/*#define XENMEM_translate_gpfn_list 8*/
+
+#define XENMEM_add_to_physmap_range 23
+struct xen_add_to_physmap_range {
+ /* IN */
+ /* Which domain to change the mapping for. */
+ domid_t domid;
+ u16 space; /* => enum phys_map_space */
+
+ /* Number of pages to go through */
+ u16 size;
+ domid_t foreign_domid; /* IFF gmfn_foreign */
+
+ /* Indexes into space being mapped. */
+ GUEST_HANDLE(xen_ulong_t)idxs;
+
+ /* GPFN in domid where the source mapping page should appear. */
+ GUEST_HANDLE(xen_pfn_t)gpfns;
+
+ /* OUT */
+
+ /* Per index error code. */
+ GUEST_HANDLE(int)errs;
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(xen_add_to_physmap_range);
+
+/*
+ * Returns the pseudo-physical memory map as it was when the domain
+ * was started (specified by XENMEM_set_memory_map).
+ * arg == addr of struct xen_memory_map.
+ */
+#define XENMEM_memory_map 9
+struct xen_memory_map {
+ /*
+ * On call the number of entries which can be stored in buffer. On
+ * return the number of entries which have been stored in
+ * buffer.
+ */
+ unsigned int nr_entries;
+
+ /*
+ * Entries in the buffer are in the same format as returned by the
+ * BIOS INT 0x15 EAX=0xE820 call.
+ */
+ GUEST_HANDLE(void)buffer;
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(xen_memory_map);
+
+/*
+ * Returns the real physical memory map. Passes the same structure as
+ * XENMEM_memory_map.
+ * arg == addr of struct xen_memory_map.
+ */
+#define XENMEM_machine_memory_map 10
+
+/*
+ * Unmaps the page appearing at a particular GPFN from the specified guest's
+ * pseudophysical address space.
+ * arg == addr of xen_remove_from_physmap_t.
+ */
+#define XENMEM_remove_from_physmap 15
+struct xen_remove_from_physmap {
+ /* Which domain to change the mapping for. */
+ domid_t domid;
+
+ /* GPFN of the current mapping of the page. */
+ xen_pfn_t gpfn;
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(xen_remove_from_physmap);
+
+/*
+ * Get the pages for a particular guest resource, so that they can be
+ * mapped directly by a tools domain.
+ */
+#define XENMEM_acquire_resource 28
+struct xen_mem_acquire_resource {
+ /* IN - The domain whose resource is to be mapped */
+ domid_t domid;
+ /* IN - the type of resource */
+ u16 type;
+
+#define XENMEM_resource_ioreq_server 0
+#define XENMEM_resource_grant_table 1
+
+ /*
+ * IN - a type-specific resource identifier, which must be zero
+ * unless stated otherwise.
+ *
+ * type == XENMEM_resource_ioreq_server -> id == ioreq server id
+ * type == XENMEM_resource_grant_table -> id defined below
+ */
+ u32 id;
+
+#define XENMEM_resource_grant_table_id_shared 0
+#define XENMEM_resource_grant_table_id_status 1
+
+ /* IN/OUT - As an IN parameter number of frames of the resource
+ * to be mapped. However, if the specified value is 0 and
+ * frame_list is NULL then this field will be set to the
+ * maximum value supported by the implementation on return.
+ */
+ u32 nr_frames;
+ /*
+ * OUT - Must be zero on entry. On return this may contain a bitwise
+ * OR of the following values.
+ */
+ u32 flags;
+
+ /* The resource pages have been assigned to the calling domain */
+#define _XENMEM_rsrc_acq_caller_owned 0
+#define XENMEM_rsrc_acq_caller_owned (1u << _XENMEM_rsrc_acq_caller_owned)
+
+ /*
+ * IN - the index of the initial frame to be mapped. This parameter
+ * is ignored if nr_frames is 0.
+ */
+ u64 frame;
+
+#define XENMEM_resource_ioreq_server_frame_bufioreq 0
+#define XENMEM_resource_ioreq_server_frame_ioreq(n) (1 + (n))
+
+ /*
+ * IN/OUT - If the tools domain is PV then, upon return, frame_list
+ * will be populated with the MFNs of the resource.
+ * If the tools domain is HVM then it is expected that, on
+ * entry, frame_list will be populated with a list of GFNs
+ * that will be mapped to the MFNs of the resource.
+ * If -EIO is returned then the frame_list has only been
+ * partially mapped and it is up to the caller to unmap all
+ * the GFNs.
+ * This parameter may be NULL if nr_frames is 0.
+ */
+ GUEST_HANDLE(xen_pfn_t)frame_list;
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(xen_mem_acquire_resource);
+
+#endif /* __XEN_PUBLIC_MEMORY_H__ */
diff --git a/include/xen/interface/sched.h b/include/xen/interface/sched.h
new file mode 100644
index 00000000000..387589be495
--- /dev/null
+++ b/include/xen/interface/sched.h
@@ -0,0 +1,171 @@
+/* SPDX-License-Identifier: MIT
+ *
+ * sched.h
+ *
+ * Scheduler state interactions
+ *
+ * Copyright (c) 2005, Keir Fraser <keir@xensource.com>
+ */
+
+#ifndef __XEN_PUBLIC_SCHED_H__
+#define __XEN_PUBLIC_SCHED_H__
+
+#include <xen/interface/event_channel.h>
+
+/*
+ * Guest Scheduler Operations
+ *
+ * The SCHEDOP interface provides mechanisms for a guest to interact
+ * with the scheduler, including yield, blocking and shutting itself
+ * down.
+ */
+
+/*
+ * The prototype for this hypercall is:
+ * long HYPERVISOR_sched_op(enum sched_op cmd, void *arg, ...)
+ *
+ * @cmd == SCHEDOP_??? (scheduler operation).
+ * @arg == Operation-specific extra argument(s), as described below.
+ * ... == Additional Operation-specific extra arguments, described below.
+ *
+ * Versions of Xen prior to 3.0.2 provided only the following legacy version
+ * of this hypercall, supporting only the commands yield, block and shutdown:
+ * long sched_op(int cmd, unsigned long arg)
+ * @cmd == SCHEDOP_??? (scheduler operation).
+ * @arg == 0 (SCHEDOP_yield and SCHEDOP_block)
+ * == SHUTDOWN_* code (SCHEDOP_shutdown)
+ *
+ * This legacy version is available to new guests as:
+ * long HYPERVISOR_sched_op_compat(enum sched_op cmd, unsigned long arg)
+ */
+
+/*
+ * Voluntarily yield the CPU.
+ * @arg == NULL.
+ */
+#define SCHEDOP_yield 0
+
+/*
+ * Block execution of this VCPU until an event is received for processing.
+ * If called with event upcalls masked, this operation will atomically
+ * reenable event delivery and check for pending events before blocking the
+ * VCPU. This avoids a "wakeup waiting" race.
+ * @arg == NULL.
+ */
+#define SCHEDOP_block 1
+
+/*
+ * Halt execution of this domain (all VCPUs) and notify the system controller.
+ * @arg == pointer to sched_shutdown structure.
+ *
+ * If the sched_shutdown_t reason is SHUTDOWN_suspend then
+ * x86 PV guests must also set RDX (EDX for 32-bit guests) to the MFN
+ * of the guest's start info page. RDX/EDX is the third hypercall
+ * argument.
+ *
+ * In addition, which reason is SHUTDOWN_suspend this hypercall
+ * returns 1 if suspend was cancelled or the domain was merely
+ * checkpointed, and 0 if it is resuming in a new domain.
+ */
+#define SCHEDOP_shutdown 2
+
+/*
+ * Poll a set of event-channel ports. Return when one or more are pending. An
+ * optional timeout may be specified.
+ * @arg == pointer to sched_poll structure.
+ */
+#define SCHEDOP_poll 3
+
+/*
+ * Declare a shutdown for another domain. The main use of this function is
+ * in interpreting shutdown requests and reasons for fully-virtualized
+ * domains. A para-virtualized domain may use SCHEDOP_shutdown directly.
+ * @arg == pointer to sched_remote_shutdown structure.
+ */
+#define SCHEDOP_remote_shutdown 4
+
+/*
+ * Latch a shutdown code, so that when the domain later shuts down it
+ * reports this code to the control tools.
+ * @arg == sched_shutdown, as for SCHEDOP_shutdown.
+ */
+#define SCHEDOP_shutdown_code 5
+
+/*
+ * Setup, poke and destroy a domain watchdog timer.
+ * @arg == pointer to sched_watchdog structure.
+ * With id == 0, setup a domain watchdog timer to cause domain shutdown
+ * after timeout, returns watchdog id.
+ * With id != 0 and timeout == 0, destroy domain watchdog timer.
+ * With id != 0 and timeout != 0, poke watchdog timer and set new timeout.
+ */
+#define SCHEDOP_watchdog 6
+
+/*
+ * Override the current vcpu affinity by pinning it to one physical cpu or
+ * undo this override restoring the previous affinity.
+ * @arg == pointer to sched_pin_override structure.
+ *
+ * A negative pcpu value will undo a previous pin override and restore the
+ * previous cpu affinity.
+ * This call is allowed for the hardware domain only and requires the cpu
+ * to be part of the domain's cpupool.
+ */
+#define SCHEDOP_pin_override 7
+
+struct sched_shutdown {
+ unsigned int reason; /* SHUTDOWN_* => shutdown reason */
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(sched_shutdown);
+
+struct sched_poll {
+ GUEST_HANDLE(evtchn_port_t)ports;
+ unsigned int nr_ports;
+ u64 timeout;
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(sched_poll);
+
+struct sched_remote_shutdown {
+ domid_t domain_id; /* Remote domain ID */
+ unsigned int reason; /* SHUTDOWN_* => shutdown reason */
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(sched_remote_shutdown);
+
+struct sched_watchdog {
+ u32 id; /* watchdog ID */
+ u32 timeout; /* timeout */
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(sched_watchdog);
+
+struct sched_pin_override {
+ s32 pcpu;
+};
+
+DEFINE_GUEST_HANDLE_STRUCT(sched_pin_override);
+
+/*
+ * Reason codes for SCHEDOP_shutdown. These may be interpreted by control
+ * software to determine the appropriate action. For the most part, Xen does
+ * not care about the shutdown code.
+ */
+#define SHUTDOWN_poweroff 0 /* Domain exited normally. Clean up and kill. */
+#define SHUTDOWN_reboot 1 /* Clean up, kill, and then restart. */
+#define SHUTDOWN_suspend 2 /* Clean up, save suspend info, kill. */
+#define SHUTDOWN_crash 3 /* Tell controller we've crashed. */
+#define SHUTDOWN_watchdog 4 /* Restart because watchdog time expired. */
+
+/*
+ * Domain asked to perform 'soft reset' for it. The expected behavior is to
+ * reset internal Xen state for the domain returning it to the point where it
+ * was created but leaving the domain's memory contents and vCPU contexts
+ * intact. This will allow the domain to start over and set up all Xen specific
+ * interfaces again.
+ */
+#define SHUTDOWN_soft_reset 5
+#define SHUTDOWN_MAX 5 /* Maximum valid shutdown reason. */
+
+#endif /* __XEN_PUBLIC_SCHED_H__ */
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
new file mode 100644
index 00000000000..eec8ab75b9c
--- /dev/null
+++ b/include/xen/interface/xen.h
@@ -0,0 +1,208 @@
+/* SPDX-License-Identifier: MIT
+ *
+ * xen.h
+ *
+ * Guest OS interface to Xen.
+ *
+ * Copyright (c) 2004, K A Fraser
+ */
+
+#ifndef __XEN_PUBLIC_XEN_H__
+#define __XEN_PUBLIC_XEN_H__
+
+#include <xen/arm/interface.h>
+
+/*
+ * XEN "SYSTEM CALLS" (a.k.a. HYPERCALLS).
+ */
+
+/*
+ * x86_32: EAX = vector; EBX, ECX, EDX, ESI, EDI = args 1, 2, 3, 4, 5.
+ * EAX = return value
+ * (argument registers may be clobbered on return)
+ * x86_64: RAX = vector; RDI, RSI, RDX, R10, R8, R9 = args 1, 2, 3, 4, 5, 6.
+ * RAX = return value
+ * (argument registers not clobbered on return; RCX, R11 are)
+ */
+#define __HYPERVISOR_set_trap_table 0
+#define __HYPERVISOR_mmu_update 1
+#define __HYPERVISOR_set_gdt 2
+#define __HYPERVISOR_stack_switch 3
+#define __HYPERVISOR_set_callbacks 4
+#define __HYPERVISOR_fpu_taskswitch 5
+#define __HYPERVISOR_sched_op_compat 6
+#define __HYPERVISOR_platform_op 7
+#define __HYPERVISOR_set_debugreg 8
+#define __HYPERVISOR_get_debugreg 9
+#define __HYPERVISOR_update_descriptor 10
+#define __HYPERVISOR_memory_op 12
+#define __HYPERVISOR_multicall 13
+#define __HYPERVISOR_update_va_mapping 14
+#define __HYPERVISOR_set_timer_op 15
+#define __HYPERVISOR_event_channel_op_compat 16
+#define __HYPERVISOR_xen_version 17
+#define __HYPERVISOR_console_io 18
+#define __HYPERVISOR_physdev_op_compat 19
+#define __HYPERVISOR_grant_table_op 20
+#define __HYPERVISOR_vm_assist 21
+#define __HYPERVISOR_update_va_mapping_otherdomain 22
+#define __HYPERVISOR_iret 23 /* x86 only */
+#define __HYPERVISOR_vcpu_op 24
+#define __HYPERVISOR_set_segment_base 25 /* x86/64 only */
+#define __HYPERVISOR_mmuext_op 26
+#define __HYPERVISOR_xsm_op 27
+#define __HYPERVISOR_nmi_op 28
+#define __HYPERVISOR_sched_op 29
+#define __HYPERVISOR_callback_op 30
+#define __HYPERVISOR_xenoprof_op 31
+#define __HYPERVISOR_event_channel_op 32
+#define __HYPERVISOR_physdev_op 33
+#define __HYPERVISOR_hvm_op 34
+#define __HYPERVISOR_sysctl 35
+#define __HYPERVISOR_domctl 36
+#define __HYPERVISOR_kexec_op 37
+#define __HYPERVISOR_tmem_op 38
+#define __HYPERVISOR_xc_reserved_op 39 /* reserved for XenClient */
+#define __HYPERVISOR_xenpmu_op 40
+#define __HYPERVISOR_dm_op 41
+
+/* Architecture-specific hypercall definitions. */
+#define __HYPERVISOR_arch_0 48
+#define __HYPERVISOR_arch_1 49
+#define __HYPERVISOR_arch_2 50
+#define __HYPERVISOR_arch_3 51
+#define __HYPERVISOR_arch_4 52
+#define __HYPERVISOR_arch_5 53
+#define __HYPERVISOR_arch_6 54
+#define __HYPERVISOR_arch_7 55
+
+#ifndef __ASSEMBLY__
+
+typedef u16 domid_t;
+
+/* Domain ids >= DOMID_FIRST_RESERVED cannot be used for ordinary domains. */
+#define DOMID_FIRST_RESERVED (0x7FF0U)
+
+/* DOMID_SELF is used in certain contexts to refer to oneself. */
+#define DOMID_SELF (0x7FF0U)
+
+/*
+ * DOMID_IO is used to restrict page-table updates to mapping I/O memory.
+ * Although no Foreign Domain need be specified to map I/O pages, DOMID_IO
+ * is useful to ensure that no mappings to the OS's own heap are accidentally
+ * installed. (e.g., in Linux this could cause havoc as reference counts
+ * aren't adjusted on the I/O-mapping code path).
+ * This only makes sense in MMUEXT_SET_FOREIGNDOM, but in that context can
+ * be specified by any calling domain.
+ */
+#define DOMID_IO (0x7FF1U)
+
+/*
+ * DOMID_XEN is used to allow privileged domains to map restricted parts of
+ * Xen's heap space (e.g., the machine_to_phys table).
+ * This only makes sense in MMUEXT_SET_FOREIGNDOM, and is only permitted if
+ * the caller is privileged.
+ */
+#define DOMID_XEN (0x7FF2U)
+
+/* DOMID_COW is used as the owner of sharable pages */
+#define DOMID_COW (0x7FF3U)
+
+/* DOMID_INVALID is used to identify pages with unknown owner. */
+#define DOMID_INVALID (0x7FF4U)
+
+/* Idle domain. */
+#define DOMID_IDLE (0x7FFFU)
+
+struct vcpu_info {
+ /*
+ * 'evtchn_upcall_pending' is written non-zero by Xen to indicate
+ * a pending notification for a particular VCPU. It is then cleared
+ * by the guest OS /before/ checking for pending work, thus avoiding
+ * a set-and-check race. Note that the mask is only accessed by Xen
+ * on the CPU that is currently hosting the VCPU. This means that the
+ * pending and mask flags can be updated by the guest without special
+ * synchronisation (i.e., no need for the x86 LOCK prefix).
+ * This may seem suboptimal because if the pending flag is set by
+ * a different CPU then an IPI may be scheduled even when the mask
+ * is set. However, note:
+ * 1. The task of 'interrupt holdoff' is covered by the per-event-
+ * channel mask bits. A 'noisy' event that is continually being
+ * triggered can be masked at source at this very precise
+ * granularity.
+ * 2. The main purpose of the per-VCPU mask is therefore to restrict
+ * reentrant execution: whether for concurrency control, or to
+ * prevent unbounded stack usage. Whatever the purpose, we expect
+ * that the mask will be asserted only for short periods at a time,
+ * and so the likelihood of a 'spurious' IPI is suitably small.
+ * The mask is read before making an event upcall to the guest: a
+ * non-zero mask therefore guarantees that the VCPU will not receive
+ * an upcall activation. The mask is cleared when the VCPU requests
+ * to block: this avoids wakeup-waiting races.
+ */
+ u8 evtchn_upcall_pending;
+ u8 evtchn_upcall_mask;
+ xen_ulong_t evtchn_pending_sel;
+ struct arch_vcpu_info arch;
+ struct pvclock_vcpu_time_info time;
+}; /* 64 bytes (x86) */
+
+/*
+ * Xen/kernel shared data -- pointer provided in start_info.
+ * NB. We expect that this struct is smaller than a page.
+ */
+struct shared_info {
+ struct vcpu_info vcpu_info[MAX_VIRT_CPUS];
+
+ /*
+ * A domain can create "event channels" on which it can send and receive
+ * asynchronous event notifications. There are three classes of event that
+ * are delivered by this mechanism:
+ * 1. Bi-directional inter- and intra-domain connections. Domains must
+ * arrange out-of-band to set up a connection (usually by allocating
+ * an unbound 'listener' port and avertising that via a storage service
+ * such as xenstore).
+ * 2. Physical interrupts. A domain with suitable hardware-access
+ * privileges can bind an event-channel port to a physical interrupt
+ * source.
+ * 3. Virtual interrupts ('events'). A domain can bind an event-channel
+ * port to a virtual interrupt source, such as the virtual-timer
+ * device or the emergency console.
+ *
+ * Event channels are addressed by a "port index". Each channel is
+ * associated with two bits of information:
+ * 1. PENDING -- notifies the domain that there is a pending notification
+ * to be processed. This bit is cleared by the guest.
+ * 2. MASK -- if this bit is clear then a 0->1 transition of PENDING
+ * will cause an asynchronous upcall to be scheduled. This bit is only
+ * updated by the guest. It is read-only within Xen. If a channel
+ * becomes pending while the channel is masked then the 'edge' is lost
+ * (i.e., when the channel is unmasked, the guest must manually handle
+ * pending notifications as no upcall will be scheduled by Xen).
+ *
+ * To expedite scanning of pending notifications, any 0->1 pending
+ * transition on an unmasked channel causes a corresponding bit in a
+ * per-vcpu selector word to be set. Each bit in the selector covers a
+ * 'C long' in the PENDING bitfield array.
+ */
+ xen_ulong_t evtchn_pending[sizeof(xen_ulong_t) * 8];
+ xen_ulong_t evtchn_mask[sizeof(xen_ulong_t) * 8];
+
+ /*
+ * Wallclock time: updated only by control software. Guests should base
+ * their gettimeofday() syscall on this wallclock-base value.
+ */
+ struct pvclock_wall_clock wc;
+
+ struct arch_shared_info arch;
+
+};
+
+#else /* __ASSEMBLY__ */
+
+/* In assembly code we cannot use C numeric constant suffixes. */
+#define mk_unsigned_long(x) x
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __XEN_PUBLIC_XEN_H__ */
diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h
new file mode 100644
index 00000000000..3ed7fd57333
--- /dev/null
+++ b/include/xen/xenbus.h
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef XENBUS_H__
+#define XENBUS_H__
+
+#include <xen/interface/xen.h>
+#include <xen/interface/io/xenbus.h>
+
+typedef unsigned long xenbus_transaction_t;
+#define XBT_NIL ((xenbus_transaction_t)0)
+
+extern u32 xenbus_evtchn;
+
+/* Initialize the XenBus system. */
+void init_xenbus(void);
+/* Finalize the XenBus system. */
+void fini_xenbus(void);
+
+/**
+ * xenbus_read() - Read the value associated with a path.
+ *
+ * Returns a malloc'd error string on failure and sets *value to NULL.
+ * On success, *value is set to a malloc'd copy of the value.
+ */
+char *xenbus_read(xenbus_transaction_t xbt, const char *path, char **value);
+
+char *xenbus_wait_for_state_change(const char *path, XenbusState *state);
+char *xenbus_switch_state(xenbus_transaction_t xbt, const char *path,
+ XenbusState state);
+
+/**
+ * xenbus_write() - Associates a value with a path.
+ *
+ * Returns a malloc'd error string on failure.
+ */
+char *xenbus_write(xenbus_transaction_t xbt, const char *path,
+ const char *value);
+
+/**
+ * xenbus_rm() - Removes the value associated with a path.
+ *
+ * Returns a malloc'd error string on failure.
+ */
+char *xenbus_rm(xenbus_transaction_t xbt, const char *path);
+
+/**
+ * xenbus_ls() - List the contents of a directory.
+ *
+ * Returns a malloc'd error string on failure and sets *contents to NULL.
+ * On success, *contents is set to a malloc'd array of pointers to malloc'd
+ * strings. The array is NULL terminated. May block.
+ */
+char *xenbus_ls(xenbus_transaction_t xbt, const char *prefix, char ***contents);
+
+/**
+ * xenbus_get_perms() - Reads permissions associated with a path.
+ *
+ * Returns a malloc'd error string on failure and sets *value to NULL.
+ * On success, *value is set to a malloc'd copy of the value.
+ */
+char *xenbus_get_perms(xenbus_transaction_t xbt, const char *path, char **value);
+
+/**
+ * xenbus_set_perms() - Sets the permissions associated with a path.
+ *
+ * Returns a malloc'd error string on failure.
+ */
+char *xenbus_set_perms(xenbus_transaction_t xbt, const char *path, domid_t dom,
+ char perm);
+
+/**
+ * xenbus_transaction_start() - Start a xenbus transaction.
+ *
+ * Returns the transaction in xbt on success or a malloc'd error string
+ * otherwise.
+ */
+char *xenbus_transaction_start(xenbus_transaction_t *xbt);
+
+/**
+ * xenbus_transaction_end() - End a xenbus transaction.
+ *
+ * Returns a malloc'd error string if it fails. Abort says whether the
+ * transaction should be aborted.
+ * Returns 1 in *retry if the transaction should be retried.
+ */
+char *xenbus_transaction_end(xenbus_transaction_t xbt, int abort,
+ int *retry);
+
+/**
+ * xenbus_read_integer() - Read path and parse it as an integer.
+ *
+ * Returns -1 on error.
+ */
+int xenbus_read_integer(const char *path);
+
+/**
+ * xenbus_read_uuid() - Read path and parse it as 16 byte uuid.
+ *
+ * Returns 1 if read and parsing were successful, 0 if not
+ */
+int xenbus_read_uuid(const char *path, unsigned char uuid[16]);
+
+/**
+ * xenbus_printf() - Contraction of snprintf and xenbus_write(path/node).
+ */
+char *xenbus_printf(xenbus_transaction_t xbt,
+ const char *node, const char *path,
+ const char *fmt, ...)
+ __attribute__((__format__(printf, 4, 5)));
+
+/**
+ * xenbus_get_self_id() - Utility function to figure out our domain id
+ */
+domid_t xenbus_get_self_id(void);
+
+#endif /* XENBUS_H__ */
diff --git a/lib/Kconfig b/lib/Kconfig
index 089348af739..8efb154f734 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -80,6 +80,10 @@ config SPL_SPRINTF
config TPL_SPRINTF
bool
+config SSCANF
+ bool
+ default n
+
config STRTO
bool
default y
diff --git a/lib/Makefile b/lib/Makefile
index 1dc06c57d5f..0cd7bea2823 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -123,6 +123,7 @@ else
# Main U-Boot always uses the full printf support
obj-y += vsprintf.o strto.o
obj-$(CONFIG_OID_REGISTRY) += oid_registry.o
+obj-$(CONFIG_SSCANF) += sscanf.o
endif
obj-y += date.o
diff --git a/lib/sscanf.c b/lib/sscanf.c
new file mode 100644
index 00000000000..d1e2dc272cc
--- /dev/null
+++ b/lib/sscanf.c
@@ -0,0 +1,823 @@
+// SPDX-License-Identifier: BSD-3-Clause
+/*
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Chris Torek.
+ *
+ * Copyright (c) 2011 The FreeBSD Foundation
+ * All rights reserved.
+ * Portions of this software were developed by David Chisnall
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Author: Juergen Gross <jgross@suse.com>
+ * Date: Jun 2016
+ */
+
+#if !defined HAVE_LIBC
+
+#include <os.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <vsprintf.h>
+#include <linux/string.h>
+#include <malloc.h>
+#define __DECONST(type, var) ((type)(uintptr_t)(const void *)(var))
+
+/**
+ * struct str_info - Input string parameters
+ * @neg: negative number or not
+ * 0 - not negative
+ * 1 - negative
+ * @any: set any if any `digits' consumed; make it negative to indicate
+ * overflow
+ * @acc: accumulated value
+ */
+struct str_info {
+ int neg, any;
+ u64 acc;
+};
+
+/**
+ * str_to_int_convert() - Write string data to structure
+ * @nptr: pointer to string
+ * @base: number's base
+ * @unsign: describes what integer is expected
+ * 0 - not unsigned
+ * 1 - unsigned
+ *
+ * Ignores `locale' stuff. Assumes that the upper and lower case
+ * alphabets and digits are each contiguous.
+ *
+ * Return: struct str_info *, which contains string data to future process
+ */
+static struct str_info *
+str_to_int_convert(const char **nptr, int base, unsigned int unsign)
+{
+ const char *s = *nptr;
+ u64 acc;
+ unsigned char c;
+ u64 cutoff;
+ int neg, any, cutlim;
+ u64 qbase;
+ struct str_info *info;
+
+ /*
+ * Skip white space and pick up leading +/- sign if any.
+ * If base is 0, allow 0x for hex and 0 for octal, else
+ * assume decimal; if base is already 16, allow 0x.
+ */
+ info = (struct str_info *)malloc(sizeof(struct str_info));
+ if (!info)
+ return NULL;
+
+ do {
+ c = *s++;
+ } while (isspace(c));
+ if (c == '-') {
+ neg = 1;
+ c = *s++;
+ } else {
+ neg = 0;
+ if (c == '+')
+ c = *s++;
+ }
+ if ((base == 0 || base == 16) &&
+ c == '0' && (*s == 'x' || *s == 'X')) {
+ c = s[1];
+ s += 2;
+ base = 16;
+ }
+ if (base == 0)
+ base = c == '0' ? 8 : 10;
+
+ /*
+ * Compute the cutoff value between legal numbers and illegal
+ * numbers. That is the largest legal value, divided by the
+ * base. An input number that is greater than this value, if
+ * followed by a legal input character, is too big. One that
+ * is equal to this value may be valid or not; the limit
+ * between valid and invalid numbers is then based on the last
+ * digit. For instance, if the range for quads is
+ * [-9223372036854775808..9223372036854775807] and the input base
+ * is 10, cutoff will be set to 922337203685477580 and cutlim to
+ * either 7 (neg==0) or 8 (neg==1), meaning that if we have
+ * accumulated a value > 922337203685477580, or equal but the
+ * next digit is > 7 (or 8), the number is too big, and we will
+ * return a range error.
+ *
+ * Set any if any `digits' consumed; make it negative to indicate
+ * overflow.
+ */
+ qbase = (unsigned int)base;
+
+ if (!unsign) {
+ cutoff = neg ? (u64)-(LLONG_MIN + LLONG_MAX) + LLONG_MAX : LLONG_MAX;
+ cutlim = cutoff % qbase;
+ cutoff /= qbase;
+ } else {
+ cutoff = (u64)ULLONG_MAX / qbase;
+ cutlim = (u64)ULLONG_MAX % qbase;
+ }
+
+ for (acc = 0, any = 0;; c = *s++) {
+ if (!isascii(c))
+ break;
+ if (isdigit(c))
+ c -= '0';
+ else if (isalpha(c))
+ c -= isupper(c) ? 'A' - 10 : 'a' - 10;
+ else
+ break;
+ if (c >= base)
+ break;
+ if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim)) {
+ any = -1;
+ } else {
+ any = 1;
+ acc *= qbase;
+ acc += c;
+ }
+ }
+
+ info->any = any;
+ info->neg = neg;
+ info->acc = acc;
+
+ *nptr = s;
+
+ return info;
+}
+
+/**
+ * strtoq() - Convert a string to a quad integer
+ * @nptr: pointer to string
+ * @endptr: pointer to number's end in the string
+ * @base: number's base
+ *
+ * Return: s64 quad integer number converted from input string
+ */
+static s64
+strtoq(const char *nptr, char **endptr, int base)
+{
+ const char *s = nptr;
+ u64 acc;
+ int unsign = 0;
+ struct str_info *info;
+
+ info = str_to_int_convert(&s, base, unsign);
+ if (!info)
+ return -1;
+
+ acc = info->acc;
+
+ if (info->any < 0)
+ acc = info->neg ? LLONG_MIN : LLONG_MAX;
+ else if (info->neg)
+ acc = -acc;
+ if (endptr != 0)
+ *endptr = __DECONST(char *, info->any ? s - 1 : nptr);
+
+ free(info);
+
+ return acc;
+}
+
+/**
+ * strtouq() - Convert a string to an unsigned quad integer
+ * @nptr: pointer to string
+ * @endptr: pointer to number's end in the string
+ * @base: number's base
+ *
+ * Return: s64 unsigned quad integer number converted from
+ * input string
+ */
+u64
+strtouq(const char *nptr, char **endptr, int base)
+{
+ const char *s = nptr;
+ u64 acc;
+ int unsign = 1;
+ struct str_info *info;
+
+ info = str_to_int_convert(&s, base, unsign);
+ if (!info)
+ return -1;
+
+ acc = info->acc;
+
+ if (info->any < 0)
+ acc = ULLONG_MAX;
+ else if (info->neg)
+ acc = -acc;
+ if (endptr != 0)
+ *endptr = __DECONST(char *, info->any ? s - 1 : nptr);
+
+ free(info);
+
+ return acc;
+}
+
+/**
+ * __sccl() - Fill in the given table from the scanset at the given format
+ * (just after `[')
+ * @tab: table to fill in
+ * @fmt: format of buffer
+ *
+ * The table has a 1 wherever characters should be considered part of the
+ * scanset.
+ *
+ * Return: pointer to the character past the closing `]'
+ */
+static const u_char *
+__sccl(char *tab, const u_char *fmt)
+{
+ int c, n, v;
+
+ /* first `clear' the whole table */
+ c = *fmt++; /* first char hat => negated scanset */
+ if (c == '^') {
+ v = 1; /* default => accept */
+ c = *fmt++; /* get new first char */
+ } else {
+ v = 0; /* default => reject */
+ }
+
+ /* XXX: Will not work if sizeof(tab*) > sizeof(char) */
+ for (n = 0; n < 256; n++)
+ tab[n] = v; /* memset(tab, v, 256) */
+
+ if (c == 0)
+ return (fmt - 1);/* format ended before closing ] */
+
+ /*
+ * Now set the entries corresponding to the actual scanset
+ * to the opposite of the above.
+ *
+ * The first character may be ']' (or '-') without being special;
+ * the last character may be '-'.
+ */
+ v = 1 - v;
+ for (;;) {
+ tab[c] = v; /* take character c */
+doswitch:
+ n = *fmt++; /* and examine the next */
+ switch (n) {
+ case 0: /* format ended too soon */
+ return (fmt - 1);
+
+ case '-':
+ /*
+ * A scanset of the form
+ * [01+-]
+ * is defined as `the digit 0, the digit 1,
+ * the character +, the character -', but
+ * the effect of a scanset such as
+ * [a-zA-Z0-9]
+ * is implementation defined. The V7 Unix
+ * scanf treats `a-z' as `the letters a through
+ * z', but treats `a-a' as `the letter a, the
+ * character -, and the letter a'.
+ *
+ * For compatibility, the `-' is not considerd
+ * to define a range if the character following
+ * it is either a close bracket (required by ANSI)
+ * or is not numerically greater than the character
+ * we just stored in the table (c).
+ */
+ n = *fmt;
+ if (n == ']' || n < c) {
+ c = '-';
+ break; /* resume the for(;;) */
+ }
+ fmt++;
+ /* fill in the range */
+ do {
+ tab[++c] = v;
+ } while (c < n);
+ c = n;
+ /*
+ * Alas, the V7 Unix scanf also treats formats
+ * such as [a-c-e] as `the letters a through e'.
+ * This too is permitted by the standard....
+ */
+ goto doswitch;
+ break;
+
+ case ']': /* end of scanset */
+ return (fmt);
+
+ default: /* just another character */
+ c = n;
+ break;
+ }
+ }
+ /* NOTREACHED */
+}
+
+/**
+ * vsscanf - Unformat a buffer into a list of arguments
+ * @buf: input buffer
+ * @fmt: format of buffer
+ * @args: arguments
+ */
+#define BUF 32 /* Maximum length of numeric string. */
+
+/*
+ * Flags used during conversion.
+ */
+#define LONG 0x01 /* l: long or double */
+#define SHORT 0x04 /* h: short */
+#define SUPPRESS 0x08 /* suppress assignment */
+#define POINTER 0x10 /* weird %p pointer (`fake hex') */
+#define NOSKIP 0x20 /* do not skip blanks */
+#define QUAD 0x400
+#define SHORTSHORT 0x4000 /** hh: char */
+
+/*
+ * The following are used in numeric conversions only:
+ * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point;
+ * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral.
+ */
+#define SIGNOK 0x40 /* +/- is (still) legal */
+#define NDIGITS 0x80 /* no digits detected */
+
+#define DPTOK 0x100 /* (float) decimal point is still legal */
+#define EXPOK 0x200 /* (float) exponent (e+3, etc) still legal */
+
+#define PFXOK 0x100 /* 0x prefix is (still) legal */
+#define NZDIGITS 0x200 /* no zero digits detected */
+
+/*
+ * Conversion types.
+ */
+#define CT_CHAR 0 /* %c conversion */
+#define CT_CCL 1 /* %[...] conversion */
+#define CT_STRING 2 /* %s conversion */
+#define CT_INT 3 /* integer, i.e., strtoq or strtouq */
+typedef u64 (*ccfntype)(const char *, char **, int);
+
+int
+vsscanf(const char *inp, char const *fmt0, va_list ap)
+{
+ int inr;
+ const u_char *fmt = (const u_char *)fmt0;
+ int c; /* character from format, or conversion */
+ size_t width; /* field width, or 0 */
+ char *p; /* points into all kinds of strings */
+ int n; /* handy integer */
+ int flags; /* flags as defined above */
+ char *p0; /* saves original value of p when necessary */
+ int nassigned; /* number of fields assigned */
+ int nconversions; /* number of conversions */
+ int nread; /* number of characters consumed from fp */
+ int base; /* base argument to strtoq/strtouq */
+ ccfntype ccfn; /* conversion function (strtoq/strtouq) */
+ char ccltab[256]; /* character class table for %[...] */
+ char buf[BUF]; /* buffer for numeric conversions */
+
+ /* `basefix' is used to avoid `if' tests in the integer scanner */
+ static short basefix[17] = { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
+ 12, 13, 14, 15, 16 };
+
+ inr = strlen(inp);
+
+ nassigned = 0;
+ nconversions = 0;
+ nread = 0;
+ base = 0; /* XXX just to keep gcc happy */
+ ccfn = NULL; /* XXX just to keep gcc happy */
+ for (;;) {
+ c = *fmt++;
+ if (c == 0)
+ return (nassigned);
+ if (isspace(c)) {
+ while (inr > 0 && isspace(*inp))
+ nread++, inr--, inp++;
+ continue;
+ }
+ if (c != '%')
+ goto literal;
+ width = 0;
+ flags = 0;
+ /*
+ * switch on the format. continue if done;
+ * break once format type is derived.
+ */
+again: c = *fmt++;
+ switch (c) {
+ case '%':
+literal:
+ if (inr <= 0)
+ goto input_failure;
+ if (*inp != c)
+ goto match_failure;
+ inr--, inp++;
+ nread++;
+ continue;
+
+ case '*':
+ flags |= SUPPRESS;
+ goto again;
+ case 'l':
+ if (flags & LONG) {
+ flags &= ~LONG;
+ flags |= QUAD;
+ } else {
+ flags |= LONG;
+ }
+ goto again;
+ case 'q':
+ flags |= QUAD;
+ goto again;
+ case 'h':
+ if (flags & SHORT) {
+ flags &= ~SHORT;
+ flags |= SHORTSHORT;
+ } else {
+ flags |= SHORT;
+ }
+ goto again;
+
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ width = width * 10 + c - '0';
+ goto again;
+
+ /*
+ * Conversions.
+ *
+ */
+ case 'd':
+ c = CT_INT;
+ ccfn = (ccfntype)strtoq;
+ base = 10;
+ break;
+
+ case 'i':
+ c = CT_INT;
+ ccfn = (ccfntype)strtoq;
+ base = 0;
+ break;
+
+ case 'o':
+ c = CT_INT;
+ ccfn = strtouq;
+ base = 8;
+ break;
+
+ case 'u':
+ c = CT_INT;
+ ccfn = strtouq;
+ base = 10;
+ break;
+
+ case 'x':
+ flags |= PFXOK; /* enable 0x prefixing */
+ c = CT_INT;
+ ccfn = strtouq;
+ base = 16;
+ break;
+
+ case 's':
+ c = CT_STRING;
+ break;
+
+ case '[':
+ fmt = __sccl(ccltab, fmt);
+ flags |= NOSKIP;
+ c = CT_CCL;
+ break;
+
+ case 'c':
+ flags |= NOSKIP;
+ c = CT_CHAR;
+ break;
+
+ case 'p': /* pointer format is like hex */
+ flags |= POINTER | PFXOK;
+ c = CT_INT;
+ ccfn = strtouq;
+ base = 16;
+ break;
+
+ case 'n':
+ nconversions++;
+ if (flags & SUPPRESS) /* ??? */
+ continue;
+ if (flags & SHORTSHORT)
+ *va_arg(ap, char *) = nread;
+ else if (flags & SHORT)
+ *va_arg(ap, short *) = nread;
+ else if (flags & LONG)
+ *va_arg(ap, long *) = nread;
+ else if (flags & QUAD)
+ *va_arg(ap, s64 *) = nread;
+ else
+ *va_arg(ap, int *) = nread;
+ continue;
+ }
+
+ /*
+ * We have a conversion that requires input.
+ */
+ if (inr <= 0)
+ goto input_failure;
+
+ /*
+ * Consume leading white space, except for formats
+ * that suppress this.
+ */
+ if ((flags & NOSKIP) == 0) {
+ while (isspace(*inp)) {
+ nread++;
+ if (--inr > 0)
+ inp++;
+ else
+ goto input_failure;
+ }
+ /*
+ * Note that there is at least one character in
+ * the buffer, so conversions that do not set NOSKIP
+ * can no longer result in an input failure.
+ */
+ }
+
+ /*
+ * Do the conversion.
+ */
+ switch (c) {
+ case CT_CHAR:
+ /* scan arbitrary characters (sets NOSKIP) */
+ if (width == 0)
+ width = 1;
+ if (flags & SUPPRESS) {
+ size_t sum = 0;
+
+ if ((n = inr) < width) {
+ sum += n;
+ width -= n;
+ inp += n;
+ if (sum == 0)
+ goto input_failure;
+ } else {
+ sum += width;
+ inr -= width;
+ inp += width;
+ }
+ nread += sum;
+ } else {
+ memcpy(va_arg(ap, char *), inp, width);
+ inr -= width;
+ inp += width;
+ nread += width;
+ nassigned++;
+ }
+ nconversions++;
+ break;
+
+ case CT_CCL:
+ /* scan a (nonempty) character class (sets NOSKIP) */
+ if (width == 0)
+ width = (size_t)~0; /* `infinity' */
+ /* take only those things in the class */
+ if (flags & SUPPRESS) {
+ n = 0;
+ while (ccltab[(unsigned char)*inp]) {
+ n++, inr--, inp++;
+ if (--width == 0)
+ break;
+ if (inr <= 0) {
+ if (n == 0)
+ goto input_failure;
+ break;
+ }
+ }
+ if (n == 0)
+ goto match_failure;
+ } else {
+ p = va_arg(ap, char *);
+ p0 = p;
+ while (ccltab[(unsigned char)*inp]) {
+ inr--;
+ *p++ = *inp++;
+ if (--width == 0)
+ break;
+ if (inr <= 0) {
+ if (p == p0)
+ goto input_failure;
+ break;
+ }
+ }
+ n = p - p0;
+ if (n == 0)
+ goto match_failure;
+ *p = 0;
+ nassigned++;
+ }
+ nread += n;
+ nconversions++;
+ break;
+
+ case CT_STRING:
+ /* like CCL, but zero-length string OK, & no NOSKIP */
+ if (width == 0)
+ width = (size_t)~0;
+ if (flags & SUPPRESS) {
+ n = 0;
+ while (!isspace(*inp)) {
+ n++, inr--, inp++;
+ if (--width == 0)
+ break;
+ if (inr <= 0)
+ break;
+ }
+ nread += n;
+ } else {
+ p = va_arg(ap, char *);
+ p0 = p;
+ while (!isspace(*inp)) {
+ inr--;
+ *p++ = *inp++;
+ if (--width == 0)
+ break;
+ if (inr <= 0)
+ break;
+ }
+ *p = 0;
+ nread += p - p0;
+ nassigned++;
+ }
+ nconversions++;
+ continue;
+
+ case CT_INT:
+ /* scan an integer as if by strtoq/strtouq */
+#ifdef hardway
+ if (width == 0 || width > sizeof(buf) - 1)
+ width = sizeof(buf) - 1;
+#else
+ /* size_t is unsigned, hence this optimisation */
+ if (--width > sizeof(buf) - 2)
+ width = sizeof(buf) - 2;
+ width++;
+#endif
+ flags |= SIGNOK | NDIGITS | NZDIGITS;
+ for (p = buf; width; width--) {
+ c = *inp;
+ /*
+ * Switch on the character; `goto ok'
+ * if we accept it as a part of number.
+ */
+ switch (c) {
+ /*
+ * The digit 0 is always legal, but is
+ * special. For %i conversions, if no
+ * digits (zero or nonzero) have been
+ * scanned (only signs), we will have
+ * base==0. In that case, we should set
+ * it to 8 and enable 0x prefixing.
+ * Also, if we have not scanned zero digits
+ * before this, do not turn off prefixing
+ * (someone else will turn it off if we
+ * have scanned any nonzero digits).
+ */
+ case '0':
+ if (base == 0) {
+ base = 8;
+ flags |= PFXOK;
+ }
+ if (flags & NZDIGITS)
+ flags &= ~(SIGNOK | NZDIGITS | NDIGITS);
+ else
+ flags &= ~(SIGNOK | PFXOK | NDIGITS);
+ goto ok;
+
+ /* 1 through 7 always legal */
+ case '1': case '2': case '3':
+ case '4': case '5': case '6': case '7':
+ base = basefix[base];
+ flags &= ~(SIGNOK | PFXOK | NDIGITS);
+ goto ok;
+
+ /* digits 8 and 9 ok iff decimal or hex */
+ case '8': case '9':
+ base = basefix[base];
+ if (base <= 8)
+ break; /* not legal here */
+ flags &= ~(SIGNOK | PFXOK | NDIGITS);
+ goto ok;
+
+ /* letters ok iff hex */
+ case 'A': case 'B': case 'C':
+ case 'D': case 'E': case 'F':
+ case 'a': case 'b': case 'c':
+ case 'd': case 'e': case 'f':
+ /* no need to fix base here */
+ if (base <= 10)
+ break; /* not legal here */
+ flags &= ~(SIGNOK | PFXOK | NDIGITS);
+ goto ok;
+
+ /* sign ok only as first character */
+ case '+': case '-':
+ if (flags & SIGNOK) {
+ flags &= ~SIGNOK;
+ goto ok;
+ }
+ break;
+
+ /* x ok iff flag still set & 2nd char */
+ case 'x': case 'X':
+ if (flags & PFXOK && p == buf + 1) {
+ base = 16; /* if %i */
+ flags &= ~PFXOK;
+ goto ok;
+ }
+ break;
+ }
+
+ /*
+ * If we got here, c is not a legal character
+ * for a number. Stop accumulating digits.
+ */
+ break;
+ok:
+ /*
+ * c is legal: store it and look at the next.
+ */
+ *p++ = c;
+ if (--inr > 0)
+ inp++;
+ else
+ break; /* end of input */
+ }
+ /*
+ * If we had only a sign, it is no good; push
+ * back the sign. If the number ends in `x',
+ * it was [sign] '' 'x', so push back the x
+ * and treat it as [sign] ''.
+ */
+ if (flags & NDIGITS) {
+ if (p > buf) {
+ inp--;
+ inr++;
+ }
+ goto match_failure;
+ }
+ c = ((u_char *)p)[-1];
+ if (c == 'x' || c == 'X') {
+ --p;
+ inp--;
+ inr++;
+ }
+ if ((flags & SUPPRESS) == 0) {
+ u64 res;
+
+ *p = 0;
+ res = (*ccfn)(buf, (char **)NULL, base);
+ if (flags & POINTER)
+ *va_arg(ap, void **) =
+ (void *)(uintptr_t)res;
+ else if (flags & SHORTSHORT)
+ *va_arg(ap, char *) = res;
+ else if (flags & SHORT)
+ *va_arg(ap, short *) = res;
+ else if (flags & LONG)
+ *va_arg(ap, long *) = res;
+ else if (flags & QUAD)
+ *va_arg(ap, s64 *) = res;
+ else
+ *va_arg(ap, int *) = res;
+ nassigned++;
+ }
+ nread += p - buf;
+ nconversions++;
+ break;
+ }
+ }
+input_failure:
+ return (nconversions != 0 ? nassigned : -1);
+match_failure:
+ return (nassigned);
+}
+
+/**
+ * sscanf - Unformat a buffer into a list of arguments
+ * @buf: input buffer
+ * @fmt: formatting of buffer
+ * @...: resulting arguments
+ */
+int sscanf(const char *buf, const char *fmt, ...)
+{
+ va_list args;
+ int i;
+
+ va_start(args, fmt);
+ i = vsscanf(buf, fmt, args);
+ va_end(args);
+ return i;
+}
+
+#endif
diff --git a/test/lib/Makefile b/test/lib/Makefile
index 6ccc2c41bc3..b6a0a208c5e 100644
--- a/test/lib/Makefile
+++ b/test/lib/Makefile
@@ -6,6 +6,7 @@ obj-y += cmd_ut_lib.o
obj-$(CONFIG_EFI_SECURE_BOOT) += efi_image_region.o
obj-y += hexdump.o
obj-y += lmb.o
+obj-y += sscanf.o
obj-y += string.o
obj-$(CONFIG_ERRNO_STR) += test_errno_str.o
obj-$(CONFIG_UT_LIB_ASN1) += asn1.o
diff --git a/test/lib/sscanf.c b/test/lib/sscanf.c
new file mode 100644
index 00000000000..772e4b92042
--- /dev/null
+++ b/test/lib/sscanf.c
@@ -0,0 +1,174 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (c) 2002, Uwe Bonnes
+ * Copyright (c) 2001-2004, Roger Dingledine.
+ * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
+ * Copyright (c) 2007-2016, The Tor Project, Inc.
+ * Copyright (c) 2020, EPAM Systems Inc.
+ *
+ * Unit tests for sscanf() function
+ */
+
+#include <common.h>
+#include <command.h>
+#include <log.h>
+#include <test/lib.h>
+#include <test/test.h>
+#include <test/ut.h>
+
+#define EOF -1
+
+/**
+ * lib_sscanf() - unit test for sscanf()
+ * @uts: unit test state
+ *
+ * Test sscanf() with varied parameters in different combinations passed
+ * as arguments.
+ *
+ * Return: 0 - success
+ * 1 - failure
+ */
+static int lib_sscanf(struct unit_test_state *uts)
+{
+ char buffer[100], buffer1[100];
+ int result, ret;
+ static const char pname[] = " Hello World!\n";
+ int hour = 21, min = 59, sec = 20;
+ int number, number_so_far;
+ unsigned int u1, u2, u3;
+ char s1[20], s2[10], s3[10], ch;
+ int r, int1, int2;
+ long lng1;
+
+ /* check EOF */
+ strcpy(buffer, "");
+ ret = sscanf(buffer, "%d", &result);
+ ut_asserteq(ret, EOF);
+
+ /* check %x */
+ strcpy(buffer, "0x519");
+ ut_asserteq(sscanf(buffer, "%x", &result), 1);
+ ut_asserteq(result, 0x519);
+
+ strcpy(buffer, "0x51a");
+ ut_asserteq(sscanf(buffer, "%x", &result), 1);
+ ut_asserteq(result, 0x51a);
+
+ strcpy(buffer, "0x51g");
+ ut_asserteq(sscanf(buffer, "%x", &result), 1);
+ ut_asserteq(result, 0x51);
+
+ /* check strings */
+ ret = sprintf(buffer, " %s", pname);
+ ret = sscanf(buffer, "%*c%[^\n]", buffer1);
+ ut_asserteq(ret, 1);
+ ut_asserteq(strncmp(pname, buffer1, strlen(buffer1)), 0);
+
+ /* check digits */
+ ret = sprintf(buffer, "%d:%d:%d", hour, min, sec);
+ ret = sscanf(buffer, "%d%n", &number, &number_so_far);
+ ut_asserteq(ret, 1);
+ ut_asserteq(number, hour);
+ ut_asserteq(number_so_far, 2);
+
+ ret = sscanf(buffer + 2, "%*c%n", &number_so_far);
+ ut_asserteq(ret, 0);
+ ut_asserteq(number_so_far, 1);
+
+ /* Check %i */
+ strcpy(buffer, "123");
+ ret = sscanf(buffer, "%i", &result);
+ ut_asserteq(ret, 1);
+ ut_asserteq(result, 123);
+ ret = sscanf(buffer, "%d", &result);
+ ut_asserteq(ret, 1);
+ ut_asserteq(result, 123);
+
+ ut_asserteq(0, sscanf("hello world", "hello world"));
+ ut_asserteq(0, sscanf("hello world", "good bye"));
+ /* Excess data */
+ ut_asserteq(0, sscanf("hello 3", "%u", &u1)); /* have to match the start */
+ ut_asserteq(1, sscanf("3 hello", "%u", &u1)); /* but trailing is alright */
+
+ /* Numbers (ie. %u) */
+ ut_asserteq(0, sscanf("hello world 3", "hello worlb %u", &u1)); /* d vs b */
+ ut_asserteq(1, sscanf("12345", "%u", &u1));
+ ut_asserteq(12345u, u1);
+ ut_asserteq(1, sscanf("0", "%u", &u1));
+ ut_asserteq(0u, u1);
+ ut_asserteq(1, sscanf("0000", "%u", &u2));
+ ut_asserteq(0u, u2);
+ ut_asserteq(0, sscanf("A", "%u", &u1)); /* bogus number */
+
+ /* Numbers with size (eg. %2u) */
+ ut_asserteq(2, sscanf("123456", "%2u%u", &u1, &u2));
+ ut_asserteq(12u, u1);
+ ut_asserteq(3456u, u2);
+ ut_asserteq(1, sscanf("123456", "%8u", &u1));
+ ut_asserteq(123456u, u1);
+ ut_asserteq(1, sscanf("123457 ", "%8u", &u1));
+ ut_asserteq(123457u, u1);
+ ut_asserteq(3, sscanf("!12:3:456", "!%2u:%2u:%3u", &u1, &u2, &u3));
+ ut_asserteq(12u, u1);
+ ut_asserteq(3u, u2);
+ ut_asserteq(456u, u3);
+ ut_asserteq(3, sscanf("67:8:099", "%2u:%2u:%3u", &u1, &u2, &u3)); /* 0s */
+ ut_asserteq(67u, u1);
+ ut_asserteq(8u, u2);
+ ut_asserteq(99u, u3);
+ /* Arbitrary amounts of 0-padding are okay */
+ ut_asserteq(3, sscanf("12:03:000000000000000099", "%2u:%2u:%u", &u1, &u2, &u3));
+ ut_asserteq(12u, u1);
+ ut_asserteq(3u, u2);
+ ut_asserteq(99u, u3);
+
+ /* Hex (ie. %x) */
+ ut_asserteq(3, sscanf("1234 02aBcdEf ff", "%x %x %x", &u1, &u2, &u3));
+ ut_asserteq(0x1234, u1);
+ ut_asserteq(0x2ABCDEF, u2);
+ ut_asserteq(0xFF, u3);
+ /* Width works on %x */
+ ut_asserteq(3, sscanf("f00dcafe444", "%4x%4x%u", &u1, &u2, &u3));
+ ut_asserteq(0xf00d, u1);
+ ut_asserteq(0xcafe, u2);
+ ut_asserteq(444, u3);
+
+ /* Literal '%' (ie. '%%') */
+ ut_asserteq(1, sscanf("99% fresh", "%3u%% fresh", &u1));
+ ut_asserteq(99, u1);
+ ut_asserteq(0, sscanf("99 fresh", "%% %3u %s", &u1, s1));
+ ut_asserteq(1, sscanf("99 fresh", "%3u%% %s", &u1, s1));
+ ut_asserteq(2, sscanf("99 fresh", "%3u %5s %%", &u1, s1));
+ ut_asserteq(99, u1);
+ ut_asserteq_str(s1, "fresh");
+ ut_asserteq(1, sscanf("% boo", "%% %3s", s1));
+ ut_asserteq_str("boo", s1);
+
+ /* Strings (ie. %s) */
+ ut_asserteq(2, sscanf("hello", "%3s%7s", s1, s2));
+ ut_asserteq_str(s1, "hel");
+ ut_asserteq_str(s2, "lo");
+ ut_asserteq(2, sscanf("WD40", "%2s%u", s3, &u1)); /* %s%u */
+ ut_asserteq_str(s3, "WD");
+ ut_asserteq(40, u1);
+ ut_asserteq(2, sscanf("WD40", "%3s%u", s3, &u1)); /* %s%u */
+ ut_asserteq_str(s3, "WD4");
+ ut_asserteq(0, u1);
+ ut_asserteq(2, sscanf("76trombones", "%6u%9s", &u1, s1)); /* %u%s */
+ ut_asserteq(76, u1);
+ ut_asserteq_str(s1, "trombones");
+
+ ut_asserteq(3, sscanf("1.2.3", "%u.%u.%u%c", &u1, &u2, &u3, &ch));
+ ut_asserteq(4, sscanf("1.2.3 foobar", "%u.%u.%u%c", &u1, &u2, &u3, &ch));
+ ut_asserteq(' ', ch);
+
+ r = sscanf("12345 -67890 -1", "%d %ld %d", &int1, &lng1, &int2);
+ ut_asserteq(r, 3);
+ ut_asserteq(int1, 12345);
+ ut_asserteq(lng1, -67890);
+ ut_asserteq(int2, -1);
+
+ return 0;
+}
+
+LIB_TEST(lib_sscanf, 0);