aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/s390/00-INDEX2
-rw-r--r--Documentation/s390/vfio-ccw.txt303
-rw-r--r--MAINTAINERS11
-rw-r--r--arch/s390/Kbuild2
-rw-r--r--arch/s390/Kconfig39
-rw-r--r--arch/s390/configs/default_defconfig1
-rw-r--r--arch/s390/configs/gcov_defconfig1
-rw-r--r--arch/s390/configs/performance_defconfig1
-rw-r--r--arch/s390/configs/zfcpdump_defconfig1
-rw-r--r--arch/s390/crypto/Makefile1
-rw-r--r--arch/s390/crypto/arch_random.c31
-rw-r--r--arch/s390/crypto/paes_s390.c2
-rw-r--r--arch/s390/crypto/prng.c42
-rw-r--r--arch/s390/include/asm/Kbuild6
-rw-r--r--arch/s390/include/asm/archrandom.h69
-rw-r--r--arch/s390/include/asm/atomic_ops.h22
-rw-r--r--arch/s390/include/asm/bitops.h13
-rw-r--r--arch/s390/include/asm/cio.h18
-rw-r--r--arch/s390/include/asm/cpacf.h56
-rw-r--r--arch/s390/include/asm/cpu_mf.h6
-rw-r--r--arch/s390/include/asm/div64.h1
-rw-r--r--arch/s390/include/asm/elf.h1
-rw-r--r--arch/s390/include/asm/emergency-restart.h6
-rw-r--r--arch/s390/include/asm/facility.h6
-rw-r--r--arch/s390/include/asm/irq_regs.h1
-rw-r--r--arch/s390/include/asm/isc.h1
-rw-r--r--arch/s390/include/asm/kmap_types.h6
-rw-r--r--arch/s390/include/asm/local.h1
-rw-r--r--arch/s390/include/asm/local64.h1
-rw-r--r--arch/s390/include/asm/lowcore.h9
-rw-r--r--arch/s390/include/asm/mman.h4
-rw-r--r--arch/s390/include/asm/mmu.h2
-rw-r--r--arch/s390/include/asm/mmu_context.h1
-rw-r--r--arch/s390/include/asm/nmi.h12
-rw-r--r--arch/s390/include/asm/page-states.h19
-rw-r--r--arch/s390/include/asm/perf_event.h4
-rw-r--r--arch/s390/include/asm/pgtable.h16
-rw-r--r--arch/s390/include/asm/pkey.h21
-rw-r--r--arch/s390/include/asm/processor.h14
-rw-r--r--arch/s390/include/asm/setup.h6
-rw-r--r--arch/s390/include/asm/sparsemem.h2
-rw-r--r--arch/s390/include/asm/spinlock.h45
-rw-r--r--arch/s390/include/asm/spinlock_types.h6
-rw-r--r--arch/s390/include/asm/switch_to.h3
-rw-r--r--arch/s390/include/asm/sysinfo.h12
-rw-r--r--arch/s390/include/asm/thread_info.h12
-rw-r--r--arch/s390/include/uapi/asm/Kbuild19
-rw-r--r--arch/s390/include/uapi/asm/errno.h11
-rw-r--r--arch/s390/include/uapi/asm/fcntl.h1
-rw-r--r--arch/s390/include/uapi/asm/guarded_storage.h77
-rw-r--r--arch/s390/include/uapi/asm/ioctl.h1
-rw-r--r--arch/s390/include/uapi/asm/mman.h6
-rw-r--r--arch/s390/include/uapi/asm/param.h6
-rw-r--r--arch/s390/include/uapi/asm/pkey.h19
-rw-r--r--arch/s390/include/uapi/asm/poll.h1
-rw-r--r--arch/s390/include/uapi/asm/resource.h13
-rw-r--r--arch/s390/include/uapi/asm/sockios.h6
-rw-r--r--arch/s390/include/uapi/asm/termbits.h6
-rw-r--r--arch/s390/include/uapi/asm/unistd.h2
-rw-r--r--arch/s390/kernel/Makefile6
-rw-r--r--arch/s390/kernel/asm-offsets.c2
-rw-r--r--arch/s390/kernel/compat_wrapper.c1
-rw-r--r--arch/s390/kernel/crash_dump.c15
-rw-r--r--arch/s390/kernel/early.c66
-rw-r--r--arch/s390/kernel/entry.S28
-rw-r--r--arch/s390/kernel/entry.h2
-rw-r--r--arch/s390/kernel/guarded_storage.c128
-rw-r--r--arch/s390/kernel/head.S1
-rw-r--r--arch/s390/kernel/head64.S2
-rw-r--r--arch/s390/kernel/kdebugfs.c15
-rw-r--r--arch/s390/kernel/machine_kexec.c13
-rw-r--r--arch/s390/kernel/nmi.c19
-rw-r--r--arch/s390/kernel/perf_cpum_cf.c128
-rw-r--r--arch/s390/kernel/perf_cpum_cf_events.c148
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c7
-rw-r--r--arch/s390/kernel/process.c7
-rw-r--r--arch/s390/kernel/processor.c16
-rw-r--r--arch/s390/kernel/ptrace.c132
-rw-r--r--arch/s390/kernel/setup.c18
-rw-r--r--arch/s390/kernel/smp.c43
-rw-r--r--arch/s390/kernel/syscalls.S2
-rw-r--r--arch/s390/kernel/sysinfo.c98
-rw-r--r--arch/s390/kernel/topology.c6
-rw-r--r--arch/s390/kvm/interrupt.c4
-rw-r--r--arch/s390/kvm/kvm-s390.c6
-rw-r--r--arch/s390/lib/spinlock.c84
-rw-r--r--arch/s390/mm/gmap.c37
-rw-r--r--arch/s390/mm/gup.c2
-rw-r--r--arch/s390/mm/mmap.c84
-rw-r--r--arch/s390/mm/page-states.c3
-rw-r--r--arch/s390/mm/pageattr.c10
-rw-r--r--arch/s390/mm/pgalloc.c4
-rw-r--r--arch/s390/mm/pgtable.c153
-rw-r--r--arch/s390/pci/pci.c22
-rw-r--r--drivers/char/hw_random/Kconfig14
-rw-r--r--drivers/char/hw_random/Makefile1
-rw-r--r--drivers/char/hw_random/s390-trng.c268
-rw-r--r--drivers/iommu/Kconfig8
-rw-r--r--drivers/s390/block/dasd_3990_erp.c5
-rw-r--r--drivers/s390/block/dasd_eckd.c16
-rw-r--r--drivers/s390/block/dasd_int.h2
-rw-r--r--drivers/s390/cio/Makefile3
-rw-r--r--drivers/s390/cio/cio.c69
-rw-r--r--drivers/s390/cio/cio.h1
-rw-r--r--drivers/s390/cio/device_fsm.c54
-rw-r--r--drivers/s390/cio/vfio_ccw_cp.c842
-rw-r--r--drivers/s390/cio/vfio_ccw_cp.h42
-rw-r--r--drivers/s390/cio/vfio_ccw_drv.c308
-rw-r--r--drivers/s390/cio/vfio_ccw_fsm.c203
-rw-r--r--drivers/s390/cio/vfio_ccw_ops.c425
-rw-r--r--drivers/s390/cio/vfio_ccw_private.h96
-rw-r--r--drivers/s390/crypto/pkey_api.c64
-rw-r--r--include/uapi/linux/elf.h2
-rw-r--r--include/uapi/linux/vfio.h18
-rw-r--r--include/uapi/linux/vfio_ccw.h24
115 files changed, 4223 insertions, 561 deletions
diff --git a/Documentation/s390/00-INDEX b/Documentation/s390/00-INDEX
index 9189535f6cd2..317f0378ae01 100644
--- a/Documentation/s390/00-INDEX
+++ b/Documentation/s390/00-INDEX
@@ -22,5 +22,7 @@ qeth.txt
- HiperSockets Bridge Port Support.
s390dbf.txt
- information on using the s390 debug feature.
+vfio-ccw.txt
+ information on the vfio-ccw I/O subchannel driver.
zfcpdump.txt
- information on the s390 SCSI dump tool.
diff --git a/Documentation/s390/vfio-ccw.txt b/Documentation/s390/vfio-ccw.txt
new file mode 100644
index 000000000000..90b3dfead81b
--- /dev/null
+++ b/Documentation/s390/vfio-ccw.txt
@@ -0,0 +1,303 @@
+vfio-ccw: the basic infrastructure
+==================================
+
+Introduction
+------------
+
+Here we describe the vfio support for I/O subchannel devices for
+Linux/s390. Motivation for vfio-ccw is to passthrough subchannels to a
+virtual machine, while vfio is the means.
+
+Different than other hardware architectures, s390 has defined a unified
+I/O access method, which is so called Channel I/O. It has its own access
+patterns:
+- Channel programs run asynchronously on a separate (co)processor.
+- The channel subsystem will access any memory designated by the caller
+ in the channel program directly, i.e. there is no iommu involved.
+Thus when we introduce vfio support for these devices, we realize it
+with a mediated device (mdev) implementation. The vfio mdev will be
+added to an iommu group, so as to make itself able to be managed by the
+vfio framework. And we add read/write callbacks for special vfio I/O
+regions to pass the channel programs from the mdev to its parent device
+(the real I/O subchannel device) to do further address translation and
+to perform I/O instructions.
+
+This document does not intend to explain the s390 I/O architecture in
+every detail. More information/reference could be found here:
+- A good start to know Channel I/O in general:
+ https://en.wikipedia.org/wiki/Channel_I/O
+- s390 architecture:
+ s390 Principles of Operation manual (IBM Form. No. SA22-7832)
+- The existing Qemu code which implements a simple emulated channel
+ subsystem could also be a good reference. It makes it easier to follow
+ the flow.
+ qemu/hw/s390x/css.c
+
+For vfio mediated device framework:
+- Documentation/vfio-mediated-device.txt
+
+Motivation of vfio-ccw
+----------------------
+
+Currently, a guest virtualized via qemu/kvm on s390 only sees
+paravirtualized virtio devices via the "Virtio Over Channel I/O
+(virtio-ccw)" transport. This makes virtio devices discoverable via
+standard operating system algorithms for handling channel devices.
+
+However this is not enough. On s390 for the majority of devices, which
+use the standard Channel I/O based mechanism, we also need to provide
+the functionality of passing through them to a Qemu virtual machine.
+This includes devices that don't have a virtio counterpart (e.g. tape
+drives) or that have specific characteristics which guests want to
+exploit.
+
+For passing a device to a guest, we want to use the same interface as
+everybody else, namely vfio. Thus, we would like to introduce vfio
+support for channel devices. And we would like to name this new vfio
+device "vfio-ccw".
+
+Access patterns of CCW devices
+------------------------------
+
+s390 architecture has implemented a so called channel subsystem, that
+provides a unified view of the devices physically attached to the
+systems. Though the s390 hardware platform knows about a huge variety of
+different peripheral attachments like disk devices (aka. DASDs), tapes,
+communication controllers, etc. They can all be accessed by a well
+defined access method and they are presenting I/O completion a unified
+way: I/O interruptions.
+
+All I/O requires the use of channel command words (CCWs). A CCW is an
+instruction to a specialized I/O channel processor. A channel program is
+a sequence of CCWs which are executed by the I/O channel subsystem. To
+issue a channel program to the channel subsystem, it is required to
+build an operation request block (ORB), which can be used to point out
+the format of the CCW and other control information to the system. The
+operating system signals the I/O channel subsystem to begin executing
+the channel program with a SSCH (start sub-channel) instruction. The
+central processor is then free to proceed with non-I/O instructions
+until interrupted. The I/O completion result is received by the
+interrupt handler in the form of interrupt response block (IRB).
+
+Back to vfio-ccw, in short:
+- ORBs and channel programs are built in guest kernel (with guest
+ physical addresses).
+- ORBs and channel programs are passed to the host kernel.
+- Host kernel translates the guest physical addresses to real addresses
+ and starts the I/O with issuing a privileged Channel I/O instruction
+ (e.g SSCH).
+- channel programs run asynchronously on a separate processor.
+- I/O completion will be signaled to the host with I/O interruptions.
+ And it will be copied as IRB to user space to pass it back to the
+ guest.
+
+Physical vfio ccw device and its child mdev
+-------------------------------------------
+
+As mentioned above, we realize vfio-ccw with a mdev implementation.
+
+Channel I/O does not have IOMMU hardware support, so the physical
+vfio-ccw device does not have an IOMMU level translation or isolation.
+
+Sub-channel I/O instructions are all privileged instructions, When
+handling the I/O instruction interception, vfio-ccw has the software
+policing and translation how the channel program is programmed before
+it gets sent to hardware.
+
+Within this implementation, we have two drivers for two types of
+devices:
+- The vfio_ccw driver for the physical subchannel device.
+ This is an I/O subchannel driver for the real subchannel device. It
+ realizes a group of callbacks and registers to the mdev framework as a
+ parent (physical) device. As a consequence, mdev provides vfio_ccw a
+ generic interface (sysfs) to create mdev devices. A vfio mdev could be
+ created by vfio_ccw then and added to the mediated bus. It is the vfio
+ device that added to an IOMMU group and a vfio group.
+ vfio_ccw also provides an I/O region to accept channel program
+ request from user space and store I/O interrupt result for user
+ space to retrieve. To notify user space an I/O completion, it offers
+ an interface to setup an eventfd fd for asynchronous signaling.
+
+- The vfio_mdev driver for the mediated vfio ccw device.
+ This is provided by the mdev framework. It is a vfio device driver for
+ the mdev that created by vfio_ccw.
+ It realize a group of vfio device driver callbacks, adds itself to a
+ vfio group, and registers itself to the mdev framework as a mdev
+ driver.
+ It uses a vfio iommu backend that uses the existing map and unmap
+ ioctls, but rather than programming them into an IOMMU for a device,
+ it simply stores the translations for use by later requests. This
+ means that a device programmed in a VM with guest physical addresses
+ can have the vfio kernel convert that address to process virtual
+ address, pin the page and program the hardware with the host physical
+ address in one step.
+ For a mdev, the vfio iommu backend will not pin the pages during the
+ VFIO_IOMMU_MAP_DMA ioctl. Mdev framework will only maintain a database
+ of the iova<->vaddr mappings in this operation. And they export a
+ vfio_pin_pages and a vfio_unpin_pages interfaces from the vfio iommu
+ backend for the physical devices to pin and unpin pages by demand.
+
+Below is a high Level block diagram.
+
+ +-------------+
+ | |
+ | +---------+ | mdev_register_driver() +--------------+
+ | | Mdev | +<-----------------------+ |
+ | | bus | | | vfio_mdev.ko |
+ | | driver | +----------------------->+ |<-> VFIO user
+ | +---------+ | probe()/remove() +--------------+ APIs
+ | |
+ | MDEV CORE |
+ | MODULE |
+ | mdev.ko |
+ | +---------+ | mdev_register_device() +--------------+
+ | |Physical | +<-----------------------+ |
+ | | device | | | vfio_ccw.ko |<-> subchannel
+ | |interface| +----------------------->+ | device
+ | +---------+ | callback +--------------+
+ +-------------+
+
+The process of how these work together.
+1. vfio_ccw.ko drives the physical I/O subchannel, and registers the
+ physical device (with callbacks) to mdev framework.
+ When vfio_ccw probing the subchannel device, it registers device
+ pointer and callbacks to the mdev framework. Mdev related file nodes
+ under the device node in sysfs would be created for the subchannel
+ device, namely 'mdev_create', 'mdev_destroy' and
+ 'mdev_supported_types'.
+2. Create a mediated vfio ccw device.
+ Use the 'mdev_create' sysfs file, we need to manually create one (and
+ only one for our case) mediated device.
+3. vfio_mdev.ko drives the mediated ccw device.
+ vfio_mdev is also the vfio device drvier. It will probe the mdev and
+ add it to an iommu_group and a vfio_group. Then we could pass through
+ the mdev to a guest.
+
+vfio-ccw I/O region
+-------------------
+
+An I/O region is used to accept channel program request from user
+space and store I/O interrupt result for user space to retrieve. The
+defination of the region is:
+
+struct ccw_io_region {
+#define ORB_AREA_SIZE 12
+ __u8 orb_area[ORB_AREA_SIZE];
+#define SCSW_AREA_SIZE 12
+ __u8 scsw_area[SCSW_AREA_SIZE];
+#define IRB_AREA_SIZE 96
+ __u8 irb_area[IRB_AREA_SIZE];
+ __u32 ret_code;
+} __packed;
+
+While starting an I/O request, orb_area should be filled with the
+guest ORB, and scsw_area should be filled with the SCSW of the Virtual
+Subchannel.
+
+irb_area stores the I/O result.
+
+ret_code stores a return code for each access of the region.
+
+vfio-ccw patches overview
+-------------------------
+
+For now, our patches are rebased on the latest mdev implementation.
+vfio-ccw follows what vfio-pci did on the s390 paltform and uses
+vfio-iommu-type1 as the vfio iommu backend. It's a good start to launch
+the code review for vfio-ccw. Note that the implementation is far from
+complete yet; but we'd like to get feedback for the general
+architecture.
+
+* CCW translation APIs
+- Description:
+ These introduce a group of APIs (start with 'cp_') to do CCW
+ translation. The CCWs passed in by a user space program are
+ organized with their guest physical memory addresses. These APIs
+ will copy the CCWs into the kernel space, and assemble a runnable
+ kernel channel program by updating the guest physical addresses with
+ their corresponding host physical addresses.
+- Patches:
+ vfio: ccw: introduce channel program interfaces
+
+* vfio_ccw device driver
+- Description:
+ The following patches utilizes the CCW translation APIs and introduce
+ vfio_ccw, which is the driver for the I/O subchannel devices you want
+ to pass through.
+ vfio_ccw implements the following vfio ioctls:
+ VFIO_DEVICE_GET_INFO
+ VFIO_DEVICE_GET_IRQ_INFO
+ VFIO_DEVICE_GET_REGION_INFO
+ VFIO_DEVICE_RESET
+ VFIO_DEVICE_SET_IRQS
+ This provides an I/O region, so that the user space program can pass a
+ channel program to the kernel, to do further CCW translation before
+ issuing them to a real device.
+ This also provides the SET_IRQ ioctl to setup an event notifier to
+ notify the user space program the I/O completion in an asynchronous
+ way.
+- Patches:
+ vfio: ccw: basic implementation for vfio_ccw driver
+ vfio: ccw: introduce ccw_io_region
+ vfio: ccw: realize VFIO_DEVICE_GET_REGION_INFO ioctl
+ vfio: ccw: realize VFIO_DEVICE_RESET ioctl
+ vfio: ccw: realize VFIO_DEVICE_G(S)ET_IRQ_INFO ioctls
+
+The user of vfio-ccw is not limited to Qemu, while Qemu is definitely a
+good example to get understand how these patches work. Here is a little
+bit more detail how an I/O request triggered by the Qemu guest will be
+handled (without error handling).
+
+Explanation:
+Q1-Q7: Qemu side process.
+K1-K5: Kernel side process.
+
+Q1. Get I/O region info during initialization.
+Q2. Setup event notifier and handler to handle I/O completion.
+
+... ...
+
+Q3. Intercept a ssch instruction.
+Q4. Write the guest channel program and ORB to the I/O region.
+ K1. Copy from guest to kernel.
+ K2. Translate the guest channel program to a host kernel space
+ channel program, which becomes runnable for a real device.
+ K3. With the necessary information contained in the orb passed in
+ by Qemu, issue the ccwchain to the device.
+ K4. Return the ssch CC code.
+Q5. Return the CC code to the guest.
+
+... ...
+
+ K5. Interrupt handler gets the I/O result and write the result to
+ the I/O region.
+ K6. Signal Qemu to retrieve the result.
+Q6. Get the signal and event handler reads out the result from the I/O
+ region.
+Q7. Update the irb for the guest.
+
+Limitations
+-----------
+
+The current vfio-ccw implementation focuses on supporting basic commands
+needed to implement block device functionality (read/write) of DASD/ECKD
+device only. Some commands may need special handling in the future, for
+example, anything related to path grouping.
+
+DASD is a kind of storage device. While ECKD is a data recording format.
+More information for DASD and ECKD could be found here:
+https://en.wikipedia.org/wiki/Direct-access_storage_device
+https://en.wikipedia.org/wiki/Count_key_data
+
+Together with the corresponding work in Qemu, we can bring the passed
+through DASD/ECKD device online in a guest now and use it as a block
+device.
+
+Reference
+---------
+1. ESA/s390 Principles of Operation manual (IBM Form. No. SA22-7832)
+2. ESA/390 Common I/O Device Commands manual (IBM Form. No. SA22-7204)
+3. https://en.wikipedia.org/wiki/Channel_I/O
+4. Documentation/s390/cds.txt
+5. Documentation/vfio.txt
+6. Documentation/vfio-mediated-device.txt
diff --git a/MAINTAINERS b/MAINTAINERS
index 33ecf266570f..5f91365ebc0d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7201,6 +7201,7 @@ S: Supported
F: Documentation/s390/kvm.txt
F: arch/s390/include/asm/kvm*
F: arch/s390/kvm/
+F: arch/s390/mm/gmap.c
KERNEL VIRTUAL MACHINE (KVM) FOR ARM
M: Christoffer Dall <christoffer.dall@linaro.org>
@@ -10896,6 +10897,16 @@ W: http://www.ibm.com/developerworks/linux/linux390/
S: Supported
F: drivers/iommu/s390-iommu.c
+S390 VFIO-CCW DRIVER
+M: Cornelia Huck <cornelia.huck@de.ibm.com>
+M: Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
+L: linux-s390@vger.kernel.org
+L: kvm@vger.kernel.org
+S: Supported
+F: drivers/s390/cio/vfio_ccw*
+F: Documentation/s390/vfio-ccw.txt
+F: include/uapi/linux/vfio_ccw.h
+
S3C24XX SD/MMC Driver
M: Ben Dooks <ben-linux@fluff.org>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
diff --git a/arch/s390/Kbuild b/arch/s390/Kbuild
index e256592eb66e..eae2c64cf69d 100644
--- a/arch/s390/Kbuild
+++ b/arch/s390/Kbuild
@@ -1,7 +1,7 @@
obj-y += kernel/
obj-y += mm/
obj-$(CONFIG_KVM) += kvm/
-obj-$(CONFIG_CRYPTO_HW) += crypto/
+obj-y += crypto/
obj-$(CONFIG_S390_HYPFS_FS) += hypfs/
obj-$(CONFIG_APPLDATA_BASE) += appldata/
obj-y += net/
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index b8b143432381..e161fafb495b 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -105,6 +105,7 @@ config S390
select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
select ARCH_SAVE_PAGE_KEYS if HIBERNATION
select ARCH_SUPPORTS_ATOMIC_RMW
+ select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
select ARCH_SUPPORTS_NUMA_BALANCING
select ARCH_USE_BUILTIN_BSWAP
select ARCH_USE_CMPXCHG_LOCKREF
@@ -123,7 +124,6 @@ config S390
select GENERIC_TIME_VSYSCALL
select HAVE_ALIGNED_STRUCT_PAGE if SLUB
select HAVE_ARCH_AUDITSYSCALL
- select HAVE_ARCH_EARLY_PFN_TO_NID
select HAVE_ARCH_JUMP_LABEL
select CPU_NO_EFFICIENT_FFS if !HAVE_MARCH_Z9_109_FEATURES
select HAVE_ARCH_SECCOMP_FILTER
@@ -506,6 +506,21 @@ source kernel/Kconfig.preempt
source kernel/Kconfig.hz
+config ARCH_RANDOM
+ def_bool y
+ prompt "s390 architectural random number generation API"
+ help
+ Enable the s390 architectural random number generation API
+ to provide random data for all consumers within the Linux
+ kernel.
+
+ When enabled the arch_random_* functions declared in linux/random.h
+ are implemented. The implementation is based on the s390 CPACF
+ instruction subfunction TRNG which provides a real true random
+ number generator.
+
+ If unsure, say Y.
+
endmenu
menu "Memory setup"
@@ -536,6 +551,16 @@ config FORCE_MAX_ZONEORDER
source "mm/Kconfig"
+config MAX_PHYSMEM_BITS
+ int "Maximum size of supported physical memory in bits (42-53)"
+ range 42 53
+ default "46"
+ help
+ This option specifies the maximum supported size of physical memory
+ in bits. Supported is any size between 2^42 (4TB) and 2^53 (8PB).
+ Increasing the number of bits also increases the kernel image size.
+ By default 46 bits (64TB) are supported.
+
config PACK_STACK
def_bool y
prompt "Pack kernel stack"
@@ -613,7 +638,7 @@ if PCI
config PCI_NR_FUNCTIONS
int "Maximum number of PCI functions (1-4096)"
range 1 4096
- default "64"
+ default "128"
help
This allows you to specify the maximum number of PCI functions which
this kernel will support.
@@ -671,6 +696,16 @@ config EADM_SCH
To compile this driver as a module, choose M here: the
module will be called eadm_sch.
+config VFIO_CCW
+ def_tristate n
+ prompt "Support for VFIO-CCW subchannels"
+ depends on S390_CCW_IOMMU && VFIO_MDEV
+ help
+ This driver allows usage of I/O subchannels via VFIO-CCW.
+
+ To compile this driver as a module, choose M here: the
+ module will be called vfio_ccw.
+
endmenu
menu "Dump support"
diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig
index 4b176fe83da4..a5039fa89314 100644
--- a/arch/s390/configs/default_defconfig
+++ b/arch/s390/configs/default_defconfig
@@ -73,6 +73,7 @@ CONFIG_ZSWAP=y
CONFIG_ZBUD=m
CONFIG_ZSMALLOC=m
CONFIG_ZSMALLOC_STAT=y
+CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
CONFIG_IDLE_PAGE_TRACKING=y
CONFIG_PCI=y
CONFIG_PCI_DEBUG=y
diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig
index 0de46cc397f6..83970b5afb2b 100644
--- a/arch/s390/configs/gcov_defconfig
+++ b/arch/s390/configs/gcov_defconfig
@@ -72,6 +72,7 @@ CONFIG_ZSWAP=y
CONFIG_ZBUD=m
CONFIG_ZSMALLOC=m
CONFIG_ZSMALLOC_STAT=y
+CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
CONFIG_IDLE_PAGE_TRACKING=y
CONFIG_PCI=y
CONFIG_HOTPLUG_PCI=y
diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig
index e167557b434c..fbc6542aaf59 100644
--- a/arch/s390/configs/performance_defconfig
+++ b/arch/s390/configs/performance_defconfig
@@ -70,6 +70,7 @@ CONFIG_ZSWAP=y
CONFIG_ZBUD=m
CONFIG_ZSMALLOC=m
CONFIG_ZSMALLOC_STAT=y
+CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
CONFIG_IDLE_PAGE_TRACKING=y
CONFIG_PCI=y
CONFIG_HOTPLUG_PCI=y
diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig
index 4366a3e3e754..e23d97c13735 100644
--- a/arch/s390/configs/zfcpdump_defconfig
+++ b/arch/s390/configs/zfcpdump_defconfig
@@ -35,7 +35,6 @@ CONFIG_SCSI_ENCLOSURE=y
CONFIG_SCSI_CONSTANTS=y
CONFIG_SCSI_LOGGING=y
CONFIG_SCSI_FC_ATTRS=y
-CONFIG_SCSI_SRP_ATTRS=y
CONFIG_ZFCP=y
# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
# CONFIG_INPUT_KEYBOARD is not set
diff --git a/arch/s390/crypto/Makefile b/arch/s390/crypto/Makefile
index 402c530c6da5..678d9863e3f0 100644
--- a/arch/s390/crypto/Makefile
+++ b/arch/s390/crypto/Makefile
@@ -10,5 +10,6 @@ obj-$(CONFIG_CRYPTO_AES_S390) += aes_s390.o paes_s390.o
obj-$(CONFIG_S390_PRNG) += prng.o
obj-$(CONFIG_CRYPTO_GHASH_S390) += ghash_s390.o
obj-$(CONFIG_CRYPTO_CRC32_S390) += crc32-vx_s390.o
+obj-$(CONFIG_ARCH_RANDOM) += arch_random.o
crc32-vx_s390-y := crc32-vx.o crc32le-vx.o crc32be-vx.o
diff --git a/arch/s390/crypto/arch_random.c b/arch/s390/crypto/arch_random.c
new file mode 100644
index 000000000000..9317b3e645e2
--- /dev/null
+++ b/arch/s390/crypto/arch_random.c
@@ -0,0 +1,31 @@
+/*
+ * s390 arch random implementation.
+ *
+ * Copyright IBM Corp. 2017
+ * Author(s): Harald Freudenberger <freude@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/atomic.h>
+#include <linux/static_key.h>
+#include <asm/cpacf.h>
+
+DEFINE_STATIC_KEY_FALSE(s390_arch_random_available);
+
+atomic64_t s390_arch_random_counter = ATOMIC64_INIT(0);
+EXPORT_SYMBOL(s390_arch_random_counter);
+
+static int __init s390_arch_random_init(void)
+{
+ /* check if subfunction CPACF_PRNO_TRNG is available */
+ if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG))
+ static_branch_enable(&s390_arch_random_available);
+
+ return 0;
+}
+arch_initcall(s390_arch_random_init);
diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c
index 716b17238599..a4e903ed7e21 100644
--- a/arch/s390/crypto/paes_s390.c
+++ b/arch/s390/crypto/paes_s390.c
@@ -616,7 +616,7 @@ out_err:
module_init(paes_s390_init);
module_exit(paes_s390_fini);
-MODULE_ALIAS_CRYPTO("aes-all");
+MODULE_ALIAS_CRYPTO("paes");
MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm with protected keys");
MODULE_LICENSE("GPL");
diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
index 5a3ec04a7082..3e47c4a0f18b 100644
--- a/arch/s390/crypto/prng.c
+++ b/arch/s390/crypto/prng.c
@@ -81,7 +81,7 @@ struct prng_ws_s {
u64 byte_counter;
};
-struct ppno_ws_s {
+struct prno_ws_s {
u32 res;
u32 reseed_counter;
u64 stream_bytes;
@@ -93,7 +93,7 @@ struct prng_data_s {
struct mutex mutex;
union {
struct prng_ws_s prngws;
- struct ppno_ws_s ppnows;
+ struct prno_ws_s prnows;
};
u8 *buf;
u32 rest;
@@ -306,12 +306,12 @@ static int __init prng_sha512_selftest(void)
0x36, 0x8c, 0x5a, 0x9f, 0x7a, 0x4b, 0x3e, 0xe2 };
u8 buf[sizeof(random)];
- struct ppno_ws_s ws;
+ struct prno_ws_s ws;
memset(&ws, 0, sizeof(ws));
/* initial seed */
- cpacf_ppno(CPACF_PPNO_SHA512_DRNG_SEED,
+ cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED,
&ws, NULL, 0, seed, sizeof(seed));
/* check working states V and C */
@@ -324,9 +324,9 @@ static int __init prng_sha512_selftest(void)
}
/* generate random bytes */
- cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN,
+ cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN,
&ws, buf, sizeof(buf), NULL, 0);
- cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN,
+ cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN,
&ws, buf, sizeof(buf), NULL, 0);
/* check against expected data */
@@ -374,16 +374,16 @@ static int __init prng_sha512_instantiate(void)
/* followed by 16 bytes of unique nonce */
get_tod_clock_ext(seed + 64 + 32);
- /* initial seed of the ppno drng */
- cpacf_ppno(CPACF_PPNO_SHA512_DRNG_SEED,
- &prng_data->ppnows, NULL, 0, seed, sizeof(seed));
+ /* initial seed of the prno drng */
+ cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED,
+ &prng_data->prnows, NULL, 0, seed, sizeof(seed));
/* if fips mode is enabled, generate a first block of random
bytes for the FIPS 140-2 Conditional Self Test */
if (fips_enabled) {
prng_data->prev = prng_data->buf + prng_chunk_size;
- cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN,
- &prng_data->ppnows,
+ cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN,
+ &prng_data->prnows,
prng_data->prev, prng_chunk_size, NULL, 0);
}
@@ -412,9 +412,9 @@ static int prng_sha512_reseed(void)
if (ret != sizeof(seed))
return ret;
- /* do a reseed of the ppno drng with this bytestring */
- cpacf_ppno(CPACF_PPNO_SHA512_DRNG_SEED,
- &prng_data->ppnows, NULL, 0, seed, sizeof(seed));
+ /* do a reseed of the prno drng with this bytestring */
+ cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED,
+ &prng_data->prnows, NULL, 0, seed, sizeof(seed));
return 0;
}
@@ -425,15 +425,15 @@ static int prng_sha512_generate(u8 *buf, size_t nbytes)
int ret;
/* reseed needed ? */
- if (prng_data->ppnows.reseed_counter > prng_reseed_limit) {
+ if (prng_data->prnows.reseed_counter > prng_reseed_limit) {
ret = prng_sha512_reseed();
if (ret)
return ret;
}
- /* PPNO generate */
- cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN,
- &prng_data->ppnows, buf, nbytes, NULL, 0);
+ /* PRNO generate */
+ cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN,
+ &prng_data->prnows, buf, nbytes, NULL, 0);
/* FIPS 140-2 Conditional Self Test */
if (fips_enabled) {
@@ -653,7 +653,7 @@ static ssize_t prng_counter_show(struct device *dev,
if (mutex_lock_interruptible(&prng_data->mutex))
return -ERESTARTSYS;
if (prng_mode == PRNG_MODE_SHA512)
- counter = prng_data->ppnows.stream_bytes;
+ counter = prng_data->prnows.stream_bytes;
else
counter = prng_data->prngws.byte_counter;
mutex_unlock(&prng_data->mutex);
@@ -774,8 +774,8 @@ static int __init prng_init(void)
/* choose prng mode */
if (prng_mode != PRNG_MODE_TDES) {
- /* check for MSA5 support for PPNO operations */
- if (!cpacf_query_func(CPACF_PPNO, CPACF_PPNO_SHA512_DRNG_GEN)) {
+ /* check for MSA5 support for PRNO operations */
+ if (!cpacf_query_func(CPACF_PRNO, CPACF_PRNO_SHA512_DRNG_GEN)) {
if (prng_mode == PRNG_MODE_SHA512) {
pr_err("The prng module cannot "
"start in SHA-512 mode\n");
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index 8aea32fe8bd2..7e3481eb2174 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -1,8 +1,14 @@
generic-y += asm-offsets.h
generic-y += clkdev.h
generic-y += dma-contiguous.h
+generic-y += div64.h
+generic-y += emergency-restart.h
generic-y += export.h
+generic-y += irq_regs.h
generic-y += irq_work.h
+generic-y += kmap_types.h
+generic-y += local.h
+generic-y += local64.h
generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
generic-y += preempt.h
diff --git a/arch/s390/include/asm/archrandom.h b/arch/s390/include/asm/archrandom.h
new file mode 100644
index 000000000000..6033901a40b2
--- /dev/null
+++ b/arch/s390/include/asm/archrandom.h
@@ -0,0 +1,69 @@
+/*
+ * Kernel interface for the s390 arch_random_* functions
+ *
+ * Copyright IBM Corp. 2017
+ *
+ * Author: Harald Freudenberger <freude@de.ibm.com>
+ *
+ */
+
+#ifndef _ASM_S390_ARCHRANDOM_H
+#define _ASM_S390_ARCHRANDOM_H
+
+#ifdef CONFIG_ARCH_RANDOM
+
+#include <linux/static_key.h>
+#include <linux/atomic.h>
+#include <asm/cpacf.h>
+
+DECLARE_STATIC_KEY_FALSE(s390_arch_random_available);
+extern atomic64_t s390_arch_random_counter;
+
+static void s390_arch_random_generate(u8 *buf, unsigned int nbytes)
+{
+ cpacf_trng(NULL, 0, buf, nbytes);
+ atomic64_add(nbytes, &s390_arch_random_counter);
+}
+
+static inline bool arch_has_random(void)
+{
+ if (static_branch_likely(&s390_arch_random_available))
+ return true;
+ return false;
+}
+
+static inline bool arch_has_random_seed(void)
+{
+ return arch_has_random();
+}
+
+static inline bool arch_get_random_long(unsigned long *v)
+{
+ if (static_branch_likely(&s390_arch_random_available)) {
+ s390_arch_random_generate((u8 *)v, sizeof(*v));
+ return true;
+ }
+ return false;
+}
+
+static inline bool arch_get_random_int(unsigned int *v)
+{
+ if (static_branch_likely(&s390_arch_random_available)) {
+ s390_arch_random_generate((u8 *)v, sizeof(*v));
+ return true;
+ }
+ return false;
+}
+
+static inline bool arch_get_random_seed_long(unsigned long *v)
+{
+ return arch_get_random_long(v);
+}
+
+static inline bool arch_get_random_seed_int(unsigned int *v)
+{
+ return arch_get_random_int(v);
+}
+
+#endif /* CONFIG_ARCH_RANDOM */
+#endif /* _ASM_S390_ARCHRANDOM_H */
diff --git a/arch/s390/include/asm/atomic_ops.h b/arch/s390/include/asm/atomic_ops.h
index ac9e2b939d04..ba6d29412344 100644
--- a/arch/s390/include/asm/atomic_ops.h
+++ b/arch/s390/include/asm/atomic_ops.h
@@ -111,20 +111,22 @@ __ATOMIC64_OPS(__atomic64_xor, "xgr")
static inline int __atomic_cmpxchg(int *ptr, int old, int new)
{
- asm volatile(
- " cs %[old],%[new],%[ptr]"
- : [old] "+d" (old), [ptr] "+Q" (*ptr)
- : [new] "d" (new) : "cc", "memory");
- return old;
+ return __sync_val_compare_and_swap(ptr, old, new);
+}
+
+static inline int __atomic_cmpxchg_bool(int *ptr, int old, int new)
+{
+ return __sync_bool_compare_and_swap(ptr, old, new);
}
static inline long __atomic64_cmpxchg(long *ptr, long old, long new)
{
- asm volatile(
- " csg %[old],%[new],%[ptr]"
- : [old] "+d" (old), [ptr] "+Q" (*ptr)
- : [new] "d" (new) : "cc", "memory");
- return old;
+ return __sync_val_compare_and_swap(ptr, old, new);
+}
+
+static inline long __atomic64_cmpxchg_bool(long *ptr, long old, long new)
+{
+ return __sync_bool_compare_and_swap(ptr, old, new);
}
#endif /* __ARCH_S390_ATOMIC_OPS__ */
diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h
index d92047da5ccb..99902b7b9f0c 100644
--- a/arch/s390/include/asm/bitops.h
+++ b/arch/s390/include/asm/bitops.h
@@ -15,14 +15,6 @@
* end up numbered:
* |63..............0|127............64|191...........128|255...........192|
*
- * There are a few little-endian macros used mostly for filesystem
- * bitmaps, these work on similar bit array layouts, but byte-oriented:
- * |7...0|15...8|23...16|31...24|39...32|47...40|55...48|63...56|
- *
- * The main difference is that bit 3-5 in the bit number field needs to be
- * reversed compared to the big-endian bit fields. This can be achieved by
- * XOR with 0x38.
- *
* We also have special functions which work with an MSB0 encoding.
* The bits are numbered:
* |0..............63|64............127|128...........191|192...........255|
@@ -253,6 +245,11 @@ unsigned long find_first_bit_inv(const unsigned long *addr, unsigned long size);
unsigned long find_next_bit_inv(const unsigned long *addr, unsigned long size,
unsigned long offset);
+#define for_each_set_bit_inv(bit, addr, size) \
+ for ((bit) = find_first_bit_inv((addr), (size)); \
+ (bit) < (size); \
+ (bit) = find_next_bit_inv((addr), (size), (bit) + 1))
+
static inline void set_bit_inv(unsigned long nr, volatile unsigned long *ptr)
{
return set_bit(nr ^ (BITS_PER_LONG - 1), ptr);
diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h
index f7ed88cc066e..7a38ca85190b 100644
--- a/arch/s390/include/asm/cio.h
+++ b/arch/s390/include/asm/cio.h
@@ -33,6 +33,24 @@ struct ccw1 {
__u32 cda;
} __attribute__ ((packed,aligned(8)));
+/**
+ * struct ccw0 - channel command word
+ * @cmd_code: command code
+ * @cda: data address
+ * @flags: flags, like IDA addressing, etc.
+ * @reserved: will be ignored
+ * @count: byte count
+ *
+ * The format-0 ccw structure.
+ */
+struct ccw0 {
+ __u8 cmd_code;
+ __u32 cda : 24;
+ __u8 flags;
+ __u8 reserved;
+ __u16 count;
+} __packed __aligned(8);
+
#define CCW_FLAG_DC 0x80
#define CCW_FLAG_CC 0x40
#define CCW_FLAG_SLI 0x20
diff --git a/arch/s390/include/asm/cpacf.h b/arch/s390/include/asm/cpacf.h
index e2dfbf280d12..e06f2556b316 100644
--- a/arch/s390/include/asm/cpacf.h
+++ b/arch/s390/include/asm/cpacf.h
@@ -25,7 +25,8 @@
#define CPACF_KMO 0xb92b /* MSA4 */
#define CPACF_PCC 0xb92c /* MSA4 */
#define CPACF_KMCTR 0xb92d /* MSA4 */
-#define CPACF_PPNO 0xb93c /* MSA5 */
+#define CPACF_PRNO 0xb93c /* MSA5 */
+#define CPACF_KMA 0xb929 /* MSA8 */
/*
* En/decryption modifier bits
@@ -123,12 +124,14 @@
#define CPACF_PCKMO_ENC_AES_256_KEY 0x14
/*
- * Function codes for the PPNO (PERFORM PSEUDORANDOM NUMBER OPERATION)
+ * Function codes for the PRNO (PERFORM RANDOM NUMBER OPERATION)
* instruction
*/
-#define CPACF_PPNO_QUERY 0x00
-#define CPACF_PPNO_SHA512_DRNG_GEN 0x03
-#define CPACF_PPNO_SHA512_DRNG_SEED 0x83
+#define CPACF_PRNO_QUERY 0x00
+#define CPACF_PRNO_SHA512_DRNG_GEN 0x03
+#define CPACF_PRNO_SHA512_DRNG_SEED 0x83
+#define CPACF_PRNO_TRNG_Q_R2C_RATIO 0x70
+#define CPACF_PRNO_TRNG 0x72
typedef struct { unsigned char bytes[16]; } cpacf_mask_t;
@@ -149,8 +152,8 @@ static inline void __cpacf_query(unsigned int opcode, cpacf_mask_t *mask)
asm volatile(
" spm 0\n" /* pckmo doesn't change the cc */
- /* Parameter registers are ignored, but may not be 0 */
- "0: .insn rrf,%[opc] << 16,2,2,2,0\n"
+ /* Parameter regs are ignored, but must be nonzero and unique */
+ "0: .insn rrf,%[opc] << 16,2,4,6,0\n"
" brc 1,0b\n" /* handle partial completion */
: "=m" (*mask)
: [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (opcode)
@@ -173,7 +176,7 @@ static inline int __cpacf_check_opcode(unsigned int opcode)
case CPACF_PCC:
case CPACF_KMCTR:
return test_facility(77); /* check for MSA4 */
- case CPACF_PPNO:
+ case CPACF_PRNO:
return test_facility(57); /* check for MSA5 */
default:
BUG();
@@ -373,18 +376,18 @@ static inline int cpacf_kmctr(unsigned long func, void *param, u8 *dest,
}
/**
- * cpacf_ppno() - executes the PPNO (PERFORM PSEUDORANDOM NUMBER OPERATION)
+ * cpacf_prno() - executes the PRNO (PERFORM RANDOM NUMBER OPERATION)
* instruction
- * @func: the function code passed to PPNO; see CPACF_PPNO_xxx defines
+ * @func: the function code passed to PRNO; see CPACF_PRNO_xxx defines
* @param: address of parameter block; see POP for details on each func
* @dest: address of destination memory area
* @dest_len: size of destination memory area in bytes
* @seed: address of seed data
* @seed_len: size of seed data in bytes
*/
-static inline void cpacf_ppno(unsigned long func, void *param,
- u8 *dest, long dest_len,
- const u8 *seed, long seed_len)
+static inline void cpacf_prno(unsigned long func, void *param,
+ u8 *dest, unsigned long dest_len,
+ const u8 *seed, unsigned long seed_len)
{
register unsigned long r0 asm("0") = (unsigned long) func;
register unsigned long r1 asm("1") = (unsigned long) param;
@@ -398,7 +401,32 @@ static inline void cpacf_ppno(unsigned long func, void *param,
" brc 1,0b\n" /* handle partial completion */
: [dst] "+a" (r2), [dlen] "+d" (r3)
: [fc] "d" (r0), [pba] "a" (r1),
- [seed] "a" (r4), [slen] "d" (r5), [opc] "i" (CPACF_PPNO)
+ [seed] "a" (r4), [slen] "d" (r5), [opc] "i" (CPACF_PRNO)
+ : "cc", "memory");
+}
+
+/**
+ * cpacf_trng() - executes the TRNG subfunction of the PRNO instruction
+ * @ucbuf: buffer for unconditioned data
+ * @ucbuf_len: amount of unconditioned data to fetch in bytes
+ * @cbuf: buffer for conditioned data
+ * @cbuf_len: amount of conditioned data to fetch in bytes
+ */
+static inline void cpacf_trng(u8 *ucbuf, unsigned long ucbuf_len,
+ u8 *cbuf, unsigned long cbuf_len)
+{
+ register unsigned long r0 asm("0") = (unsigned long) CPACF_PRNO_TRNG;
+ register unsigned long r2 asm("2") = (unsigned long) ucbuf;
+ register unsigned long r3 asm("3") = (unsigned long) ucbuf_len;
+ register unsigned long r4 asm("4") = (unsigned long) cbuf;
+ register unsigned long r5 asm("5") = (unsigned long) cbuf_len;
+
+ asm volatile (
+ "0: .insn rre,%[opc] << 16,%[ucbuf],%[cbuf]\n"
+ " brc 1,0b\n" /* handle partial completion */
+ : [ucbuf] "+a" (r2), [ucbuflen] "+d" (r3),
+ [cbuf] "+a" (r4), [cbuflen] "+d" (r5)
+ : [fc] "d" (r0), [opc] "i" (CPACF_PRNO)
: "cc", "memory");
}
diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h
index d1e0707310fd..05480e4cc5ca 100644
--- a/arch/s390/include/asm/cpu_mf.h
+++ b/arch/s390/include/asm/cpu_mf.h
@@ -20,9 +20,11 @@
#define CPU_MF_INT_SF_PRA (1 << 29) /* program request alert */
#define CPU_MF_INT_SF_SACA (1 << 23) /* sampler auth. change alert */
#define CPU_MF_INT_SF_LSDA (1 << 22) /* loss of sample data alert */
+#define CPU_MF_INT_CF_MTDA (1 << 15) /* loss of MT ctr. data alert */
#define CPU_MF_INT_CF_CACA (1 << 7) /* counter auth. change alert */
#define CPU_MF_INT_CF_LCDA (1 << 6) /* loss of counter data alert */
-#define CPU_MF_INT_CF_MASK (CPU_MF_INT_CF_CACA|CPU_MF_INT_CF_LCDA)
+#define CPU_MF_INT_CF_MASK (CPU_MF_INT_CF_MTDA|CPU_MF_INT_CF_CACA| \
+ CPU_MF_INT_CF_LCDA)
#define CPU_MF_INT_SF_MASK (CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE| \
CPU_MF_INT_SF_PRA|CPU_MF_INT_SF_SACA| \
CPU_MF_INT_SF_LSDA)
@@ -172,7 +174,7 @@ static inline int lcctl(u64 ctl)
/* Extract CPU counter */
static inline int __ecctr(u64 ctr, u64 *content)
{
- register u64 _content asm("4") = 0;
+ u64 _content;
int cc;
asm volatile (
diff --git a/arch/s390/include/asm/div64.h b/arch/s390/include/asm/div64.h
deleted file mode 100644
index 6cd978cefb28..000000000000
--- a/arch/s390/include/asm/div64.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/div64.h>
diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h
index 1d48880b3cc1..e8f623041769 100644
--- a/arch/s390/include/asm/elf.h
+++ b/arch/s390/include/asm/elf.h
@@ -105,6 +105,7 @@
#define HWCAP_S390_VXRS 2048
#define HWCAP_S390_VXRS_BCD 4096
#define HWCAP_S390_VXRS_EXT 8192
+#define HWCAP_S390_GS 16384
/* Internal bits, not exposed via elf */
#define HWCAP_INT_SIE 1UL
diff --git a/arch/s390/include/asm/emergency-restart.h b/arch/s390/include/asm/emergency-restart.h
deleted file mode 100644
index 108d8c48e42e..000000000000
--- a/arch/s390/include/asm/emergency-restart.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_EMERGENCY_RESTART_H
-#define _ASM_EMERGENCY_RESTART_H
-
-#include <asm-generic/emergency-restart.h>
-
-#endif /* _ASM_EMERGENCY_RESTART_H */
diff --git a/arch/s390/include/asm/facility.h b/arch/s390/include/asm/facility.h
index 09b406db7529..cb60d5c5755d 100644
--- a/arch/s390/include/asm/facility.h
+++ b/arch/s390/include/asm/facility.h
@@ -8,14 +8,11 @@
#define __ASM_FACILITY_H
#include <generated/facilities.h>
-
-#ifndef __ASSEMBLY__
-
#include <linux/string.h>
#include <linux/preempt.h>
#include <asm/lowcore.h>
-#define MAX_FACILITY_BIT (256*8) /* stfle_fac_list has 256 bytes */
+#define MAX_FACILITY_BIT (sizeof(((struct lowcore *)0)->stfle_fac_list) * 8)
static inline int __test_facility(unsigned long nr, void *facilities)
{
@@ -72,5 +69,4 @@ static inline void stfle(u64 *stfle_fac_list, int size)
preempt_enable();
}
-#endif /* __ASSEMBLY__ */
#endif /* __ASM_FACILITY_H */
diff --git a/arch/s390/include/asm/irq_regs.h b/arch/s390/include/asm/irq_regs.h
deleted file mode 100644
index 3dd9c0b70270..000000000000
--- a/arch/s390/include/asm/irq_regs.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/irq_regs.h>
diff --git a/arch/s390/include/asm/isc.h b/arch/s390/include/asm/isc.h
index 68d7d68300f2..8a0b721a9b8d 100644
--- a/arch/s390/include/asm/isc.h
+++ b/arch/s390/include/asm/isc.h
@@ -16,6 +16,7 @@
#define CONSOLE_ISC 1 /* console I/O subchannel */
#define EADM_SCH_ISC 4 /* EADM subchannels */
#define CHSC_SCH_ISC 7 /* CHSC subchannels */
+#define VFIO_CCW_ISC IO_SCH_ISC /* VFIO-CCW I/O subchannels */
/* Adapter interrupts. */
#define QDIO_AIRQ_ISC IO_SCH_ISC /* I/O subchannel in qdio mode */
#define PCI_ISC 2 /* PCI I/O subchannels */
diff --git a/arch/s390/include/asm/kmap_types.h b/arch/s390/include/asm/kmap_types.h
deleted file mode 100644
index 0a88622339ee..000000000000
--- a/arch/s390/include/asm/kmap_types.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_KMAP_TYPES_H
-#define _ASM_KMAP_TYPES_H
-
-#include <asm-generic/kmap_types.h>
-
-#endif
diff --git a/arch/s390/include/asm/local.h b/arch/s390/include/asm/local.h
deleted file mode 100644
index c11c530f74d0..000000000000
--- a/arch/s390/include/asm/local.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/local.h>
diff --git a/arch/s390/include/asm/local64.h b/arch/s390/include/asm/local64.h
deleted file mode 100644
index 36c93b5cc239..000000000000
--- a/arch/s390/include/asm/local64.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/local64.h>
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 61261e0e95c0..8a5b082797f8 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -157,8 +157,8 @@ struct lowcore {
__u64 stfle_fac_list[32]; /* 0x0f00 */
__u8 pad_0x1000[0x11b0-0x1000]; /* 0x1000 */
- /* Pointer to vector register save area */
- __u64 vector_save_area_addr; /* 0x11b0 */
+ /* Pointer to the machine check extended save area */
+ __u64 mcesad; /* 0x11b0 */
/* 64 bit extparam used for pfault/diag 250: defined by architecture */
__u64 ext_params2; /* 0x11B8 */
@@ -182,10 +182,7 @@ struct lowcore {
/* Transaction abort diagnostic block */
__u8 pgm_tdb[256]; /* 0x1800 */
- __u8 pad_0x1900[0x1c00-0x1900]; /* 0x1900 */
-
- /* Software defined save area for vector registers */
- __u8 vector_save_area[1024]; /* 0x1c00 */
+ __u8 pad_0x1900[0x2000-0x1900]; /* 0x1900 */
} __packed;
#define S390_lowcore (*((struct lowcore *) 0))
diff --git a/arch/s390/include/asm/mman.h b/arch/s390/include/asm/mman.h
index b55a59e1d134..b79813d9cf68 100644
--- a/arch/s390/include/asm/mman.h
+++ b/arch/s390/include/asm/mman.h
@@ -8,8 +8,4 @@
#include <uapi/asm/mman.h>
-#ifndef __ASSEMBLY__
-int s390_mmap_check(unsigned long addr, unsigned long len, unsigned long flags);
-#define arch_mmap_check(addr, len, flags) s390_mmap_check(addr, len, flags)
-#endif
#endif /* __S390_MMAN_H__ */
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index bea785d7f853..bd6f30304518 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -22,6 +22,8 @@ typedef struct {
unsigned int has_pgste:1;
/* The mmu context uses storage keys. */
unsigned int use_skey:1;
+ /* The mmu context uses CMMA. */
+ unsigned int use_cmma:1;
} mm_context_t;
#define INIT_MM_CONTEXT(name) \
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index fa2bf69be182..8712e11bead4 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -28,6 +28,7 @@ static inline int init_new_context(struct task_struct *tsk,
mm->context.alloc_pgste = page_table_allocate_pgste;
mm->context.has_pgste = 0;
mm->context.use_skey = 0;
+ mm->context.use_cmma = 0;
#endif
switch (mm->context.asce_limit) {
case 1UL << 42:
diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h
index b75fd910386a..e3e8895f5d3e 100644
--- a/arch/s390/include/asm/nmi.h
+++ b/arch/s390/include/asm/nmi.h
@@ -58,7 +58,9 @@ union mci {
u64 ie : 1; /* 32 indirect storage error */
u64 ar : 1; /* 33 access register validity */
u64 da : 1; /* 34 delayed access exception */
- u64 : 7; /* 35-41 */
+ u64 : 1; /* 35 */
+ u64 gs : 1; /* 36 guarded storage registers */
+ u64 : 5; /* 37-41 */
u64 pr : 1; /* 42 tod programmable register validity */
u64 fc : 1; /* 43 fp control register validity */
u64 ap : 1; /* 44 ancillary report */
@@ -69,6 +71,14 @@ union mci {
};
};
+#define MCESA_ORIGIN_MASK (~0x3ffUL)
+#define MCESA_LC_MASK (0xfUL)
+
+struct mcesa {
+ u8 vector_save_area[1024];
+ u8 guarded_storage_save_area[32];
+};
+
struct pt_regs;
extern void s390_handle_mcck(void);
diff --git a/arch/s390/include/asm/page-states.h b/arch/s390/include/asm/page-states.h
new file mode 100644
index 000000000000..42267a2fe29e
--- /dev/null
+++ b/arch/s390/include/asm/page-states.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright IBM Corp. 2017
+ * Author(s): Claudio Imbrenda <imbrenda@linux.vnet.ibm.com>
+ */
+
+#ifndef PAGE_STATES_H
+#define PAGE_STATES_H
+
+#define ESSA_GET_STATE 0
+#define ESSA_SET_STABLE 1
+#define ESSA_SET_UNUSED 2
+#define ESSA_SET_VOLATILE 3
+#define ESSA_SET_POT_VOLATILE 4
+#define ESSA_SET_STABLE_RESIDENT 5
+#define ESSA_SET_STABLE_IF_RESIDENT 6
+
+#define ESSA_MAX ESSA_SET_STABLE_IF_RESIDENT
+
+#endif
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
index c64c0befd3f3..dd32beb9d30c 100644
--- a/arch/s390/include/asm/perf_event.h
+++ b/arch/s390/include/asm/perf_event.h
@@ -1,7 +1,7 @@
/*
* Performance event support - s390 specific definitions.
*
- * Copyright IBM Corp. 2009, 2013
+ * Copyright IBM Corp. 2009, 2017
* Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
* Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
*/
@@ -47,7 +47,7 @@ struct perf_sf_sde_regs {
};
/* Perf PMU definitions for the counter facility */
-#define PERF_CPUM_CF_MAX_CTR 256
+#define PERF_CPUM_CF_MAX_CTR 0xffffUL /* Max ctr for ECCTR */
/* Perf PMU definitions for the sampling facility */
#define PERF_CPUM_SF_MAX_CTR 2
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index ecec682bb516..e6e3b887bee3 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -372,10 +372,12 @@ static inline int is_module_addr(void *addr)
#define PGSTE_VSIE_BIT 0x0000200000000000UL /* ref'd in a shadow table */
/* Guest Page State used for virtualization */
-#define _PGSTE_GPS_ZERO 0x0000000080000000UL
-#define _PGSTE_GPS_USAGE_MASK 0x0000000003000000UL
-#define _PGSTE_GPS_USAGE_STABLE 0x0000000000000000UL
-#define _PGSTE_GPS_USAGE_UNUSED 0x0000000001000000UL
+#define _PGSTE_GPS_ZERO 0x0000000080000000UL
+#define _PGSTE_GPS_USAGE_MASK 0x0000000003000000UL
+#define _PGSTE_GPS_USAGE_STABLE 0x0000000000000000UL
+#define _PGSTE_GPS_USAGE_UNUSED 0x0000000001000000UL
+#define _PGSTE_GPS_USAGE_POT_VOLATILE 0x0000000002000000UL
+#define _PGSTE_GPS_USAGE_VOLATILE _PGSTE_GPS_USAGE_MASK
/*
* A user page table pointer has the space-switch-event bit, the
@@ -1041,6 +1043,12 @@ int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr);
int get_guest_storage_key(struct mm_struct *mm, unsigned long addr,
unsigned char *key);
+int set_pgste_bits(struct mm_struct *mm, unsigned long addr,
+ unsigned long bits, unsigned long value);
+int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep);
+int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc,
+ unsigned long *oldpte, unsigned long *oldpgste);
+
/*
* Certain architectures need to do special things when PTEs
* within a page table are directly modified. Thus, the following
diff --git a/arch/s390/include/asm/pkey.h b/arch/s390/include/asm/pkey.h
index b48aef4188f6..4c484590d858 100644
--- a/arch/s390/include/asm/pkey.h
+++ b/arch/s390/include/asm/pkey.h
@@ -87,4 +87,25 @@ int pkey_findcard(const struct pkey_seckey *seckey,
int pkey_skey2pkey(const struct pkey_seckey *seckey,
struct pkey_protkey *protkey);
+/*
+ * Verify the given secure key for being able to be useable with
+ * the pkey module. Check for correct key type and check for having at
+ * least one crypto card being able to handle this key (master key
+ * or old master key verification pattern matches).
+ * Return some info about the key: keysize in bits, keytype (currently
+ * only AES), flag if key is wrapped with an old MKVP.
+ * @param seckey pointer to buffer with the input secure key
+ * @param pcardnr pointer to cardnr, receives the card number on success
+ * @param pdomain pointer to domain, receives the domain number on success
+ * @param pkeysize pointer to keysize, receives the bitsize of the key
+ * @param pattributes pointer to attributes, receives additional info
+ * PKEY_VERIFY_ATTR_AES if the key is an AES key
+ * PKEY_VERIFY_ATTR_OLD_MKVP if key has old mkvp stored in
+ * @return 0 on success, negative errno value on failure. If no card could
+ * be found which is able to handle this key, -ENODEV is returned.
+ */
+int pkey_verifykey(const struct pkey_seckey *seckey,
+ u16 *pcardnr, u16 *pdomain,
+ u16 *pkeysize, u32 *pattributes);
+
#endif /* _KAPI_PKEY_H */
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index e4988710aa86..60d395fdc864 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -91,14 +91,15 @@ extern void execve_tail(void);
* User space process size: 2GB for 31 bit, 4TB or 8PT for 64 bit.
*/
-#define TASK_SIZE_OF(tsk) ((tsk)->mm ? \
- (tsk)->mm->context.asce_limit : TASK_MAX_SIZE)
+#define TASK_SIZE_OF(tsk) (test_tsk_thread_flag(tsk, TIF_31BIT) ? \
+ (1UL << 31) : (1UL << 53))
#define TASK_UNMAPPED_BASE (test_thread_flag(TIF_31BIT) ? \
(1UL << 30) : (1UL << 41))
#define TASK_SIZE TASK_SIZE_OF(current)
-#define TASK_MAX_SIZE (1UL << 53)
+#define TASK_SIZE_MAX (1UL << 53)
-#define STACK_TOP (1UL << (test_thread_flag(TIF_31BIT) ? 31:42))
+#define STACK_TOP (test_thread_flag(TIF_31BIT) ? \
+ (1UL << 31) : (1UL << 42))
#define STACK_TOP_MAX (1UL << 42)
#define HAVE_ARCH_PICK_MMAP_LAYOUT
@@ -135,6 +136,8 @@ struct thread_struct {
struct list_head list;
/* cpu runtime instrumentation */
struct runtime_instr_cb *ri_cb;
+ struct gs_cb *gs_cb; /* Current guarded storage cb */
+ struct gs_cb *gs_bc_cb; /* Broadcast guarded storage cb */
unsigned char trap_tdb[256]; /* Transaction abort diagnose block */
/*
* Warning: 'fpu' is dynamically-sized. It *MUST* be at
@@ -215,6 +218,9 @@ void show_cacheinfo(struct seq_file *m);
/* Free all resources held by a thread. */
extern void release_thread(struct task_struct *);
+/* Free guarded storage control block for current */
+void exit_thread_gs(void);
+
/*
* Return saved PC of a blocked thread.
*/
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index 30bdb5a027f3..cd78155b1829 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -29,8 +29,8 @@
#define MACHINE_FLAG_TE _BITUL(11)
#define MACHINE_FLAG_TLB_LC _BITUL(12)
#define MACHINE_FLAG_VX _BITUL(13)
-#define MACHINE_FLAG_CAD _BITUL(14)
-#define MACHINE_FLAG_NX _BITUL(15)
+#define MACHINE_FLAG_NX _BITUL(14)
+#define MACHINE_FLAG_GS _BITUL(15)
#define LPP_MAGIC _BITUL(31)
#define LPP_PFAULT_PID_MASK _AC(0xffffffff, UL)
@@ -68,8 +68,8 @@ extern void detect_memory_memblock(void);
#define MACHINE_HAS_TE (S390_lowcore.machine_flags & MACHINE_FLAG_TE)
#define MACHINE_HAS_TLB_LC (S390_lowcore.machine_flags & MACHINE_FLAG_TLB_LC)
#define MACHINE_HAS_VX (S390_lowcore.machine_flags & MACHINE_FLAG_VX)
-#define MACHINE_HAS_CAD (S390_lowcore.machine_flags & MACHINE_FLAG_CAD)
#define MACHINE_HAS_NX (S390_lowcore.machine_flags & MACHINE_FLAG_NX)
+#define MACHINE_HAS_GS (S390_lowcore.machine_flags & MACHINE_FLAG_GS)
/*
* Console mode. Override with conmode=
diff --git a/arch/s390/include/asm/sparsemem.h b/arch/s390/include/asm/sparsemem.h
index 487428b6d099..334e279f1bce 100644
--- a/arch/s390/include/asm/sparsemem.h
+++ b/arch/s390/include/asm/sparsemem.h
@@ -2,6 +2,6 @@
#define _ASM_S390_SPARSEMEM_H
#define SECTION_SIZE_BITS 28
-#define MAX_PHYSMEM_BITS 46
+#define MAX_PHYSMEM_BITS CONFIG_MAX_PHYSMEM_BITS
#endif /* _ASM_S390_SPARSEMEM_H */
diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
index ffc45048ea7d..f7838ecd83c6 100644
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@@ -10,6 +10,7 @@
#define __ASM_SPINLOCK_H
#include <linux/smp.h>
+#include <asm/atomic_ops.h>
#include <asm/barrier.h>
#include <asm/processor.h>
@@ -17,12 +18,6 @@
extern int spin_retry;
-static inline int
-_raw_compare_and_swap(unsigned int *lock, unsigned int old, unsigned int new)
-{
- return __sync_bool_compare_and_swap(lock, old, new);
-}
-
#ifndef CONFIG_SMP
static inline bool arch_vcpu_is_preempted(int cpu) { return false; }
#else
@@ -40,7 +35,7 @@ bool arch_vcpu_is_preempted(int cpu);
* (the type definitions are in asm/spinlock_types.h)
*/
-void arch_lock_relax(unsigned int cpu);
+void arch_lock_relax(int cpu);
void arch_spin_lock_wait(arch_spinlock_t *);
int arch_spin_trylock_retry(arch_spinlock_t *);
@@ -70,7 +65,7 @@ static inline int arch_spin_trylock_once(arch_spinlock_t *lp)
{
barrier();
return likely(arch_spin_value_unlocked(*lp) &&
- _raw_compare_and_swap(&lp->lock, 0, SPINLOCK_LOCKVAL));
+ __atomic_cmpxchg_bool(&lp->lock, 0, SPINLOCK_LOCKVAL));
}
static inline void arch_spin_lock(arch_spinlock_t *lp)
@@ -95,7 +90,7 @@ static inline int arch_spin_trylock(arch_spinlock_t *lp)
static inline void arch_spin_unlock(arch_spinlock_t *lp)
{
- typecheck(unsigned int, lp->lock);
+ typecheck(int, lp->lock);
asm volatile(
"st %1,%0\n"
: "+Q" (lp->lock)
@@ -141,16 +136,16 @@ extern int _raw_write_trylock_retry(arch_rwlock_t *lp);
static inline int arch_read_trylock_once(arch_rwlock_t *rw)
{
- unsigned int old = ACCESS_ONCE(rw->lock);
- return likely((int) old >= 0 &&
- _raw_compare_and_swap(&rw->lock, old, old + 1));
+ int old = ACCESS_ONCE(rw->lock);
+ return likely(old >= 0 &&
+ __atomic_cmpxchg_bool(&rw->lock, old, old + 1));
}
static inline int arch_write_trylock_once(arch_rwlock_t *rw)
{
- unsigned int old = ACCESS_ONCE(rw->lock);
+ int old = ACCESS_ONCE(rw->lock);
return likely(old == 0 &&
- _raw_compare_and_swap(&rw->lock, 0, 0x80000000));
+ __atomic_cmpxchg_bool(&rw->lock, 0, 0x80000000));
}
#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
@@ -161,9 +156,9 @@ static inline int arch_write_trylock_once(arch_rwlock_t *rw)
#define __RAW_LOCK(ptr, op_val, op_string) \
({ \
- unsigned int old_val; \
+ int old_val; \
\
- typecheck(unsigned int *, ptr); \
+ typecheck(int *, ptr); \
asm volatile( \
op_string " %0,%2,%1\n" \
"bcr 14,0\n" \
@@ -175,9 +170,9 @@ static inline int arch_write_trylock_once(arch_rwlock_t *rw)
#define __RAW_UNLOCK(ptr, op_val, op_string) \
({ \
- unsigned int old_val; \
+ int old_val; \
\
- typecheck(unsigned int *, ptr); \
+ typecheck(int *, ptr); \
asm volatile( \
op_string " %0,%2,%1\n" \
: "=d" (old_val), "+Q" (*ptr) \
@@ -187,14 +182,14 @@ static inline int arch_write_trylock_once(arch_rwlock_t *rw)
})
extern void _raw_read_lock_wait(arch_rwlock_t *lp);
-extern void _raw_write_lock_wait(arch_rwlock_t *lp, unsigned int prev);
+extern void _raw_write_lock_wait(arch_rwlock_t *lp, int prev);
static inline void arch_read_lock(arch_rwlock_t *rw)
{
- unsigned int old;
+ int old;
old = __RAW_LOCK(&rw->lock, 1, __RAW_OP_ADD);
- if ((int) old < 0)
+ if (old < 0)
_raw_read_lock_wait(rw);
}
@@ -205,7 +200,7 @@ static inline void arch_read_unlock(arch_rwlock_t *rw)
static inline void arch_write_lock(arch_rwlock_t *rw)
{
- unsigned int old;
+ int old;
old = __RAW_LOCK(&rw->lock, 0x80000000, __RAW_OP_OR);
if (old != 0)
@@ -232,11 +227,11 @@ static inline void arch_read_lock(arch_rwlock_t *rw)
static inline void arch_read_unlock(arch_rwlock_t *rw)
{
- unsigned int old;
+ int old;
do {
old = ACCESS_ONCE(rw->lock);
- } while (!_raw_compare_and_swap(&rw->lock, old, old - 1));
+ } while (!__atomic_cmpxchg_bool(&rw->lock, old, old - 1));
}
static inline void arch_write_lock(arch_rwlock_t *rw)
@@ -248,7 +243,7 @@ static inline void arch_write_lock(arch_rwlock_t *rw)
static inline void arch_write_unlock(arch_rwlock_t *rw)
{
- typecheck(unsigned int, rw->lock);
+ typecheck(int, rw->lock);
rw->owner = 0;
asm volatile(
diff --git a/arch/s390/include/asm/spinlock_types.h b/arch/s390/include/asm/spinlock_types.h
index d84b6939237c..fe755eec275f 100644
--- a/arch/s390/include/asm/spinlock_types.h
+++ b/arch/s390/include/asm/spinlock_types.h
@@ -6,14 +6,14 @@
#endif
typedef struct {
- unsigned int lock;
+ int lock;
} __attribute__ ((aligned (4))) arch_spinlock_t;
#define __ARCH_SPIN_LOCK_UNLOCKED { .lock = 0, }
typedef struct {
- unsigned int lock;
- unsigned int owner;
+ int lock;
+ int owner;
} arch_rwlock_t;
#define __ARCH_RW_LOCK_UNLOCKED { 0 }
diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h
index 12d45f0cfdd9..f6c2b5814ab0 100644
--- a/arch/s390/include/asm/switch_to.h
+++ b/arch/s390/include/asm/switch_to.h
@@ -10,6 +10,7 @@
#include <linux/thread_info.h>
#include <asm/fpu/api.h>
#include <asm/ptrace.h>
+#include <asm/guarded_storage.h>
extern struct task_struct *__switch_to(void *, void *);
extern void update_cr_regs(struct task_struct *task);
@@ -33,12 +34,14 @@ static inline void restore_access_regs(unsigned int *acrs)
save_fpu_regs(); \
save_access_regs(&prev->thread.acrs[0]); \
save_ri_cb(prev->thread.ri_cb); \
+ save_gs_cb(prev->thread.gs_cb); \
} \
if (next->mm) { \
update_cr_regs(next); \
set_cpu_flag(CIF_FPU); \
restore_access_regs(&next->thread.acrs[0]); \
restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \
+ restore_gs_cb(next->thread.gs_cb); \
} \
prev = __switch_to(prev,next); \
} while (0)
diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h
index 229326c942c7..73bff45ced55 100644
--- a/arch/s390/include/asm/sysinfo.h
+++ b/arch/s390/include/asm/sysinfo.h
@@ -142,7 +142,15 @@ struct sysinfo_3_2_2 {
extern int topology_max_mnest;
-#define TOPOLOGY_CORE_BITS 64
+/*
+ * Returns the maximum nesting level supported by the cpu topology code.
+ * The current maximum level is 4 which is the drawer level.
+ */
+static inline int topology_mnest_limit(void)
+{
+ return min(topology_max_mnest, 4);
+}
+
#define TOPOLOGY_NR_MAG 6
struct topology_core {
@@ -152,7 +160,7 @@ struct topology_core {
unsigned char pp:2;
unsigned char reserved1;
unsigned short origin;
- unsigned long mask[TOPOLOGY_CORE_BITS / BITS_PER_LONG];
+ unsigned long mask;
};
struct topology_container {
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index a5b54a445eb8..f36e6e2b73f0 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -54,11 +54,12 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
#define TIF_NOTIFY_RESUME 0 /* callback before returning to user */
#define TIF_SIGPENDING 1 /* signal pending */
#define TIF_NEED_RESCHED 2 /* rescheduling necessary */
-#define TIF_SYSCALL_TRACE 3 /* syscall trace active */
-#define TIF_SYSCALL_AUDIT 4 /* syscall auditing active */
-#define TIF_SECCOMP 5 /* secure computing */
-#define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */
-#define TIF_UPROBE 7 /* breakpointed or single-stepping */
+#define TIF_UPROBE 3 /* breakpointed or single-stepping */
+#define TIF_GUARDED_STORAGE 4 /* load guarded storage control block */
+#define TIF_SYSCALL_TRACE 8 /* syscall trace active */
+#define TIF_SYSCALL_AUDIT 9 /* syscall auditing active */
+#define TIF_SECCOMP 10 /* secure computing */
+#define TIF_SYSCALL_TRACEPOINT 11 /* syscall tracepoint instrumentation */
#define TIF_31BIT 16 /* 32bit process */
#define TIF_MEMDIE 17 /* is terminating due to OOM killer */
#define TIF_RESTORE_SIGMASK 18 /* restore signal mask in do_signal() */
@@ -76,5 +77,6 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
#define _TIF_UPROBE _BITUL(TIF_UPROBE)
#define _TIF_31BIT _BITUL(TIF_31BIT)
#define _TIF_SINGLE_STEP _BITUL(TIF_SINGLE_STEP)
+#define _TIF_GUARDED_STORAGE _BITUL(TIF_GUARDED_STORAGE)
#endif /* _ASM_THREAD_INFO_H */
diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild
index 6848ba5c1454..addb09cee0f5 100644
--- a/arch/s390/include/uapi/asm/Kbuild
+++ b/arch/s390/include/uapi/asm/Kbuild
@@ -1,6 +1,16 @@
# UAPI Header export list
include include/uapi/asm-generic/Kbuild.asm
+generic-y += errno.h
+generic-y += fcntl.h
+generic-y += ioctl.h
+generic-y += mman.h
+generic-y += param.h
+generic-y += poll.h
+generic-y += resource.h
+generic-y += sockios.h
+generic-y += termbits.h
+
header-y += auxvec.h
header-y += bitsperlong.h
header-y += byteorder.h
@@ -11,25 +21,20 @@ header-y += cmb.h
header-y += dasd.h
header-y += debug.h
header-y += errno.h
-header-y += fcntl.h
+header-y += guarded_storage.h
header-y += hypfs.h
-header-y += ioctl.h
header-y += ioctls.h
header-y += ipcbuf.h
header-y += kvm.h
header-y += kvm_para.h
header-y += kvm_perf.h
header-y += kvm_virtio.h
-header-y += mman.h
header-y += monwriter.h
header-y += msgbuf.h
-header-y += param.h
header-y += pkey.h
-header-y += poll.h
header-y += posix_types.h
header-y += ptrace.h
header-y += qeth.h
-header-y += resource.h
header-y += schid.h
header-y += sclp_ctl.h
header-y += sembuf.h
@@ -40,12 +45,10 @@ header-y += sigcontext.h
header-y += siginfo.h
header-y += signal.h
header-y += socket.h
-header-y += sockios.h
header-y += stat.h
header-y += statfs.h
header-y += swab.h
header-y += tape390.h
-header-y += termbits.h
header-y += termios.h
header-y += types.h
header-y += ucontext.h
diff --git a/arch/s390/include/uapi/asm/errno.h b/arch/s390/include/uapi/asm/errno.h
deleted file mode 100644
index 395e97d8005e..000000000000
--- a/arch/s390/include/uapi/asm/errno.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/*
- * S390 version
- *
- */
-
-#ifndef _S390_ERRNO_H
-#define _S390_ERRNO_H
-
-#include <asm-generic/errno.h>
-
-#endif
diff --git a/arch/s390/include/uapi/asm/fcntl.h b/arch/s390/include/uapi/asm/fcntl.h
deleted file mode 100644
index 46ab12db5739..000000000000
--- a/arch/s390/include/uapi/asm/fcntl.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/fcntl.h>
diff --git a/arch/s390/include/uapi/asm/guarded_storage.h b/arch/s390/include/uapi/asm/guarded_storage.h
new file mode 100644
index 000000000000..852850e8e17e
--- /dev/null
+++ b/arch/s390/include/uapi/asm/guarded_storage.h
@@ -0,0 +1,77 @@
+#ifndef _GUARDED_STORAGE_H
+#define _GUARDED_STORAGE_H
+
+#include <linux/types.h>
+
+struct gs_cb {
+ __u64 reserved;
+ __u64 gsd;
+ __u64 gssm;
+ __u64 gs_epl_a;
+};
+
+struct gs_epl {
+ __u8 pad1;
+ union {
+ __u8 gs_eam;
+ struct {
+ __u8 : 6;
+ __u8 e : 1;
+ __u8 b : 1;
+ };
+ };
+ union {
+ __u8 gs_eci;
+ struct {
+ __u8 tx : 1;
+ __u8 cx : 1;
+ __u8 : 5;
+ __u8 in : 1;
+ };
+ };
+ union {
+ __u8 gs_eai;
+ struct {
+ __u8 : 1;
+ __u8 t : 1;
+ __u8 as : 2;
+ __u8 ar : 4;
+ };
+ };
+ __u32 pad2;
+ __u64 gs_eha;
+ __u64 gs_eia;
+ __u64 gs_eoa;
+ __u64 gs_eir;
+ __u64 gs_era;
+};
+
+#define GS_ENABLE 0
+#define GS_DISABLE 1
+#define GS_SET_BC_CB 2
+#define GS_CLEAR_BC_CB 3
+#define GS_BROADCAST 4
+
+static inline void load_gs_cb(struct gs_cb *gs_cb)
+{
+ asm volatile(".insn rxy,0xe3000000004d,0,%0" : : "Q" (*gs_cb));
+}
+
+static inline void store_gs_cb(struct gs_cb *gs_cb)
+{
+ asm volatile(".insn rxy,0xe30000000049,0,%0" : : "Q" (*gs_cb));
+}
+
+static inline void save_gs_cb(struct gs_cb *gs_cb)
+{
+ if (gs_cb)
+ store_gs_cb(gs_cb);
+}
+
+static inline void restore_gs_cb(struct gs_cb *gs_cb)
+{
+ if (gs_cb)
+ load_gs_cb(gs_cb);
+}
+
+#endif /* _GUARDED_STORAGE_H */
diff --git a/arch/s390/include/uapi/asm/ioctl.h b/arch/s390/include/uapi/asm/ioctl.h
deleted file mode 100644
index b279fe06dfe5..000000000000
--- a/arch/s390/include/uapi/asm/ioctl.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ioctl.h>
diff --git a/arch/s390/include/uapi/asm/mman.h b/arch/s390/include/uapi/asm/mman.h
deleted file mode 100644
index de23da1f41b2..000000000000
--- a/arch/s390/include/uapi/asm/mman.h
+++ /dev/null
@@ -1,6 +0,0 @@
-/*
- * S390 version
- *
- * Derived from "include/asm-i386/mman.h"
- */
-#include <asm-generic/mman.h>
diff --git a/arch/s390/include/uapi/asm/param.h b/arch/s390/include/uapi/asm/param.h
deleted file mode 100644
index c616821bf2ac..000000000000
--- a/arch/s390/include/uapi/asm/param.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASMS390_PARAM_H
-#define _ASMS390_PARAM_H
-
-#include <asm-generic/param.h>
-
-#endif /* _ASMS390_PARAM_H */
diff --git a/arch/s390/include/uapi/asm/pkey.h b/arch/s390/include/uapi/asm/pkey.h
index ed7f19c27ce5..e6c04faf8a6c 100644
--- a/arch/s390/include/uapi/asm/pkey.h
+++ b/arch/s390/include/uapi/asm/pkey.h
@@ -109,4 +109,23 @@ struct pkey_skey2pkey {
};
#define PKEY_SKEY2PKEY _IOWR(PKEY_IOCTL_MAGIC, 0x06, struct pkey_skey2pkey)
+/*
+ * Verify the given secure key for being able to be useable with
+ * the pkey module. Check for correct key type and check for having at
+ * least one crypto card being able to handle this key (master key
+ * or old master key verification pattern matches).
+ * Return some info about the key: keysize in bits, keytype (currently
+ * only AES), flag if key is wrapped with an old MKVP.
+ */
+struct pkey_verifykey {
+ struct pkey_seckey seckey; /* in: the secure key blob */
+ __u16 cardnr; /* out: card number */
+ __u16 domain; /* out: domain number */
+ __u16 keysize; /* out: key size in bits */
+ __u32 attributes; /* out: attribute bits */
+};
+#define PKEY_VERIFYKEY _IOWR(PKEY_IOCTL_MAGIC, 0x07, struct pkey_verifykey)
+#define PKEY_VERIFY_ATTR_AES 0x00000001 /* key is an AES key */
+#define PKEY_VERIFY_ATTR_OLD_MKVP 0x00000100 /* key has old MKVP value */
+
#endif /* _UAPI_PKEY_H */
diff --git a/arch/s390/include/uapi/asm/poll.h b/arch/s390/include/uapi/asm/poll.h
deleted file mode 100644
index c98509d3149e..000000000000
--- a/arch/s390/include/uapi/asm/poll.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/poll.h>
diff --git a/arch/s390/include/uapi/asm/resource.h b/arch/s390/include/uapi/asm/resource.h
deleted file mode 100644
index ec23d1c73c92..000000000000
--- a/arch/s390/include/uapi/asm/resource.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/*
- * S390 version
- *
- * Derived from "include/asm-i386/resources.h"
- */
-
-#ifndef _S390_RESOURCE_H
-#define _S390_RESOURCE_H
-
-#include <asm-generic/resource.h>
-
-#endif
-
diff --git a/arch/s390/include/uapi/asm/sockios.h b/arch/s390/include/uapi/asm/sockios.h
deleted file mode 100644
index 6f60eee73242..000000000000
--- a/arch/s390/include/uapi/asm/sockios.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_S390_SOCKIOS_H
-#define _ASM_S390_SOCKIOS_H
-
-#include <asm-generic/sockios.h>
-
-#endif
diff --git a/arch/s390/include/uapi/asm/termbits.h b/arch/s390/include/uapi/asm/termbits.h
deleted file mode 100644
index 71bf6ac6a2b9..000000000000
--- a/arch/s390/include/uapi/asm/termbits.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_S390_TERMBITS_H
-#define _ASM_S390_TERMBITS_H
-
-#include <asm-generic/termbits.h>
-
-#endif
diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h
index 152de9b796e1..ea42290e7d51 100644
--- a/arch/s390/include/uapi/asm/unistd.h
+++ b/arch/s390/include/uapi/asm/unistd.h
@@ -313,7 +313,7 @@
#define __NR_copy_file_range 375
#define __NR_preadv2 376
#define __NR_pwritev2 377
-/* Number 378 is reserved for guarded storage */
+#define __NR_s390_guarded_storage 378
#define __NR_statx 379
#define NR_syscalls 380
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 060ce548fe8b..adb3fe2e3d42 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -51,14 +51,12 @@ CFLAGS_dumpstack.o += -fno-optimize-sibling-calls
#
CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"'
-CFLAGS_sysinfo.o += -w
-
obj-y := traps.o time.o process.o base.o early.o setup.o idle.o vtime.o
obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o als.o
obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o
-obj-y += runtime_instr.o cache.o fpu.o dumpstack.o
-obj-y += entry.o reipl.o relocate_kernel.o
+obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o
+obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o
extra-y += head.o head64.o vmlinux.lds
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index c4b3570ded5b..6bb29633e1f1 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -175,7 +175,7 @@ int main(void)
/* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */
OFFSET(__LC_DUMP_REIPL, lowcore, ipib);
/* hardware defined lowcore locations 0x1000 - 0x18ff */
- OFFSET(__LC_VX_SAVE_AREA_ADDR, lowcore, vector_save_area_addr);
+ OFFSET(__LC_MCESAD, lowcore, mcesad);
OFFSET(__LC_EXT_PARAMS2, lowcore, ext_params2);
OFFSET(__LC_FPREGS_SAVE_AREA, lowcore, floating_pt_save_area);
OFFSET(__LC_GPREGS_SAVE_AREA, lowcore, gpregs_save_area);
diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c
index e89cc2e71db1..986642a3543b 100644
--- a/arch/s390/kernel/compat_wrapper.c
+++ b/arch/s390/kernel/compat_wrapper.c
@@ -178,4 +178,5 @@ COMPAT_SYSCALL_WRAP3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
COMPAT_SYSCALL_WRAP6(sendto, int, fd, void __user *, buff, size_t, len, unsigned int, flags, struct sockaddr __user *, addr, int, addr_len);
COMPAT_SYSCALL_WRAP3(mlock2, unsigned long, start, size_t, len, int, flags);
COMPAT_SYSCALL_WRAP6(copy_file_range, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags);
+COMPAT_SYSCALL_WRAP2(s390_guarded_storage, int, command, struct gs_cb *, gs_cb);
COMPAT_SYSCALL_WRAP5(statx, int, dfd, const char __user *, path, unsigned, flags, unsigned, mask, struct statx __user *, buffer);
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index dd1d5c62c374..d628afc26708 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -429,6 +429,20 @@ static void *nt_vmcoreinfo(void *ptr)
}
/*
+ * Initialize final note (needed for /proc/vmcore code)
+ */
+static void *nt_final(void *ptr)
+{
+ Elf64_Nhdr *note;
+
+ note = (Elf64_Nhdr *) ptr;
+ note->n_namesz = 0;
+ note->n_descsz = 0;
+ note->n_type = 0;
+ return PTR_ADD(ptr, sizeof(Elf64_Nhdr));
+}
+
+/*
* Initialize ELF header (new kernel)
*/
static void *ehdr_init(Elf64_Ehdr *ehdr, int mem_chunk_cnt)
@@ -515,6 +529,7 @@ static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset)
if (sa->prefix != 0)
ptr = fill_cpu_elf_notes(ptr, cpu++, sa);
ptr = nt_vmcoreinfo(ptr);
+ ptr = nt_final(ptr);
memset(phdr, 0, sizeof(*phdr));
phdr->p_type = PT_NOTE;
phdr->p_offset = notes_offset;
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 4e65c79cc5f2..5d20182ee8ae 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -231,9 +231,29 @@ static noinline __init void detect_machine_type(void)
S390_lowcore.machine_flags |= MACHINE_FLAG_VM;
}
+/* Remove leading, trailing and double whitespace. */
+static inline void strim_all(char *str)
+{
+ char *s;
+
+ s = strim(str);
+ if (s != str)
+ memmove(str, s, strlen(s));
+ while (*str) {
+ if (!isspace(*str++))
+ continue;
+ if (isspace(*str)) {
+ s = skip_spaces(str);
+ memmove(str, s, strlen(s) + 1);
+ }
+ }
+}
+
static noinline __init void setup_arch_string(void)
{
struct sysinfo_1_1_1 *mach = (struct sysinfo_1_1_1 *)&sysinfo_page;
+ struct sysinfo_3_2_2 *vm = (struct sysinfo_3_2_2 *)&sysinfo_page;
+ char mstr[80], hvstr[17];
if (stsi(mach, 1, 1, 1))
return;
@@ -241,14 +261,21 @@ static noinline __init void setup_arch_string(void)
EBCASC(mach->type, sizeof(mach->type));
EBCASC(mach->model, sizeof(mach->model));
EBCASC(mach->model_capacity, sizeof(mach->model_capacity));
- dump_stack_set_arch_desc("%-16.16s %-4.4s %-16.16s %-16.16s (%s)",
- mach->manufacturer,
- mach->type,
- mach->model,
- mach->model_capacity,
- MACHINE_IS_LPAR ? "LPAR" :
- MACHINE_IS_VM ? "z/VM" :
- MACHINE_IS_KVM ? "KVM" : "unknown");
+ sprintf(mstr, "%-16.16s %-4.4s %-16.16s %-16.16s",
+ mach->manufacturer, mach->type,
+ mach->model, mach->model_capacity);
+ strim_all(mstr);
+ if (stsi(vm, 3, 2, 2) == 0 && vm->count) {
+ EBCASC(vm->vm[0].cpi, sizeof(vm->vm[0].cpi));
+ sprintf(hvstr, "%-16.16s", vm->vm[0].cpi);
+ strim_all(hvstr);
+ } else {
+ sprintf(hvstr, "%s",
+ MACHINE_IS_LPAR ? "LPAR" :
+ MACHINE_IS_VM ? "z/VM" :
+ MACHINE_IS_KVM ? "KVM" : "unknown");
+ }
+ dump_stack_set_arch_desc("%s (%s)", mstr, hvstr);
}
static __init void setup_topology(void)
@@ -358,6 +385,8 @@ static __init void detect_machine_facilities(void)
S390_lowcore.machine_flags |= MACHINE_FLAG_NX;
__ctl_set_bit(0, 20);
}
+ if (test_facility(133))
+ S390_lowcore.machine_flags |= MACHINE_FLAG_GS;
}
static inline void save_vector_registers(void)
@@ -375,7 +404,7 @@ static int __init topology_setup(char *str)
rc = kstrtobool(str, &enabled);
if (!rc && !enabled)
- S390_lowcore.machine_flags &= ~MACHINE_HAS_TOPOLOGY;
+ S390_lowcore.machine_flags &= ~MACHINE_FLAG_TOPOLOGY;
return rc;
}
early_param("topology", topology_setup);
@@ -405,23 +434,16 @@ early_param("noexec", noexec_setup);
static int __init cad_setup(char *str)
{
- int val;
-
- get_option(&str, &val);
- if (val && test_facility(128))
- S390_lowcore.machine_flags |= MACHINE_FLAG_CAD;
- return 0;
-}
-early_param("cad", cad_setup);
+ bool enabled;
+ int rc;
-static int __init cad_init(void)
-{
- if (MACHINE_HAS_CAD)
+ rc = kstrtobool(str, &enabled);
+ if (!rc && enabled && test_facility(128))
/* Enable problem state CAD. */
__ctl_set_bit(2, 3);
- return 0;
+ return rc;
}
-early_initcall(cad_init);
+early_param("cad", cad_setup);
static __init void memmove_early(void *dst, const void *src, size_t n)
{
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 6a7d737d514c..c6cf338c9327 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -47,7 +47,7 @@ STACK_SIZE = 1 << STACK_SHIFT
STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE
_TIF_WORK = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
- _TIF_UPROBE)
+ _TIF_UPROBE | _TIF_GUARDED_STORAGE)
_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
_TIF_SYSCALL_TRACEPOINT)
_CIF_WORK = (_CIF_MCCK_PENDING | _CIF_ASCE_PRIMARY | \
@@ -189,8 +189,6 @@ ENTRY(__switch_to)
stg %r3,__LC_CURRENT # store task struct of next
stg %r15,__LC_KERNEL_STACK # store end of kernel stack
lg %r15,__THREAD_ksp(%r1) # load kernel stack of next
- /* c4 is used in guest detection: arch/s390/kernel/perf_cpum_sf.c */
- lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4
mvc __LC_CURRENT_PID(4,%r0),__TASK_pid(%r3) # store pid of next
lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task
TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_LPP
@@ -332,6 +330,8 @@ ENTRY(system_call)
TSTMSK __TI_flags(%r12),_TIF_UPROBE
jo .Lsysc_uprobe_notify
#endif
+ TSTMSK __TI_flags(%r12),_TIF_GUARDED_STORAGE
+ jo .Lsysc_guarded_storage
TSTMSK __PT_FLAGS(%r11),_PIF_PER_TRAP
jo .Lsysc_singlestep
TSTMSK __TI_flags(%r12),_TIF_SIGPENDING
@@ -409,6 +409,14 @@ ENTRY(system_call)
#endif
#
+# _TIF_GUARDED_STORAGE is set, call guarded_storage_load
+#
+.Lsysc_guarded_storage:
+ lgr %r2,%r11 # pass pointer to pt_regs
+ larl %r14,.Lsysc_return
+ jg gs_load_bc_cb
+
+#
# _PIF_PER_TRAP is set, call do_per_trap
#
.Lsysc_singlestep:
@@ -663,6 +671,8 @@ ENTRY(io_int_handler)
jo .Lio_sigpending
TSTMSK __TI_flags(%r12),_TIF_NOTIFY_RESUME
jo .Lio_notify_resume
+ TSTMSK __TI_flags(%r12),_TIF_GUARDED_STORAGE
+ jo .Lio_guarded_storage
TSTMSK __LC_CPU_FLAGS,_CIF_FPU
jo .Lio_vxrs
TSTMSK __LC_CPU_FLAGS,(_CIF_ASCE_PRIMARY|_CIF_ASCE_SECONDARY)
@@ -697,6 +707,18 @@ ENTRY(io_int_handler)
jg load_fpu_regs
#
+# _TIF_GUARDED_STORAGE is set, call guarded_storage_load
+#
+.Lio_guarded_storage:
+ # TRACE_IRQS_ON already done at .Lio_return
+ ssm __LC_SVC_NEW_PSW # reenable interrupts
+ lgr %r2,%r11 # pass pointer to pt_regs
+ brasl %r14,gs_load_bc_cb
+ ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts
+ TRACE_IRQS_OFF
+ j .Lio_return
+
+#
# _TIF_NEED_RESCHED is set, call schedule
#
.Lio_reschedule:
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index 33f901865326..dbf5f7e18246 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -74,12 +74,14 @@ long sys_sigreturn(void);
long sys_s390_personality(unsigned int personality);
long sys_s390_runtime_instr(int command, int signum);
+long sys_s390_guarded_storage(int command, struct gs_cb __user *);
long sys_s390_pci_mmio_write(unsigned long, const void __user *, size_t);
long sys_s390_pci_mmio_read(unsigned long, void __user *, size_t);
DECLARE_PER_CPU(u64, mt_cycles[8]);
void verify_facilities(void);
+void gs_load_bc_cb(struct pt_regs *regs);
void set_fs_fixup(void);
#endif /* _ENTRY_H */
diff --git a/arch/s390/kernel/guarded_storage.c b/arch/s390/kernel/guarded_storage.c
new file mode 100644
index 000000000000..6f064745c3b1
--- /dev/null
+++ b/arch/s390/kernel/guarded_storage.c
@@ -0,0 +1,128 @@
+/*
+ * Copyright IBM Corp. 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/syscalls.h>
+#include <linux/signal.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <asm/guarded_storage.h>
+#include "entry.h"
+
+void exit_thread_gs(void)
+{
+ kfree(current->thread.gs_cb);
+ kfree(current->thread.gs_bc_cb);
+ current->thread.gs_cb = current->thread.gs_bc_cb = NULL;
+}
+
+static int gs_enable(void)
+{
+ struct gs_cb *gs_cb;
+
+ if (!current->thread.gs_cb) {
+ gs_cb = kzalloc(sizeof(*gs_cb), GFP_KERNEL);
+ if (!gs_cb)
+ return -ENOMEM;
+ gs_cb->gsd = 25;
+ preempt_disable();
+ __ctl_set_bit(2, 4);
+ load_gs_cb(gs_cb);
+ current->thread.gs_cb = gs_cb;
+ preempt_enable();
+ }
+ return 0;
+}
+
+static int gs_disable(void)
+{
+ if (current->thread.gs_cb) {
+ preempt_disable();
+ kfree(current->thread.gs_cb);
+ current->thread.gs_cb = NULL;
+ __ctl_clear_bit(2, 4);
+ preempt_enable();
+ }
+ return 0;
+}
+
+static int gs_set_bc_cb(struct gs_cb __user *u_gs_cb)
+{
+ struct gs_cb *gs_cb;
+
+ gs_cb = current->thread.gs_bc_cb;
+ if (!gs_cb) {
+ gs_cb = kzalloc(sizeof(*gs_cb), GFP_KERNEL);
+ if (!gs_cb)
+ return -ENOMEM;
+ current->thread.gs_bc_cb = gs_cb;
+ }
+ if (copy_from_user(gs_cb, u_gs_cb, sizeof(*gs_cb)))
+ return -EFAULT;
+ return 0;
+}
+
+static int gs_clear_bc_cb(void)
+{
+ struct gs_cb *gs_cb;
+
+ gs_cb = current->thread.gs_bc_cb;
+ current->thread.gs_bc_cb = NULL;
+ kfree(gs_cb);
+ return 0;
+}
+
+void gs_load_bc_cb(struct pt_regs *regs)
+{
+ struct gs_cb *gs_cb;
+
+ preempt_disable();
+ clear_thread_flag(TIF_GUARDED_STORAGE);
+ gs_cb = current->thread.gs_bc_cb;
+ if (gs_cb) {
+ kfree(current->thread.gs_cb);
+ current->thread.gs_bc_cb = NULL;
+ __ctl_set_bit(2, 4);
+ load_gs_cb(gs_cb);
+ current->thread.gs_cb = gs_cb;
+ }
+ preempt_enable();
+}
+
+static int gs_broadcast(void)
+{
+ struct task_struct *sibling;
+
+ read_lock(&tasklist_lock);
+ for_each_thread(current, sibling) {
+ if (!sibling->thread.gs_bc_cb)
+ continue;
+ if (test_and_set_tsk_thread_flag(sibling, TIF_GUARDED_STORAGE))
+ kick_process(sibling);
+ }
+ read_unlock(&tasklist_lock);
+ return 0;
+}
+
+SYSCALL_DEFINE2(s390_guarded_storage, int, command,
+ struct gs_cb __user *, gs_cb)
+{
+ if (!MACHINE_HAS_GS)
+ return -EOPNOTSUPP;
+ switch (command) {
+ case GS_ENABLE:
+ return gs_enable();
+ case GS_DISABLE:
+ return gs_disable();
+ case GS_SET_BC_CB:
+ return gs_set_bc_cb(gs_cb);
+ case GS_CLEAR_BC_CB:
+ return gs_clear_bc_cb();
+ case GS_BROADCAST:
+ return gs_broadcast();
+ default:
+ return -EINVAL;
+ }
+}
diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S
index 0b5ebf8a3d30..eff5b31671d4 100644
--- a/arch/s390/kernel/head.S
+++ b/arch/s390/kernel/head.S
@@ -25,7 +25,6 @@
#include <linux/linkage.h>
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
-#include <asm/facility.h>
#include <asm/page.h>
#include <asm/ptrace.h>
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index 482d3526e32b..31c91f24e562 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -52,7 +52,7 @@ ENTRY(startup_continue)
.quad 0 # cr1: primary space segment table
.quad .Lduct # cr2: dispatchable unit control table
.quad 0 # cr3: instruction authorization
- .quad 0 # cr4: instruction authorization
+ .quad 0xffff # cr4: instruction authorization
.quad .Lduct # cr5: primary-aste origin
.quad 0 # cr6: I/O interrupts
.quad 0 # cr7: secondary space segment table
diff --git a/arch/s390/kernel/kdebugfs.c b/arch/s390/kernel/kdebugfs.c
new file mode 100644
index 000000000000..ee85e17dd79d
--- /dev/null
+++ b/arch/s390/kernel/kdebugfs.c
@@ -0,0 +1,15 @@
+#include <linux/debugfs.h>
+#include <linux/export.h>
+#include <linux/init.h>
+
+struct dentry *arch_debugfs_dir;
+EXPORT_SYMBOL(arch_debugfs_dir);
+
+static int __init arch_kdebugfs_init(void)
+{
+ arch_debugfs_dir = debugfs_create_dir("s390", NULL);
+ if (IS_ERR(arch_debugfs_dir))
+ arch_debugfs_dir = NULL;
+ return 0;
+}
+postcore_initcall(arch_kdebugfs_init);
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index 3074c1d83829..db5658daf994 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -27,6 +27,7 @@
#include <asm/cacheflush.h>
#include <asm/os_info.h>
#include <asm/switch_to.h>
+#include <asm/nmi.h>
typedef void (*relocate_kernel_t)(kimage_entry_t *, unsigned long);
@@ -102,6 +103,8 @@ static void __do_machine_kdump(void *image)
*/
static noinline void __machine_kdump(void *image)
{
+ struct mcesa *mcesa;
+ unsigned long cr2_old, cr2_new;
int this_cpu, cpu;
lgr_info_log();
@@ -114,8 +117,16 @@ static noinline void __machine_kdump(void *image)
continue;
}
/* Store status of the boot CPU */
+ mcesa = (struct mcesa *)(S390_lowcore.mcesad & MCESA_ORIGIN_MASK);
if (MACHINE_HAS_VX)
- save_vx_regs((void *) &S390_lowcore.vector_save_area);
+ save_vx_regs((__vector128 *) mcesa->vector_save_area);
+ if (MACHINE_HAS_GS) {
+ __ctl_store(cr2_old, 2, 2);
+ cr2_new = cr2_old | (1UL << 4);
+ __ctl_load(cr2_new, 2, 2);
+ save_gs_cb((struct gs_cb *) mcesa->guarded_storage_save_area);
+ __ctl_load(cr2_old, 2, 2);
+ }
/*
* To create a good backchain for this CPU in the dump store_status
* is passed the address of a function. The address is saved into
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 9bf8327154ee..985589523970 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -106,6 +106,7 @@ static int notrace s390_validate_registers(union mci mci, int umode)
int kill_task;
u64 zero;
void *fpt_save_area;
+ struct mcesa *mcesa;
kill_task = 0;
zero = 0;
@@ -165,6 +166,7 @@ static int notrace s390_validate_registers(union mci mci, int umode)
: : "Q" (S390_lowcore.fpt_creg_save_area));
}
+ mcesa = (struct mcesa *)(S390_lowcore.mcesad & MCESA_ORIGIN_MASK);
if (!MACHINE_HAS_VX) {
/* Validate floating point registers */
asm volatile(
@@ -209,8 +211,8 @@ static int notrace s390_validate_registers(union mci mci, int umode)
" la 1,%0\n"
" .word 0xe70f,0x1000,0x0036\n" /* vlm 0,15,0(1) */
" .word 0xe70f,0x1100,0x0c36\n" /* vlm 16,31,256(1) */
- : : "Q" (*(struct vx_array *)
- &S390_lowcore.vector_save_area) : "1");
+ : : "Q" (*(struct vx_array *) mcesa->vector_save_area)
+ : "1");
__ctl_load(S390_lowcore.cregs_save_area[0], 0, 0);
}
/* Validate access registers */
@@ -224,6 +226,19 @@ static int notrace s390_validate_registers(union mci mci, int umode)
*/
kill_task = 1;
}
+ /* Validate guarded storage registers */
+ if (MACHINE_HAS_GS && (S390_lowcore.cregs_save_area[2] & (1UL << 4))) {
+ if (!mci.gs)
+ /*
+ * Guarded storage register can't be restored and
+ * the current processes uses guarded storage.
+ * It has to be terminated.
+ */
+ kill_task = 1;
+ else
+ load_gs_cb((struct gs_cb *)
+ mcesa->guarded_storage_save_area);
+ }
/*
* We don't even try to validate the TOD register, since we simply
* can't write something sensible into that register.
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index 1aba10e90906..746d03423333 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -1,7 +1,7 @@
/*
* Performance event support for s390x - CPU-measurement Counter Facility
*
- * Copyright IBM Corp. 2012
+ * Copyright IBM Corp. 2012, 2017
* Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
*
* This program is free software; you can redistribute it and/or modify
@@ -22,19 +22,12 @@
#include <asm/irq.h>
#include <asm/cpu_mf.h>
-/* CPU-measurement counter facility supports these CPU counter sets:
- * For CPU counter sets:
- * Basic counter set: 0-31
- * Problem-state counter set: 32-63
- * Crypto-activity counter set: 64-127
- * Extented counter set: 128-159
- */
enum cpumf_ctr_set {
- /* CPU counter sets */
- CPUMF_CTR_SET_BASIC = 0,
- CPUMF_CTR_SET_USER = 1,
- CPUMF_CTR_SET_CRYPTO = 2,
- CPUMF_CTR_SET_EXT = 3,
+ CPUMF_CTR_SET_BASIC = 0, /* Basic Counter Set */
+ CPUMF_CTR_SET_USER = 1, /* Problem-State Counter Set */
+ CPUMF_CTR_SET_CRYPTO = 2, /* Crypto-Activity Counter Set */
+ CPUMF_CTR_SET_EXT = 3, /* Extended Counter Set */
+ CPUMF_CTR_SET_MT_DIAG = 4, /* MT-diagnostic Counter Set */
/* Maximum number of counter sets */
CPUMF_CTR_SET_MAX,
@@ -47,6 +40,7 @@ static const u64 cpumf_state_ctl[CPUMF_CTR_SET_MAX] = {
[CPUMF_CTR_SET_USER] = 0x04,
[CPUMF_CTR_SET_CRYPTO] = 0x08,
[CPUMF_CTR_SET_EXT] = 0x01,
+ [CPUMF_CTR_SET_MT_DIAG] = 0x20,
};
static void ctr_set_enable(u64 *state, int ctr_set)
@@ -76,19 +70,20 @@ struct cpu_hw_events {
};
static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
.ctr_set = {
- [CPUMF_CTR_SET_BASIC] = ATOMIC_INIT(0),
- [CPUMF_CTR_SET_USER] = ATOMIC_INIT(0),
- [CPUMF_CTR_SET_CRYPTO] = ATOMIC_INIT(0),
- [CPUMF_CTR_SET_EXT] = ATOMIC_INIT(0),
+ [CPUMF_CTR_SET_BASIC] = ATOMIC_INIT(0),
+ [CPUMF_CTR_SET_USER] = ATOMIC_INIT(0),
+ [CPUMF_CTR_SET_CRYPTO] = ATOMIC_INIT(0),
+ [CPUMF_CTR_SET_EXT] = ATOMIC_INIT(0),
+ [CPUMF_CTR_SET_MT_DIAG] = ATOMIC_INIT(0),
},
.state = 0,
.flags = 0,
.txn_flags = 0,
};
-static int get_counter_set(u64 event)
+static enum cpumf_ctr_set get_counter_set(u64 event)
{
- int set = -1;
+ int set = CPUMF_CTR_SET_MAX;
if (event < 32)
set = CPUMF_CTR_SET_BASIC;
@@ -98,34 +93,17 @@ static int get_counter_set(u64 event)
set = CPUMF_CTR_SET_CRYPTO;
else if (event < 256)
set = CPUMF_CTR_SET_EXT;
+ else if (event >= 448 && event < 496)
+ set = CPUMF_CTR_SET_MT_DIAG;
return set;
}
-static int validate_event(const struct hw_perf_event *hwc)
-{
- switch (hwc->config_base) {
- case CPUMF_CTR_SET_BASIC:
- case CPUMF_CTR_SET_USER:
- case CPUMF_CTR_SET_CRYPTO:
- case CPUMF_CTR_SET_EXT:
- /* check for reserved counters */
- if ((hwc->config >= 6 && hwc->config <= 31) ||
- (hwc->config >= 38 && hwc->config <= 63) ||
- (hwc->config >= 80 && hwc->config <= 127))
- return -EOPNOTSUPP;
- break;
- default:
- return -EINVAL;
- }
-
- return 0;
-}
-
static int validate_ctr_version(const struct hw_perf_event *hwc)
{
struct cpu_hw_events *cpuhw;
int err = 0;
+ u16 mtdiag_ctl;
cpuhw = &get_cpu_var(cpu_hw_events);
@@ -145,6 +123,27 @@ static int validate_ctr_version(const struct hw_perf_event *hwc)
(cpuhw->info.csvn > 2 && hwc->config > 255))
err = -EOPNOTSUPP;
break;
+ case CPUMF_CTR_SET_MT_DIAG:
+ if (cpuhw->info.csvn <= 3)
+ err = -EOPNOTSUPP;
+ /*
+ * MT-diagnostic counters are read-only. The counter set
+ * is automatically enabled and activated on all CPUs with
+ * multithreading (SMT). Deactivation of multithreading
+ * also disables the counter set. State changes are ignored
+ * by lcctl(). Because Linux controls SMT enablement through
+ * a kernel parameter only, the counter set is either disabled
+ * or enabled and active.
+ *
+ * Thus, the counters can only be used if SMT is on and the
+ * counter set is enabled and active.
+ */
+ mtdiag_ctl = cpumf_state_ctl[CPUMF_CTR_SET_MT_DIAG];
+ if (!((cpuhw->info.auth_ctl & mtdiag_ctl) &&
+ (cpuhw->info.enable_ctl & mtdiag_ctl) &&
+ (cpuhw->info.act_ctl & mtdiag_ctl)))
+ err = -EOPNOTSUPP;
+ break;
}
put_cpu_var(cpu_hw_events);
@@ -250,6 +249,11 @@ static void cpumf_measurement_alert(struct ext_code ext_code,
/* loss of counter data alert */
if (alert & CPU_MF_INT_CF_LCDA)
pr_err("CPU[%i] Counter data was lost\n", smp_processor_id());
+
+ /* loss of MT counter data alert */
+ if (alert & CPU_MF_INT_CF_MTDA)
+ pr_warn("CPU[%i] MT counter data was lost\n",
+ smp_processor_id());
}
#define PMC_INIT 0
@@ -330,6 +334,7 @@ static int __hw_perf_event_init(struct perf_event *event)
{
struct perf_event_attr *attr = &event->attr;
struct hw_perf_event *hwc = &event->hw;
+ enum cpumf_ctr_set set;
int err;
u64 ev;
@@ -370,25 +375,30 @@ static int __hw_perf_event_init(struct perf_event *event)
if (ev == -1)
return -ENOENT;
- if (ev >= PERF_CPUM_CF_MAX_CTR)
+ if (ev > PERF_CPUM_CF_MAX_CTR)
return -EINVAL;
- /* Use the hardware perf event structure to store the counter number
- * in 'config' member and the counter set to which the counter belongs
- * in the 'config_base'. The counter set (config_base) is then used
- * to enable/disable the counters.
- */
- hwc->config = ev;
- hwc->config_base = get_counter_set(ev);
-
- /* Validate the counter that is assigned to this event.
- * Because the counter facility can use numerous counters at the
- * same time without constraints, it is not necessary to explicitly
- * validate event groups (event->group_leader != event).
- */
- err = validate_event(hwc);
- if (err)
- return err;
+ /* Obtain the counter set to which the specified counter belongs */
+ set = get_counter_set(ev);
+ switch (set) {
+ case CPUMF_CTR_SET_BASIC:
+ case CPUMF_CTR_SET_USER:
+ case CPUMF_CTR_SET_CRYPTO:
+ case CPUMF_CTR_SET_EXT:
+ case CPUMF_CTR_SET_MT_DIAG:
+ /*
+ * Use the hardware perf event structure to store the
+ * counter number in the 'config' member and the counter
+ * set number in the 'config_base'. The counter set number
+ * is then later used to enable/disable the counter(s).
+ */
+ hwc->config = ev;
+ hwc->config_base = set;
+ break;
+ case CPUMF_CTR_SET_MAX:
+ /* The counter could not be associated to a counter set */
+ return -EINVAL;
+ };
/* Initialize for using the CPU-measurement counter facility */
if (!atomic_inc_not_zero(&num_events)) {
@@ -452,7 +462,7 @@ static int hw_perf_event_reset(struct perf_event *event)
return err;
}
-static int hw_perf_event_update(struct perf_event *event)
+static void hw_perf_event_update(struct perf_event *event)
{
u64 prev, new, delta;
int err;
@@ -461,14 +471,12 @@ static int hw_perf_event_update(struct perf_event *event)
prev = local64_read(&event->hw.prev_count);
err = ecctr(event->hw.config, &new);
if (err)
- goto out;
+ return;
} while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev);
delta = (prev <= new) ? new - prev
: (-1ULL - prev) + new + 1; /* overflow */
local64_add(delta, &event->count);
-out:
- return err;
}
static void cpumf_pmu_read(struct perf_event *event)
diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c
index c343ac2cf6c5..d3133285b7d1 100644
--- a/arch/s390/kernel/perf_cpum_cf_events.c
+++ b/arch/s390/kernel/perf_cpum_cf_events.c
@@ -114,8 +114,64 @@ CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES_IV, 0x00a1);
CPUMF_EVENT_ATTR(cf_zec12, TX_NC_TABORT, 0x00b1);
CPUMF_EVENT_ATTR(cf_zec12, TX_C_TABORT_NO_SPECIAL, 0x00b2);
CPUMF_EVENT_ATTR(cf_zec12, TX_C_TABORT_SPECIAL, 0x00b3);
+CPUMF_EVENT_ATTR(cf_z13, L1D_WRITES_RO_EXCL, 0x0080);
+CPUMF_EVENT_ATTR(cf_z13, DTLB1_WRITES, 0x0081);
+CPUMF_EVENT_ATTR(cf_z13, DTLB1_MISSES, 0x0082);
+CPUMF_EVENT_ATTR(cf_z13, DTLB1_HPAGE_WRITES, 0x0083);
+CPUMF_EVENT_ATTR(cf_z13, DTLB1_GPAGE_WRITES, 0x0084);
+CPUMF_EVENT_ATTR(cf_z13, L1D_L2D_SOURCED_WRITES, 0x0085);
+CPUMF_EVENT_ATTR(cf_z13, ITLB1_WRITES, 0x0086);
+CPUMF_EVENT_ATTR(cf_z13, ITLB1_MISSES, 0x0087);
+CPUMF_EVENT_ATTR(cf_z13, L1I_L2I_SOURCED_WRITES, 0x0088);
+CPUMF_EVENT_ATTR(cf_z13, TLB2_PTE_WRITES, 0x0089);
+CPUMF_EVENT_ATTR(cf_z13, TLB2_CRSTE_HPAGE_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_z13, TLB2_CRSTE_WRITES, 0x008b);
+CPUMF_EVENT_ATTR(cf_z13, TX_C_TEND, 0x008c);
+CPUMF_EVENT_ATTR(cf_z13, TX_NC_TEND, 0x008d);
+CPUMF_EVENT_ATTR(cf_z13, L1C_TLB1_MISSES, 0x008f);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0090);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONCHIP_L3_SOURCED_WRITES_IV, 0x0091);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONNODE_L4_SOURCED_WRITES, 0x0092);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONNODE_L3_SOURCED_WRITES_IV, 0x0093);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONNODE_L3_SOURCED_WRITES, 0x0094);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONDRAWER_L4_SOURCED_WRITES, 0x0095);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONDRAWER_L3_SOURCED_WRITES_IV, 0x0096);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONDRAWER_L3_SOURCED_WRITES, 0x0097);
+CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_SCOL_L4_SOURCED_WRITES, 0x0098);
+CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV, 0x0099);
+CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES, 0x009a);
+CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_FCOL_L4_SOURCED_WRITES, 0x009b);
+CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV, 0x009c);
+CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES, 0x009d);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONNODE_MEM_SOURCED_WRITES, 0x009e);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONDRAWER_MEM_SOURCED_WRITES, 0x009f);
+CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_MEM_SOURCED_WRITES, 0x00a0);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONCHIP_MEM_SOURCED_WRITES, 0x00a1);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONCHIP_L3_SOURCED_WRITES, 0x00a2);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONCHIP_L3_SOURCED_WRITES_IV, 0x00a3);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONNODE_L4_SOURCED_WRITES, 0x00a4);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONNODE_L3_SOURCED_WRITES_IV, 0x00a5);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONNODE_L3_SOURCED_WRITES, 0x00a6);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONDRAWER_L4_SOURCED_WRITES, 0x00a7);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONDRAWER_L3_SOURCED_WRITES_IV, 0x00a8);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONDRAWER_L3_SOURCED_WRITES, 0x00a9);
+CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_SCOL_L4_SOURCED_WRITES, 0x00aa);
+CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV, 0x00ab);
+CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES, 0x00ac);
+CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_FCOL_L4_SOURCED_WRITES, 0x00ad);
+CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV, 0x00ae);
+CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES, 0x00af);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONNODE_MEM_SOURCED_WRITES, 0x00b0);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONDRAWER_MEM_SOURCED_WRITES, 0x00b1);
+CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_MEM_SOURCED_WRITES, 0x00b2);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONCHIP_MEM_SOURCED_WRITES, 0x00b3);
+CPUMF_EVENT_ATTR(cf_z13, TX_NC_TABORT, 0x00da);
+CPUMF_EVENT_ATTR(cf_z13, TX_C_TABORT_NO_SPECIAL, 0x00db);
+CPUMF_EVENT_ATTR(cf_z13, TX_C_TABORT_SPECIAL, 0x00dc);
+CPUMF_EVENT_ATTR(cf_z13, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0);
+CPUMF_EVENT_ATTR(cf_z13, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1);
-static struct attribute *cpumcf_pmu_event_attr[] = {
+static struct attribute *cpumcf_pmu_event_attr[] __initdata = {
CPUMF_EVENT_PTR(cf, CPU_CYCLES),
CPUMF_EVENT_PTR(cf, INSTRUCTIONS),
CPUMF_EVENT_PTR(cf, L1I_DIR_WRITES),
@@ -236,28 +292,87 @@ static struct attribute *cpumcf_zec12_pmu_event_attr[] __initdata = {
NULL,
};
+static struct attribute *cpumcf_z13_pmu_event_attr[] __initdata = {
+ CPUMF_EVENT_PTR(cf_z13, L1D_WRITES_RO_EXCL),
+ CPUMF_EVENT_PTR(cf_z13, DTLB1_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, DTLB1_MISSES),
+ CPUMF_EVENT_PTR(cf_z13, DTLB1_HPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, DTLB1_GPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1D_L2D_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, ITLB1_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, ITLB1_MISSES),
+ CPUMF_EVENT_PTR(cf_z13, L1I_L2I_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, TLB2_PTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, TLB2_CRSTE_HPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, TLB2_CRSTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, TX_C_TEND),
+ CPUMF_EVENT_PTR(cf_z13, TX_NC_TEND),
+ CPUMF_EVENT_PTR(cf_z13, L1C_TLB1_MISSES),
+ CPUMF_EVENT_PTR(cf_z13, L1D_ONCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1D_ONCHIP_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z13, L1D_ONNODE_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1D_ONNODE_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z13, L1D_ONNODE_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1D_ONDRAWER_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1D_ONDRAWER_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z13, L1D_ONDRAWER_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_SCOL_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_FCOL_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1D_ONNODE_MEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1D_ONDRAWER_MEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_MEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1D_ONCHIP_MEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1I_ONCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1I_ONCHIP_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z13, L1I_ONNODE_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1I_ONNODE_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z13, L1I_ONNODE_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1I_ONDRAWER_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1I_ONDRAWER_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z13, L1I_ONDRAWER_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_SCOL_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_FCOL_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1I_ONNODE_MEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1I_ONDRAWER_MEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_MEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1I_ONCHIP_MEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, TX_NC_TABORT),
+ CPUMF_EVENT_PTR(cf_z13, TX_C_TABORT_NO_SPECIAL),
+ CPUMF_EVENT_PTR(cf_z13, TX_C_TABORT_SPECIAL),
+ CPUMF_EVENT_PTR(cf_z13, MT_DIAG_CYCLES_ONE_THR_ACTIVE),
+ CPUMF_EVENT_PTR(cf_z13, MT_DIAG_CYCLES_TWO_THR_ACTIVE),
+ NULL,
+};
+
/* END: CPUM_CF COUNTER DEFINITIONS ===================================== */
-static struct attribute_group cpumsf_pmu_events_group = {
+static struct attribute_group cpumcf_pmu_events_group = {
.name = "events",
- .attrs = cpumcf_pmu_event_attr,
};
PMU_FORMAT_ATTR(event, "config:0-63");
-static struct attribute *cpumsf_pmu_format_attr[] = {
+static struct attribute *cpumcf_pmu_format_attr[] = {
&format_attr_event.attr,
NULL,
};
-static struct attribute_group cpumsf_pmu_format_group = {
+static struct attribute_group cpumcf_pmu_format_group = {
.name = "format",
- .attrs = cpumsf_pmu_format_attr,
+ .attrs = cpumcf_pmu_format_attr,
};
-static const struct attribute_group *cpumsf_pmu_attr_groups[] = {
- &cpumsf_pmu_events_group,
- &cpumsf_pmu_format_group,
+static const struct attribute_group *cpumcf_pmu_attr_groups[] = {
+ &cpumcf_pmu_events_group,
+ &cpumcf_pmu_format_group,
NULL,
};
@@ -290,6 +405,7 @@ static __init struct attribute **merge_attr(struct attribute **a,
__init const struct attribute_group **cpumf_cf_event_group(void)
{
struct attribute **combined, **model;
+ struct attribute *none[] = { NULL };
struct cpuid cpu_id;
get_cpu_id(&cpu_id);
@@ -306,17 +422,17 @@ __init const struct attribute_group **cpumf_cf_event_group(void)
case 0x2828:
model = cpumcf_zec12_pmu_event_attr;
break;
+ case 0x2964:
+ case 0x2965:
+ model = cpumcf_z13_pmu_event_attr;
+ break;
default:
- model = NULL;
+ model = none;
break;
}
- if (!model)
- goto out;
-
combined = merge_attr(cpumcf_pmu_event_attr, model);
if (combined)
- cpumsf_pmu_events_group.attrs = combined;
-out:
- return cpumsf_pmu_attr_groups;
+ cpumcf_pmu_events_group.attrs = combined;
+ return cpumcf_pmu_attr_groups;
}
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 1c0b58545c04..9a4f279d25ca 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -1009,8 +1009,8 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr)
* sample. Some early samples or samples from guests without
* lpp usage would be misaccounted to the host. We use the asn
* value as an addon heuristic to detect most of these guest samples.
- * If the value differs from the host hpp value, we assume to be a
- * KVM guest.
+ * If the value differs from 0xffff (the host value), we assume to
+ * be a KVM guest.
*/
switch (sfr->basic.CL) {
case 1: /* logical partition */
@@ -1020,8 +1020,7 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr)
sde_regs->in_guest = 1;
break;
default: /* old machine, use heuristics */
- if (sfr->basic.gpp ||
- sfr->basic.prim_asn != (u16)sfr->basic.hpp)
+ if (sfr->basic.gpp || sfr->basic.prim_asn != 0xffff)
sde_regs->in_guest = 1;
break;
}
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index f29e41c5e2ec..999d7154bbdc 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -73,8 +73,10 @@ extern void kernel_thread_starter(void);
*/
void exit_thread(struct task_struct *tsk)
{
- if (tsk == current)
+ if (tsk == current) {
exit_thread_runtime_instr();
+ exit_thread_gs();
+ }
}
void flush_thread(void)
@@ -159,6 +161,9 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long new_stackp,
/* Don't copy runtime instrumentation info */
p->thread.ri_cb = NULL;
frame->childregs.psw.mask &= ~PSW_MASK_RI;
+ /* Don't copy guarded storage control block */
+ p->thread.gs_cb = NULL;
+ p->thread.gs_bc_cb = NULL;
/* Set a new TLS ? */
if (clone_flags & CLONE_SETTLS) {
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index 928b929a6261..778cd6536175 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -7,6 +7,7 @@
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <linux/cpufeature.h>
+#include <linux/bitops.h>
#include <linux/kernel.h>
#include <linux/sched/mm.h>
#include <linux/init.h>
@@ -91,11 +92,23 @@ int cpu_have_feature(unsigned int num)
}
EXPORT_SYMBOL(cpu_have_feature);
+static void show_facilities(struct seq_file *m)
+{
+ unsigned int bit;
+ long *facilities;
+
+ facilities = (long *)&S390_lowcore.stfle_fac_list;
+ seq_puts(m, "facilities :");
+ for_each_set_bit_inv(bit, facilities, MAX_FACILITY_BIT)
+ seq_printf(m, " %d", bit);
+ seq_putc(m, '\n');
+}
+
static void show_cpu_summary(struct seq_file *m, void *v)
{
static const char *hwcap_str[] = {
"esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp",
- "edat", "etf3eh", "highgprs", "te", "vx", "vxd", "vxe"
+ "edat", "etf3eh", "highgprs", "te", "vx", "vxd", "vxe", "gs"
};
static const char * const int_hwcap_str[] = {
"sie"
@@ -116,6 +129,7 @@ static void show_cpu_summary(struct seq_file *m, void *v)
if (int_hwcap_str[i] && (int_hwcap & (1UL << i)))
seq_printf(m, "%s ", int_hwcap_str[i]);
seq_puts(m, "\n");
+ show_facilities(m);
show_cacheinfo(m);
for_each_online_cpu(cpu) {
struct cpuid *id = &per_cpu(cpu_info.cpu_id, cpu);
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index c14df0a1ec3c..488c5bb8dc77 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -44,30 +44,42 @@ void update_cr_regs(struct task_struct *task)
struct pt_regs *regs = task_pt_regs(task);
struct thread_struct *thread = &task->thread;
struct per_regs old, new;
-
+ unsigned long cr0_old, cr0_new;
+ unsigned long cr2_old, cr2_new;
+ int cr0_changed, cr2_changed;
+
+ __ctl_store(cr0_old, 0, 0);
+ __ctl_store(cr2_old, 2, 2);
+ cr0_new = cr0_old;
+ cr2_new = cr2_old;
/* Take care of the enable/disable of transactional execution. */
if (MACHINE_HAS_TE) {
- unsigned long cr, cr_new;
-
- __ctl_store(cr, 0, 0);
/* Set or clear transaction execution TXC bit 8. */
- cr_new = cr | (1UL << 55);
+ cr0_new |= (1UL << 55);
if (task->thread.per_flags & PER_FLAG_NO_TE)
- cr_new &= ~(1UL << 55);
- if (cr_new != cr)
- __ctl_load(cr_new, 0, 0);
+ cr0_new &= ~(1UL << 55);
/* Set or clear transaction execution TDC bits 62 and 63. */
- __ctl_store(cr, 2, 2);
- cr_new = cr & ~3UL;
+ cr2_new &= ~3UL;
if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) {
if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND_TEND)
- cr_new |= 1UL;
+ cr2_new |= 1UL;
else
- cr_new |= 2UL;
+ cr2_new |= 2UL;
}
- if (cr_new != cr)
- __ctl_load(cr_new, 2, 2);
}
+ /* Take care of enable/disable of guarded storage. */
+ if (MACHINE_HAS_GS) {
+ cr2_new &= ~(1UL << 4);
+ if (task->thread.gs_cb)
+ cr2_new |= (1UL << 4);
+ }
+ /* Load control register 0/2 iff changed */
+ cr0_changed = cr0_new != cr0_old;
+ cr2_changed = cr2_new != cr2_old;
+ if (cr0_changed)
+ __ctl_load(cr0_new, 0, 0);
+ if (cr2_changed)
+ __ctl_load(cr2_new, 2, 2);
/* Copy user specified PER registers */
new.control = thread->per_user.control;
new.start = thread->per_user.start;
@@ -1137,6 +1149,74 @@ static int s390_system_call_set(struct task_struct *target,
data, 0, sizeof(unsigned int));
}
+static int s390_gs_cb_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ struct gs_cb *data = target->thread.gs_cb;
+
+ if (!MACHINE_HAS_GS)
+ return -ENODEV;
+ if (!data)
+ return -ENODATA;
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ data, 0, sizeof(struct gs_cb));
+}
+
+static int s390_gs_cb_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ struct gs_cb *data = target->thread.gs_cb;
+
+ if (!MACHINE_HAS_GS)
+ return -ENODEV;
+ if (!data) {
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+ target->thread.gs_cb = data;
+ }
+ return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ data, 0, sizeof(struct gs_cb));
+}
+
+static int s390_gs_bc_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ struct gs_cb *data = target->thread.gs_bc_cb;
+
+ if (!MACHINE_HAS_GS)
+ return -ENODEV;
+ if (!data)
+ return -ENODATA;
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ data, 0, sizeof(struct gs_cb));
+}
+
+static int s390_gs_bc_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ struct gs_cb *data = target->thread.gs_bc_cb;
+
+ if (!MACHINE_HAS_GS)
+ return -ENODEV;
+ if (!data) {
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+ target->thread.gs_bc_cb = data;
+ }
+ return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ data, 0, sizeof(struct gs_cb));
+}
+
static const struct user_regset s390_regsets[] = {
{
.core_note_type = NT_PRSTATUS,
@@ -1194,6 +1274,22 @@ static const struct user_regset s390_regsets[] = {
.get = s390_vxrs_high_get,
.set = s390_vxrs_high_set,
},
+ {
+ .core_note_type = NT_S390_GS_CB,
+ .n = sizeof(struct gs_cb) / sizeof(__u64),
+ .size = sizeof(__u64),
+ .align = sizeof(__u64),
+ .get = s390_gs_cb_get,
+ .set = s390_gs_cb_set,
+ },
+ {
+ .core_note_type = NT_S390_GS_BC,
+ .n = sizeof(struct gs_cb) / sizeof(__u64),
+ .size = sizeof(__u64),
+ .align = sizeof(__u64),
+ .get = s390_gs_bc_get,
+ .set = s390_gs_bc_set,
+ },
};
static const struct user_regset_view user_s390_view = {
@@ -1422,6 +1518,14 @@ static const struct user_regset s390_compat_regsets[] = {
.get = s390_compat_regs_high_get,
.set = s390_compat_regs_high_set,
},
+ {
+ .core_note_type = NT_S390_GS_CB,
+ .n = sizeof(struct gs_cb) / sizeof(__u64),
+ .size = sizeof(__u64),
+ .align = sizeof(__u64),
+ .get = s390_gs_cb_get,
+ .set = s390_gs_cb_set,
+ },
};
static const struct user_regset_view user_s390_compat_view = {
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 911dc0b49be0..3ae756c0db3d 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -339,9 +339,15 @@ static void __init setup_lowcore(void)
lc->stfl_fac_list = S390_lowcore.stfl_fac_list;
memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list,
MAX_FACILITY_BIT/8);
- if (MACHINE_HAS_VX)
- lc->vector_save_area_addr =
- (unsigned long) &lc->vector_save_area;
+ if (MACHINE_HAS_VX || MACHINE_HAS_GS) {
+ unsigned long bits, size;
+
+ bits = MACHINE_HAS_GS ? 11 : 10;
+ size = 1UL << bits;
+ lc->mcesad = (__u64) memblock_virt_alloc(size, size);
+ if (MACHINE_HAS_GS)
+ lc->mcesad |= bits;
+ }
lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0];
lc->sync_enter_timer = S390_lowcore.sync_enter_timer;
lc->async_enter_timer = S390_lowcore.async_enter_timer;
@@ -779,6 +785,12 @@ static int __init setup_hwcaps(void)
elf_hwcap |= HWCAP_S390_VXRS_BCD;
}
+ /*
+ * Guarded storage support HWCAP_S390_GS is bit 12.
+ */
+ if (MACHINE_HAS_GS)
+ elf_hwcap |= HWCAP_S390_GS;
+
get_cpu_id(&cpu_id);
add_device_randomness(&cpu_id, sizeof(cpu_id));
switch (cpu_id.machine) {
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 5dab859b0d54..363000a77ffc 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -51,6 +51,7 @@
#include <asm/os_info.h>
#include <asm/sigp.h>
#include <asm/idle.h>
+#include <asm/nmi.h>
#include "entry.h"
enum {
@@ -78,6 +79,8 @@ struct pcpu {
static u8 boot_core_type;
static struct pcpu pcpu_devices[NR_CPUS];
+static struct kmem_cache *pcpu_mcesa_cache;
+
unsigned int smp_cpu_mt_shift;
EXPORT_SYMBOL(smp_cpu_mt_shift);
@@ -188,8 +191,10 @@ static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit)
static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
{
unsigned long async_stack, panic_stack;
+ unsigned long mcesa_origin, mcesa_bits;
struct lowcore *lc;
+ mcesa_origin = mcesa_bits = 0;
if (pcpu != &pcpu_devices[0]) {
pcpu->lowcore = (struct lowcore *)
__get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER);
@@ -197,20 +202,27 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
panic_stack = __get_free_page(GFP_KERNEL);
if (!pcpu->lowcore || !panic_stack || !async_stack)
goto out;
+ if (MACHINE_HAS_VX || MACHINE_HAS_GS) {
+ mcesa_origin = (unsigned long)
+ kmem_cache_alloc(pcpu_mcesa_cache, GFP_KERNEL);
+ if (!mcesa_origin)
+ goto out;
+ mcesa_bits = MACHINE_HAS_GS ? 11 : 0;
+ }
} else {
async_stack = pcpu->lowcore->async_stack - ASYNC_FRAME_OFFSET;
panic_stack = pcpu->lowcore->panic_stack - PANIC_FRAME_OFFSET;
+ mcesa_origin = pcpu->lowcore->mcesad & MCESA_ORIGIN_MASK;
+ mcesa_bits = pcpu->lowcore->mcesad & MCESA_LC_MASK;
}
lc = pcpu->lowcore;
memcpy(lc, &S390_lowcore, 512);
memset((char *) lc + 512, 0, sizeof(*lc) - 512);
lc->async_stack = async_stack + ASYNC_FRAME_OFFSET;
lc->panic_stack = panic_stack + PANIC_FRAME_OFFSET;
+ lc->mcesad = mcesa_origin | mcesa_bits;
lc->cpu_nr = cpu;
lc->spinlock_lockval = arch_spin_lockval(cpu);
- if (MACHINE_HAS_VX)
- lc->vector_save_area_addr =
- (unsigned long) &lc->vector_save_area;
if (vdso_alloc_per_cpu(lc))
goto out;
lowcore_ptr[cpu] = lc;
@@ -218,6 +230,9 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
return 0;
out:
if (pcpu != &pcpu_devices[0]) {
+ if (mcesa_origin)
+ kmem_cache_free(pcpu_mcesa_cache,
+ (void *) mcesa_origin);
free_page(panic_stack);
free_pages(async_stack, ASYNC_ORDER);
free_pages((unsigned long) pcpu->lowcore, LC_ORDER);
@@ -229,11 +244,17 @@ out:
static void pcpu_free_lowcore(struct pcpu *pcpu)
{
+ unsigned long mcesa_origin;
+
pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0);
lowcore_ptr[pcpu - pcpu_devices] = NULL;
vdso_free_per_cpu(pcpu->lowcore);
if (pcpu == &pcpu_devices[0])
return;
+ if (MACHINE_HAS_VX || MACHINE_HAS_GS) {
+ mcesa_origin = pcpu->lowcore->mcesad & MCESA_ORIGIN_MASK;
+ kmem_cache_free(pcpu_mcesa_cache, (void *) mcesa_origin);
+ }
free_page(pcpu->lowcore->panic_stack-PANIC_FRAME_OFFSET);
free_pages(pcpu->lowcore->async_stack-ASYNC_FRAME_OFFSET, ASYNC_ORDER);
free_pages((unsigned long) pcpu->lowcore, LC_ORDER);
@@ -550,9 +571,11 @@ int smp_store_status(int cpu)
if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_STATUS_AT_ADDRESS,
pa) != SIGP_CC_ORDER_CODE_ACCEPTED)
return -EIO;
- if (!MACHINE_HAS_VX)
+ if (!MACHINE_HAS_VX && !MACHINE_HAS_GS)
return 0;
- pa = __pa(pcpu->lowcore->vector_save_area_addr);
+ pa = __pa(pcpu->lowcore->mcesad & MCESA_ORIGIN_MASK);
+ if (MACHINE_HAS_GS)
+ pa |= pcpu->lowcore->mcesad & MCESA_LC_MASK;
if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS,
pa) != SIGP_CC_ORDER_CODE_ACCEPTED)
return -EIO;
@@ -897,12 +920,22 @@ void __init smp_fill_possible_mask(void)
void __init smp_prepare_cpus(unsigned int max_cpus)
{
+ unsigned long size;
+
/* request the 0x1201 emergency signal external interrupt */
if (register_external_irq(EXT_IRQ_EMERGENCY_SIG, do_ext_call_interrupt))
panic("Couldn't request external interrupt 0x1201");
/* request the 0x1202 external call external interrupt */
if (register_external_irq(EXT_IRQ_EXTERNAL_CALL, do_ext_call_interrupt))
panic("Couldn't request external interrupt 0x1202");
+ /* create slab cache for the machine-check-extended-save-areas */
+ if (MACHINE_HAS_VX || MACHINE_HAS_GS) {
+ size = 1UL << (MACHINE_HAS_GS ? 11 : 10);
+ pcpu_mcesa_cache = kmem_cache_create("nmi_save_areas",
+ size, size, 0, NULL);
+ if (!pcpu_mcesa_cache)
+ panic("Couldn't create nmi save area cache");
+ }
}
void __init smp_prepare_boot_cpu(void)
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 2659b5cfeddb..54fce7b065de 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -386,5 +386,5 @@ SYSCALL(sys_mlock2,compat_sys_mlock2)
SYSCALL(sys_copy_file_range,compat_sys_copy_file_range) /* 375 */
SYSCALL(sys_preadv2,compat_sys_preadv2)
SYSCALL(sys_pwritev2,compat_sys_pwritev2)
-NI_SYSCALL
+SYSCALL(sys_s390_guarded_storage,compat_sys_s390_guarded_storage) /* 378 */
SYSCALL(sys_statx,compat_sys_statx)
diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c
index 12b6b138e354..eefcb54872a5 100644
--- a/arch/s390/kernel/sysinfo.c
+++ b/arch/s390/kernel/sysinfo.c
@@ -4,6 +4,7 @@
* Martin Schwidefsky <schwidefsky@de.ibm.com>,
*/
+#include <linux/debugfs.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/proc_fs.h>
@@ -13,6 +14,7 @@
#include <linux/export.h>
#include <linux/slab.h>
#include <asm/ebcdic.h>
+#include <asm/debug.h>
#include <asm/sysinfo.h>
#include <asm/cpcmd.h>
#include <asm/topology.h>
@@ -485,3 +487,99 @@ void calibrate_delay(void)
"%lu.%02lu BogoMIPS preset\n", loops_per_jiffy/(500000/HZ),
(loops_per_jiffy/(5000/HZ)) % 100);
}
+
+#ifdef CONFIG_DEBUG_FS
+
+#define STSI_FILE(fc, s1, s2) \
+static int stsi_open_##fc##_##s1##_##s2(struct inode *inode, struct file *file)\
+{ \
+ file->private_data = (void *) get_zeroed_page(GFP_KERNEL); \
+ if (!file->private_data) \
+ return -ENOMEM; \
+ if (stsi(file->private_data, fc, s1, s2)) { \
+ free_page((unsigned long)file->private_data); \
+ file->private_data = NULL; \
+ return -EACCES; \
+ } \
+ return nonseekable_open(inode, file); \
+} \
+ \
+static const struct file_operations stsi_##fc##_##s1##_##s2##_fs_ops = { \
+ .open = stsi_open_##fc##_##s1##_##s2, \
+ .release = stsi_release, \
+ .read = stsi_read, \
+ .llseek = no_llseek, \
+};
+
+static int stsi_release(struct inode *inode, struct file *file)
+{
+ free_page((unsigned long)file->private_data);
+ return 0;
+}
+
+static ssize_t stsi_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
+{
+ return simple_read_from_buffer(buf, size, ppos, file->private_data, PAGE_SIZE);
+}
+
+STSI_FILE( 1, 1, 1);
+STSI_FILE( 1, 2, 1);
+STSI_FILE( 1, 2, 2);
+STSI_FILE( 2, 2, 1);
+STSI_FILE( 2, 2, 2);
+STSI_FILE( 3, 2, 2);
+STSI_FILE(15, 1, 2);
+STSI_FILE(15, 1, 3);
+STSI_FILE(15, 1, 4);
+STSI_FILE(15, 1, 5);
+STSI_FILE(15, 1, 6);
+
+struct stsi_file {
+ const struct file_operations *fops;
+ char *name;
+};
+
+static struct stsi_file stsi_file[] __initdata = {
+ {.fops = &stsi_1_1_1_fs_ops, .name = "1_1_1"},
+ {.fops = &stsi_1_2_1_fs_ops, .name = "1_2_1"},
+ {.fops = &stsi_1_2_2_fs_ops, .name = "1_2_2"},
+ {.fops = &stsi_2_2_1_fs_ops, .name = "2_2_1"},
+ {.fops = &stsi_2_2_2_fs_ops, .name = "2_2_2"},
+ {.fops = &stsi_3_2_2_fs_ops, .name = "3_2_2"},
+ {.fops = &stsi_15_1_2_fs_ops, .name = "15_1_2"},
+ {.fops = &stsi_15_1_3_fs_ops, .name = "15_1_3"},
+ {.fops = &stsi_15_1_4_fs_ops, .name = "15_1_4"},
+ {.fops = &stsi_15_1_5_fs_ops, .name = "15_1_5"},
+ {.fops = &stsi_15_1_6_fs_ops, .name = "15_1_6"},
+};
+
+static u8 stsi_0_0_0;
+
+static __init int stsi_init_debugfs(void)
+{
+ struct dentry *stsi_root;
+ struct stsi_file *sf;
+ int lvl, i;
+
+ stsi_root = debugfs_create_dir("stsi", arch_debugfs_dir);
+ if (IS_ERR_OR_NULL(stsi_root))
+ return 0;
+ lvl = stsi(NULL, 0, 0, 0);
+ if (lvl > 0)
+ stsi_0_0_0 = lvl;
+ debugfs_create_u8("0_0_0", 0400, stsi_root, &stsi_0_0_0);
+ for (i = 0; i < ARRAY_SIZE(stsi_file); i++) {
+ sf = &stsi_file[i];
+ debugfs_create_file(sf->name, 0400, stsi_root, NULL, sf->fops);
+ }
+ if (IS_ENABLED(CONFIG_SCHED_TOPOLOGY) && MACHINE_HAS_TOPOLOGY) {
+ char link_to[10];
+
+ sprintf(link_to, "15_1_%d", topology_mnest_limit());
+ debugfs_create_symlink("topology", stsi_root, link_to);
+ }
+ return 0;
+}
+device_initcall(stsi_init_debugfs);
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 17660e800e74..bb47c92476f0 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -83,6 +83,8 @@ static cpumask_t cpu_thread_map(unsigned int cpu)
return mask;
}
+#define TOPOLOGY_CORE_BITS 64
+
static void add_cpus_to_mask(struct topology_core *tl_core,
struct mask_info *drawer,
struct mask_info *book,
@@ -91,7 +93,7 @@ static void add_cpus_to_mask(struct topology_core *tl_core,
struct cpu_topology_s390 *topo;
unsigned int core;
- for_each_set_bit(core, &tl_core->mask[0], TOPOLOGY_CORE_BITS) {
+ for_each_set_bit(core, &tl_core->mask, TOPOLOGY_CORE_BITS) {
unsigned int rcore;
int lcpu, i;
@@ -244,7 +246,7 @@ static void update_cpu_masks(void)
void store_topology(struct sysinfo_15_1_x *info)
{
- stsi(info, 15, 1, min(topology_max_mnest, 4));
+ stsi(info, 15, 1, topology_mnest_limit());
}
static int __arch_update_cpu_topology(void)
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 0f8f14199734..169558dc7daf 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -420,8 +420,8 @@ static int __write_machine_check(struct kvm_vcpu *vcpu,
save_access_regs(vcpu->run->s.regs.acrs);
/* Extended save area */
- rc = read_guest_lc(vcpu, __LC_VX_SAVE_AREA_ADDR, &ext_sa_addr,
- sizeof(unsigned long));
+ rc = read_guest_lc(vcpu, __LC_MCESAD, &ext_sa_addr,
+ sizeof(unsigned long));
/* Only bits 0-53 are used for address formation */
ext_sa_addr &= ~0x3ffUL;
if (!rc && mci.vr && ext_sa_addr && test_kvm_facility(vcpu->kvm, 129)) {
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index fd6cd05bb6a7..d5c5c911821a 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -273,7 +273,7 @@ static void kvm_s390_cpu_feat_init(void)
kvm_s390_available_subfunc.pcc);
}
if (test_facility(57)) /* MSA5 */
- __cpacf_query(CPACF_PPNO, (cpacf_mask_t *)
+ __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
kvm_s390_available_subfunc.ppno);
if (MACHINE_HAS_ESOP)
@@ -1512,9 +1512,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
} else {
if (sclp.hamax == U64_MAX)
- kvm->arch.mem_limit = TASK_MAX_SIZE;
+ kvm->arch.mem_limit = TASK_SIZE_MAX;
else
- kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
+ kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
sclp.hamax + 1);
kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
if (!kvm->arch.gmap)
diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c
index ba427eb6f14c..ffb15bd4c593 100644
--- a/arch/s390/lib/spinlock.c
+++ b/arch/s390/lib/spinlock.c
@@ -17,7 +17,7 @@ int spin_retry = -1;
static int __init spin_retry_init(void)
{
if (spin_retry < 0)
- spin_retry = MACHINE_HAS_CAD ? 10 : 1000;
+ spin_retry = 1000;
return 0;
}
early_initcall(spin_retry_init);
@@ -32,23 +32,17 @@ static int __init spin_retry_setup(char *str)
}
__setup("spin_retry=", spin_retry_setup);
-static inline void _raw_compare_and_delay(unsigned int *lock, unsigned int old)
-{
- asm(".insn rsy,0xeb0000000022,%0,0,%1" : : "d" (old), "Q" (*lock));
-}
-
void arch_spin_lock_wait(arch_spinlock_t *lp)
{
- unsigned int cpu = SPINLOCK_LOCKVAL;
- unsigned int owner;
- int count, first_diag;
+ int cpu = SPINLOCK_LOCKVAL;
+ int owner, count, first_diag;
first_diag = 1;
while (1) {
owner = ACCESS_ONCE(lp->lock);
/* Try to get the lock if it is free. */
if (!owner) {
- if (_raw_compare_and_swap(&lp->lock, 0, cpu))
+ if (__atomic_cmpxchg_bool(&lp->lock, 0, cpu))
return;
continue;
}
@@ -61,8 +55,6 @@ void arch_spin_lock_wait(arch_spinlock_t *lp)
/* Loop for a while on the lock value. */
count = spin_retry;
do {
- if (MACHINE_HAS_CAD)
- _raw_compare_and_delay(&lp->lock, owner);
owner = ACCESS_ONCE(lp->lock);
} while (owner && count-- > 0);
if (!owner)
@@ -82,9 +74,8 @@ EXPORT_SYMBOL(arch_spin_lock_wait);
void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags)
{
- unsigned int cpu = SPINLOCK_LOCKVAL;
- unsigned int owner;
- int count, first_diag;
+ int cpu = SPINLOCK_LOCKVAL;
+ int owner, count, first_diag;
local_irq_restore(flags);
first_diag = 1;
@@ -93,7 +84,7 @@ void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags)
/* Try to get the lock if it is free. */
if (!owner) {
local_irq_disable();
- if (_raw_compare_and_swap(&lp->lock, 0, cpu))
+ if (__atomic_cmpxchg_bool(&lp->lock, 0, cpu))
return;
local_irq_restore(flags);
continue;
@@ -107,8 +98,6 @@ void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags)
/* Loop for a while on the lock value. */
count = spin_retry;
do {
- if (MACHINE_HAS_CAD)
- _raw_compare_and_delay(&lp->lock, owner);
owner = ACCESS_ONCE(lp->lock);
} while (owner && count-- > 0);
if (!owner)
@@ -128,18 +117,16 @@ EXPORT_SYMBOL(arch_spin_lock_wait_flags);
int arch_spin_trylock_retry(arch_spinlock_t *lp)
{
- unsigned int cpu = SPINLOCK_LOCKVAL;
- unsigned int owner;
- int count;
+ int cpu = SPINLOCK_LOCKVAL;
+ int owner, count;
for (count = spin_retry; count > 0; count--) {
owner = READ_ONCE(lp->lock);
/* Try to get the lock if it is free. */
if (!owner) {
- if (_raw_compare_and_swap(&lp->lock, 0, cpu))
+ if (__atomic_cmpxchg_bool(&lp->lock, 0, cpu))
return 1;
- } else if (MACHINE_HAS_CAD)
- _raw_compare_and_delay(&lp->lock, owner);
+ }
}
return 0;
}
@@ -147,8 +134,8 @@ EXPORT_SYMBOL(arch_spin_trylock_retry);
void _raw_read_lock_wait(arch_rwlock_t *rw)
{
- unsigned int owner, old;
int count = spin_retry;
+ int owner, old;
#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
__RAW_LOCK(&rw->lock, -1, __RAW_OP_ADD);
@@ -162,12 +149,9 @@ void _raw_read_lock_wait(arch_rwlock_t *rw)
}
old = ACCESS_ONCE(rw->lock);
owner = ACCESS_ONCE(rw->owner);
- if ((int) old < 0) {
- if (MACHINE_HAS_CAD)
- _raw_compare_and_delay(&rw->lock, old);
+ if (old < 0)
continue;
- }
- if (_raw_compare_and_swap(&rw->lock, old, old + 1))
+ if (__atomic_cmpxchg_bool(&rw->lock, old, old + 1))
return;
}
}
@@ -175,17 +159,14 @@ EXPORT_SYMBOL(_raw_read_lock_wait);
int _raw_read_trylock_retry(arch_rwlock_t *rw)
{
- unsigned int old;
int count = spin_retry;
+ int old;
while (count-- > 0) {
old = ACCESS_ONCE(rw->lock);
- if ((int) old < 0) {
- if (MACHINE_HAS_CAD)
- _raw_compare_and_delay(&rw->lock, old);
+ if (old < 0)
continue;
- }
- if (_raw_compare_and_swap(&rw->lock, old, old + 1))
+ if (__atomic_cmpxchg_bool(&rw->lock, old, old + 1))
return 1;
}
return 0;
@@ -194,10 +175,10 @@ EXPORT_SYMBOL(_raw_read_trylock_retry);
#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
-void _raw_write_lock_wait(arch_rwlock_t *rw, unsigned int prev)
+void _raw_write_lock_wait(arch_rwlock_t *rw, int prev)
{
- unsigned int owner, old;
int count = spin_retry;
+ int owner, old;
owner = 0;
while (1) {
@@ -209,14 +190,12 @@ void _raw_write_lock_wait(arch_rwlock_t *rw, unsigned int prev)
old = ACCESS_ONCE(rw->lock);
owner = ACCESS_ONCE(rw->owner);
smp_mb();
- if ((int) old >= 0) {
+ if (old >= 0) {
prev = __RAW_LOCK(&rw->lock, 0x80000000, __RAW_OP_OR);
old = prev;
}
- if ((old & 0x7fffffff) == 0 && (int) prev >= 0)
+ if ((old & 0x7fffffff) == 0 && prev >= 0)
break;
- if (MACHINE_HAS_CAD)
- _raw_compare_and_delay(&rw->lock, old);
}
}
EXPORT_SYMBOL(_raw_write_lock_wait);
@@ -225,8 +204,8 @@ EXPORT_SYMBOL(_raw_write_lock_wait);
void _raw_write_lock_wait(arch_rwlock_t *rw)
{
- unsigned int owner, old, prev;
int count = spin_retry;
+ int owner, old, prev;
prev = 0x80000000;
owner = 0;
@@ -238,15 +217,13 @@ void _raw_write_lock_wait(arch_rwlock_t *rw)
}
old = ACCESS_ONCE(rw->lock);
owner = ACCESS_ONCE(rw->owner);
- if ((int) old >= 0 &&
- _raw_compare_and_swap(&rw->lock, old, old | 0x80000000))
+ if (old >= 0 &&
+ __atomic_cmpxchg_bool(&rw->lock, old, old | 0x80000000))
prev = old;
else
smp_mb();
- if ((old & 0x7fffffff) == 0 && (int) prev >= 0)
+ if ((old & 0x7fffffff) == 0 && prev >= 0)
break;
- if (MACHINE_HAS_CAD)
- _raw_compare_and_delay(&rw->lock, old);
}
}
EXPORT_SYMBOL(_raw_write_lock_wait);
@@ -255,24 +232,21 @@ EXPORT_SYMBOL(_raw_write_lock_wait);
int _raw_write_trylock_retry(arch_rwlock_t *rw)
{
- unsigned int old;
int count = spin_retry;
+ int old;
while (count-- > 0) {
old = ACCESS_ONCE(rw->lock);
- if (old) {
- if (MACHINE_HAS_CAD)
- _raw_compare_and_delay(&rw->lock, old);
+ if (old)
continue;
- }
- if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000))
+ if (__atomic_cmpxchg_bool(&rw->lock, 0, 0x80000000))
return 1;
}
return 0;
}
EXPORT_SYMBOL(_raw_write_trylock_retry);
-void arch_lock_relax(unsigned int cpu)
+void arch_lock_relax(int cpu)
{
if (!cpu)
return;
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index a07b1ec1391d..7f6db1e6c048 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -431,7 +431,7 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from,
if ((from | to | len) & (PMD_SIZE - 1))
return -EINVAL;
if (len == 0 || from + len < from || to + len < to ||
- from + len - 1 > TASK_MAX_SIZE || to + len - 1 > gmap->asce_end)
+ from + len - 1 > TASK_SIZE_MAX || to + len - 1 > gmap->asce_end)
return -EINVAL;
flush = 0;
@@ -2004,20 +2004,12 @@ EXPORT_SYMBOL_GPL(gmap_shadow_page);
* Called with sg->parent->shadow_lock.
*/
static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr,
- unsigned long offset, pte_t *pte)
+ unsigned long gaddr, pte_t *pte)
{
struct gmap_rmap *rmap, *rnext, *head;
- unsigned long gaddr, start, end, bits, raddr;
- unsigned long *table;
+ unsigned long start, end, bits, raddr;
BUG_ON(!gmap_is_shadow(sg));
- spin_lock(&sg->parent->guest_table_lock);
- table = radix_tree_lookup(&sg->parent->host_to_guest,
- vmaddr >> PMD_SHIFT);
- gaddr = table ? __gmap_segment_gaddr(table) + offset : 0;
- spin_unlock(&sg->parent->guest_table_lock);
- if (!table)
- return;
spin_lock(&sg->guest_table_lock);
if (sg->removed) {
@@ -2076,7 +2068,7 @@ static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr,
void ptep_notify(struct mm_struct *mm, unsigned long vmaddr,
pte_t *pte, unsigned long bits)
{
- unsigned long offset, gaddr;
+ unsigned long offset, gaddr = 0;
unsigned long *table;
struct gmap *gmap, *sg, *next;
@@ -2084,22 +2076,23 @@ void ptep_notify(struct mm_struct *mm, unsigned long vmaddr,
offset = offset * (4096 / sizeof(pte_t));
rcu_read_lock();
list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
- if (!list_empty(&gmap->children) && (bits & PGSTE_VSIE_BIT)) {
- spin_lock(&gmap->shadow_lock);
- list_for_each_entry_safe(sg, next,
- &gmap->children, list)
- gmap_shadow_notify(sg, vmaddr, offset, pte);
- spin_unlock(&gmap->shadow_lock);
- }
- if (!(bits & PGSTE_IN_BIT))
- continue;
spin_lock(&gmap->guest_table_lock);
table = radix_tree_lookup(&gmap->host_to_guest,
vmaddr >> PMD_SHIFT);
if (table)
gaddr = __gmap_segment_gaddr(table) + offset;
spin_unlock(&gmap->guest_table_lock);
- if (table)
+ if (!table)
+ continue;
+
+ if (!list_empty(&gmap->children) && (bits & PGSTE_VSIE_BIT)) {
+ spin_lock(&gmap->shadow_lock);
+ list_for_each_entry_safe(sg, next,
+ &gmap->children, list)
+ gmap_shadow_notify(sg, vmaddr, gaddr, pte);
+ spin_unlock(&gmap->shadow_lock);
+ }
+ if (bits & PGSTE_IN_BIT)
gmap_call_notifier(gmap, gaddr, gaddr + PAGE_SIZE - 1);
}
rcu_read_unlock();
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
index 18d4107e10ee..b7b779c40a5b 100644
--- a/arch/s390/mm/gup.c
+++ b/arch/s390/mm/gup.c
@@ -211,7 +211,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
addr = start;
len = (unsigned long) nr_pages << PAGE_SHIFT;
end = start + len;
- if ((end <= start) || (end > TASK_SIZE))
+ if ((end <= start) || (end > mm->context.asce_limit))
return 0;
/*
* local_irq_save() doesn't prevent pagetable teardown, but does
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index 50618614881f..b017daed6887 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -89,19 +89,20 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
struct vm_unmapped_area_info info;
+ int rc;
if (len > TASK_SIZE - mmap_min_addr)
return -ENOMEM;
if (flags & MAP_FIXED)
- return addr;
+ goto check_asce_limit;
if (addr) {
addr = PAGE_ALIGN(addr);
vma = find_vma(mm, addr);
if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
(!vma || addr + len <= vma->vm_start))
- return addr;
+ goto check_asce_limit;
}
info.flags = 0;
@@ -113,7 +114,18 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
else
info.align_mask = 0;
info.align_offset = pgoff << PAGE_SHIFT;
- return vm_unmapped_area(&info);
+ addr = vm_unmapped_area(&info);
+ if (addr & ~PAGE_MASK)
+ return addr;
+
+check_asce_limit:
+ if (addr + len > current->mm->context.asce_limit) {
+ rc = crst_table_upgrade(mm);
+ if (rc)
+ return (unsigned long) rc;
+ }
+
+ return addr;
}
unsigned long
@@ -125,13 +137,14 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
struct mm_struct *mm = current->mm;
unsigned long addr = addr0;
struct vm_unmapped_area_info info;
+ int rc;
/* requested length too big for entire address space */
if (len > TASK_SIZE - mmap_min_addr)
return -ENOMEM;
if (flags & MAP_FIXED)
- return addr;
+ goto check_asce_limit;
/* requesting a specific address */
if (addr) {
@@ -139,7 +152,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
vma = find_vma(mm, addr);
if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
(!vma || addr + len <= vma->vm_start))
- return addr;
+ goto check_asce_limit;
}
info.flags = VM_UNMAPPED_AREA_TOPDOWN;
@@ -165,65 +178,20 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
info.low_limit = TASK_UNMAPPED_BASE;
info.high_limit = TASK_SIZE;
addr = vm_unmapped_area(&info);
+ if (addr & ~PAGE_MASK)
+ return addr;
}
- return addr;
-}
-
-int s390_mmap_check(unsigned long addr, unsigned long len, unsigned long flags)
-{
- if (is_compat_task() || TASK_SIZE >= TASK_MAX_SIZE)
- return 0;
- if (!(flags & MAP_FIXED))
- addr = 0;
- if ((addr + len) >= TASK_SIZE)
- return crst_table_upgrade(current->mm);
- return 0;
-}
-
-static unsigned long
-s390_get_unmapped_area(struct file *filp, unsigned long addr,
- unsigned long len, unsigned long pgoff, unsigned long flags)
-{
- struct mm_struct *mm = current->mm;
- unsigned long area;
- int rc;
-
- area = arch_get_unmapped_area(filp, addr, len, pgoff, flags);
- if (!(area & ~PAGE_MASK))
- return area;
- if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < TASK_MAX_SIZE) {
- /* Upgrade the page table to 4 levels and retry. */
+check_asce_limit:
+ if (addr + len > current->mm->context.asce_limit) {
rc = crst_table_upgrade(mm);
if (rc)
return (unsigned long) rc;
- area = arch_get_unmapped_area(filp, addr, len, pgoff, flags);
}
- return area;
-}
-
-static unsigned long
-s390_get_unmapped_area_topdown(struct file *filp, const unsigned long addr,
- const unsigned long len, const unsigned long pgoff,
- const unsigned long flags)
-{
- struct mm_struct *mm = current->mm;
- unsigned long area;
- int rc;
- area = arch_get_unmapped_area_topdown(filp, addr, len, pgoff, flags);
- if (!(area & ~PAGE_MASK))
- return area;
- if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < TASK_MAX_SIZE) {
- /* Upgrade the page table to 4 levels and retry. */
- rc = crst_table_upgrade(mm);
- if (rc)
- return (unsigned long) rc;
- area = arch_get_unmapped_area_topdown(filp, addr, len,
- pgoff, flags);
- }
- return area;
+ return addr;
}
+
/*
* This function, called very early during the creation of a new
* process VM image, sets up which VM layout function to use:
@@ -241,9 +209,9 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
*/
if (mmap_is_legacy()) {
mm->mmap_base = mmap_base_legacy(random_factor);
- mm->get_unmapped_area = s390_get_unmapped_area;
+ mm->get_unmapped_area = arch_get_unmapped_area;
} else {
mm->mmap_base = mmap_base(random_factor);
- mm->get_unmapped_area = s390_get_unmapped_area_topdown;
+ mm->get_unmapped_area = arch_get_unmapped_area_topdown;
}
}
diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c
index 3330ea124eec..69a7b01ae746 100644
--- a/arch/s390/mm/page-states.c
+++ b/arch/s390/mm/page-states.c
@@ -13,8 +13,7 @@
#include <linux/gfp.h>
#include <linux/init.h>
-#define ESSA_SET_STABLE 1
-#define ESSA_SET_UNUSED 2
+#include <asm/page-states.h>
static int cmma_flag = 1;
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index fc5dc33bb141..fc321c5ec30e 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -94,7 +94,7 @@ static int walk_pte_level(pmd_t *pmdp, unsigned long addr, unsigned long end,
new = pte_wrprotect(new);
else if (flags & SET_MEMORY_RW)
new = pte_mkwrite(pte_mkdirty(new));
- if ((flags & SET_MEMORY_NX) && MACHINE_HAS_NX)
+ if (flags & SET_MEMORY_NX)
pte_val(new) |= _PAGE_NOEXEC;
else if (flags & SET_MEMORY_X)
pte_val(new) &= ~_PAGE_NOEXEC;
@@ -144,7 +144,7 @@ static void modify_pmd_page(pmd_t *pmdp, unsigned long addr,
new = pmd_wrprotect(new);
else if (flags & SET_MEMORY_RW)
new = pmd_mkwrite(pmd_mkdirty(new));
- if ((flags & SET_MEMORY_NX) && MACHINE_HAS_NX)
+ if (flags & SET_MEMORY_NX)
pmd_val(new) |= _SEGMENT_ENTRY_NOEXEC;
else if (flags & SET_MEMORY_X)
pmd_val(new) &= ~_SEGMENT_ENTRY_NOEXEC;
@@ -221,7 +221,7 @@ static void modify_pud_page(pud_t *pudp, unsigned long addr,
new = pud_wrprotect(new);
else if (flags & SET_MEMORY_RW)
new = pud_mkwrite(pud_mkdirty(new));
- if ((flags & SET_MEMORY_NX) && MACHINE_HAS_NX)
+ if (flags & SET_MEMORY_NX)
pud_val(new) |= _REGION_ENTRY_NOEXEC;
else if (flags & SET_MEMORY_X)
pud_val(new) &= ~_REGION_ENTRY_NOEXEC;
@@ -288,6 +288,10 @@ static int change_page_attr(unsigned long addr, unsigned long end,
int __set_memory(unsigned long addr, int numpages, unsigned long flags)
{
+ if (!MACHINE_HAS_NX)
+ flags &= ~(SET_MEMORY_NX | SET_MEMORY_X);
+ if (!flags)
+ return 0;
addr &= PAGE_MASK;
return change_page_attr(addr, addr + numpages * PAGE_SIZE, flags);
}
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index 995f78532cc2..f502cbe657af 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -95,7 +95,6 @@ int crst_table_upgrade(struct mm_struct *mm)
mm->context.asce_limit = 1UL << 53;
mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
_ASCE_USER_BITS | _ASCE_TYPE_REGION2;
- mm->task_size = mm->context.asce_limit;
spin_unlock_bh(&mm->page_table_lock);
on_each_cpu(__crst_table_upgrade, mm, 0);
@@ -119,7 +118,6 @@ void crst_table_downgrade(struct mm_struct *mm)
mm->context.asce_limit = 1UL << 31;
mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
_ASCE_USER_BITS | _ASCE_TYPE_SEGMENT;
- mm->task_size = mm->context.asce_limit;
crst_table_free(mm, (unsigned long *) pgd);
if (current->active_mm == mm)
@@ -144,7 +142,7 @@ struct page *page_table_alloc_pgste(struct mm_struct *mm)
struct page *page;
unsigned long *table;
- page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
+ page = alloc_page(GFP_KERNEL);
if (page) {
table = (unsigned long *) page_to_phys(page);
clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 463e5ef02304..947b66a5cdba 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -23,6 +23,7 @@
#include <asm/tlb.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
+#include <asm/page-states.h>
static inline pte_t ptep_flush_direct(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
@@ -787,4 +788,156 @@ int get_guest_storage_key(struct mm_struct *mm, unsigned long addr,
return 0;
}
EXPORT_SYMBOL(get_guest_storage_key);
+
+/**
+ * pgste_perform_essa - perform ESSA actions on the PGSTE.
+ * @mm: the memory context. It must have PGSTEs, no check is performed here!
+ * @hva: the host virtual address of the page whose PGSTE is to be processed
+ * @orc: the specific action to perform, see the ESSA_SET_* macros.
+ * @oldpte: the PTE will be saved there if the pointer is not NULL.
+ * @oldpgste: the old PGSTE will be saved there if the pointer is not NULL.
+ *
+ * Return: 1 if the page is to be added to the CBRL, otherwise 0,
+ * or < 0 in case of error. -EINVAL is returned for invalid values
+ * of orc, -EFAULT for invalid addresses.
+ */
+int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc,
+ unsigned long *oldpte, unsigned long *oldpgste)
+{
+ unsigned long pgstev;
+ spinlock_t *ptl;
+ pgste_t pgste;
+ pte_t *ptep;
+ int res = 0;
+
+ WARN_ON_ONCE(orc > ESSA_MAX);
+ if (unlikely(orc > ESSA_MAX))
+ return -EINVAL;
+ ptep = get_locked_pte(mm, hva, &ptl);
+ if (unlikely(!ptep))
+ return -EFAULT;
+ pgste = pgste_get_lock(ptep);
+ pgstev = pgste_val(pgste);
+ if (oldpte)
+ *oldpte = pte_val(*ptep);
+ if (oldpgste)
+ *oldpgste = pgstev;
+
+ switch (orc) {
+ case ESSA_GET_STATE:
+ break;
+ case ESSA_SET_STABLE:
+ pgstev &= ~_PGSTE_GPS_USAGE_MASK;
+ pgstev |= _PGSTE_GPS_USAGE_STABLE;
+ break;
+ case ESSA_SET_UNUSED:
+ pgstev &= ~_PGSTE_GPS_USAGE_MASK;
+ pgstev |= _PGSTE_GPS_USAGE_UNUSED;
+ if (pte_val(*ptep) & _PAGE_INVALID)
+ res = 1;
+ break;
+ case ESSA_SET_VOLATILE:
+ pgstev &= ~_PGSTE_GPS_USAGE_MASK;
+ pgstev |= _PGSTE_GPS_USAGE_VOLATILE;
+ if (pte_val(*ptep) & _PAGE_INVALID)
+ res = 1;
+ break;
+ case ESSA_SET_POT_VOLATILE:
+ pgstev &= ~_PGSTE_GPS_USAGE_MASK;
+ if (!(pte_val(*ptep) & _PAGE_INVALID)) {
+ pgstev |= _PGSTE_GPS_USAGE_POT_VOLATILE;
+ break;
+ }
+ if (pgstev & _PGSTE_GPS_ZERO) {
+ pgstev |= _PGSTE_GPS_USAGE_VOLATILE;
+ break;
+ }
+ if (!(pgstev & PGSTE_GC_BIT)) {
+ pgstev |= _PGSTE_GPS_USAGE_VOLATILE;
+ res = 1;
+ break;
+ }
+ break;
+ case ESSA_SET_STABLE_RESIDENT:
+ pgstev &= ~_PGSTE_GPS_USAGE_MASK;
+ pgstev |= _PGSTE_GPS_USAGE_STABLE;
+ /*
+ * Since the resident state can go away any time after this
+ * call, we will not make this page resident. We can revisit
+ * this decision if a guest will ever start using this.
+ */
+ break;
+ case ESSA_SET_STABLE_IF_RESIDENT:
+ if (!(pte_val(*ptep) & _PAGE_INVALID)) {
+ pgstev &= ~_PGSTE_GPS_USAGE_MASK;
+ pgstev |= _PGSTE_GPS_USAGE_STABLE;
+ }
+ break;
+ default:
+ /* we should never get here! */
+ break;
+ }
+ /* If we are discarding a page, set it to logical zero */
+ if (res)
+ pgstev |= _PGSTE_GPS_ZERO;
+
+ pgste_val(pgste) = pgstev;
+ pgste_set_unlock(ptep, pgste);
+ pte_unmap_unlock(ptep, ptl);
+ return res;
+}
+EXPORT_SYMBOL(pgste_perform_essa);
+
+/**
+ * set_pgste_bits - set specific PGSTE bits.
+ * @mm: the memory context. It must have PGSTEs, no check is performed here!
+ * @hva: the host virtual address of the page whose PGSTE is to be processed
+ * @bits: a bitmask representing the bits that will be touched
+ * @value: the values of the bits to be written. Only the bits in the mask
+ * will be written.
+ *
+ * Return: 0 on success, < 0 in case of error.
+ */
+int set_pgste_bits(struct mm_struct *mm, unsigned long hva,
+ unsigned long bits, unsigned long value)
+{
+ spinlock_t *ptl;
+ pgste_t new;
+ pte_t *ptep;
+
+ ptep = get_locked_pte(mm, hva, &ptl);
+ if (unlikely(!ptep))
+ return -EFAULT;
+ new = pgste_get_lock(ptep);
+
+ pgste_val(new) &= ~bits;
+ pgste_val(new) |= value & bits;
+
+ pgste_set_unlock(ptep, new);
+ pte_unmap_unlock(ptep, ptl);
+ return 0;
+}
+EXPORT_SYMBOL(set_pgste_bits);
+
+/**
+ * get_pgste - get the current PGSTE for the given address.
+ * @mm: the memory context. It must have PGSTEs, no check is performed here!
+ * @hva: the host virtual address of the page whose PGSTE is to be processed
+ * @pgstep: will be written with the current PGSTE for the given address.
+ *
+ * Return: 0 on success, < 0 in case of error.
+ */
+int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep)
+{
+ spinlock_t *ptl;
+ pte_t *ptep;
+
+ ptep = get_locked_pte(mm, hva, &ptl);
+ if (unlikely(!ptep))
+ return -EFAULT;
+ *pgstep = pgste_val(pgste_get(ptep));
+ pte_unmap_unlock(ptep, ptl);
+ return 0;
+}
+EXPORT_SYMBOL(get_pgste);
#endif
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 364b9d824be3..8051df109db3 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -60,16 +60,8 @@ static DEFINE_SPINLOCK(zpci_domain_lock);
static struct airq_iv *zpci_aisb_iv;
static struct airq_iv *zpci_aibv[ZPCI_NR_DEVICES];
-/* Adapter interrupt definitions */
-static void zpci_irq_handler(struct airq_struct *airq);
-
-static struct airq_struct zpci_airq = {
- .handler = zpci_irq_handler,
- .isc = PCI_ISC,
-};
-
#define ZPCI_IOMAP_ENTRIES \
- min(((unsigned long) CONFIG_PCI_NR_FUNCTIONS * PCI_BAR_COUNT), \
+ min(((unsigned long) ZPCI_NR_DEVICES * PCI_BAR_COUNT / 2), \
ZPCI_IOMAP_MAX_ENTRIES)
static DEFINE_SPINLOCK(zpci_iomap_lock);
@@ -214,8 +206,6 @@ int zpci_fmb_disable_device(struct zpci_dev *zdev)
return rc;
}
-#define ZPCI_PCIAS_CFGSPC 15
-
static int zpci_cfg_load(struct zpci_dev *zdev, int offset, u32 *val, u8 len)
{
u64 req = ZPCI_CREATE_REQ(zdev->fh, ZPCI_PCIAS_CFGSPC, len);
@@ -507,6 +497,11 @@ static void zpci_unmap_resources(struct pci_dev *pdev)
}
}
+static struct airq_struct zpci_airq = {
+ .handler = zpci_irq_handler,
+ .isc = PCI_ISC,
+};
+
static int __init zpci_irq_init(void)
{
int rc;
@@ -871,11 +866,6 @@ int zpci_report_error(struct pci_dev *pdev,
}
EXPORT_SYMBOL(zpci_report_error);
-static inline int barsize(u8 size)
-{
- return (size) ? (1 << size) >> 10 : 0;
-}
-
static int zpci_mem_init(void)
{
BUILD_BUG_ON(!is_power_of_2(__alignof__(struct zpci_fmb)) ||
diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig
index 0cafe08919c9..b9918fb9587d 100644
--- a/drivers/char/hw_random/Kconfig
+++ b/drivers/char/hw_random/Kconfig
@@ -423,6 +423,20 @@ config HW_RANDOM_CAVIUM
If unsure, say Y.
+config HW_RANDOM_S390
+ tristate "S390 True Random Number Generator support"
+ depends on S390
+ default HW_RANDOM
+ ---help---
+ This driver provides kernel-side support for the True
+ Random Number Generator available as CPACF extension
+ on modern s390 hardware platforms.
+
+ To compile this driver as a module, choose M here: the
+ module will be called s390-trng.
+
+ If unsure, say Y.
+
endif # HW_RANDOM
config UML_RANDOM
diff --git a/drivers/char/hw_random/Makefile b/drivers/char/hw_random/Makefile
index 5f52b1e4e7be..dd1765246255 100644
--- a/drivers/char/hw_random/Makefile
+++ b/drivers/char/hw_random/Makefile
@@ -36,3 +36,4 @@ obj-$(CONFIG_HW_RANDOM_STM32) += stm32-rng.o
obj-$(CONFIG_HW_RANDOM_PIC32) += pic32-rng.o
obj-$(CONFIG_HW_RANDOM_MESON) += meson-rng.o
obj-$(CONFIG_HW_RANDOM_CAVIUM) += cavium-rng.o cavium-rng-vf.o
+obj-$(CONFIG_HW_RANDOM_S390) += s390-trng.o
diff --git a/drivers/char/hw_random/s390-trng.c b/drivers/char/hw_random/s390-trng.c
new file mode 100644
index 000000000000..aca48e893fca
--- /dev/null
+++ b/drivers/char/hw_random/s390-trng.c
@@ -0,0 +1,268 @@
+/*
+ * s390 TRNG device driver
+ *
+ * Driver for the TRNG (true random number generation) command
+ * available via CPACF extension MSA 7 on the s390 arch.
+
+ * Copyright IBM Corp. 2017
+ * Author(s): Harald Freudenberger <freude@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ */
+
+#define KMSG_COMPONENT "trng"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/hw_random.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/cpufeature.h>
+#include <linux/miscdevice.h>
+#include <linux/debugfs.h>
+#include <linux/atomic.h>
+#include <linux/random.h>
+#include <linux/sched/signal.h>
+#include <asm/debug.h>
+#include <asm/cpacf.h>
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("IBM Corporation");
+MODULE_DESCRIPTION("s390 CPACF TRNG device driver");
+
+
+/* trng related debug feature things */
+
+static debug_info_t *debug_info;
+
+#define DEBUG_DBG(...) debug_sprintf_event(debug_info, 6, ##__VA_ARGS__)
+#define DEBUG_INFO(...) debug_sprintf_event(debug_info, 5, ##__VA_ARGS__)
+#define DEBUG_WARN(...) debug_sprintf_event(debug_info, 4, ##__VA_ARGS__)
+#define DEBUG_ERR(...) debug_sprintf_event(debug_info, 3, ##__VA_ARGS__)
+
+
+/* trng helpers */
+
+static atomic64_t trng_dev_counter = ATOMIC64_INIT(0);
+static atomic64_t trng_hwrng_counter = ATOMIC64_INIT(0);
+
+
+/* file io functions */
+
+static int trng_open(struct inode *inode, struct file *file)
+{
+ return nonseekable_open(inode, file);
+}
+
+static ssize_t trng_read(struct file *file, char __user *ubuf,
+ size_t nbytes, loff_t *ppos)
+{
+ u8 buf[32];
+ u8 *p = buf;
+ unsigned int n;
+ ssize_t ret = 0;
+
+ /*
+ * use buf for requests <= sizeof(buf),
+ * otherwise allocate one page and fetch
+ * pagewise.
+ */
+
+ if (nbytes > sizeof(buf)) {
+ p = (u8 *) __get_free_page(GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+ }
+
+ while (nbytes) {
+ if (need_resched()) {
+ if (signal_pending(current)) {
+ if (ret == 0)
+ ret = -ERESTARTSYS;
+ break;
+ }
+ schedule();
+ }
+ n = nbytes > PAGE_SIZE ? PAGE_SIZE : nbytes;
+ cpacf_trng(NULL, 0, p, n);
+ atomic64_add(n, &trng_dev_counter);
+ if (copy_to_user(ubuf, p, n)) {
+ ret = -EFAULT;
+ break;
+ }
+ nbytes -= n;
+ ubuf += n;
+ ret += n;
+ }
+
+ if (p != buf)
+ free_page((unsigned long) p);
+
+ DEBUG_DBG("trng_read()=%zd\n", ret);
+ return ret;
+}
+
+
+/* sysfs */
+
+static ssize_t trng_counter_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ u64 dev_counter = atomic64_read(&trng_dev_counter);
+ u64 hwrng_counter = atomic64_read(&trng_hwrng_counter);
+#if IS_ENABLED(CONFIG_ARCH_RANDOM)
+ u64 arch_counter = atomic64_read(&s390_arch_random_counter);
+
+ return snprintf(buf, PAGE_SIZE,
+ "trng: %llu\n"
+ "hwrng: %llu\n"
+ "arch: %llu\n"
+ "total: %llu\n",
+ dev_counter, hwrng_counter, arch_counter,
+ dev_counter + hwrng_counter + arch_counter);
+#else
+ return snprintf(buf, PAGE_SIZE,
+ "trng: %llu\n"
+ "hwrng: %llu\n"
+ "total: %llu\n",
+ dev_counter, hwrng_counter,
+ dev_counter + hwrng_counter);
+#endif
+}
+static DEVICE_ATTR(byte_counter, 0444, trng_counter_show, NULL);
+
+static struct attribute *trng_dev_attrs[] = {
+ &dev_attr_byte_counter.attr,
+ NULL
+};
+
+static const struct attribute_group trng_dev_attr_group = {
+ .attrs = trng_dev_attrs
+};
+
+static const struct attribute_group *trng_dev_attr_groups[] = {
+ &trng_dev_attr_group,
+ NULL
+};
+
+static const struct file_operations trng_fops = {
+ .owner = THIS_MODULE,
+ .open = &trng_open,
+ .release = NULL,
+ .read = &trng_read,
+ .llseek = noop_llseek,
+};
+
+static struct miscdevice trng_dev = {
+ .name = "trng",
+ .minor = MISC_DYNAMIC_MINOR,
+ .mode = 0444,
+ .fops = &trng_fops,
+ .groups = trng_dev_attr_groups,
+};
+
+
+/* hwrng_register */
+
+static inline void _trng_hwrng_read(u8 *buf, size_t len)
+{
+ cpacf_trng(NULL, 0, buf, len);
+ atomic64_add(len, &trng_hwrng_counter);
+}
+
+static int trng_hwrng_data_read(struct hwrng *rng, u32 *data)
+{
+ size_t len = sizeof(*data);
+
+ _trng_hwrng_read((u8 *) data, len);
+
+ DEBUG_DBG("trng_hwrng_data_read()=%zu\n", len);
+
+ return len;
+}
+
+static int trng_hwrng_read(struct hwrng *rng, void *data, size_t max, bool wait)
+{
+ size_t len = max <= PAGE_SIZE ? max : PAGE_SIZE;
+
+ _trng_hwrng_read((u8 *) data, len);
+
+ DEBUG_DBG("trng_hwrng_read()=%zu\n", len);
+
+ return len;
+}
+
+/*
+ * hwrng register struct
+ * The trng is suppost to have 100% entropy, and thus
+ * we register with a very high quality value.
+ */
+static struct hwrng trng_hwrng_dev = {
+ .name = "s390-trng",
+ .data_read = trng_hwrng_data_read,
+ .read = trng_hwrng_read,
+ .quality = 999,
+};
+
+
+/* init and exit */
+
+static void __init trng_debug_init(void)
+{
+ debug_info = debug_register("trng", 1, 1, 4 * sizeof(long));
+ debug_register_view(debug_info, &debug_sprintf_view);
+ debug_set_level(debug_info, 3);
+}
+
+static void trng_debug_exit(void)
+{
+ debug_unregister(debug_info);
+}
+
+static int __init trng_init(void)
+{
+ int ret;
+
+ trng_debug_init();
+
+ /* check if subfunction CPACF_PRNO_TRNG is available */
+ if (!cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG)) {
+ DEBUG_INFO("trng_init CPACF_PRNO_TRNG not available\n");
+ ret = -ENODEV;
+ goto out_dbg;
+ }
+
+ ret = misc_register(&trng_dev);
+ if (ret) {
+ DEBUG_WARN("trng_init misc_register() failed rc=%d\n", ret);
+ goto out_dbg;
+ }
+
+ ret = hwrng_register(&trng_hwrng_dev);
+ if (ret) {
+ DEBUG_WARN("trng_init hwrng_register() failed rc=%d\n", ret);
+ goto out_misc;
+ }
+
+ DEBUG_DBG("trng_init successful\n");
+
+ return 0;
+
+out_misc:
+ misc_deregister(&trng_dev);
+out_dbg:
+ trng_debug_exit();
+ return ret;
+}
+
+static void __exit trng_exit(void)
+{
+ hwrng_unregister(&trng_hwrng_dev);
+ misc_deregister(&trng_dev);
+ trng_debug_exit();
+}
+
+module_cpu_feature_match(MSA, trng_init);
+module_exit(trng_exit);
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 37e204f3d9be..6ee3a25ae731 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -327,6 +327,14 @@ config S390_IOMMU
help
Support for the IOMMU API for s390 PCI devices.
+config S390_CCW_IOMMU
+ bool "S390 CCW IOMMU Support"
+ depends on S390 && CCW
+ select IOMMU_API
+ help
+ Enables bits of IOMMU API required by VFIO. The iommu_ops
+ is not implemented as it is not necessary for VFIO.
+
config MTK_IOMMU
bool "MTK IOMMU Support"
depends on ARM || ARM64
diff --git a/drivers/s390/block/dasd_3990_erp.c b/drivers/s390/block/dasd_3990_erp.c
index 774da20ceb58..107cd3361e29 100644
--- a/drivers/s390/block/dasd_3990_erp.c
+++ b/drivers/s390/block/dasd_3990_erp.c
@@ -1052,8 +1052,9 @@ dasd_3990_erp_com_rej(struct dasd_ccw_req * erp, char *sense)
} else {
/* fatal error - set status to FAILED
internal error 09 - Command Reject */
- dev_err(&device->cdev->dev, "An error occurred in the DASD "
- "device driver, reason=%s\n", "09");
+ if (!test_bit(DASD_CQR_SUPPRESS_CR, &erp->flags))
+ dev_err(&device->cdev->dev,
+ "An error occurred in the DASD device driver, reason=09\n");
erp = dasd_3990_erp_cleanup(erp, DASD_CQR_FAILED);
}
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index 0b38217f8147..122456e4db89 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -4927,10 +4927,14 @@ static void dasd_eckd_dump_sense(struct dasd_device *device,
dasd_eckd_dump_sense_tcw(device, req, irb);
} else {
/*
- * In some cases the 'No Record Found' error might be expected
- * and log messages shouldn't be written then. Check if the
- * according suppress bit is set.
+ * In some cases the 'Command Reject' or 'No Record Found'
+ * error might be expected and log messages shouldn't be
+ * written then. Check if the according suppress bit is set.
*/
+ if (sense && sense[0] & SNS0_CMD_REJECT &&
+ test_bit(DASD_CQR_SUPPRESS_CR, &req->flags))
+ return;
+
if (sense && sense[1] & SNS1_NO_REC_FOUND &&
test_bit(DASD_CQR_SUPPRESS_NRF, &req->flags))
return;
@@ -5172,6 +5176,10 @@ static int dasd_eckd_query_host_access(struct dasd_device *device,
if (!device->block && private->lcu->pav == HYPER_PAV)
return -EOPNOTSUPP;
+ /* may not be supported by the storage server */
+ if (!(private->features.feature[14] & 0x80))
+ return -EOPNOTSUPP;
+
cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1 /* PSF */ + 1 /* RSSD */,
sizeof(struct dasd_psf_prssd_data) + 1,
device);
@@ -5219,6 +5227,8 @@ static int dasd_eckd_query_host_access(struct dasd_device *device,
cqr->buildclk = get_tod_clock();
cqr->status = DASD_CQR_FILLED;
+ /* the command might not be supported, suppress error message */
+ __set_bit(DASD_CQR_SUPPRESS_CR, &cqr->flags);
rc = dasd_sleep_on_interruptible(cqr);
if (rc == 0) {
*data = *host_access;
diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h
index 518dba2732d5..dca7cb1e6f65 100644
--- a/drivers/s390/block/dasd_int.h
+++ b/drivers/s390/block/dasd_int.h
@@ -239,11 +239,11 @@ struct dasd_ccw_req {
*/
/*
* The following flags are used to suppress output of certain errors.
- * These flags should only be used for format checks!
*/
#define DASD_CQR_SUPPRESS_NRF 4 /* Suppress 'No Record Found' error */
#define DASD_CQR_SUPPRESS_FP 5 /* Suppress 'File Protected' error*/
#define DASD_CQR_SUPPRESS_IL 6 /* Suppress 'Incorrect Length' error */
+#define DASD_CQR_SUPPRESS_CR 7 /* Suppress 'Command Reject' error */
/* Signature for error recovery functions. */
typedef struct dasd_ccw_req *(*dasd_erp_fn_t) (struct dasd_ccw_req *);
diff --git a/drivers/s390/cio/Makefile b/drivers/s390/cio/Makefile
index 3ab9aedeb84a..bdf47526038a 100644
--- a/drivers/s390/cio/Makefile
+++ b/drivers/s390/cio/Makefile
@@ -17,3 +17,6 @@ obj-$(CONFIG_CCWGROUP) += ccwgroup.o
qdio-objs := qdio_main.o qdio_thinint.o qdio_debug.o qdio_setup.o
obj-$(CONFIG_QDIO) += qdio.o
+
+vfio_ccw-objs += vfio_ccw_drv.o vfio_ccw_cp.o vfio_ccw_ops.o vfio_ccw_fsm.o
+obj-$(CONFIG_VFIO_CCW) += vfio_ccw.o
diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c
index 1b350665c823..89216174fcbb 100644
--- a/drivers/s390/cio/cio.c
+++ b/drivers/s390/cio/cio.c
@@ -170,12 +170,14 @@ cio_start_key (struct subchannel *sch, /* subchannel structure */
return ccode;
}
}
+EXPORT_SYMBOL_GPL(cio_start_key);
int
cio_start (struct subchannel *sch, struct ccw1 *cpa, __u8 lpm)
{
return cio_start_key(sch, cpa, lpm, PAGE_DEFAULT_KEY);
}
+EXPORT_SYMBOL_GPL(cio_start);
/*
* resume suspended I/O operation
@@ -208,6 +210,7 @@ cio_resume (struct subchannel *sch)
return -ENODEV;
}
}
+EXPORT_SYMBOL_GPL(cio_resume);
/*
* halt I/O operation
@@ -241,6 +244,7 @@ cio_halt(struct subchannel *sch)
return -ENODEV;
}
}
+EXPORT_SYMBOL_GPL(cio_halt);
/*
* Clear I/O operation
@@ -271,6 +275,7 @@ cio_clear(struct subchannel *sch)
return -ENODEV;
}
}
+EXPORT_SYMBOL_GPL(cio_clear);
/*
* Function: cio_cancel
@@ -308,7 +313,68 @@ cio_cancel (struct subchannel *sch)
return -ENODEV;
}
}
+EXPORT_SYMBOL_GPL(cio_cancel);
+/**
+ * cio_cancel_halt_clear - Cancel running I/O by performing cancel, halt
+ * and clear ordinally if subchannel is valid.
+ * @sch: subchannel on which to perform the cancel_halt_clear operation
+ * @iretry: the number of the times remained to retry the next operation
+ *
+ * This should be called repeatedly since halt/clear are asynchronous
+ * operations. We do one try with cio_cancel, three tries with cio_halt,
+ * 255 tries with cio_clear. The caller should initialize @iretry with
+ * the value 255 for its first call to this, and keep using the same
+ * @iretry in the subsequent calls until it gets a non -EBUSY return.
+ *
+ * Returns 0 if device now idle, -ENODEV for device not operational,
+ * -EBUSY if an interrupt is expected (either from halt/clear or from a
+ * status pending), and -EIO if out of retries.
+ */
+int cio_cancel_halt_clear(struct subchannel *sch, int *iretry)
+{
+ int ret;
+
+ if (cio_update_schib(sch))
+ return -ENODEV;
+ if (!sch->schib.pmcw.ena)
+ /* Not operational -> done. */
+ return 0;
+ /* Stage 1: cancel io. */
+ if (!(scsw_actl(&sch->schib.scsw) & SCSW_ACTL_HALT_PEND) &&
+ !(scsw_actl(&sch->schib.scsw) & SCSW_ACTL_CLEAR_PEND)) {
+ if (!scsw_is_tm(&sch->schib.scsw)) {
+ ret = cio_cancel(sch);
+ if (ret != -EINVAL)
+ return ret;
+ }
+ /*
+ * Cancel io unsuccessful or not applicable (transport mode).
+ * Continue with asynchronous instructions.
+ */
+ *iretry = 3; /* 3 halt retries. */
+ }
+ /* Stage 2: halt io. */
+ if (!(scsw_actl(&sch->schib.scsw) & SCSW_ACTL_CLEAR_PEND)) {
+ if (*iretry) {
+ *iretry -= 1;
+ ret = cio_halt(sch);
+ if (ret != -EBUSY)
+ return (ret == 0) ? -EBUSY : ret;
+ }
+ /* Halt io unsuccessful. */
+ *iretry = 255; /* 255 clear retries. */
+ }
+ /* Stage 3: clear io. */
+ if (*iretry) {
+ *iretry -= 1;
+ ret = cio_clear(sch);
+ return (ret == 0) ? -EBUSY : ret;
+ }
+ /* Function was unsuccessful */
+ return -EIO;
+}
+EXPORT_SYMBOL_GPL(cio_cancel_halt_clear);
static void cio_apply_config(struct subchannel *sch, struct schib *schib)
{
@@ -382,6 +448,7 @@ int cio_commit_config(struct subchannel *sch)
}
return ret;
}
+EXPORT_SYMBOL_GPL(cio_commit_config);
/**
* cio_update_schib - Perform stsch and update schib if subchannel is valid.
@@ -987,6 +1054,7 @@ int cio_tm_start_key(struct subchannel *sch, struct tcw *tcw, u8 lpm, u8 key)
return cio_start_handle_notoper(sch, lpm);
}
}
+EXPORT_SYMBOL_GPL(cio_tm_start_key);
/**
* cio_tm_intrg - perform interrogate function
@@ -1012,3 +1080,4 @@ int cio_tm_intrg(struct subchannel *sch)
return -ENODEV;
}
}
+EXPORT_SYMBOL_GPL(cio_tm_intrg);
diff --git a/drivers/s390/cio/cio.h b/drivers/s390/cio/cio.h
index f0e57aefb5f2..939596d81b73 100644
--- a/drivers/s390/cio/cio.h
+++ b/drivers/s390/cio/cio.h
@@ -123,6 +123,7 @@ extern int cio_enable_subchannel(struct subchannel *, u32);
extern int cio_disable_subchannel (struct subchannel *);
extern int cio_cancel (struct subchannel *);
extern int cio_clear (struct subchannel *);
+extern int cio_cancel_halt_clear(struct subchannel *, int *);
extern int cio_resume (struct subchannel *);
extern int cio_halt (struct subchannel *);
extern int cio_start (struct subchannel *, struct ccw1 *, __u8);
diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c
index 9afb5ce13007..12016e32e519 100644
--- a/drivers/s390/cio/device_fsm.c
+++ b/drivers/s390/cio/device_fsm.c
@@ -124,14 +124,6 @@ ccw_device_set_timeout(struct ccw_device *cdev, int expires)
add_timer(&cdev->private->timer);
}
-/*
- * Cancel running i/o. This is called repeatedly since halt/clear are
- * asynchronous operations. We do one try with cio_cancel, two tries
- * with cio_halt, 255 tries with cio_clear. If everythings fails panic.
- * Returns 0 if device now idle, -ENODEV for device not operational and
- * -EBUSY if an interrupt is expected (either from halt/clear or from a
- * status pending).
- */
int
ccw_device_cancel_halt_clear(struct ccw_device *cdev)
{
@@ -139,44 +131,14 @@ ccw_device_cancel_halt_clear(struct ccw_device *cdev)
int ret;
sch = to_subchannel(cdev->dev.parent);
- if (cio_update_schib(sch))
- return -ENODEV;
- if (!sch->schib.pmcw.ena)
- /* Not operational -> done. */
- return 0;
- /* Stage 1: cancel io. */
- if (!(scsw_actl(&sch->schib.scsw) & SCSW_ACTL_HALT_PEND) &&
- !(scsw_actl(&sch->schib.scsw) & SCSW_ACTL_CLEAR_PEND)) {
- if (!scsw_is_tm(&sch->schib.scsw)) {
- ret = cio_cancel(sch);
- if (ret != -EINVAL)
- return ret;
- }
- /* cancel io unsuccessful or not applicable (transport mode).
- * Continue with asynchronous instructions. */
- cdev->private->iretry = 3; /* 3 halt retries. */
- }
- if (!(scsw_actl(&sch->schib.scsw) & SCSW_ACTL_CLEAR_PEND)) {
- /* Stage 2: halt io. */
- if (cdev->private->iretry) {
- cdev->private->iretry--;
- ret = cio_halt(sch);
- if (ret != -EBUSY)
- return (ret == 0) ? -EBUSY : ret;
- }
- /* halt io unsuccessful. */
- cdev->private->iretry = 255; /* 255 clear retries. */
- }
- /* Stage 3: clear io. */
- if (cdev->private->iretry) {
- cdev->private->iretry--;
- ret = cio_clear (sch);
- return (ret == 0) ? -EBUSY : ret;
- }
- /* Function was unsuccessful */
- CIO_MSG_EVENT(0, "0.%x.%04x: could not stop I/O\n",
- cdev->private->dev_id.ssid, cdev->private->dev_id.devno);
- return -EIO;
+ ret = cio_cancel_halt_clear(sch, &cdev->private->iretry);
+
+ if (ret == -EIO)
+ CIO_MSG_EVENT(0, "0.%x.%04x: could not stop I/O\n",
+ cdev->private->dev_id.ssid,
+ cdev->private->dev_id.devno);
+
+ return ret;
}
void ccw_device_update_sense_data(struct ccw_device *cdev)
diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c
new file mode 100644
index 000000000000..ba6ac83a6c25
--- /dev/null
+++ b/drivers/s390/cio/vfio_ccw_cp.c
@@ -0,0 +1,842 @@
+/*
+ * channel program interfaces
+ *
+ * Copyright IBM Corp. 2017
+ *
+ * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
+ * Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
+ */
+
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/iommu.h>
+#include <linux/vfio.h>
+#include <asm/idals.h>
+
+#include "vfio_ccw_cp.h"
+
+/*
+ * Max length for ccw chain.
+ * XXX: Limit to 256, need to check more?
+ */
+#define CCWCHAIN_LEN_MAX 256
+
+struct pfn_array {
+ unsigned long pa_iova;
+ unsigned long *pa_iova_pfn;
+ unsigned long *pa_pfn;
+ int pa_nr;
+};
+
+struct pfn_array_table {
+ struct pfn_array *pat_pa;
+ int pat_nr;
+};
+
+struct ccwchain {
+ struct list_head next;
+ struct ccw1 *ch_ccw;
+ /* Guest physical address of the current chain. */
+ u64 ch_iova;
+ /* Count of the valid ccws in chain. */
+ int ch_len;
+ /* Pinned PAGEs for the original data. */
+ struct pfn_array_table *ch_pat;
+};
+
+/*
+ * pfn_array_pin() - pin user pages in memory
+ * @pa: pfn_array on which to perform the operation
+ * @mdev: the mediated device to perform pin/unpin operations
+ *
+ * Attempt to pin user pages in memory.
+ *
+ * Usage of pfn_array:
+ * @pa->pa_iova starting guest physical I/O address. Assigned by caller.
+ * @pa->pa_iova_pfn array that stores PFNs of the pages need to pin. Allocated
+ * by caller.
+ * @pa->pa_pfn array that receives PFNs of the pages pinned. Allocated by
+ * caller.
+ * @pa->pa_nr number of pages from @pa->pa_iova to pin. Assigned by
+ * caller.
+ * number of pages pinned. Assigned by callee.
+ *
+ * Returns:
+ * Number of pages pinned on success.
+ * If @pa->pa_nr is 0 or negative, returns 0.
+ * If no pages were pinned, returns -errno.
+ */
+static int pfn_array_pin(struct pfn_array *pa, struct device *mdev)
+{
+ int i, ret;
+
+ if (pa->pa_nr <= 0) {
+ pa->pa_nr = 0;
+ return 0;
+ }
+
+ pa->pa_iova_pfn[0] = pa->pa_iova >> PAGE_SHIFT;
+ for (i = 1; i < pa->pa_nr; i++)
+ pa->pa_iova_pfn[i] = pa->pa_iova_pfn[i - 1] + 1;
+
+ ret = vfio_pin_pages(mdev, pa->pa_iova_pfn, pa->pa_nr,
+ IOMMU_READ | IOMMU_WRITE, pa->pa_pfn);
+
+ if (ret > 0 && ret != pa->pa_nr) {
+ vfio_unpin_pages(mdev, pa->pa_iova_pfn, ret);
+ pa->pa_nr = 0;
+ return 0;
+ }
+
+ return ret;
+}
+
+/* Unpin the pages before releasing the memory. */
+static void pfn_array_unpin_free(struct pfn_array *pa, struct device *mdev)
+{
+ vfio_unpin_pages(mdev, pa->pa_iova_pfn, pa->pa_nr);
+ pa->pa_nr = 0;
+ kfree(pa->pa_iova_pfn);
+}
+
+/* Alloc memory for PFNs, then pin pages with them. */
+static int pfn_array_alloc_pin(struct pfn_array *pa, struct device *mdev,
+ u64 iova, unsigned int len)
+{
+ int ret = 0;
+
+ if (!len || pa->pa_nr)
+ return -EINVAL;
+
+ pa->pa_iova = iova;
+
+ pa->pa_nr = ((iova & ~PAGE_MASK) + len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+ if (!pa->pa_nr)
+ return -EINVAL;
+
+ pa->pa_iova_pfn = kcalloc(pa->pa_nr,
+ sizeof(*pa->pa_iova_pfn) +
+ sizeof(*pa->pa_pfn),
+ GFP_KERNEL);
+ if (unlikely(!pa->pa_iova_pfn))
+ return -ENOMEM;
+ pa->pa_pfn = pa->pa_iova_pfn + pa->pa_nr;
+
+ ret = pfn_array_pin(pa, mdev);
+
+ if (ret > 0)
+ return ret;
+ else if (!ret)
+ ret = -EINVAL;
+
+ kfree(pa->pa_iova_pfn);
+
+ return ret;
+}
+
+static int pfn_array_table_init(struct pfn_array_table *pat, int nr)
+{
+ pat->pat_pa = kcalloc(nr, sizeof(*pat->pat_pa), GFP_KERNEL);
+ if (unlikely(ZERO_OR_NULL_PTR(pat->pat_pa))) {
+ pat->pat_nr = 0;
+ return -ENOMEM;
+ }
+
+ pat->pat_nr = nr;
+
+ return 0;
+}
+
+static void pfn_array_table_unpin_free(struct pfn_array_table *pat,
+ struct device *mdev)
+{
+ int i;
+
+ for (i = 0; i < pat->pat_nr; i++)
+ pfn_array_unpin_free(pat->pat_pa + i, mdev);
+
+ if (pat->pat_nr) {
+ kfree(pat->pat_pa);
+ pat->pat_pa = NULL;
+ pat->pat_nr = 0;
+ }
+}
+
+static bool pfn_array_table_iova_pinned(struct pfn_array_table *pat,
+ unsigned long iova)
+{
+ struct pfn_array *pa = pat->pat_pa;
+ unsigned long iova_pfn = iova >> PAGE_SHIFT;
+ int i, j;
+
+ for (i = 0; i < pat->pat_nr; i++, pa++)
+ for (j = 0; j < pa->pa_nr; j++)
+ if (pa->pa_iova_pfn[i] == iova_pfn)
+ return true;
+
+ return false;
+}
+/* Create the list idal words for a pfn_array_table. */
+static inline void pfn_array_table_idal_create_words(
+ struct pfn_array_table *pat,
+ unsigned long *idaws)
+{
+ struct pfn_array *pa;
+ int i, j, k;
+
+ /*
+ * Idal words (execept the first one) rely on the memory being 4k
+ * aligned. If a user virtual address is 4K aligned, then it's
+ * corresponding kernel physical address will also be 4K aligned. Thus
+ * there will be no problem here to simply use the phys to create an
+ * idaw.
+ */
+ k = 0;
+ for (i = 0; i < pat->pat_nr; i++) {
+ pa = pat->pat_pa + i;
+ for (j = 0; j < pa->pa_nr; j++) {
+ idaws[k] = pa->pa_pfn[j] << PAGE_SHIFT;
+ if (k == 0)
+ idaws[k] += pa->pa_iova & (PAGE_SIZE - 1);
+ k++;
+ }
+ }
+}
+
+
+/*
+ * Within the domain (@mdev), copy @n bytes from a guest physical
+ * address (@iova) to a host physical address (@to).
+ */
+static long copy_from_iova(struct device *mdev,
+ void *to, u64 iova,
+ unsigned long n)
+{
+ struct pfn_array pa = {0};
+ u64 from;
+ int i, ret;
+ unsigned long l, m;
+
+ ret = pfn_array_alloc_pin(&pa, mdev, iova, n);
+ if (ret <= 0)
+ return ret;
+
+ l = n;
+ for (i = 0; i < pa.pa_nr; i++) {
+ from = pa.pa_pfn[i] << PAGE_SHIFT;
+ m = PAGE_SIZE;
+ if (i == 0) {
+ from += iova & (PAGE_SIZE - 1);
+ m -= iova & (PAGE_SIZE - 1);
+ }
+
+ m = min(l, m);
+ memcpy(to + (n - l), (void *)from, m);
+
+ l -= m;
+ if (l == 0)
+ break;
+ }
+
+ pfn_array_unpin_free(&pa, mdev);
+
+ return l;
+}
+
+static long copy_ccw_from_iova(struct channel_program *cp,
+ struct ccw1 *to, u64 iova,
+ unsigned long len)
+{
+ struct ccw0 ccw0;
+ struct ccw1 *pccw1;
+ int ret;
+ int i;
+
+ ret = copy_from_iova(cp->mdev, to, iova, len * sizeof(struct ccw1));
+ if (ret)
+ return ret;
+
+ if (!cp->orb.cmd.fmt) {
+ pccw1 = to;
+ for (i = 0; i < len; i++) {
+ ccw0 = *(struct ccw0 *)pccw1;
+ if ((pccw1->cmd_code & 0x0f) == CCW_CMD_TIC) {
+ pccw1->cmd_code = CCW_CMD_TIC;
+ pccw1->flags = 0;
+ pccw1->count = 0;
+ } else {
+ pccw1->cmd_code = ccw0.cmd_code;
+ pccw1->flags = ccw0.flags;
+ pccw1->count = ccw0.count;
+ }
+ pccw1->cda = ccw0.cda;
+ pccw1++;
+ }
+ }
+
+ return ret;
+}
+
+/*
+ * Helpers to operate ccwchain.
+ */
+#define ccw_is_test(_ccw) (((_ccw)->cmd_code & 0x0F) == 0)
+
+#define ccw_is_noop(_ccw) ((_ccw)->cmd_code == CCW_CMD_NOOP)
+
+#define ccw_is_tic(_ccw) ((_ccw)->cmd_code == CCW_CMD_TIC)
+
+#define ccw_is_idal(_ccw) ((_ccw)->flags & CCW_FLAG_IDA)
+
+
+#define ccw_is_chain(_ccw) ((_ccw)->flags & (CCW_FLAG_CC | CCW_FLAG_DC))
+
+static struct ccwchain *ccwchain_alloc(struct channel_program *cp, int len)
+{
+ struct ccwchain *chain;
+ void *data;
+ size_t size;
+
+ /* Make ccw address aligned to 8. */
+ size = ((sizeof(*chain) + 7L) & -8L) +
+ sizeof(*chain->ch_ccw) * len +
+ sizeof(*chain->ch_pat) * len;
+ chain = kzalloc(size, GFP_DMA | GFP_KERNEL);
+ if (!chain)
+ return NULL;
+
+ data = (u8 *)chain + ((sizeof(*chain) + 7L) & -8L);
+ chain->ch_ccw = (struct ccw1 *)data;
+
+ data = (u8 *)(chain->ch_ccw) + sizeof(*chain->ch_ccw) * len;
+ chain->ch_pat = (struct pfn_array_table *)data;
+
+ chain->ch_len = len;
+
+ list_add_tail(&chain->next, &cp->ccwchain_list);
+
+ return chain;
+}
+
+static void ccwchain_free(struct ccwchain *chain)
+{
+ list_del(&chain->next);
+ kfree(chain);
+}
+
+/* Free resource for a ccw that allocated memory for its cda. */
+static void ccwchain_cda_free(struct ccwchain *chain, int idx)
+{
+ struct ccw1 *ccw = chain->ch_ccw + idx;
+
+ if (!ccw->count)
+ return;
+
+ kfree((void *)(u64)ccw->cda);
+}
+
+/* Unpin the pages then free the memory resources. */
+static void cp_unpin_free(struct channel_program *cp)
+{
+ struct ccwchain *chain, *temp;
+ int i;
+
+ list_for_each_entry_safe(chain, temp, &cp->ccwchain_list, next) {
+ for (i = 0; i < chain->ch_len; i++) {
+ pfn_array_table_unpin_free(chain->ch_pat + i,
+ cp->mdev);
+ ccwchain_cda_free(chain, i);
+ }
+ ccwchain_free(chain);
+ }
+}
+
+/**
+ * ccwchain_calc_length - calculate the length of the ccw chain.
+ * @iova: guest physical address of the target ccw chain
+ * @cp: channel_program on which to perform the operation
+ *
+ * This is the chain length not considering any TICs.
+ * You need to do a new round for each TIC target.
+ *
+ * Returns: the length of the ccw chain or -errno.
+ */
+static int ccwchain_calc_length(u64 iova, struct channel_program *cp)
+{
+ struct ccw1 *ccw, *p;
+ int cnt;
+
+ /*
+ * Copy current chain from guest to host kernel.
+ * Currently the chain length is limited to CCWCHAIN_LEN_MAX (256).
+ * So copying 2K is enough (safe).
+ */
+ p = ccw = kcalloc(CCWCHAIN_LEN_MAX, sizeof(*ccw), GFP_KERNEL);
+ if (!ccw)
+ return -ENOMEM;
+
+ cnt = copy_ccw_from_iova(cp, ccw, iova, CCWCHAIN_LEN_MAX);
+ if (cnt) {
+ kfree(ccw);
+ return cnt;
+ }
+
+ cnt = 0;
+ do {
+ cnt++;
+
+ if ((!ccw_is_chain(ccw)) && (!ccw_is_tic(ccw)))
+ break;
+
+ ccw++;
+ } while (cnt < CCWCHAIN_LEN_MAX + 1);
+
+ if (cnt == CCWCHAIN_LEN_MAX + 1)
+ cnt = -EINVAL;
+
+ kfree(p);
+ return cnt;
+}
+
+static int tic_target_chain_exists(struct ccw1 *tic, struct channel_program *cp)
+{
+ struct ccwchain *chain;
+ u32 ccw_head, ccw_tail;
+
+ list_for_each_entry(chain, &cp->ccwchain_list, next) {
+ ccw_head = chain->ch_iova;
+ ccw_tail = ccw_head + (chain->ch_len - 1) * sizeof(struct ccw1);
+
+ if ((ccw_head <= tic->cda) && (tic->cda <= ccw_tail))
+ return 1;
+ }
+
+ return 0;
+}
+
+static int ccwchain_loop_tic(struct ccwchain *chain,
+ struct channel_program *cp);
+
+static int ccwchain_handle_tic(struct ccw1 *tic, struct channel_program *cp)
+{
+ struct ccwchain *chain;
+ int len, ret;
+
+ /* May transfer to an existing chain. */
+ if (tic_target_chain_exists(tic, cp))
+ return 0;
+
+ /* Get chain length. */
+ len = ccwchain_calc_length(tic->cda, cp);
+ if (len < 0)
+ return len;
+
+ /* Need alloc a new chain for this one. */
+ chain = ccwchain_alloc(cp, len);
+ if (!chain)
+ return -ENOMEM;
+ chain->ch_iova = tic->cda;
+
+ /* Copy the new chain from user. */
+ ret = copy_ccw_from_iova(cp, chain->ch_ccw, tic->cda, len);
+ if (ret) {
+ ccwchain_free(chain);
+ return ret;
+ }
+
+ /* Loop for tics on this new chain. */
+ return ccwchain_loop_tic(chain, cp);
+}
+
+/* Loop for TICs. */
+static int ccwchain_loop_tic(struct ccwchain *chain, struct channel_program *cp)
+{
+ struct ccw1 *tic;
+ int i, ret;
+
+ for (i = 0; i < chain->ch_len; i++) {
+ tic = chain->ch_ccw + i;
+
+ if (!ccw_is_tic(tic))
+ continue;
+
+ ret = ccwchain_handle_tic(tic, cp);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int ccwchain_fetch_tic(struct ccwchain *chain,
+ int idx,
+ struct channel_program *cp)
+{
+ struct ccw1 *ccw = chain->ch_ccw + idx;
+ struct ccwchain *iter;
+ u32 ccw_head, ccw_tail;
+
+ list_for_each_entry(iter, &cp->ccwchain_list, next) {
+ ccw_head = iter->ch_iova;
+ ccw_tail = ccw_head + (iter->ch_len - 1) * sizeof(struct ccw1);
+
+ if ((ccw_head <= ccw->cda) && (ccw->cda <= ccw_tail)) {
+ ccw->cda = (__u32) (addr_t) (iter->ch_ccw +
+ (ccw->cda - ccw_head));
+ return 0;
+ }
+ }
+
+ return -EFAULT;
+}
+
+static int ccwchain_fetch_direct(struct ccwchain *chain,
+ int idx,
+ struct channel_program *cp)
+{
+ struct ccw1 *ccw;
+ struct pfn_array_table *pat;
+ unsigned long *idaws;
+ int idaw_nr;
+
+ ccw = chain->ch_ccw + idx;
+
+ /*
+ * Pin data page(s) in memory.
+ * The number of pages actually is the count of the idaws which will be
+ * needed when translating a direct ccw to a idal ccw.
+ */
+ pat = chain->ch_pat + idx;
+ if (pfn_array_table_init(pat, 1))
+ return -ENOMEM;
+ idaw_nr = pfn_array_alloc_pin(pat->pat_pa, cp->mdev,
+ ccw->cda, ccw->count);
+ if (idaw_nr < 0)
+ return idaw_nr;
+
+ /* Translate this direct ccw to a idal ccw. */
+ idaws = kcalloc(idaw_nr, sizeof(*idaws), GFP_DMA | GFP_KERNEL);
+ if (!idaws) {
+ pfn_array_table_unpin_free(pat, cp->mdev);
+ return -ENOMEM;
+ }
+ ccw->cda = (__u32) virt_to_phys(idaws);
+ ccw->flags |= CCW_FLAG_IDA;
+
+ pfn_array_table_idal_create_words(pat, idaws);
+
+ return 0;
+}
+
+static int ccwchain_fetch_idal(struct ccwchain *chain,
+ int idx,
+ struct channel_program *cp)
+{
+ struct ccw1 *ccw;
+ struct pfn_array_table *pat;
+ unsigned long *idaws;
+ u64 idaw_iova;
+ unsigned int idaw_nr, idaw_len;
+ int i, ret;
+
+ ccw = chain->ch_ccw + idx;
+
+ /* Calculate size of idaws. */
+ ret = copy_from_iova(cp->mdev, &idaw_iova, ccw->cda, sizeof(idaw_iova));
+ if (ret)
+ return ret;
+ idaw_nr = idal_nr_words((void *)(idaw_iova), ccw->count);
+ idaw_len = idaw_nr * sizeof(*idaws);
+
+ /* Pin data page(s) in memory. */
+ pat = chain->ch_pat + idx;
+ ret = pfn_array_table_init(pat, idaw_nr);
+ if (ret)
+ return ret;
+
+ /* Translate idal ccw to use new allocated idaws. */
+ idaws = kzalloc(idaw_len, GFP_DMA | GFP_KERNEL);
+ if (!idaws) {
+ ret = -ENOMEM;
+ goto out_unpin;
+ }
+
+ ret = copy_from_iova(cp->mdev, idaws, ccw->cda, idaw_len);
+ if (ret)
+ goto out_free_idaws;
+
+ ccw->cda = virt_to_phys(idaws);
+
+ for (i = 0; i < idaw_nr; i++) {
+ idaw_iova = *(idaws + i);
+ if (IS_ERR_VALUE(idaw_iova)) {
+ ret = -EFAULT;
+ goto out_free_idaws;
+ }
+
+ ret = pfn_array_alloc_pin(pat->pat_pa + i, cp->mdev,
+ idaw_iova, 1);
+ if (ret < 0)
+ goto out_free_idaws;
+ }
+
+ pfn_array_table_idal_create_words(pat, idaws);
+
+ return 0;
+
+out_free_idaws:
+ kfree(idaws);
+out_unpin:
+ pfn_array_table_unpin_free(pat, cp->mdev);
+ return ret;
+}
+
+/*
+ * Fetch one ccw.
+ * To reduce memory copy, we'll pin the cda page in memory,
+ * and to get rid of the cda 2G limitiaion of ccw1, we'll translate
+ * direct ccws to idal ccws.
+ */
+static int ccwchain_fetch_one(struct ccwchain *chain,
+ int idx,
+ struct channel_program *cp)
+{
+ struct ccw1 *ccw = chain->ch_ccw + idx;
+
+ if (ccw_is_test(ccw) || ccw_is_noop(ccw))
+ return 0;
+
+ if (ccw_is_tic(ccw))
+ return ccwchain_fetch_tic(chain, idx, cp);
+
+ if (ccw_is_idal(ccw))
+ return ccwchain_fetch_idal(chain, idx, cp);
+
+ return ccwchain_fetch_direct(chain, idx, cp);
+}
+
+/**
+ * cp_init() - allocate ccwchains for a channel program.
+ * @cp: channel_program on which to perform the operation
+ * @mdev: the mediated device to perform pin/unpin operations
+ * @orb: control block for the channel program from the guest
+ *
+ * This creates one or more ccwchain(s), and copies the raw data of
+ * the target channel program from @orb->cmd.iova to the new ccwchain(s).
+ *
+ * Limitations:
+ * 1. Supports only prefetch enabled mode.
+ * 2. Supports idal(c64) ccw chaining.
+ * 3. Supports 4k idaw.
+ *
+ * Returns:
+ * %0 on success and a negative error value on failure.
+ */
+int cp_init(struct channel_program *cp, struct device *mdev, union orb *orb)
+{
+ u64 iova = orb->cmd.cpa;
+ struct ccwchain *chain;
+ int len, ret;
+
+ /*
+ * XXX:
+ * Only support prefetch enable mode now.
+ * Only support 64bit addressing idal.
+ * Only support 4k IDAW.
+ */
+ if (!orb->cmd.pfch || !orb->cmd.c64 || orb->cmd.i2k)
+ return -EOPNOTSUPP;
+
+ INIT_LIST_HEAD(&cp->ccwchain_list);
+ memcpy(&cp->orb, orb, sizeof(*orb));
+ cp->mdev = mdev;
+
+ /* Get chain length. */
+ len = ccwchain_calc_length(iova, cp);
+ if (len < 0)
+ return len;
+
+ /* Alloc mem for the head chain. */
+ chain = ccwchain_alloc(cp, len);
+ if (!chain)
+ return -ENOMEM;
+ chain->ch_iova = iova;
+
+ /* Copy the head chain from guest. */
+ ret = copy_ccw_from_iova(cp, chain->ch_ccw, iova, len);
+ if (ret) {
+ ccwchain_free(chain);
+ return ret;
+ }
+
+ /* Now loop for its TICs. */
+ ret = ccwchain_loop_tic(chain, cp);
+ if (ret)
+ cp_unpin_free(cp);
+
+ return ret;
+}
+
+
+/**
+ * cp_free() - free resources for channel program.
+ * @cp: channel_program on which to perform the operation
+ *
+ * This unpins the memory pages and frees the memory space occupied by
+ * @cp, which must have been returned by a previous call to cp_init().
+ * Otherwise, undefined behavior occurs.
+ */
+void cp_free(struct channel_program *cp)
+{
+ cp_unpin_free(cp);
+}
+
+/**
+ * cp_prefetch() - translate a guest physical address channel program to
+ * a real-device runnable channel program.
+ * @cp: channel_program on which to perform the operation
+ *
+ * This function translates the guest-physical-address channel program
+ * and stores the result to ccwchain list. @cp must have been
+ * initialized by a previous call with cp_init(). Otherwise, undefined
+ * behavior occurs.
+ *
+ * The S/390 CCW Translation APIS (prefixed by 'cp_') are introduced
+ * as helpers to do ccw chain translation inside the kernel. Basically
+ * they accept a channel program issued by a virtual machine, and
+ * translate the channel program to a real-device runnable channel
+ * program.
+ *
+ * These APIs will copy the ccws into kernel-space buffers, and update
+ * the guest phsical addresses with their corresponding host physical
+ * addresses. Then channel I/O device drivers could issue the
+ * translated channel program to real devices to perform an I/O
+ * operation.
+ *
+ * These interfaces are designed to support translation only for
+ * channel programs, which are generated and formatted by a
+ * guest. Thus this will make it possible for things like VFIO to
+ * leverage the interfaces to passthrough a channel I/O mediated
+ * device in QEMU.
+ *
+ * We support direct ccw chaining by translating them to idal ccws.
+ *
+ * Returns:
+ * %0 on success and a negative error value on failure.
+ */
+int cp_prefetch(struct channel_program *cp)
+{
+ struct ccwchain *chain;
+ int len, idx, ret;
+
+ list_for_each_entry(chain, &cp->ccwchain_list, next) {
+ len = chain->ch_len;
+ for (idx = 0; idx < len; idx++) {
+ ret = ccwchain_fetch_one(chain, idx, cp);
+ if (ret)
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * cp_get_orb() - get the orb of the channel program
+ * @cp: channel_program on which to perform the operation
+ * @intparm: new intparm for the returned orb
+ * @lpm: candidate value of the logical-path mask for the returned orb
+ *
+ * This function returns the address of the updated orb of the channel
+ * program. Channel I/O device drivers could use this orb to issue a
+ * ssch.
+ */
+union orb *cp_get_orb(struct channel_program *cp, u32 intparm, u8 lpm)
+{
+ union orb *orb;
+ struct ccwchain *chain;
+ struct ccw1 *cpa;
+
+ orb = &cp->orb;
+
+ orb->cmd.intparm = intparm;
+ orb->cmd.fmt = 1;
+ orb->cmd.key = PAGE_DEFAULT_KEY >> 4;
+
+ if (orb->cmd.lpm == 0)
+ orb->cmd.lpm = lpm;
+
+ chain = list_first_entry(&cp->ccwchain_list, struct ccwchain, next);
+ cpa = chain->ch_ccw;
+ orb->cmd.cpa = (__u32) __pa(cpa);
+
+ return orb;
+}
+
+/**
+ * cp_update_scsw() - update scsw for a channel program.
+ * @cp: channel_program on which to perform the operation
+ * @scsw: I/O results of the channel program and also the target to be
+ * updated
+ *
+ * @scsw contains the I/O results of the channel program that pointed
+ * to by @cp. However what @scsw->cpa stores is a host physical
+ * address, which is meaningless for the guest, which is waiting for
+ * the I/O results.
+ *
+ * This function updates @scsw->cpa to its coressponding guest physical
+ * address.
+ */
+void cp_update_scsw(struct channel_program *cp, union scsw *scsw)
+{
+ struct ccwchain *chain;
+ u32 cpa = scsw->cmd.cpa;
+ u32 ccw_head, ccw_tail;
+
+ /*
+ * LATER:
+ * For now, only update the cmd.cpa part. We may need to deal with
+ * other portions of the schib as well, even if we don't return them
+ * in the ioctl directly. Path status changes etc.
+ */
+ list_for_each_entry(chain, &cp->ccwchain_list, next) {
+ ccw_head = (u32)(u64)chain->ch_ccw;
+ ccw_tail = (u32)(u64)(chain->ch_ccw + chain->ch_len - 1);
+
+ if ((ccw_head <= cpa) && (cpa <= ccw_tail)) {
+ /*
+ * (cpa - ccw_head) is the offset value of the host
+ * physical ccw to its chain head.
+ * Adding this value to the guest physical ccw chain
+ * head gets us the guest cpa.
+ */
+ cpa = chain->ch_iova + (cpa - ccw_head);
+ break;
+ }
+ }
+
+ scsw->cmd.cpa = cpa;
+}
+
+/**
+ * cp_iova_pinned() - check if an iova is pinned for a ccw chain.
+ * @cmd: ccwchain command on which to perform the operation
+ * @iova: the iova to check
+ *
+ * If the @iova is currently pinned for the ccw chain, return true;
+ * else return false.
+ */
+bool cp_iova_pinned(struct channel_program *cp, u64 iova)
+{
+ struct ccwchain *chain;
+ int i;
+
+ list_for_each_entry(chain, &cp->ccwchain_list, next) {
+ for (i = 0; i < chain->ch_len; i++)
+ if (pfn_array_table_iova_pinned(chain->ch_pat + i,
+ iova))
+ return true;
+ }
+
+ return false;
+}
diff --git a/drivers/s390/cio/vfio_ccw_cp.h b/drivers/s390/cio/vfio_ccw_cp.h
new file mode 100644
index 000000000000..7a1996b3b36d
--- /dev/null
+++ b/drivers/s390/cio/vfio_ccw_cp.h
@@ -0,0 +1,42 @@
+/*
+ * channel program interfaces
+ *
+ * Copyright IBM Corp. 2017
+ *
+ * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
+ * Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
+ */
+
+#ifndef _VFIO_CCW_CP_H_
+#define _VFIO_CCW_CP_H_
+
+#include <asm/cio.h>
+#include <asm/scsw.h>
+
+#include "orb.h"
+
+/**
+ * struct channel_program - manage information for channel program
+ * @ccwchain_list: list head of ccwchains
+ * @orb: orb for the currently processed ssch request
+ * @mdev: the mediated device to perform page pinning/unpinning
+ *
+ * @ccwchain_list is the head of a ccwchain list, that contents the
+ * translated result of the guest channel program that pointed out by
+ * the iova parameter when calling cp_init.
+ */
+struct channel_program {
+ struct list_head ccwchain_list;
+ union orb orb;
+ struct device *mdev;
+};
+
+extern int cp_init(struct channel_program *cp, struct device *mdev,
+ union orb *orb);
+extern void cp_free(struct channel_program *cp);
+extern int cp_prefetch(struct channel_program *cp);
+extern union orb *cp_get_orb(struct channel_program *cp, u32 intparm, u8 lpm);
+extern void cp_update_scsw(struct channel_program *cp, union scsw *scsw);
+extern bool cp_iova_pinned(struct channel_program *cp, u64 iova);
+
+#endif
diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c
new file mode 100644
index 000000000000..e90dd43d2a55
--- /dev/null
+++ b/drivers/s390/cio/vfio_ccw_drv.c
@@ -0,0 +1,308 @@
+/*
+ * VFIO based Physical Subchannel device driver
+ *
+ * Copyright IBM Corp. 2017
+ *
+ * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
+ * Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/uuid.h>
+#include <linux/mdev.h>
+
+#include <asm/isc.h>
+
+#include "ioasm.h"
+#include "css.h"
+#include "vfio_ccw_private.h"
+
+struct workqueue_struct *vfio_ccw_work_q;
+
+/*
+ * Helpers
+ */
+int vfio_ccw_sch_quiesce(struct subchannel *sch)
+{
+ struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev);
+ DECLARE_COMPLETION_ONSTACK(completion);
+ int iretry, ret = 0;
+
+ spin_lock_irq(sch->lock);
+ if (!sch->schib.pmcw.ena)
+ goto out_unlock;
+ ret = cio_disable_subchannel(sch);
+ if (ret != -EBUSY)
+ goto out_unlock;
+
+ do {
+ iretry = 255;
+
+ ret = cio_cancel_halt_clear(sch, &iretry);
+ while (ret == -EBUSY) {
+ /*
+ * Flush all I/O and wait for
+ * cancel/halt/clear completion.
+ */
+ private->completion = &completion;
+ spin_unlock_irq(sch->lock);
+
+ wait_for_completion_timeout(&completion, 3*HZ);
+
+ spin_lock_irq(sch->lock);
+ private->completion = NULL;
+ flush_workqueue(vfio_ccw_work_q);
+ ret = cio_cancel_halt_clear(sch, &iretry);
+ };
+
+ ret = cio_disable_subchannel(sch);
+ } while (ret == -EBUSY);
+out_unlock:
+ private->state = VFIO_CCW_STATE_NOT_OPER;
+ spin_unlock_irq(sch->lock);
+ return ret;
+}
+
+static void vfio_ccw_sch_io_todo(struct work_struct *work)
+{
+ struct vfio_ccw_private *private;
+ struct subchannel *sch;
+ struct irb *irb;
+
+ private = container_of(work, struct vfio_ccw_private, io_work);
+ irb = &private->irb;
+ sch = private->sch;
+
+ if (scsw_is_solicited(&irb->scsw)) {
+ cp_update_scsw(&private->cp, &irb->scsw);
+ cp_free(&private->cp);
+ }
+ memcpy(private->io_region.irb_area, irb, sizeof(*irb));
+
+ if (private->io_trigger)
+ eventfd_signal(private->io_trigger, 1);
+
+ if (private->mdev)
+ private->state = VFIO_CCW_STATE_IDLE;
+}
+
+/*
+ * Sysfs interfaces
+ */
+static ssize_t chpids_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct subchannel *sch = to_subchannel(dev);
+ struct chsc_ssd_info *ssd = &sch->ssd_info;
+ ssize_t ret = 0;
+ int chp;
+ int mask;
+
+ for (chp = 0; chp < 8; chp++) {
+ mask = 0x80 >> chp;
+ if (ssd->path_mask & mask)
+ ret += sprintf(buf + ret, "%02x ", ssd->chpid[chp].id);
+ else
+ ret += sprintf(buf + ret, "00 ");
+ }
+ ret += sprintf(buf+ret, "\n");
+ return ret;
+}
+
+static ssize_t pimpampom_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct subchannel *sch = to_subchannel(dev);
+ struct pmcw *pmcw = &sch->schib.pmcw;
+
+ return sprintf(buf, "%02x %02x %02x\n",
+ pmcw->pim, pmcw->pam, pmcw->pom);
+}
+
+static DEVICE_ATTR(chpids, 0444, chpids_show, NULL);
+static DEVICE_ATTR(pimpampom, 0444, pimpampom_show, NULL);
+
+static struct attribute *vfio_subchannel_attrs[] = {
+ &dev_attr_chpids.attr,
+ &dev_attr_pimpampom.attr,
+ NULL,
+};
+
+static struct attribute_group vfio_subchannel_attr_group = {
+ .attrs = vfio_subchannel_attrs,
+};
+
+/*
+ * Css driver callbacks
+ */
+static void vfio_ccw_sch_irq(struct subchannel *sch)
+{
+ struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev);
+
+ inc_irq_stat(IRQIO_CIO);
+ vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_INTERRUPT);
+}
+
+static int vfio_ccw_sch_probe(struct subchannel *sch)
+{
+ struct pmcw *pmcw = &sch->schib.pmcw;
+ struct vfio_ccw_private *private;
+ int ret;
+
+ if (pmcw->qf) {
+ dev_warn(&sch->dev, "vfio: ccw: does not support QDIO: %s\n",
+ dev_name(&sch->dev));
+ return -ENODEV;
+ }
+
+ private = kzalloc(sizeof(*private), GFP_KERNEL | GFP_DMA);
+ if (!private)
+ return -ENOMEM;
+ private->sch = sch;
+ dev_set_drvdata(&sch->dev, private);
+
+ spin_lock_irq(sch->lock);
+ private->state = VFIO_CCW_STATE_NOT_OPER;
+ sch->isc = VFIO_CCW_ISC;
+ ret = cio_enable_subchannel(sch, (u32)(unsigned long)sch);
+ spin_unlock_irq(sch->lock);
+ if (ret)
+ goto out_free;
+
+ ret = sysfs_create_group(&sch->dev.kobj, &vfio_subchannel_attr_group);
+ if (ret)
+ goto out_disable;
+
+ ret = vfio_ccw_mdev_reg(sch);
+ if (ret)
+ goto out_rm_group;
+
+ INIT_WORK(&private->io_work, vfio_ccw_sch_io_todo);
+ atomic_set(&private->avail, 1);
+ private->state = VFIO_CCW_STATE_STANDBY;
+
+ return 0;
+
+out_rm_group:
+ sysfs_remove_group(&sch->dev.kobj, &vfio_subchannel_attr_group);
+out_disable:
+ cio_disable_subchannel(sch);
+out_free:
+ dev_set_drvdata(&sch->dev, NULL);
+ kfree(private);
+ return ret;
+}
+
+static int vfio_ccw_sch_remove(struct subchannel *sch)
+{
+ struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev);
+
+ vfio_ccw_sch_quiesce(sch);
+
+ vfio_ccw_mdev_unreg(sch);
+
+ sysfs_remove_group(&sch->dev.kobj, &vfio_subchannel_attr_group);
+
+ dev_set_drvdata(&sch->dev, NULL);
+
+ kfree(private);
+
+ return 0;
+}
+
+static void vfio_ccw_sch_shutdown(struct subchannel *sch)
+{
+ vfio_ccw_sch_quiesce(sch);
+}
+
+/**
+ * vfio_ccw_sch_event - process subchannel event
+ * @sch: subchannel
+ * @process: non-zero if function is called in process context
+ *
+ * An unspecified event occurred for this subchannel. Adjust data according
+ * to the current operational state of the subchannel. Return zero when the
+ * event has been handled sufficiently or -EAGAIN when this function should
+ * be called again in process context.
+ */
+static int vfio_ccw_sch_event(struct subchannel *sch, int process)
+{
+ struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev);
+ unsigned long flags;
+
+ spin_lock_irqsave(sch->lock, flags);
+ if (!device_is_registered(&sch->dev))
+ goto out_unlock;
+
+ if (work_pending(&sch->todo_work))
+ goto out_unlock;
+
+ if (cio_update_schib(sch)) {
+ vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_NOT_OPER);
+ goto out_unlock;
+ }
+
+ private = dev_get_drvdata(&sch->dev);
+ if (private->state == VFIO_CCW_STATE_NOT_OPER) {
+ private->state = private->mdev ? VFIO_CCW_STATE_IDLE :
+ VFIO_CCW_STATE_STANDBY;
+ }
+
+out_unlock:
+ spin_unlock_irqrestore(sch->lock, flags);
+
+ return 0;
+}
+
+static struct css_device_id vfio_ccw_sch_ids[] = {
+ { .match_flags = 0x1, .type = SUBCHANNEL_TYPE_IO, },
+ { /* end of list */ },
+};
+MODULE_DEVICE_TABLE(css, vfio_ccw_sch_ids);
+
+static struct css_driver vfio_ccw_sch_driver = {
+ .drv = {
+ .name = "vfio_ccw",
+ .owner = THIS_MODULE,
+ },
+ .subchannel_type = vfio_ccw_sch_ids,
+ .irq = vfio_ccw_sch_irq,
+ .probe = vfio_ccw_sch_probe,
+ .remove = vfio_ccw_sch_remove,
+ .shutdown = vfio_ccw_sch_shutdown,
+ .sch_event = vfio_ccw_sch_event,
+};
+
+static int __init vfio_ccw_sch_init(void)
+{
+ int ret;
+
+ vfio_ccw_work_q = create_singlethread_workqueue("vfio-ccw");
+ if (!vfio_ccw_work_q)
+ return -ENOMEM;
+
+ isc_register(VFIO_CCW_ISC);
+ ret = css_driver_register(&vfio_ccw_sch_driver);
+ if (ret) {
+ isc_unregister(VFIO_CCW_ISC);
+ destroy_workqueue(vfio_ccw_work_q);
+ }
+
+ return ret;
+}
+
+static void __exit vfio_ccw_sch_exit(void)
+{
+ css_driver_unregister(&vfio_ccw_sch_driver);
+ isc_unregister(VFIO_CCW_ISC);
+ destroy_workqueue(vfio_ccw_work_q);
+}
+module_init(vfio_ccw_sch_init);
+module_exit(vfio_ccw_sch_exit);
+
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c
new file mode 100644
index 000000000000..80a0559cd7ce
--- /dev/null
+++ b/drivers/s390/cio/vfio_ccw_fsm.c
@@ -0,0 +1,203 @@
+/*
+ * Finite state machine for vfio-ccw device handling
+ *
+ * Copyright IBM Corp. 2017
+ *
+ * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
+ */
+
+#include <linux/vfio.h>
+#include <linux/mdev.h>
+
+#include "ioasm.h"
+#include "vfio_ccw_private.h"
+
+static int fsm_io_helper(struct vfio_ccw_private *private)
+{
+ struct subchannel *sch;
+ union orb *orb;
+ int ccode;
+ __u8 lpm;
+ unsigned long flags;
+
+ sch = private->sch;
+
+ spin_lock_irqsave(sch->lock, flags);
+ private->state = VFIO_CCW_STATE_BUSY;
+ spin_unlock_irqrestore(sch->lock, flags);
+
+ orb = cp_get_orb(&private->cp, (u32)(addr_t)sch, sch->lpm);
+
+ /* Issue "Start Subchannel" */
+ ccode = ssch(sch->schid, orb);
+
+ switch (ccode) {
+ case 0:
+ /*
+ * Initialize device status information
+ */
+ sch->schib.scsw.cmd.actl |= SCSW_ACTL_START_PEND;
+ return 0;
+ case 1: /* Status pending */
+ case 2: /* Busy */
+ return -EBUSY;
+ case 3: /* Device/path not operational */
+ {
+ lpm = orb->cmd.lpm;
+ if (lpm != 0)
+ sch->lpm &= ~lpm;
+ else
+ sch->lpm = 0;
+
+ if (cio_update_schib(sch))
+ return -ENODEV;
+
+ return sch->lpm ? -EACCES : -ENODEV;
+ }
+ default:
+ return ccode;
+ }
+}
+
+static void fsm_notoper(struct vfio_ccw_private *private,
+ enum vfio_ccw_event event)
+{
+ struct subchannel *sch = private->sch;
+
+ /*
+ * TODO:
+ * Probably we should send the machine check to the guest.
+ */
+ css_sched_sch_todo(sch, SCH_TODO_UNREG);
+ private->state = VFIO_CCW_STATE_NOT_OPER;
+}
+
+/*
+ * No operation action.
+ */
+static void fsm_nop(struct vfio_ccw_private *private,
+ enum vfio_ccw_event event)
+{
+}
+
+static void fsm_io_error(struct vfio_ccw_private *private,
+ enum vfio_ccw_event event)
+{
+ pr_err("vfio-ccw: FSM: I/O request from state:%d\n", private->state);
+ private->io_region.ret_code = -EIO;
+}
+
+static void fsm_io_busy(struct vfio_ccw_private *private,
+ enum vfio_ccw_event event)
+{
+ private->io_region.ret_code = -EBUSY;
+}
+
+static void fsm_disabled_irq(struct vfio_ccw_private *private,
+ enum vfio_ccw_event event)
+{
+ struct subchannel *sch = private->sch;
+
+ /*
+ * An interrupt in a disabled state means a previous disable was not
+ * successful - should not happen, but we try to disable again.
+ */
+ cio_disable_subchannel(sch);
+}
+
+/*
+ * Deal with the ccw command request from the userspace.
+ */
+static void fsm_io_request(struct vfio_ccw_private *private,
+ enum vfio_ccw_event event)
+{
+ union orb *orb;
+ union scsw *scsw = &private->scsw;
+ struct ccw_io_region *io_region = &private->io_region;
+ struct mdev_device *mdev = private->mdev;
+
+ private->state = VFIO_CCW_STATE_BOXED;
+
+ memcpy(scsw, io_region->scsw_area, sizeof(*scsw));
+
+ if (scsw->cmd.fctl & SCSW_FCTL_START_FUNC) {
+ orb = (union orb *)io_region->orb_area;
+
+ io_region->ret_code = cp_init(&private->cp, mdev_dev(mdev),
+ orb);
+ if (io_region->ret_code)
+ goto err_out;
+
+ io_region->ret_code = cp_prefetch(&private->cp);
+ if (io_region->ret_code) {
+ cp_free(&private->cp);
+ goto err_out;
+ }
+
+ /* Start channel program and wait for I/O interrupt. */
+ io_region->ret_code = fsm_io_helper(private);
+ if (io_region->ret_code) {
+ cp_free(&private->cp);
+ goto err_out;
+ }
+ return;
+ } else if (scsw->cmd.fctl & SCSW_FCTL_HALT_FUNC) {
+ /* XXX: Handle halt. */
+ io_region->ret_code = -EOPNOTSUPP;
+ goto err_out;
+ } else if (scsw->cmd.fctl & SCSW_FCTL_CLEAR_FUNC) {
+ /* XXX: Handle clear. */
+ io_region->ret_code = -EOPNOTSUPP;
+ goto err_out;
+ }
+
+err_out:
+ private->state = VFIO_CCW_STATE_IDLE;
+}
+
+/*
+ * Got an interrupt for a normal io (state busy).
+ */
+static void fsm_irq(struct vfio_ccw_private *private,
+ enum vfio_ccw_event event)
+{
+ struct irb *irb = this_cpu_ptr(&cio_irb);
+
+ memcpy(&private->irb, irb, sizeof(*irb));
+
+ queue_work(vfio_ccw_work_q, &private->io_work);
+
+ if (private->completion)
+ complete(private->completion);
+}
+
+/*
+ * Device statemachine
+ */
+fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS] = {
+ [VFIO_CCW_STATE_NOT_OPER] = {
+ [VFIO_CCW_EVENT_NOT_OPER] = fsm_nop,
+ [VFIO_CCW_EVENT_IO_REQ] = fsm_io_error,
+ [VFIO_CCW_EVENT_INTERRUPT] = fsm_disabled_irq,
+ },
+ [VFIO_CCW_STATE_STANDBY] = {
+ [VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper,
+ [VFIO_CCW_EVENT_IO_REQ] = fsm_io_error,
+ [VFIO_CCW_EVENT_INTERRUPT] = fsm_irq,
+ },
+ [VFIO_CCW_STATE_IDLE] = {
+ [VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper,
+ [VFIO_CCW_EVENT_IO_REQ] = fsm_io_request,
+ [VFIO_CCW_EVENT_INTERRUPT] = fsm_irq,
+ },
+ [VFIO_CCW_STATE_BOXED] = {
+ [VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper,
+ [VFIO_CCW_EVENT_IO_REQ] = fsm_io_busy,
+ [VFIO_CCW_EVENT_INTERRUPT] = fsm_irq,
+ },
+ [VFIO_CCW_STATE_BUSY] = {
+ [VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper,
+ [VFIO_CCW_EVENT_IO_REQ] = fsm_io_busy,
+ [VFIO_CCW_EVENT_INTERRUPT] = fsm_irq,
+ },
+};
diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
new file mode 100644
index 000000000000..e72abbc18ee3
--- /dev/null
+++ b/drivers/s390/cio/vfio_ccw_ops.c
@@ -0,0 +1,425 @@
+/*
+ * Physical device callbacks for vfio_ccw
+ *
+ * Copyright IBM Corp. 2017
+ *
+ * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
+ * Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
+ */
+
+#include <linux/vfio.h>
+#include <linux/mdev.h>
+
+#include "vfio_ccw_private.h"
+
+static int vfio_ccw_mdev_reset(struct mdev_device *mdev)
+{
+ struct vfio_ccw_private *private;
+ struct subchannel *sch;
+ int ret;
+
+ private = dev_get_drvdata(mdev_parent_dev(mdev));
+ sch = private->sch;
+ /*
+ * TODO:
+ * In the cureent stage, some things like "no I/O running" and "no
+ * interrupt pending" are clear, but we are not sure what other state
+ * we need to care about.
+ * There are still a lot more instructions need to be handled. We
+ * should come back here later.
+ */
+ ret = vfio_ccw_sch_quiesce(sch);
+ if (ret)
+ return ret;
+
+ ret = cio_enable_subchannel(sch, (u32)(unsigned long)sch);
+ if (!ret)
+ private->state = VFIO_CCW_STATE_IDLE;
+
+ return ret;
+}
+
+static int vfio_ccw_mdev_notifier(struct notifier_block *nb,
+ unsigned long action,
+ void *data)
+{
+ struct vfio_ccw_private *private =
+ container_of(nb, struct vfio_ccw_private, nb);
+
+ /*
+ * Vendor drivers MUST unpin pages in response to an
+ * invalidation.
+ */
+ if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
+ struct vfio_iommu_type1_dma_unmap *unmap = data;
+
+ if (!cp_iova_pinned(&private->cp, unmap->iova))
+ return NOTIFY_OK;
+
+ if (vfio_ccw_mdev_reset(private->mdev))
+ return NOTIFY_BAD;
+
+ cp_free(&private->cp);
+ return NOTIFY_OK;
+ }
+
+ return NOTIFY_DONE;
+}
+
+static ssize_t name_show(struct kobject *kobj, struct device *dev, char *buf)
+{
+ return sprintf(buf, "I/O subchannel (Non-QDIO)\n");
+}
+MDEV_TYPE_ATTR_RO(name);
+
+static ssize_t device_api_show(struct kobject *kobj, struct device *dev,
+ char *buf)
+{
+ return sprintf(buf, "%s\n", VFIO_DEVICE_API_CCW_STRING);
+}
+MDEV_TYPE_ATTR_RO(device_api);
+
+static ssize_t available_instances_show(struct kobject *kobj,
+ struct device *dev, char *buf)
+{
+ struct vfio_ccw_private *private = dev_get_drvdata(dev);
+
+ return sprintf(buf, "%d\n", atomic_read(&private->avail));
+}
+MDEV_TYPE_ATTR_RO(available_instances);
+
+static struct attribute *mdev_types_attrs[] = {
+ &mdev_type_attr_name.attr,
+ &mdev_type_attr_device_api.attr,
+ &mdev_type_attr_available_instances.attr,
+ NULL,
+};
+
+static struct attribute_group mdev_type_group = {
+ .name = "io",
+ .attrs = mdev_types_attrs,
+};
+
+struct attribute_group *mdev_type_groups[] = {
+ &mdev_type_group,
+ NULL,
+};
+
+static int vfio_ccw_mdev_create(struct kobject *kobj, struct mdev_device *mdev)
+{
+ struct vfio_ccw_private *private =
+ dev_get_drvdata(mdev_parent_dev(mdev));
+
+ if (private->state == VFIO_CCW_STATE_NOT_OPER)
+ return -ENODEV;
+
+ if (atomic_dec_if_positive(&private->avail) < 0)
+ return -EPERM;
+
+ private->mdev = mdev;
+ private->state = VFIO_CCW_STATE_IDLE;
+
+ return 0;
+}
+
+static int vfio_ccw_mdev_remove(struct mdev_device *mdev)
+{
+ struct vfio_ccw_private *private =
+ dev_get_drvdata(mdev_parent_dev(mdev));
+
+ if ((private->state != VFIO_CCW_STATE_NOT_OPER) &&
+ (private->state != VFIO_CCW_STATE_STANDBY)) {
+ if (!vfio_ccw_mdev_reset(mdev))
+ private->state = VFIO_CCW_STATE_STANDBY;
+ /* The state will be NOT_OPER on error. */
+ }
+
+ private->mdev = NULL;
+ atomic_inc(&private->avail);
+
+ return 0;
+}
+
+static int vfio_ccw_mdev_open(struct mdev_device *mdev)
+{
+ struct vfio_ccw_private *private =
+ dev_get_drvdata(mdev_parent_dev(mdev));
+ unsigned long events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
+
+ private->nb.notifier_call = vfio_ccw_mdev_notifier;
+
+ return vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
+ &events, &private->nb);
+}
+
+void vfio_ccw_mdev_release(struct mdev_device *mdev)
+{
+ struct vfio_ccw_private *private =
+ dev_get_drvdata(mdev_parent_dev(mdev));
+
+ vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
+ &private->nb);
+}
+
+static ssize_t vfio_ccw_mdev_read(struct mdev_device *mdev,
+ char __user *buf,
+ size_t count,
+ loff_t *ppos)
+{
+ struct vfio_ccw_private *private;
+ struct ccw_io_region *region;
+
+ if (*ppos + count > sizeof(*region))
+ return -EINVAL;
+
+ private = dev_get_drvdata(mdev_parent_dev(mdev));
+ region = &private->io_region;
+ if (copy_to_user(buf, (void *)region + *ppos, count))
+ return -EFAULT;
+
+ return count;
+}
+
+static ssize_t vfio_ccw_mdev_write(struct mdev_device *mdev,
+ const char __user *buf,
+ size_t count,
+ loff_t *ppos)
+{
+ struct vfio_ccw_private *private;
+ struct ccw_io_region *region;
+
+ if (*ppos + count > sizeof(*region))
+ return -EINVAL;
+
+ private = dev_get_drvdata(mdev_parent_dev(mdev));
+ if (private->state != VFIO_CCW_STATE_IDLE)
+ return -EACCES;
+
+ region = &private->io_region;
+ if (copy_from_user((void *)region + *ppos, buf, count))
+ return -EFAULT;
+
+ vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_IO_REQ);
+ if (region->ret_code != 0) {
+ private->state = VFIO_CCW_STATE_IDLE;
+ return region->ret_code;
+ }
+
+ return count;
+}
+
+static int vfio_ccw_mdev_get_device_info(struct vfio_device_info *info)
+{
+ info->flags = VFIO_DEVICE_FLAGS_CCW | VFIO_DEVICE_FLAGS_RESET;
+ info->num_regions = VFIO_CCW_NUM_REGIONS;
+ info->num_irqs = VFIO_CCW_NUM_IRQS;
+
+ return 0;
+}
+
+static int vfio_ccw_mdev_get_region_info(struct vfio_region_info *info,
+ u16 *cap_type_id,
+ void **cap_type)
+{
+ switch (info->index) {
+ case VFIO_CCW_CONFIG_REGION_INDEX:
+ info->offset = 0;
+ info->size = sizeof(struct ccw_io_region);
+ info->flags = VFIO_REGION_INFO_FLAG_READ
+ | VFIO_REGION_INFO_FLAG_WRITE;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
+int vfio_ccw_mdev_get_irq_info(struct vfio_irq_info *info)
+{
+ if (info->index != VFIO_CCW_IO_IRQ_INDEX)
+ return -EINVAL;
+
+ info->count = 1;
+ info->flags = VFIO_IRQ_INFO_EVENTFD;
+
+ return 0;
+}
+
+static int vfio_ccw_mdev_set_irqs(struct mdev_device *mdev,
+ uint32_t flags,
+ void __user *data)
+{
+ struct vfio_ccw_private *private;
+ struct eventfd_ctx **ctx;
+
+ if (!(flags & VFIO_IRQ_SET_ACTION_TRIGGER))
+ return -EINVAL;
+
+ private = dev_get_drvdata(mdev_parent_dev(mdev));
+ ctx = &private->io_trigger;
+
+ switch (flags & VFIO_IRQ_SET_DATA_TYPE_MASK) {
+ case VFIO_IRQ_SET_DATA_NONE:
+ {
+ if (*ctx)
+ eventfd_signal(*ctx, 1);
+ return 0;
+ }
+ case VFIO_IRQ_SET_DATA_BOOL:
+ {
+ uint8_t trigger;
+
+ if (get_user(trigger, (uint8_t __user *)data))
+ return -EFAULT;
+
+ if (trigger && *ctx)
+ eventfd_signal(*ctx, 1);
+ return 0;
+ }
+ case VFIO_IRQ_SET_DATA_EVENTFD:
+ {
+ int32_t fd;
+
+ if (get_user(fd, (int32_t __user *)data))
+ return -EFAULT;
+
+ if (fd == -1) {
+ if (*ctx)
+ eventfd_ctx_put(*ctx);
+ *ctx = NULL;
+ } else if (fd >= 0) {
+ struct eventfd_ctx *efdctx;
+
+ efdctx = eventfd_ctx_fdget(fd);
+ if (IS_ERR(efdctx))
+ return PTR_ERR(efdctx);
+
+ if (*ctx)
+ eventfd_ctx_put(*ctx);
+
+ *ctx = efdctx;
+ } else
+ return -EINVAL;
+
+ return 0;
+ }
+ default:
+ return -EINVAL;
+ }
+}
+
+static ssize_t vfio_ccw_mdev_ioctl(struct mdev_device *mdev,
+ unsigned int cmd,
+ unsigned long arg)
+{
+ int ret = 0;
+ unsigned long minsz;
+
+ switch (cmd) {
+ case VFIO_DEVICE_GET_INFO:
+ {
+ struct vfio_device_info info;
+
+ minsz = offsetofend(struct vfio_device_info, num_irqs);
+
+ if (copy_from_user(&info, (void __user *)arg, minsz))
+ return -EFAULT;
+
+ if (info.argsz < minsz)
+ return -EINVAL;
+
+ ret = vfio_ccw_mdev_get_device_info(&info);
+ if (ret)
+ return ret;
+
+ return copy_to_user((void __user *)arg, &info, minsz);
+ }
+ case VFIO_DEVICE_GET_REGION_INFO:
+ {
+ struct vfio_region_info info;
+ u16 cap_type_id = 0;
+ void *cap_type = NULL;
+
+ minsz = offsetofend(struct vfio_region_info, offset);
+
+ if (copy_from_user(&info, (void __user *)arg, minsz))
+ return -EFAULT;
+
+ if (info.argsz < minsz)
+ return -EINVAL;
+
+ ret = vfio_ccw_mdev_get_region_info(&info, &cap_type_id,
+ &cap_type);
+ if (ret)
+ return ret;
+
+ return copy_to_user((void __user *)arg, &info, minsz);
+ }
+ case VFIO_DEVICE_GET_IRQ_INFO:
+ {
+ struct vfio_irq_info info;
+
+ minsz = offsetofend(struct vfio_irq_info, count);
+
+ if (copy_from_user(&info, (void __user *)arg, minsz))
+ return -EFAULT;
+
+ if (info.argsz < minsz || info.index >= VFIO_CCW_NUM_IRQS)
+ return -EINVAL;
+
+ ret = vfio_ccw_mdev_get_irq_info(&info);
+ if (ret)
+ return ret;
+
+ if (info.count == -1)
+ return -EINVAL;
+
+ return copy_to_user((void __user *)arg, &info, minsz);
+ }
+ case VFIO_DEVICE_SET_IRQS:
+ {
+ struct vfio_irq_set hdr;
+ size_t data_size;
+ void __user *data;
+
+ minsz = offsetofend(struct vfio_irq_set, count);
+
+ if (copy_from_user(&hdr, (void __user *)arg, minsz))
+ return -EFAULT;
+
+ ret = vfio_set_irqs_validate_and_prepare(&hdr, 1,
+ VFIO_CCW_NUM_IRQS,
+ &data_size);
+ if (ret)
+ return ret;
+
+ data = (void __user *)(arg + minsz);
+ return vfio_ccw_mdev_set_irqs(mdev, hdr.flags, data);
+ }
+ case VFIO_DEVICE_RESET:
+ return vfio_ccw_mdev_reset(mdev);
+ default:
+ return -ENOTTY;
+ }
+}
+
+static const struct mdev_parent_ops vfio_ccw_mdev_ops = {
+ .owner = THIS_MODULE,
+ .supported_type_groups = mdev_type_groups,
+ .create = vfio_ccw_mdev_create,
+ .remove = vfio_ccw_mdev_remove,
+ .open = vfio_ccw_mdev_open,
+ .release = vfio_ccw_mdev_release,
+ .read = vfio_ccw_mdev_read,
+ .write = vfio_ccw_mdev_write,
+ .ioctl = vfio_ccw_mdev_ioctl,
+};
+
+int vfio_ccw_mdev_reg(struct subchannel *sch)
+{
+ return mdev_register_device(&sch->dev, &vfio_ccw_mdev_ops);
+}
+
+void vfio_ccw_mdev_unreg(struct subchannel *sch)
+{
+ mdev_unregister_device(&sch->dev);
+}
diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h
new file mode 100644
index 000000000000..fc0f01c16ef9
--- /dev/null
+++ b/drivers/s390/cio/vfio_ccw_private.h
@@ -0,0 +1,96 @@
+/*
+ * Private stuff for vfio_ccw driver
+ *
+ * Copyright IBM Corp. 2017
+ *
+ * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
+ * Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
+ */
+
+#ifndef _VFIO_CCW_PRIVATE_H_
+#define _VFIO_CCW_PRIVATE_H_
+
+#include <linux/completion.h>
+#include <linux/eventfd.h>
+#include <linux/workqueue.h>
+#include <linux/vfio_ccw.h>
+
+#include "css.h"
+#include "vfio_ccw_cp.h"
+
+/**
+ * struct vfio_ccw_private
+ * @sch: pointer to the subchannel
+ * @state: internal state of the device
+ * @completion: synchronization helper of the I/O completion
+ * @avail: available for creating a mediated device
+ * @mdev: pointer to the mediated device
+ * @nb: notifier for vfio events
+ * @io_region: MMIO region to input/output I/O arguments/results
+ * @cp: channel program for the current I/O operation
+ * @irb: irb info received from interrupt
+ * @scsw: scsw info
+ * @io_trigger: eventfd ctx for signaling userspace I/O results
+ * @io_work: work for deferral process of I/O handling
+ */
+struct vfio_ccw_private {
+ struct subchannel *sch;
+ int state;
+ struct completion *completion;
+ atomic_t avail;
+ struct mdev_device *mdev;
+ struct notifier_block nb;
+ struct ccw_io_region io_region;
+
+ struct channel_program cp;
+ struct irb irb;
+ union scsw scsw;
+
+ struct eventfd_ctx *io_trigger;
+ struct work_struct io_work;
+} __aligned(8);
+
+extern int vfio_ccw_mdev_reg(struct subchannel *sch);
+extern void vfio_ccw_mdev_unreg(struct subchannel *sch);
+
+extern int vfio_ccw_sch_quiesce(struct subchannel *sch);
+
+/*
+ * States of the device statemachine.
+ */
+enum vfio_ccw_state {
+ VFIO_CCW_STATE_NOT_OPER,
+ VFIO_CCW_STATE_STANDBY,
+ VFIO_CCW_STATE_IDLE,
+ VFIO_CCW_STATE_BOXED,
+ VFIO_CCW_STATE_BUSY,
+ /* last element! */
+ NR_VFIO_CCW_STATES
+};
+
+/*
+ * Asynchronous events of the device statemachine.
+ */
+enum vfio_ccw_event {
+ VFIO_CCW_EVENT_NOT_OPER,
+ VFIO_CCW_EVENT_IO_REQ,
+ VFIO_CCW_EVENT_INTERRUPT,
+ /* last element! */
+ NR_VFIO_CCW_EVENTS
+};
+
+/*
+ * Action called through jumptable.
+ */
+typedef void (fsm_func_t)(struct vfio_ccw_private *, enum vfio_ccw_event);
+extern fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS];
+
+static inline void vfio_ccw_fsm_event(struct vfio_ccw_private *private,
+ int event)
+{
+ vfio_ccw_jumptable[private->state][event](private, event);
+}
+
+extern struct workqueue_struct *vfio_ccw_work_q;
+
+#endif
diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c
index 058db724b5a2..ea86da8c75f9 100644
--- a/drivers/s390/crypto/pkey_api.c
+++ b/drivers/s390/crypto/pkey_api.c
@@ -80,7 +80,7 @@ struct secaeskeytoken {
* token. If keybitsize is given, the bitsize of the key is
* also checked. Returns 0 on success or errno value on failure.
*/
-static int check_secaeskeytoken(u8 *token, int keybitsize)
+static int check_secaeskeytoken(const u8 *token, int keybitsize)
{
struct secaeskeytoken *t = (struct secaeskeytoken *) token;
@@ -1004,6 +1004,53 @@ int pkey_skey2pkey(const struct pkey_seckey *seckey,
EXPORT_SYMBOL(pkey_skey2pkey);
/*
+ * Verify key and give back some info about the key.
+ */
+int pkey_verifykey(const struct pkey_seckey *seckey,
+ u16 *pcardnr, u16 *pdomain,
+ u16 *pkeysize, u32 *pattributes)
+{
+ struct secaeskeytoken *t = (struct secaeskeytoken *) seckey;
+ u16 cardnr, domain;
+ u64 mkvp[2];
+ int rc;
+
+ /* check the secure key for valid AES secure key */
+ rc = check_secaeskeytoken((u8 *) seckey, 0);
+ if (rc)
+ goto out;
+ if (pattributes)
+ *pattributes = PKEY_VERIFY_ATTR_AES;
+ if (pkeysize)
+ *pkeysize = t->bitsize;
+
+ /* try to find a card which can handle this key */
+ rc = pkey_findcard(seckey, &cardnr, &domain, 1);
+ if (rc)
+ goto out;
+
+ /* check mkvp for old mkvp match */
+ rc = mkvp_cache_fetch(cardnr, domain, mkvp);
+ if (rc)
+ goto out;
+ if (t->mkvp == mkvp[1]) {
+ DEBUG_DBG("pkey_verifykey secure key has old mkvp\n");
+ if (pattributes)
+ *pattributes |= PKEY_VERIFY_ATTR_OLD_MKVP;
+ }
+
+ if (pcardnr)
+ *pcardnr = cardnr;
+ if (pdomain)
+ *pdomain = domain;
+
+out:
+ DEBUG_DBG("pkey_verifykey rc=%d\n", rc);
+ return rc;
+}
+EXPORT_SYMBOL(pkey_verifykey);
+
+/*
* File io functions
*/
@@ -1104,6 +1151,21 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd,
return -EFAULT;
break;
}
+ case PKEY_VERIFYKEY: {
+ struct pkey_verifykey __user *uvk = (void __user *) arg;
+ struct pkey_verifykey kvk;
+
+ if (copy_from_user(&kvk, uvk, sizeof(kvk)))
+ return -EFAULT;
+ rc = pkey_verifykey(&kvk.seckey, &kvk.cardnr, &kvk.domain,
+ &kvk.keysize, &kvk.attributes);
+ DEBUG_DBG("pkey_ioctl pkey_verifykey()=%d\n", rc);
+ if (rc)
+ break;
+ if (copy_to_user(uvk, &kvk, sizeof(kvk)))
+ return -EFAULT;
+ break;
+ }
default:
/* unknown/unsupported ioctl cmd */
return -ENOTTY;
diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index b59ee077a596..176b6cb1008d 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -409,6 +409,8 @@ typedef struct elf64_shdr {
#define NT_S390_TDB 0x308 /* s390 transaction diagnostic block */
#define NT_S390_VXRS_LOW 0x309 /* s390 vector registers 0-15 upper half */
#define NT_S390_VXRS_HIGH 0x30a /* s390 vector registers 16-31 */
+#define NT_S390_GS_CB 0x30b /* s390 guarded storage registers */
+#define NT_S390_GS_BC 0x30c /* s390 guarded storage broadcast control block */
#define NT_ARM_VFP 0x400 /* ARM VFP/NEON registers */
#define NT_ARM_TLS 0x401 /* ARM TLS register */
#define NT_ARM_HW_BREAK 0x402 /* ARM hardware breakpoint registers */
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 519eff362c1c..ae461050661a 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -198,6 +198,7 @@ struct vfio_device_info {
#define VFIO_DEVICE_FLAGS_PCI (1 << 1) /* vfio-pci device */
#define VFIO_DEVICE_FLAGS_PLATFORM (1 << 2) /* vfio-platform device */
#define VFIO_DEVICE_FLAGS_AMBA (1 << 3) /* vfio-amba device */
+#define VFIO_DEVICE_FLAGS_CCW (1 << 4) /* vfio-ccw device */
__u32 num_regions; /* Max region index + 1 */
__u32 num_irqs; /* Max IRQ index + 1 */
};
@@ -212,6 +213,7 @@ struct vfio_device_info {
#define VFIO_DEVICE_API_PCI_STRING "vfio-pci"
#define VFIO_DEVICE_API_PLATFORM_STRING "vfio-platform"
#define VFIO_DEVICE_API_AMBA_STRING "vfio-amba"
+#define VFIO_DEVICE_API_CCW_STRING "vfio-ccw"
/**
* VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8,
@@ -446,6 +448,22 @@ enum {
VFIO_PCI_NUM_IRQS
};
+/*
+ * The vfio-ccw bus driver makes use of the following fixed region and
+ * IRQ index mapping. Unimplemented regions return a size of zero.
+ * Unimplemented IRQ types return a count of zero.
+ */
+
+enum {
+ VFIO_CCW_CONFIG_REGION_INDEX,
+ VFIO_CCW_NUM_REGIONS
+};
+
+enum {
+ VFIO_CCW_IO_IRQ_INDEX,
+ VFIO_CCW_NUM_IRQS
+};
+
/**
* VFIO_DEVICE_GET_PCI_HOT_RESET_INFO - _IORW(VFIO_TYPE, VFIO_BASE + 12,
* struct vfio_pci_hot_reset_info)
diff --git a/include/uapi/linux/vfio_ccw.h b/include/uapi/linux/vfio_ccw.h
new file mode 100644
index 000000000000..34a7f6f9e065
--- /dev/null
+++ b/include/uapi/linux/vfio_ccw.h
@@ -0,0 +1,24 @@
+/*
+ * Interfaces for vfio-ccw
+ *
+ * Copyright IBM Corp. 2017
+ *
+ * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
+ */
+
+#ifndef _VFIO_CCW_H_
+#define _VFIO_CCW_H_
+
+#include <linux/types.h>
+
+struct ccw_io_region {
+#define ORB_AREA_SIZE 12
+ __u8 orb_area[ORB_AREA_SIZE];
+#define SCSW_AREA_SIZE 12
+ __u8 scsw_area[SCSW_AREA_SIZE];
+#define IRB_AREA_SIZE 96
+ __u8 irb_area[IRB_AREA_SIZE];
+ __u32 ret_code;
+} __packed;
+
+#endif