From ee89686631174fa576e61400ac3580722edac120 Mon Sep 17 00:00:00 2001 From: Akshay Bhat Date: Wed, 27 Apr 2016 16:23:34 -0400 Subject: dt-bindings: imx: ldb: Add ddc-i2c-bus property Document the ddc-i2c-bus property used by imx-ldb driver to read EDID information via I2C interface. Signed-off-by: Akshay Bhat Acked-by: Rob Herring Signed-off-by: Philipp Zabel --- Documentation/devicetree/bindings/display/imx/ldb.txt | 1 + 1 file changed, 1 insertion(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/display/imx/ldb.txt b/Documentation/devicetree/bindings/display/imx/ldb.txt index 0a175d991b52..a407462c885e 100644 --- a/Documentation/devicetree/bindings/display/imx/ldb.txt +++ b/Documentation/devicetree/bindings/display/imx/ldb.txt @@ -62,6 +62,7 @@ Required properties: display-timings are used instead. Optional properties (required if display-timings are used): + - ddc-i2c-bus: phandle of an I2C controller used for DDC EDID probing - display-timings : A node that describes the display timings as defined in Documentation/devicetree/bindings/display/display-timing.txt. - fsl,data-mapping : should be "spwg" or "jeida" -- cgit v1.2.3 From eae1760fc838d5f569a80939f72483f02ac049c5 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 31 Mar 2016 16:26:52 -0400 Subject: doc: update/fixup dma-buf related DocBook Split out dma-buf related parts into their own section, add missing files, and write a bit of overview about how it all fits together. Signed-off-by: Rob Clark Signed-off-by: Sumit Semwal --- Documentation/DocBook/device-drivers.tmpl | 36 +++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) (limited to 'Documentation') diff --git a/Documentation/DocBook/device-drivers.tmpl b/Documentation/DocBook/device-drivers.tmpl index de79efdad46c..8c68768ebee5 100644 --- a/Documentation/DocBook/device-drivers.tmpl +++ b/Documentation/DocBook/device-drivers.tmpl @@ -128,16 +128,44 @@ X!Edrivers/base/interface.c !Edrivers/base/platform.c !Edrivers/base/bus.c - Device Drivers DMA Management + + Buffer Sharing and Synchronization + + The dma-buf subsystem provides the framework for sharing buffers + for hardware (DMA) access across multiple device drivers and + subsystems, and for synchronizing asynchronous hardware access. + + + This is used, for example, by drm "prime" multi-GPU support, but + is of course not limited to GPU use cases. + + + The three main components of this are: (1) dma-buf, representing + a sg_table and exposed to userspace as a file descriptor to allow + passing between devices, (2) fence, which provides a mechanism + to signal when one device as finished access, and (3) reservation, + which manages the shared or exclusive fence(s) associated with + the buffer. + + dma-buf !Edrivers/dma-buf/dma-buf.c +!Iinclude/linux/dma-buf.h + + reservation +!Pdrivers/dma-buf/reservation.c Reservation Object Overview +!Edrivers/dma-buf/reservation.c +!Iinclude/linux/reservation.h + + fence !Edrivers/dma-buf/fence.c -!Edrivers/dma-buf/seqno-fence.c !Iinclude/linux/fence.h +!Edrivers/dma-buf/seqno-fence.c !Iinclude/linux/seqno-fence.h -!Edrivers/dma-buf/reservation.c -!Iinclude/linux/reservation.h !Edrivers/dma-buf/sync_file.c !Iinclude/linux/sync_file.h + + + Device Drivers DMA Management !Edrivers/base/dma-coherent.c !Edrivers/base/dma-mapping.c -- cgit v1.2.3 From fbf8f40e1658cb2f17452dbd3c708e329c5d27e0 Mon Sep 17 00:00:00 2001 From: Ganapatrao Kulkarni Date: Wed, 25 May 2016 15:29:20 +0200 Subject: irqchip/gicv3-its: numa: Enable workaround for Cavium thunderx erratum 23144 The erratum fixes the hang of ITS SYNC command by avoiding inter node io and collections/cpu mapping on thunderx dual-socket platform. This fix is only applicable for Cavium's ThunderX dual-socket platform. Reviewed-by: Robert Richter Signed-off-by: Ganapatrao Kulkarni Signed-off-by: Robert Richter Signed-off-by: Marc Zyngier --- Documentation/arm64/silicon-errata.txt | 1 + arch/arm64/Kconfig | 9 +++++++ drivers/irqchip/irq-gic-v3-its.c | 49 ++++++++++++++++++++++++++++++++-- 3 files changed, 57 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt index c6938e50e71f..4da60b463995 100644 --- a/Documentation/arm64/silicon-errata.txt +++ b/Documentation/arm64/silicon-errata.txt @@ -56,6 +56,7 @@ stable kernels. | ARM | MMU-500 | #841119,#826419 | N/A | | | | | | | Cavium | ThunderX ITS | #22375, #24313 | CAVIUM_ERRATUM_22375 | +| Cavium | ThunderX ITS | #23144 | CAVIUM_ERRATUM_23144 | | Cavium | ThunderX GICv3 | #23154 | CAVIUM_ERRATUM_23154 | | Cavium | ThunderX Core | #27456 | CAVIUM_ERRATUM_27456 | | Cavium | ThunderX SMMUv2 | #27704 | N/A | diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 76747d92bc72..57a9f67971d3 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -426,6 +426,15 @@ config CAVIUM_ERRATUM_22375 If unsure, say Y. +config CAVIUM_ERRATUM_23144 + bool "Cavium erratum 23144: ITS SYNC hang on dual socket system" + depends on NUMA + default y + help + ITS SYNC command hang for cross node io and collections/cpu mapping. + + If unsure, say Y. + config CAVIUM_ERRATUM_23154 bool "Cavium erratum 23154: Access to ICC_IAR1_EL1 is not sync'ed" default y diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 6bd881be24ea..5eb1f9e17a98 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -41,6 +41,7 @@ #define ITS_FLAGS_CMDQ_NEEDS_FLUSHING (1ULL << 0) #define ITS_FLAGS_WORKAROUND_CAVIUM_22375 (1ULL << 1) +#define ITS_FLAGS_WORKAROUND_CAVIUM_23144 (1ULL << 2) #define RDIST_FLAGS_PROPBASE_NEEDS_FLUSHING (1 << 0) @@ -82,6 +83,7 @@ struct its_node { u64 flags; u32 ite_size; u32 device_ids; + int numa_node; }; #define ITS_ITT_ALIGN SZ_256 @@ -613,11 +615,23 @@ static void its_unmask_irq(struct irq_data *d) static int its_set_affinity(struct irq_data *d, const struct cpumask *mask_val, bool force) { - unsigned int cpu = cpumask_any_and(mask_val, cpu_online_mask); + unsigned int cpu; + const struct cpumask *cpu_mask = cpu_online_mask; struct its_device *its_dev = irq_data_get_irq_chip_data(d); struct its_collection *target_col; u32 id = its_get_event_id(d); + /* lpi cannot be routed to a redistributor that is on a foreign node */ + if (its_dev->its->flags & ITS_FLAGS_WORKAROUND_CAVIUM_23144) { + if (its_dev->its->numa_node >= 0) { + cpu_mask = cpumask_of_node(its_dev->its->numa_node); + if (!cpumask_intersects(mask_val, cpu_mask)) + return -EINVAL; + } + } + + cpu = cpumask_any_and(mask_val, cpu_mask); + if (cpu >= nr_cpu_ids) return -EINVAL; @@ -1101,6 +1115,16 @@ static void its_cpu_init_collection(void) list_for_each_entry(its, &its_nodes, entry) { u64 target; + /* avoid cross node collections and its mapping */ + if (its->flags & ITS_FLAGS_WORKAROUND_CAVIUM_23144) { + struct device_node *cpu_node; + + cpu_node = of_get_cpu_node(cpu, NULL); + if (its->numa_node != NUMA_NO_NODE && + its->numa_node != of_node_to_nid(cpu_node)) + continue; + } + /* * We now have to bind each collection to its target * redistributor. @@ -1351,9 +1375,14 @@ static void its_irq_domain_activate(struct irq_domain *domain, { struct its_device *its_dev = irq_data_get_irq_chip_data(d); u32 event = its_get_event_id(d); + const struct cpumask *cpu_mask = cpu_online_mask; + + /* get the cpu_mask of local node */ + if (its_dev->its->numa_node >= 0) + cpu_mask = cpumask_of_node(its_dev->its->numa_node); /* Bind the LPI to the first possible CPU */ - its_dev->event_map.col_map[event] = cpumask_first(cpu_online_mask); + its_dev->event_map.col_map[event] = cpumask_first(cpu_mask); /* Map the GIC IRQ and event to the device */ its_send_mapvi(its_dev, d->hwirq, event); @@ -1443,6 +1472,13 @@ static void __maybe_unused its_enable_quirk_cavium_22375(void *data) its->flags |= ITS_FLAGS_WORKAROUND_CAVIUM_22375; } +static void __maybe_unused its_enable_quirk_cavium_23144(void *data) +{ + struct its_node *its = data; + + its->flags |= ITS_FLAGS_WORKAROUND_CAVIUM_23144; +} + static const struct gic_quirk its_quirks[] = { #ifdef CONFIG_CAVIUM_ERRATUM_22375 { @@ -1451,6 +1487,14 @@ static const struct gic_quirk its_quirks[] = { .mask = 0xffff0fff, .init = its_enable_quirk_cavium_22375, }, +#endif +#ifdef CONFIG_CAVIUM_ERRATUM_23144 + { + .desc = "ITS: Cavium erratum 23144", + .iidr = 0xa100034c, /* ThunderX pass 1.x */ + .mask = 0xffff0fff, + .init = its_enable_quirk_cavium_23144, + }, #endif { } @@ -1514,6 +1558,7 @@ static int __init its_probe(struct device_node *node, its->base = its_base; its->phys_base = res.start; its->ite_size = ((readl_relaxed(its_base + GITS_TYPER) >> 4) & 0xf) + 1; + its->numa_node = of_node_to_nid(node); its->cmd_base = kzalloc(ITS_CMD_QUEUE_SZ, GFP_KERNEL); if (!its->cmd_base) { -- cgit v1.2.3 From 4693fc734d675c5518ea9bd4c9623db45bc37402 Mon Sep 17 00:00:00 2001 From: Stephan Mueller Date: Thu, 26 May 2016 23:38:12 +0200 Subject: KEYS: Add placeholder for KDF usage with DH The values computed during Diffie-Hellman key exchange are often used in combination with key derivation functions to create cryptographic keys. Add a placeholder for a later implementation to configure a key derivation function that will transform the Diffie-Hellman result returned by the KEYCTL_DH_COMPUTE command. [This patch was stripped down from a patch produced by Mat Martineau that had a bug in the compat code - so for the moment Stephan's patch simply requires that the placeholder argument must be NULL] Original-signed-off-by: Mat Martineau Signed-off-by: Stephan Mueller Signed-off-by: David Howells Signed-off-by: James Morris --- Documentation/security/keys.txt | 5 ++++- security/keys/compat.c | 2 +- security/keys/dh.c | 8 +++++++- security/keys/internal.h | 5 +++-- security/keys/keyctl.c | 4 ++-- 5 files changed, 17 insertions(+), 7 deletions(-) (limited to 'Documentation') diff --git a/Documentation/security/keys.txt b/Documentation/security/keys.txt index 20d05719bceb..3849814bfe6d 100644 --- a/Documentation/security/keys.txt +++ b/Documentation/security/keys.txt @@ -826,7 +826,8 @@ The keyctl syscall functions are: (*) Compute a Diffie-Hellman shared secret or public key long keyctl(KEYCTL_DH_COMPUTE, struct keyctl_dh_params *params, - char *buffer, size_t buflen); + char *buffer, size_t buflen, + void *reserved); The params struct contains serial numbers for three keys: @@ -843,6 +844,8 @@ The keyctl syscall functions are: public key. If the base is the remote public key, the result is the shared secret. + The reserved argument must be set to NULL. + The buffer length must be at least the length of the prime, or zero. If the buffer length is nonzero, the length of the result is diff --git a/security/keys/compat.c b/security/keys/compat.c index c8783b3b628c..36c80bf5b89c 100644 --- a/security/keys/compat.c +++ b/security/keys/compat.c @@ -134,7 +134,7 @@ COMPAT_SYSCALL_DEFINE5(keyctl, u32, option, case KEYCTL_DH_COMPUTE: return keyctl_dh_compute(compat_ptr(arg2), compat_ptr(arg3), - arg4); + arg4, compat_ptr(arg5)); default: return -EOPNOTSUPP; diff --git a/security/keys/dh.c b/security/keys/dh.c index 880505a4b9f1..531ed2ec132f 100644 --- a/security/keys/dh.c +++ b/security/keys/dh.c @@ -78,7 +78,8 @@ error: } long keyctl_dh_compute(struct keyctl_dh_params __user *params, - char __user *buffer, size_t buflen) + char __user *buffer, size_t buflen, + void __user *reserved) { long ret; MPI base, private, prime, result; @@ -97,6 +98,11 @@ long keyctl_dh_compute(struct keyctl_dh_params __user *params, goto out; } + if (reserved) { + ret = -EINVAL; + goto out; + } + keylen = mpi_from_key(pcopy.prime, buflen, &prime); if (keylen < 0 || !prime) { /* buflen == 0 may be used to query the required buffer size, diff --git a/security/keys/internal.h b/security/keys/internal.h index 8ec7a528365d..a705a7d92ad7 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -260,10 +260,11 @@ static inline long keyctl_get_persistent(uid_t uid, key_serial_t destring) #ifdef CONFIG_KEY_DH_OPERATIONS extern long keyctl_dh_compute(struct keyctl_dh_params __user *, char __user *, - size_t); + size_t, void __user *); #else static inline long keyctl_dh_compute(struct keyctl_dh_params __user *params, - char __user *buffer, size_t buflen) + char __user *buffer, size_t buflen, + void __user *reserved) { return -EOPNOTSUPP; } diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 3b135a0af344..d580ad06b792 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -1688,8 +1688,8 @@ SYSCALL_DEFINE5(keyctl, int, option, unsigned long, arg2, unsigned long, arg3, case KEYCTL_DH_COMPUTE: return keyctl_dh_compute((struct keyctl_dh_params __user *) arg2, - (char __user *) arg3, - (size_t) arg4); + (char __user *) arg3, (size_t) arg4, + (void __user *) arg5); default: return -EOPNOTSUPP; -- cgit v1.2.3 From d8bae33dddc03dc652e1d8cfceebf4f753939de7 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Fri, 3 Jun 2016 14:55:36 -0700 Subject: kdump: fix dmesg gdbmacro to work with record based printk Commit 7ff9554bb578 ("printk: convert byte-buffer to variable-length record buffer") introduced a record based printk buffer. Modify gdbmacros.txt to parse this new structure so dmesg will work properly. Link: http://lkml.kernel.org/r/1463515794-1599-1-git-send-email-minyard@acm.org Signed-off-by: Corey Minyard Cc: Dave Young Cc: Baoquan He Cc: Vivek Goyal Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/kdump/gdbmacros.txt | 93 ++++++++++++++++++++++++++++++++++----- 1 file changed, 82 insertions(+), 11 deletions(-) (limited to 'Documentation') diff --git a/Documentation/kdump/gdbmacros.txt b/Documentation/kdump/gdbmacros.txt index 35f6a982a0d5..220d0a80ca2c 100644 --- a/Documentation/kdump/gdbmacros.txt +++ b/Documentation/kdump/gdbmacros.txt @@ -170,21 +170,92 @@ document trapinfo address the kernel panicked. end +define dump_log_idx + set $idx = $arg0 + if ($argc > 1) + set $prev_flags = $arg1 + else + set $prev_flags = 0 + end + set $msg = ((struct printk_log *) (log_buf + $idx)) + set $prefix = 1 + set $newline = 1 + set $log = log_buf + $idx + sizeof(*$msg) -define dmesg - set $i = 0 - set $end_idx = (log_end - 1) & (log_buf_len - 1) + # prev & LOG_CONT && !(msg->flags & LOG_PREIX) + if (($prev_flags & 8) && !($msg->flags & 4)) + set $prefix = 0 + end + + # msg->flags & LOG_CONT + if ($msg->flags & 8) + # (prev & LOG_CONT && !(prev & LOG_NEWLINE)) + if (($prev_flags & 8) && !($prev_flags & 2)) + set $prefix = 0 + end + # (!(msg->flags & LOG_NEWLINE)) + if (!($msg->flags & 2)) + set $newline = 0 + end + end + + if ($prefix) + printf "[%5lu.%06lu] ", $msg->ts_nsec / 1000000000, $msg->ts_nsec % 1000000000 + end + if ($msg->text_len != 0) + eval "printf \"%%%d.%ds\", $log", $msg->text_len, $msg->text_len + end + if ($newline) + printf "\n" + end + if ($msg->dict_len > 0) + set $dict = $log + $msg->text_len + set $idx = 0 + set $line = 1 + while ($idx < $msg->dict_len) + if ($line) + printf " " + set $line = 0 + end + set $c = $dict[$idx] + if ($c == '\0') + printf "\n" + set $line = 1 + else + if ($c < ' ' || $c >= 127 || $c == '\\') + printf "\\x%02x", $c + else + printf "%c", $c + end + end + set $idx = $idx + 1 + end + printf "\n" + end +end +document dump_log_idx + Dump a single log given its index in the log buffer. The first + parameter is the index into log_buf, the second is optional and + specified the previous log buffer's flags, used for properly + formatting continued lines. +end - while ($i < logged_chars) - set $idx = (log_end - 1 - logged_chars + $i) & (log_buf_len - 1) +define dmesg + set $i = log_first_idx + set $end_idx = log_first_idx + set $prev_flags = 0 - if ($idx + 100 <= $end_idx) || \ - ($end_idx <= $idx && $idx + 100 < log_buf_len) - printf "%.100s", &log_buf[$idx] - set $i = $i + 100 + while (1) + set $msg = ((struct printk_log *) (log_buf + $i)) + if ($msg->len == 0) + set $i = 0 else - printf "%c", log_buf[$idx] - set $i = $i + 1 + dump_log_idx $i $prev_flags + set $i = $i + $msg->len + set $prev_flags = $msg->flags + end + if ($i == $end_idx) + loop_break end end end -- cgit v1.2.3 From eedf265aa003b4781de24cfed40a655a664457e6 Mon Sep 17 00:00:00 2001 From: Eric W. Biederman Date: Thu, 2 Jun 2016 10:29:47 -0500 Subject: devpts: Make each mount of devpts an independent filesystem. The /dev/ptmx device node is changed to lookup the directory entry "pts" in the same directory as the /dev/ptmx device node was opened in. If there is a "pts" entry and that entry is a devpts filesystem /dev/ptmx uses that filesystem. Otherwise the open of /dev/ptmx fails. The DEVPTS_MULTIPLE_INSTANCES configuration option is removed, so that userspace can now safely depend on each mount of devpts creating a new instance of the filesystem. Each mount of devpts is now a separate and equal filesystem. Reserved ttys are now available to all instances of devpts where the mounter is in the initial mount namespace. A new vfs helper path_pts is introduced that finds a directory entry named "pts" in the directory of the passed in path, and changes the passed in path to point to it. The helper path_pts uses a function path_parent_directory that was factored out of follow_dotdot. In the implementation of devpts: - devpts_mnt is killed as it is no longer meaningful if all mounts of devpts are equal. - pts_sb_from_inode is replaced by just inode->i_sb as all cached inodes in the tty layer are now from the devpts filesystem. - devpts_add_ref is rolled into the new function devpts_ptmx. And the unnecessary inode hold is removed. - devpts_del_ref is renamed devpts_release and reduced to just a deacrivate_super. - The newinstance mount option continues to be accepted but is now ignored. In devpts_fs.h definitions for when !CONFIG_UNIX98_PTYS are removed as they are never used. Documentation/filesystems/devices.txt is updated to describe the current situation. This has been verified to work properly on openwrt-15.05, centos5, centos6, centos7, debian-6.0.2, debian-7.9, debian-8.2, ubuntu-14.04.3, ubuntu-15.10, fedora23, magia-5, mint-17.3, opensuse-42.1, slackware-14.1, gentoo-20151225 (13.0?), archlinux-2015-12-01. With the caveat that on centos6 and on slackware-14.1 that there wind up being two instances of the devpts filesystem mounted on /dev/pts, the lower copy does not end up getting used. Signed-off-by: "Eric W. Biederman" Cc: Greg KH Cc: Peter Hurley Cc: Peter Anvin Cc: Andy Lutomirski Cc: Al Viro Cc: Serge Hallyn Cc: Willy Tarreau Cc: Aurelien Jarno Cc: One Thousand Gnomes Cc: Jann Horn Cc: Jiri Slaby Cc: Florian Weimer Cc: Konstantin Khlebnikov Signed-off-by: Linus Torvalds --- Documentation/filesystems/devpts.txt | 145 +++----------------------- drivers/tty/Kconfig | 11 -- drivers/tty/pty.c | 15 +-- fs/devpts/inode.c | 191 ++++++++++------------------------- fs/namei.c | 49 +++++++-- include/linux/devpts_fs.h | 9 +- include/linux/namei.h | 2 + 7 files changed, 126 insertions(+), 296 deletions(-) (limited to 'Documentation') diff --git a/Documentation/filesystems/devpts.txt b/Documentation/filesystems/devpts.txt index 30d2fcb32f72..9f94fe276dea 100644 --- a/Documentation/filesystems/devpts.txt +++ b/Documentation/filesystems/devpts.txt @@ -1,141 +1,26 @@ +Each mount of the devpts filesystem is now distinct such that ptys +and their indicies allocated in one mount are independent from ptys +and their indicies in all other mounts. -To support containers, we now allow multiple instances of devpts filesystem, -such that indices of ptys allocated in one instance are independent of indices -allocated in other instances of devpts. +All mounts of the devpts filesystem now create a /dev/pts/ptmx node +with permissions 0000. -To preserve backward compatibility, this support for multiple instances is -enabled only if: +To retain backwards compatibility the a ptmx device node (aka any node +created with "mknod name c 5 2") when opened will look for an instance +of devpts under the name "pts" in the same directory as the ptmx device +node. - - CONFIG_DEVPTS_MULTIPLE_INSTANCES=y, and - - '-o newinstance' mount option is specified while mounting devpts - -IOW, devpts now supports both single-instance and multi-instance semantics. - -If CONFIG_DEVPTS_MULTIPLE_INSTANCES=n, there is no change in behavior and -this referred to as the "legacy" mode. In this mode, the new mount options -(-o newinstance and -o ptmxmode) will be ignored with a 'bogus option' message -on console. - -If CONFIG_DEVPTS_MULTIPLE_INSTANCES=y and devpts is mounted without the -'newinstance' option (as in current start-up scripts) the new mount binds -to the initial kernel mount of devpts. This mode is referred to as the -'single-instance' mode and the current, single-instance semantics are -preserved, i.e PTYs are common across the system. - -The only difference between this single-instance mode and the legacy mode -is the presence of new, '/dev/pts/ptmx' node with permissions 0000, which -can safely be ignored. - -If CONFIG_DEVPTS_MULTIPLE_INSTANCES=y and 'newinstance' option is specified, -the mount is considered to be in the multi-instance mode and a new instance -of the devpts fs is created. Any ptys created in this instance are independent -of ptys in other instances of devpts. Like in the single-instance mode, the -/dev/pts/ptmx node is present. To effectively use the multi-instance mode, -open of /dev/ptmx must be a redirected to '/dev/pts/ptmx' using a symlink or -bind-mount. - -Eg: A container startup script could do the following: - - $ chmod 0666 /dev/pts/ptmx - $ rm /dev/ptmx - $ ln -s pts/ptmx /dev/ptmx - $ ns_exec -cm /bin/bash - - # We are now in new container - - $ umount /dev/pts - $ mount -t devpts -o newinstance lxcpts /dev/pts - $ sshd -p 1234 - -where 'ns_exec -cm /bin/bash' calls clone() with CLONE_NEWNS flag and execs -/bin/bash in the child process. A pty created by the sshd is not visible in -the original mount of /dev/pts. +As an option instead of placing a /dev/ptmx device node at /dev/ptmx +it is possible to place a symlink to /dev/pts/ptmx at /dev/ptmx or +to bind mount /dev/ptx/ptmx to /dev/ptmx. If you opt for using +the devpts filesystem in this manner devpts should be mounted with +the ptmxmode=0666, or chmod 0666 /dev/pts/ptmx should be called. Total count of pty pairs in all instances is limited by sysctls: kernel.pty.max = 4096 - global limit -kernel.pty.reserve = 1024 - reserve for initial instance +kernel.pty.reserve = 1024 - reserved for filesystems mounted from the initial mount namespace kernel.pty.nr - current count of ptys Per-instance limit could be set by adding mount option "max=". This feature was added in kernel 3.4 together with sysctl kernel.pty.reserve. In kernels older than 3.4 sysctl kernel.pty.max works as per-instance limit. - -User-space changes ------------------- - -In multi-instance mode (i.e '-o newinstance' mount option is specified at least -once), following user-space issues should be noted. - -1. If -o newinstance mount option is never used, /dev/pts/ptmx can be ignored - and no change is needed to system-startup scripts. - -2. To effectively use multi-instance mode (i.e -o newinstance is specified) - administrators or startup scripts should "redirect" open of /dev/ptmx to - /dev/pts/ptmx using either a bind mount or symlink. - - $ mount -t devpts -o newinstance devpts /dev/pts - - followed by either - - $ rm /dev/ptmx - $ ln -s pts/ptmx /dev/ptmx - $ chmod 666 /dev/pts/ptmx - or - $ mount -o bind /dev/pts/ptmx /dev/ptmx - -3. The '/dev/ptmx -> pts/ptmx' symlink is the preferred method since it - enables better error-reporting and treats both single-instance and - multi-instance mounts similarly. - - But this method requires that system-startup scripts set the mode of - /dev/pts/ptmx correctly (default mode is 0000). The scripts can set the - mode by, either - - - adding ptmxmode mount option to devpts entry in /etc/fstab, or - - using 'chmod 0666 /dev/pts/ptmx' - -4. If multi-instance mode mount is needed for containers, but the system - startup scripts have not yet been updated, container-startup scripts - should bind mount /dev/ptmx to /dev/pts/ptmx to avoid breaking single- - instance mounts. - - Or, in general, container-startup scripts should use: - - mount -t devpts -o newinstance -o ptmxmode=0666 devpts /dev/pts - if [ ! -L /dev/ptmx ]; then - mount -o bind /dev/pts/ptmx /dev/ptmx - fi - - When all devpts mounts are multi-instance, /dev/ptmx can permanently be - a symlink to pts/ptmx and the bind mount can be ignored. - -5. A multi-instance mount that is not accompanied by the /dev/ptmx to - /dev/pts/ptmx redirection would result in an unusable/unreachable pty. - - mount -t devpts -o newinstance lxcpts /dev/pts - - immediately followed by: - - open("/dev/ptmx") - - would create a pty, say /dev/pts/7, in the initial kernel mount. - But /dev/pts/7 would be invisible in the new mount. - -6. The permissions for /dev/pts/ptmx node should be specified when mounting - /dev/pts, using the '-o ptmxmode=%o' mount option (default is 0000). - - mount -t devpts -o newinstance -o ptmxmode=0644 devpts /dev/pts - - The permissions can be later be changed as usual with 'chmod'. - - chmod 666 /dev/pts/ptmx - -7. A mount of devpts without the 'newinstance' option results in binding to - initial kernel mount. This behavior while preserving legacy semantics, - does not provide strict isolation in a container environment. i.e by - mounting devpts without the 'newinstance' option, a container could - get visibility into the 'host' or root container's devpts. - - To workaround this and have strict isolation, all mounts of devpts, - including the mount in the root container, should use the newinstance - option. diff --git a/drivers/tty/Kconfig b/drivers/tty/Kconfig index 82c4d2e45319..95103054c0e4 100644 --- a/drivers/tty/Kconfig +++ b/drivers/tty/Kconfig @@ -120,17 +120,6 @@ config UNIX98_PTYS All modern Linux systems use the Unix98 ptys. Say Y unless you're on an embedded system and want to conserve memory. -config DEVPTS_MULTIPLE_INSTANCES - bool "Support multiple instances of devpts" - depends on UNIX98_PTYS - default n - ---help--- - Enable support for multiple instances of devpts filesystem. - If you want to have isolated PTY namespaces (eg: in containers), - say Y here. Otherwise, say N. If enabled, each mount of devpts - filesystem with the '-o newinstance' option will create an - independent PTY namespace. - config LEGACY_PTYS bool "Legacy (BSD) PTY support" default y diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c index dd4b8417e7f4..f856c4544eea 100644 --- a/drivers/tty/pty.c +++ b/drivers/tty/pty.c @@ -668,7 +668,7 @@ static void pty_unix98_remove(struct tty_driver *driver, struct tty_struct *tty) else fsi = tty->link->driver_data; devpts_kill_index(fsi, tty->index); - devpts_put_ref(fsi); + devpts_release(fsi); } static const struct tty_operations ptm_unix98_ops = { @@ -733,10 +733,11 @@ static int ptmx_open(struct inode *inode, struct file *filp) if (retval) return retval; - fsi = devpts_get_ref(inode, filp); - retval = -ENODEV; - if (!fsi) + fsi = devpts_acquire(filp); + if (IS_ERR(fsi)) { + retval = PTR_ERR(fsi); goto out_free_file; + } /* find a device that is not in use. */ mutex_lock(&devpts_mutex); @@ -745,7 +746,7 @@ static int ptmx_open(struct inode *inode, struct file *filp) retval = index; if (index < 0) - goto out_put_ref; + goto out_put_fsi; mutex_lock(&tty_mutex); @@ -789,8 +790,8 @@ err_release: return retval; out: devpts_kill_index(fsi, index); -out_put_ref: - devpts_put_ref(fsi); +out_put_fsi: + devpts_release(fsi); out_free_file: tty_free_file(filp); return retval; diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 0b2954d7172d..37c134a132c7 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -95,8 +95,6 @@ static struct ctl_table pty_root_table[] = { static DEFINE_MUTEX(allocated_ptys_lock); -static struct vfsmount *devpts_mnt; - struct pts_mount_opts { int setuid; int setgid; @@ -104,7 +102,7 @@ struct pts_mount_opts { kgid_t gid; umode_t mode; umode_t ptmxmode; - int newinstance; + int reserve; int max; }; @@ -117,11 +115,9 @@ static const match_table_t tokens = { {Opt_uid, "uid=%u"}, {Opt_gid, "gid=%u"}, {Opt_mode, "mode=%o"}, -#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES {Opt_ptmxmode, "ptmxmode=%o"}, {Opt_newinstance, "newinstance"}, {Opt_max, "max=%d"}, -#endif {Opt_err, NULL} }; @@ -137,15 +133,48 @@ static inline struct pts_fs_info *DEVPTS_SB(struct super_block *sb) return sb->s_fs_info; } -static inline struct super_block *pts_sb_from_inode(struct inode *inode) +struct pts_fs_info *devpts_acquire(struct file *filp) { -#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES - if (inode->i_sb->s_magic == DEVPTS_SUPER_MAGIC) - return inode->i_sb; -#endif - if (!devpts_mnt) - return NULL; - return devpts_mnt->mnt_sb; + struct pts_fs_info *result; + struct path path; + struct super_block *sb; + int err; + + path = filp->f_path; + path_get(&path); + + /* Has the devpts filesystem already been found? */ + sb = path.mnt->mnt_sb; + if (sb->s_magic != DEVPTS_SUPER_MAGIC) { + /* Is a devpts filesystem at "pts" in the same directory? */ + err = path_pts(&path); + if (err) { + result = ERR_PTR(err); + goto out; + } + + /* Is the path the root of a devpts filesystem? */ + result = ERR_PTR(-ENODEV); + sb = path.mnt->mnt_sb; + if ((sb->s_magic != DEVPTS_SUPER_MAGIC) || + (path.mnt->mnt_root != sb->s_root)) + goto out; + } + + /* + * pty code needs to hold extra references in case of last /dev/tty close + */ + atomic_inc(&sb->s_active); + result = DEVPTS_SB(sb); + +out: + path_put(&path); + return result; +} + +void devpts_release(struct pts_fs_info *fsi) +{ + deactivate_super(fsi->sb); } #define PARSE_MOUNT 0 @@ -154,9 +183,7 @@ static inline struct super_block *pts_sb_from_inode(struct inode *inode) /* * parse_mount_options(): * Set @opts to mount options specified in @data. If an option is not - * specified in @data, set it to its default value. The exception is - * 'newinstance' option which can only be set/cleared on a mount (i.e. - * cannot be changed during remount). + * specified in @data, set it to its default value. * * Note: @data may be NULL (in which case all options are set to default). */ @@ -174,9 +201,12 @@ static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts) opts->ptmxmode = DEVPTS_DEFAULT_PTMX_MODE; opts->max = NR_UNIX98_PTY_MAX; - /* newinstance makes sense only on initial mount */ + /* Only allow instances mounted from the initial mount + * namespace to tap the reserve pool of ptys. + */ if (op == PARSE_MOUNT) - opts->newinstance = 0; + opts->reserve = + (current->nsproxy->mnt_ns == init_task.nsproxy->mnt_ns); while ((p = strsep(&data, ",")) != NULL) { substring_t args[MAX_OPT_ARGS]; @@ -211,16 +241,12 @@ static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts) return -EINVAL; opts->mode = option & S_IALLUGO; break; -#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES case Opt_ptmxmode: if (match_octal(&args[0], &option)) return -EINVAL; opts->ptmxmode = option & S_IALLUGO; break; case Opt_newinstance: - /* newinstance makes sense only on initial mount */ - if (op == PARSE_MOUNT) - opts->newinstance = 1; break; case Opt_max: if (match_int(&args[0], &option) || @@ -228,7 +254,6 @@ static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts) return -EINVAL; opts->max = option; break; -#endif default: pr_err("called with bogus options\n"); return -EINVAL; @@ -238,7 +263,6 @@ static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts) return 0; } -#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES static int mknod_ptmx(struct super_block *sb) { int mode; @@ -305,12 +329,6 @@ static void update_ptmx_mode(struct pts_fs_info *fsi) inode->i_mode = S_IFCHR|fsi->mount_opts.ptmxmode; } } -#else -static inline void update_ptmx_mode(struct pts_fs_info *fsi) -{ - return; -} -#endif static int devpts_remount(struct super_block *sb, int *flags, char *data) { @@ -344,11 +362,9 @@ static int devpts_show_options(struct seq_file *seq, struct dentry *root) seq_printf(seq, ",gid=%u", from_kgid_munged(&init_user_ns, opts->gid)); seq_printf(seq, ",mode=%03o", opts->mode); -#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES seq_printf(seq, ",ptmxmode=%03o", opts->ptmxmode); if (opts->max < NR_UNIX98_PTY_MAX) seq_printf(seq, ",max=%d", opts->max); -#endif return 0; } @@ -410,40 +426,11 @@ fail: return -ENOMEM; } -#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES -static int compare_init_pts_sb(struct super_block *s, void *p) -{ - if (devpts_mnt) - return devpts_mnt->mnt_sb == s; - return 0; -} - /* * devpts_mount() * - * If the '-o newinstance' mount option was specified, mount a new - * (private) instance of devpts. PTYs created in this instance are - * independent of the PTYs in other devpts instances. - * - * If the '-o newinstance' option was not specified, mount/remount the - * initial kernel mount of devpts. This type of mount gives the - * legacy, single-instance semantics. - * - * The 'newinstance' option is needed to support multiple namespace - * semantics in devpts while preserving backward compatibility of the - * current 'single-namespace' semantics. i.e all mounts of devpts - * without the 'newinstance' mount option should bind to the initial - * kernel mount, like mount_single(). - * - * Mounts with 'newinstance' option create a new, private namespace. - * - * NOTE: - * - * For single-mount semantics, devpts cannot use mount_single(), - * because mount_single()/sget() find and use the super-block from - * the most recent mount of devpts. But that recent mount may be a - * 'newinstance' mount and mount_single() would pick the newinstance - * super-block instead of the initial super-block. + * Mount a new (private) instance of devpts. PTYs created in this + * instance are independent of the PTYs in other devpts instances. */ static struct dentry *devpts_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) @@ -456,18 +443,7 @@ static struct dentry *devpts_mount(struct file_system_type *fs_type, if (error) return ERR_PTR(error); - /* Require newinstance for all user namespace mounts to ensure - * the mount options are not changed. - */ - if ((current_user_ns() != &init_user_ns) && !opts.newinstance) - return ERR_PTR(-EINVAL); - - if (opts.newinstance) - s = sget(fs_type, NULL, set_anon_super, flags, NULL); - else - s = sget(fs_type, compare_init_pts_sb, set_anon_super, flags, - NULL); - + s = sget(fs_type, NULL, set_anon_super, flags, NULL); if (IS_ERR(s)) return ERR_CAST(s); @@ -491,18 +467,6 @@ out_undo_sget: return ERR_PTR(error); } -#else -/* - * This supports only the legacy single-instance semantics (no - * multiple-instance semantics) - */ -static struct dentry *devpts_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data) -{ - return mount_single(fs_type, flags, data, devpts_fill_super); -} -#endif - static void devpts_kill_sb(struct super_block *sb) { struct pts_fs_info *fsi = DEVPTS_SB(sb); @@ -516,9 +480,7 @@ static struct file_system_type devpts_fs_type = { .name = "devpts", .mount = devpts_mount, .kill_sb = devpts_kill_sb, -#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES .fs_flags = FS_USERNS_MOUNT | FS_USERNS_DEV_MOUNT, -#endif }; /* @@ -531,16 +493,13 @@ int devpts_new_index(struct pts_fs_info *fsi) int index; int ida_ret; - if (!fsi) - return -ENODEV; - retry: if (!ida_pre_get(&fsi->allocated_ptys, GFP_KERNEL)) return -ENOMEM; mutex_lock(&allocated_ptys_lock); - if (pty_count >= pty_limit - - (fsi->mount_opts.newinstance ? pty_reserve : 0)) { + if (pty_count >= (pty_limit - + (fsi->mount_opts.reserve ? 0 : pty_reserve))) { mutex_unlock(&allocated_ptys_lock); return -ENOSPC; } @@ -571,30 +530,6 @@ void devpts_kill_index(struct pts_fs_info *fsi, int idx) mutex_unlock(&allocated_ptys_lock); } -/* - * pty code needs to hold extra references in case of last /dev/tty close - */ -struct pts_fs_info *devpts_get_ref(struct inode *ptmx_inode, struct file *file) -{ - struct super_block *sb; - struct pts_fs_info *fsi; - - sb = pts_sb_from_inode(ptmx_inode); - if (!sb) - return NULL; - fsi = DEVPTS_SB(sb); - if (!fsi) - return NULL; - - atomic_inc(&sb->s_active); - return fsi; -} - -void devpts_put_ref(struct pts_fs_info *fsi) -{ - deactivate_super(fsi->sb); -} - /** * devpts_pty_new -- create a new inode in /dev/pts/ * @ptmx_inode: inode of the master @@ -607,16 +542,12 @@ void devpts_put_ref(struct pts_fs_info *fsi) struct dentry *devpts_pty_new(struct pts_fs_info *fsi, int index, void *priv) { struct dentry *dentry; - struct super_block *sb; + struct super_block *sb = fsi->sb; struct inode *inode; struct dentry *root; struct pts_mount_opts *opts; char s[12]; - if (!fsi) - return ERR_PTR(-ENODEV); - - sb = fsi->sb; root = sb->s_root; opts = &fsi->mount_opts; @@ -676,20 +607,8 @@ void devpts_pty_kill(struct dentry *dentry) static int __init init_devpts_fs(void) { int err = register_filesystem(&devpts_fs_type); - struct ctl_table_header *table; - if (!err) { - struct vfsmount *mnt; - - table = register_sysctl_table(pty_root_table); - mnt = kern_mount(&devpts_fs_type); - if (IS_ERR(mnt)) { - err = PTR_ERR(mnt); - unregister_filesystem(&devpts_fs_type); - unregister_sysctl_table(table); - } else { - devpts_mnt = mnt; - } + register_sysctl_table(pty_root_table); } return err; } diff --git a/fs/namei.c b/fs/namei.c index 4c4f95ac8aa5..6a82fb7e2127 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1416,21 +1416,28 @@ static void follow_mount(struct path *path) } } +static int path_parent_directory(struct path *path) +{ + struct dentry *old = path->dentry; + /* rare case of legitimate dget_parent()... */ + path->dentry = dget_parent(path->dentry); + dput(old); + if (unlikely(!path_connected(path))) + return -ENOENT; + return 0; +} + static int follow_dotdot(struct nameidata *nd) { while(1) { - struct dentry *old = nd->path.dentry; - if (nd->path.dentry == nd->root.dentry && nd->path.mnt == nd->root.mnt) { break; } if (nd->path.dentry != nd->path.mnt->mnt_root) { - /* rare case of legitimate dget_parent()... */ - nd->path.dentry = dget_parent(nd->path.dentry); - dput(old); - if (unlikely(!path_connected(&nd->path))) - return -ENOENT; + int ret = path_parent_directory(&nd->path); + if (ret) + return ret; break; } if (!follow_up(&nd->path)) @@ -2514,6 +2521,34 @@ struct dentry *lookup_one_len_unlocked(const char *name, } EXPORT_SYMBOL(lookup_one_len_unlocked); +#ifdef CONFIG_UNIX98_PTYS +int path_pts(struct path *path) +{ + /* Find something mounted on "pts" in the same directory as + * the input path. + */ + struct dentry *child, *parent; + struct qstr this; + int ret; + + ret = path_parent_directory(path); + if (ret) + return ret; + + parent = path->dentry; + this.name = "pts"; + this.len = 3; + child = d_hash_and_lookup(parent, &this); + if (!child) + return -ENOENT; + + path->dentry = child; + dput(parent); + follow_mount(path); + return 0; +} +#endif + int user_path_at_empty(int dfd, const char __user *name, unsigned flags, struct path *path, int *empty) { diff --git a/include/linux/devpts_fs.h b/include/linux/devpts_fs.h index 5871f292b596..277ab9af9ac2 100644 --- a/include/linux/devpts_fs.h +++ b/include/linux/devpts_fs.h @@ -15,13 +15,12 @@ #include -struct pts_fs_info; - #ifdef CONFIG_UNIX98_PTYS -/* Look up a pts fs info and get a ref to it */ -struct pts_fs_info *devpts_get_ref(struct inode *, struct file *); -void devpts_put_ref(struct pts_fs_info *); +struct pts_fs_info; + +struct pts_fs_info *devpts_acquire(struct file *); +void devpts_release(struct pts_fs_info *); int devpts_new_index(struct pts_fs_info *); void devpts_kill_index(struct pts_fs_info *, int); diff --git a/include/linux/namei.h b/include/linux/namei.h index ec5ec2818a28..d3d0398f2a1b 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -45,6 +45,8 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND}; #define LOOKUP_ROOT 0x2000 #define LOOKUP_EMPTY 0x4000 +extern int path_pts(struct path *path); + extern int user_path_at_empty(int, const char __user *, unsigned, struct path *, int *empty); static inline int user_path_at(int dfd, const char __user *name, unsigned flags, -- cgit v1.2.3