diff options
Diffstat (limited to 'Documentation')
44 files changed, 1087 insertions, 34 deletions
diff --git a/Documentation/ABI/testing/sysfs-ibft b/Documentation/ABI/testing/sysfs-ibft index c2b7d1154bec..cac3930bdb04 100644 --- a/Documentation/ABI/testing/sysfs-ibft +++ b/Documentation/ABI/testing/sysfs-ibft @@ -20,4 +20,4 @@ Date: November 2007 Contact: Konrad Rzeszutek <ketuzsezr@darnok.org> Description: The /sys/firmware/ibft/ethernetX directory will contain files that expose the iSCSI Boot Firmware Table NIC data. - This can this can the IP address, MAC, and gateway of the NIC. + Usually this contains the IP address, MAC, and gateway of the NIC. diff --git a/Documentation/DocBook/media/Makefile b/Documentation/DocBook/media/Makefile index df2962d9e11e..8bf7c6191296 100644 --- a/Documentation/DocBook/media/Makefile +++ b/Documentation/DocBook/media/Makefile @@ -25,7 +25,7 @@ GENFILES := $(addprefix $(MEDIA_OBJ_DIR)/, $(MEDIA_TEMP)) PHONY += cleanmediadocs cleanmediadocs: - -@rm `find $(MEDIA_OBJ_DIR) -type l` $(GENFILES) $(OBJIMGFILES) 2>/dev/null + -@rm -f `find $(MEDIA_OBJ_DIR) -type l` $(GENFILES) $(OBJIMGFILES) 2>/dev/null $(obj)/media_api.xml: $(GENFILES) FORCE diff --git a/Documentation/DocBook/media/v4l/compat.xml b/Documentation/DocBook/media/v4l/compat.xml index 07ffc76553ba..0a2debfa68f6 100644 --- a/Documentation/DocBook/media/v4l/compat.xml +++ b/Documentation/DocBook/media/v4l/compat.xml @@ -2566,6 +2566,10 @@ fields changed from _s32 to _u32. <para>Added compound control types and &VIDIOC-QUERY-EXT-CTRL;. </para> </listitem> + </orderedlist> + </section> + + <section> <title>V4L2 in Linux 3.18</title> <orderedlist> <listitem> diff --git a/Documentation/HOWTO b/Documentation/HOWTO index 57cf5efb044d..93aa8604630e 100644 --- a/Documentation/HOWTO +++ b/Documentation/HOWTO @@ -324,7 +324,6 @@ tree, they need to be integration-tested. For this purpose, a special testing repository exists into which virtually all subsystem trees are pulled on an almost daily basis: http://git.kernel.org/?p=linux/kernel/git/next/linux-next.git - http://linux.f-seidel.de/linux-next/pmwiki/ This way, the -next kernel gives a summary outlook onto what will be expected to go into the mainline kernel at the next merge period. diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches index 482c74947de0..1fa1caa198eb 100644 --- a/Documentation/SubmittingPatches +++ b/Documentation/SubmittingPatches @@ -483,12 +483,10 @@ have been included in the discussion 14) Using Reported-by:, Tested-by:, Reviewed-by:, Suggested-by: and Fixes: -If this patch fixes a problem reported by somebody else, consider adding a -Reported-by: tag to credit the reporter for their contribution. Please -note that this tag should not be added without the reporter's permission, -especially if the problem was not reported in a public forum. That said, -if we diligently credit our bug reporters, they will, hopefully, be -inspired to help us again in the future. +The Reported-by tag gives credit to people who find bugs and report them and it +hopefully inspires them to help us again in the future. Please note that if +the bug was reported in private, then ask for permission first before using the +Reported-by tag. A Tested-by: tag indicates that the patch has been successfully tested (in some environment) by the person named. This tag informs maintainers that diff --git a/Documentation/arm64/memory.txt b/Documentation/arm64/memory.txt index 344e85cc7323..d7273a5f6456 100644 --- a/Documentation/arm64/memory.txt +++ b/Documentation/arm64/memory.txt @@ -17,7 +17,7 @@ User addresses have bits 63:48 set to 0 while the kernel addresses have the same bits set to 1. TTBRx selection is given by bit 63 of the virtual address. The swapper_pg_dir contains only kernel (global) mappings while the user pgd contains only user (non-global) mappings. -The swapper_pgd_dir address is written to TTBR1 and never written to +The swapper_pg_dir address is written to TTBR1 and never written to TTBR0. diff --git a/Documentation/development-process/2.Process b/Documentation/development-process/2.Process index 2e0617936e8f..c24e156a6118 100644 --- a/Documentation/development-process/2.Process +++ b/Documentation/development-process/2.Process @@ -289,10 +289,6 @@ lists when they are assembled; they can be downloaded from: http://www.kernel.org/pub/linux/kernel/next/ -Some information about linux-next has been gathered at: - - http://linux.f-seidel.de/linux-next/pmwiki/ - Linux-next has become an integral part of the kernel development process; all patches merged during a given merge window should really have found their way into linux-next some time before the merge window opens. diff --git a/Documentation/development-process/8.Conclusion b/Documentation/development-process/8.Conclusion index 1990ab4b4949..caef69022e9c 100644 --- a/Documentation/development-process/8.Conclusion +++ b/Documentation/development-process/8.Conclusion @@ -22,10 +22,6 @@ Beyond that, a valuable resource for kernel developers is: http://kernelnewbies.org/ -Information about the linux-next tree gathers at: - - http://linux.f-seidel.de/linux-next/pmwiki/ - And, of course, one should not forget http://kernel.org/, the definitive location for kernel release information. diff --git a/Documentation/devicetree/bindings/arm/arm-boards b/Documentation/devicetree/bindings/arm/arm-boards index c554ed3d44fb..556c8665fdbf 100644 --- a/Documentation/devicetree/bindings/arm/arm-boards +++ b/Documentation/devicetree/bindings/arm/arm-boards @@ -92,3 +92,68 @@ Required nodes: - core-module: the root node to the Versatile platforms must have a core-module with regs and the compatible strings "arm,core-module-versatile", "syscon" + +ARM RealView Boards +------------------- +The RealView boards cover tailored evaluation boards that are used to explore +the ARM11 and Cortex A-8 and Cortex A-9 processors. + +Required properties (in root node): + /* RealView Emulation Baseboard */ + compatible = "arm,realview-eb"; + /* RealView Platform Baseboard for ARM1176JZF-S */ + compatible = "arm,realview-pb1176"; + /* RealView Platform Baseboard for ARM11 MPCore */ + compatible = "arm,realview-pb11mp"; + /* RealView Platform Baseboard for Cortex A-8 */ + compatible = "arm,realview-pba8"; + /* RealView Platform Baseboard Explore for Cortex A-9 */ + compatible = "arm,realview-pbx"; + +Required nodes: + +- soc: some node of the RealView platforms must be the SoC + node that contain the SoC-specific devices, withe the compatible + string set to one of these tuples: + "arm,realview-eb-soc", "simple-bus" + "arm,realview-pb1176-soc", "simple-bus" + "arm,realview-pb11mp-soc", "simple-bus" + "arm,realview-pba8-soc", "simple-bus" + "arm,realview-pbx-soc", "simple-bus" + +- syscon: some subnode of the RealView SoC node must be a + system controller node pointing to the control registers, + with the compatible string set to one of these tuples: + "arm,realview-eb-syscon", "syscon" + "arm,realview-pb1176-syscon", "syscon" + "arm,realview-pb11mp-syscon", "syscon" + "arm,realview-pba8-syscon", "syscon" + "arm,realview-pbx-syscon", "syscon" + + Required properties for the system controller: + - regs: the location and size of the system controller registers, + one range of 0x1000 bytes. + +Example: + +/dts-v1/; +#include <dt-bindings/interrupt-controller/irq.h> +#include "skeleton.dtsi" + +/ { + model = "ARM RealView PB1176 with device tree"; + compatible = "arm,realview-pb1176"; + + soc { + #address-cells = <1>; + #size-cells = <1>; + compatible = "arm,realview-pb1176-soc", "simple-bus"; + ranges; + + syscon: syscon@10000000 { + compatible = "arm,realview-syscon", "syscon"; + reg = <0x10000000 0x1000>; + }; + + }; +}; diff --git a/Documentation/devicetree/bindings/arm/bcm/cygnus.txt b/Documentation/devicetree/bindings/arm/bcm/cygnus.txt new file mode 100644 index 000000000000..4c77169bb534 --- /dev/null +++ b/Documentation/devicetree/bindings/arm/bcm/cygnus.txt @@ -0,0 +1,31 @@ +Broadcom Cygnus device tree bindings +------------------------------------ + + +Boards with Cygnus SoCs shall have the following properties: + +Required root node property: + +BCM11300 +compatible = "brcm,bcm11300", "brcm,cygnus"; + +BCM11320 +compatible = "brcm,bcm11320", "brcm,cygnus"; + +BCM11350 +compatible = "brcm,bcm11350", "brcm,cygnus"; + +BCM11360 +compatible = "brcm,bcm11360", "brcm,cygnus"; + +BCM58300 +compatible = "brcm,bcm58300", "brcm,cygnus"; + +BCM58302 +compatible = "brcm,bcm58302", "brcm,cygnus"; + +BCM58303 +compatible = "brcm,bcm58303", "brcm,cygnus"; + +BCM58305 +compatible = "brcm,bcm58305", "brcm,cygnus"; diff --git a/Documentation/devicetree/bindings/arm/gic.txt b/Documentation/devicetree/bindings/arm/gic.txt index c7d2fa156678..b38608af66db 100644 --- a/Documentation/devicetree/bindings/arm/gic.txt +++ b/Documentation/devicetree/bindings/arm/gic.txt @@ -17,6 +17,7 @@ Main node required properties: "arm,cortex-a7-gic" "arm,arm11mp-gic" "brcm,brahma-b15-gic" + "arm,arm1176jzf-devchip-gic" - interrupt-controller : Identifies the node as an interrupt controller - #interrupt-cells : Specifies the number of cells needed to encode an interrupt source. The type shall be a <u32> and the value shall be 3. diff --git a/Documentation/devicetree/bindings/arm/marvell,berlin.txt b/Documentation/devicetree/bindings/arm/marvell,berlin.txt index 904de5781f44..a99eb9eb14c0 100644 --- a/Documentation/devicetree/bindings/arm/marvell,berlin.txt +++ b/Documentation/devicetree/bindings/arm/marvell,berlin.txt @@ -106,11 +106,21 @@ Required subnode-properties: - groups: a list of strings describing the group names. - function: a string describing the function used to mux the groups. +* Reset controller binding + +A reset controller is part of the chip control registers set. The chip control +node also provides the reset. The register set is not at the same offset between +Berlin SoCs. + +Required property: +- #reset-cells: must be set to 2 + Example: chip: chip-control@ea0000 { compatible = "marvell,berlin2-chip-ctrl"; #clock-cells = <1>; + #reset-cells = <2>; reg = <0xea0000 0x400>; clocks = <&refclk>, <&externaldev 0>; clock-names = "refclk", "video_ext0"; diff --git a/Documentation/devicetree/bindings/arm/ste-nomadik.txt b/Documentation/devicetree/bindings/arm/ste-nomadik.txt index 6256ec31666d..2fdff5a806cf 100644 --- a/Documentation/devicetree/bindings/arm/ste-nomadik.txt +++ b/Documentation/devicetree/bindings/arm/ste-nomadik.txt @@ -10,6 +10,12 @@ Required root node property: src Boards with the Nomadik SoC include: +Nomadik NHK-15 board manufactured by ST Microelectronics: + +Required root node property: + +compatible="st,nomadik-nhk-15"; + S8815 "MiniKit" manufactured by Calao Systems: Required root node property: diff --git a/Documentation/devicetree/bindings/clock/bcm-cygnus-clock.txt b/Documentation/devicetree/bindings/clock/bcm-cygnus-clock.txt new file mode 100644 index 000000000000..00d26edec8bc --- /dev/null +++ b/Documentation/devicetree/bindings/clock/bcm-cygnus-clock.txt @@ -0,0 +1,34 @@ +Broadcom Cygnus Clocks + +This binding uses the common clock binding: +Documentation/devicetree/bindings/clock/clock-bindings.txt + +Currently various "fixed" clocks are declared for peripheral drivers that use +the common clock framework to reference their core clocks. Proper support of +these clocks will be added later + +Device tree example: + + clocks { + #address-cells = <1>; + #size-cells = <1>; + ranges; + + osc: oscillator { + compatible = "fixed-clock"; + #clock-cells = <1>; + clock-frequency = <25000000>; + }; + + apb_clk: apb_clk { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <1000000000>; + }; + + periph_clk: periph_clk { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <500000000>; + }; + }; diff --git a/Documentation/devicetree/bindings/i2c/trivial-devices.txt b/Documentation/devicetree/bindings/i2c/trivial-devices.txt index fbde415078e6..605dcca5dbec 100644 --- a/Documentation/devicetree/bindings/i2c/trivial-devices.txt +++ b/Documentation/devicetree/bindings/i2c/trivial-devices.txt @@ -56,6 +56,8 @@ gmt,g751 G751: Digital Temperature Sensor and Thermal Watchdog with Two-Wire In infineon,slb9635tt Infineon SLB9635 (Soft-) I2C TPM (old protocol, max 100khz) infineon,slb9645tt Infineon SLB9645 I2C TPM (new protocol, max 400khz) isl,isl12057 Intersil ISL12057 I2C RTC Chip +isil,isl29028 (deprecated, use isl) +isl,isl29028 Intersil ISL29028 Ambient Light and Proximity Sensor maxim,ds1050 5 Bit Programmable, Pulse-Width Modulator maxim,max1237 Low-Power, 4-/12-Channel, 2-Wire Serial, 12-Bit ADCs maxim,max6625 9-Bit/12-Bit Temperature Sensors with I²C-Compatible Serial Interface diff --git a/Documentation/devicetree/bindings/mailbox/mailbox.txt b/Documentation/devicetree/bindings/mailbox/mailbox.txt new file mode 100644 index 000000000000..1a2cd3d266db --- /dev/null +++ b/Documentation/devicetree/bindings/mailbox/mailbox.txt @@ -0,0 +1,38 @@ +* Generic Mailbox Controller and client driver bindings + +Generic binding to provide a way for Mailbox controller drivers to +assign appropriate mailbox channel to client drivers. + +* Mailbox Controller + +Required property: +- #mbox-cells: Must be at least 1. Number of cells in a mailbox + specifier. + +Example: + mailbox: mailbox { + ... + #mbox-cells = <1>; + }; + + +* Mailbox Client + +Required property: +- mboxes: List of phandle and mailbox channel specifiers. + +Optional property: +- mbox-names: List of identifier strings for each mailbox channel + required by the client. The use of this property + is discouraged in favor of using index in list of + 'mboxes' while requesting a mailbox. Instead the + platforms may define channel indices, in DT headers, + to something legible. + +Example: + pwr_cntrl: power { + ... + mbox-names = "pwr-ctrl", "rpc"; + mboxes = <&mailbox 0 + &mailbox 1>; + }; diff --git a/Documentation/devicetree/bindings/net/smsc-lan91c111.txt b/Documentation/devicetree/bindings/net/smsc-lan91c111.txt index 0f8487b88822..e77e167593db 100644 --- a/Documentation/devicetree/bindings/net/smsc-lan91c111.txt +++ b/Documentation/devicetree/bindings/net/smsc-lan91c111.txt @@ -11,3 +11,5 @@ Optional properties: are supported on the device. Valid value for SMSC LAN91c111 are 1, 2 or 4. If it's omitted or invalid, the size would be 2 meaning 16-bit access only. +- power-gpios: GPIO to control the PWRDWN pin +- reset-gpios: GPIO to control the RESET pin diff --git a/Documentation/devicetree/bindings/pwm/pwm-fsl-ftm.txt b/Documentation/devicetree/bindings/pwm/pwm-fsl-ftm.txt index 0bda229a6171..3899d6a557c1 100644 --- a/Documentation/devicetree/bindings/pwm/pwm-fsl-ftm.txt +++ b/Documentation/devicetree/bindings/pwm/pwm-fsl-ftm.txt @@ -1,5 +1,20 @@ Freescale FlexTimer Module (FTM) PWM controller +The same FTM PWM device can have a different endianness on different SoCs. The +device tree provides a property to describing this so that an operating system +device driver can handle all variants of the device. Refer to the table below +for the endianness of the FTM PWM block as integrated into the existing SoCs: + + SoC | FTM-PWM endianness + --------+------------------- + Vybrid | LE + LS1 | BE + LS2 | LE + +Please see ../regmap/regmap.txt for more detail about how to specify endian +modes in device tree. + + Required properties: - compatible: Should be "fsl,vf610-ftm-pwm". - reg: Physical base address and length of the controller's registers @@ -16,7 +31,8 @@ Required properties: - pinctrl-names: Must contain a "default" entry. - pinctrl-NNN: One property must exist for each entry in pinctrl-names. See pinctrl/pinctrl-bindings.txt for details of the property values. - +- big-endian: Boolean property, required if the FTM PWM registers use a big- + endian rather than little-endian layout. Example: @@ -32,4 +48,5 @@ pwm0: pwm@40038000 { <&clks VF610_CLK_FTM0_EXT_FIX_EN>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_pwm0_1>; + big-endian; }; diff --git a/Documentation/devicetree/bindings/pwm/pwm-rockchip.txt b/Documentation/devicetree/bindings/pwm/pwm-rockchip.txt index d47d15a6a298..b8be3d09ee26 100644 --- a/Documentation/devicetree/bindings/pwm/pwm-rockchip.txt +++ b/Documentation/devicetree/bindings/pwm/pwm-rockchip.txt @@ -7,8 +7,8 @@ Required properties: "rockchip,vop-pwm": found integrated in VOP on RK3288 SoC - reg: physical base address and length of the controller's registers - clocks: phandle and clock specifier of the PWM reference clock - - #pwm-cells: should be 2. See pwm.txt in this directory for a - description of the cell format. + - #pwm-cells: must be 2 (rk2928) or 3 (rk3288). See pwm.txt in this directory + for a description of the cell format. Example: diff --git a/Documentation/devicetree/bindings/sound/sgtl5000.txt b/Documentation/devicetree/bindings/sound/sgtl5000.txt index 955df60a118c..d556dcb8816b 100644 --- a/Documentation/devicetree/bindings/sound/sgtl5000.txt +++ b/Documentation/devicetree/bindings/sound/sgtl5000.txt @@ -7,10 +7,20 @@ Required properties: - clocks : the clock provider of SYS_MCLK +- VDDA-supply : the regulator provider of VDDA + +- VDDIO-supply: the regulator provider of VDDIO + +Optional properties: + +- VDDD-supply : the regulator provider of VDDD + Example: codec: sgtl5000@0a { compatible = "fsl,sgtl5000"; reg = <0x0a>; clocks = <&clks 150>; + VDDA-supply = <®_3p3v>; + VDDIO-supply = <®_3p3v>; }; diff --git a/Documentation/devicetree/bindings/submitting-patches.txt b/Documentation/devicetree/bindings/submitting-patches.txt index 042a0273b8ba..b7ba01ad1426 100644 --- a/Documentation/devicetree/bindings/submitting-patches.txt +++ b/Documentation/devicetree/bindings/submitting-patches.txt @@ -12,6 +12,9 @@ I. For patch submitters devicetree@vger.kernel.org + 3) The Documentation/ portion of the patch should come in the series before + the code implementing the binding. + II. For kernel maintainers 1) If you aren't comfortable reviewing a given binding, reply to it and ask diff --git a/Documentation/devicetree/bindings/thermal/imx-thermal.txt b/Documentation/devicetree/bindings/thermal/imx-thermal.txt index 1f0f67234a91..3c67bd50aa10 100644 --- a/Documentation/devicetree/bindings/thermal/imx-thermal.txt +++ b/Documentation/devicetree/bindings/thermal/imx-thermal.txt @@ -1,7 +1,10 @@ * Temperature Monitor (TEMPMON) on Freescale i.MX SoCs Required properties: -- compatible : "fsl,imx6q-thermal" +- compatible : "fsl,imx6q-tempmon" for i.MX6Q, "fsl,imx6sx-tempmon" for i.MX6SX. + i.MX6SX has two more IRQs than i.MX6Q, one is IRQ_LOW and the other is IRQ_PANIC, + when temperature is below than low threshold, IRQ_LOW will be triggered, when temperature + is higher than panic threshold, system will auto reboot by SRC module. - fsl,tempmon : phandle pointer to system controller that contains TEMPMON control registers, e.g. ANATOP on imx6q. - fsl,tempmon-data : phandle pointer to fuse controller that contains TEMPMON diff --git a/Documentation/devicetree/bindings/timer/renesas,mtu2.txt b/Documentation/devicetree/bindings/timer/renesas,mtu2.txt index d9a8d5af1a21..ba0a34d97eb8 100644 --- a/Documentation/devicetree/bindings/timer/renesas,mtu2.txt +++ b/Documentation/devicetree/bindings/timer/renesas,mtu2.txt @@ -1,4 +1,4 @@ -* Renesas R-Car Multi-Function Timer Pulse Unit 2 (MTU2) +* Renesas Multi-Function Timer Pulse Unit 2 (MTU2) The MTU2 is a multi-purpose, multi-channel timer/counter with configurable clock inputs and programmable compare match. diff --git a/Documentation/devicetree/bindings/timer/renesas,tmu.txt b/Documentation/devicetree/bindings/timer/renesas,tmu.txt index 7db89fb25444..cd5f20bf2582 100644 --- a/Documentation/devicetree/bindings/timer/renesas,tmu.txt +++ b/Documentation/devicetree/bindings/timer/renesas,tmu.txt @@ -1,4 +1,4 @@ -* Renesas R-Car Timer Unit (TMU) +* Renesas R-Mobile/R-Car Timer Unit (TMU) The TMU is a 32-bit timer/counter with configurable clock inputs and programmable compare match. @@ -9,6 +9,8 @@ are independent. The TMU hardware supports up to three channels. Required Properties: - compatible: must contain one or more of the following: + - "renesas,tmu-r8a7740" for the r8a7740 TMU + - "renesas,tmu-r8a7778" for the r8a7778 TMU - "renesas,tmu-r8a7779" for the r8a7779 TMU - "renesas,tmu" for any TMU. This is a fallback for the above renesas,tmu-* entries diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt index 723999d73744..bfbd93e06bae 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.txt +++ b/Documentation/devicetree/bindings/vendor-prefixes.txt @@ -77,6 +77,7 @@ innolux Innolux Corporation intel Intel Corporation intercontrol Inter Control Group isee ISEE 2007 S.L. +isil Intersil (deprecated, use isl) isl Intersil karo Ka-Ro electronics GmbH keymile Keymile GmbH diff --git a/Documentation/devicetree/bindings/watchdog/cadence-wdt.txt b/Documentation/devicetree/bindings/watchdog/cadence-wdt.txt new file mode 100644 index 000000000000..c3a36ee45552 --- /dev/null +++ b/Documentation/devicetree/bindings/watchdog/cadence-wdt.txt @@ -0,0 +1,24 @@ +Zynq Watchdog Device Tree Bindings +------------------------------------------- + +Required properties: +- compatible : Should be "cdns,wdt-r1p2". +- clocks : This is pclk (APB clock). +- interrupts : This is wd_irq - watchdog timeout interrupt. +- interrupt-parent : Must be core interrupt controller. + +Optional properties +- reset-on-timeout : If this property exists, then a reset is done + when watchdog times out. +- timeout-sec : Watchdog timeout value (in seconds). + +Example: + watchdog@f8005000 { + compatible = "cdns,wdt-r1p2"; + clocks = <&clkc 45>; + interrupt-parent = <&intc>; + interrupts = <0 9 1>; + reg = <0xf8005000 0x1000>; + reset-on-timeout; + timeout-sec = <10>; + }; diff --git a/Documentation/devicetree/bindings/watchdog/fsl-imx-wdt.txt b/Documentation/devicetree/bindings/watchdog/fsl-imx-wdt.txt index e52ba2da868c..8dab6fd024aa 100644 --- a/Documentation/devicetree/bindings/watchdog/fsl-imx-wdt.txt +++ b/Documentation/devicetree/bindings/watchdog/fsl-imx-wdt.txt @@ -7,7 +7,8 @@ Required properties: Optional property: - big-endian: If present the watchdog device's registers are implemented - in big endian mode, otherwise in little mode. + in big endian mode, otherwise in native mode(same with CPU), for more + detail please see: Documentation/devicetree/bindings/regmap/regmap.txt. Examples: diff --git a/Documentation/devicetree/bindings/watchdog/meson6-wdt.txt b/Documentation/devicetree/bindings/watchdog/meson6-wdt.txt new file mode 100644 index 000000000000..9200fc2d508c --- /dev/null +++ b/Documentation/devicetree/bindings/watchdog/meson6-wdt.txt @@ -0,0 +1,13 @@ +Meson SoCs Watchdog timer + +Required properties: + +- compatible : should be "amlogic,meson6-wdt" +- reg : Specifies base physical address and size of the registers. + +Example: + +wdt: watchdog@c1109900 { + compatible = "amlogic,meson6-wdt"; + reg = <0xc1109900 0x8>; +}; diff --git a/Documentation/devicetree/bindings/watchdog/qcom-wdt.txt b/Documentation/devicetree/bindings/watchdog/qcom-wdt.txt new file mode 100644 index 000000000000..4726924d034e --- /dev/null +++ b/Documentation/devicetree/bindings/watchdog/qcom-wdt.txt @@ -0,0 +1,24 @@ +Qualcomm Krait Processor Sub-system (KPSS) Watchdog +--------------------------------------------------- + +Required properties : +- compatible : shall contain only one of the following: + + "qcom,kpss-wdt-msm8960" + "qcom,kpss-wdt-apq8064" + "qcom,kpss-wdt-ipq8064" + +- reg : shall contain base register location and length +- clocks : shall contain the input clock + +Optional properties : +- timeout-sec : shall contain the default watchdog timeout in seconds, + if unset, the default timeout is 30 seconds + +Example: + watchdog@208a038 { + compatible = "qcom,kpss-wdt-ipq8064"; + reg = <0x0208a038 0x40>; + clocks = <&sleep_clk>; + timeout-sec = <10>; + }; diff --git a/Documentation/devicetree/bindings/watchdog/samsung-wdt.txt b/Documentation/devicetree/bindings/watchdog/samsung-wdt.txt index cfff37511aac..8f3d96af81d7 100644 --- a/Documentation/devicetree/bindings/watchdog/samsung-wdt.txt +++ b/Documentation/devicetree/bindings/watchdog/samsung-wdt.txt @@ -9,6 +9,7 @@ Required properties: (a) "samsung,s3c2410-wdt" for Exynos4 and previous SoCs (b) "samsung,exynos5250-wdt" for Exynos5250 (c) "samsung,exynos5420-wdt" for Exynos5420 + (c) "samsung,exynos7-wdt" for Exynos7 - reg : base physical address of the controller and length of memory mapped region. diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 94d93b1f8b53..b30753cbf431 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -67,6 +67,7 @@ prototypes: struct file *, unsigned open_flag, umode_t create_mode, int *opened); int (*tmpfile) (struct inode *, struct dentry *, umode_t); + int (*dentry_open)(struct dentry *, struct file *, const struct cred *); locking rules: all may block @@ -96,6 +97,7 @@ fiemap: no update_time: no atomic_open: yes tmpfile: no +dentry_open: no Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on victim. diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt new file mode 100644 index 000000000000..530850a72735 --- /dev/null +++ b/Documentation/filesystems/overlayfs.txt @@ -0,0 +1,198 @@ +Written by: Neil Brown <neilb@suse.de> + +Overlay Filesystem +================== + +This document describes a prototype for a new approach to providing +overlay-filesystem functionality in Linux (sometimes referred to as +union-filesystems). An overlay-filesystem tries to present a +filesystem which is the result over overlaying one filesystem on top +of the other. + +The result will inevitably fail to look exactly like a normal +filesystem for various technical reasons. The expectation is that +many use cases will be able to ignore these differences. + +This approach is 'hybrid' because the objects that appear in the +filesystem do not all appear to belong to that filesystem. In many +cases an object accessed in the union will be indistinguishable +from accessing the corresponding object from the original filesystem. +This is most obvious from the 'st_dev' field returned by stat(2). + +While directories will report an st_dev from the overlay-filesystem, +all non-directory objects will report an st_dev from the lower or +upper filesystem that is providing the object. Similarly st_ino will +only be unique when combined with st_dev, and both of these can change +over the lifetime of a non-directory object. Many applications and +tools ignore these values and will not be affected. + +Upper and Lower +--------------- + +An overlay filesystem combines two filesystems - an 'upper' filesystem +and a 'lower' filesystem. When a name exists in both filesystems, the +object in the 'upper' filesystem is visible while the object in the +'lower' filesystem is either hidden or, in the case of directories, +merged with the 'upper' object. + +It would be more correct to refer to an upper and lower 'directory +tree' rather than 'filesystem' as it is quite possible for both +directory trees to be in the same filesystem and there is no +requirement that the root of a filesystem be given for either upper or +lower. + +The lower filesystem can be any filesystem supported by Linux and does +not need to be writable. The lower filesystem can even be another +overlayfs. The upper filesystem will normally be writable and if it +is it must support the creation of trusted.* extended attributes, and +must provide valid d_type in readdir responses, so NFS is not suitable. + +A read-only overlay of two read-only filesystems may use any +filesystem type. + +Directories +----------- + +Overlaying mainly involves directories. If a given name appears in both +upper and lower filesystems and refers to a non-directory in either, +then the lower object is hidden - the name refers only to the upper +object. + +Where both upper and lower objects are directories, a merged directory +is formed. + +At mount time, the two directories given as mount options "lowerdir" and +"upperdir" are combined into a merged directory: + + mount -t overlayfs overlayfs -olowerdir=/lower,upperdir=/upper,\ +workdir=/work /merged + +The "workdir" needs to be an empty directory on the same filesystem +as upperdir. + +Then whenever a lookup is requested in such a merged directory, the +lookup is performed in each actual directory and the combined result +is cached in the dentry belonging to the overlay filesystem. If both +actual lookups find directories, both are stored and a merged +directory is created, otherwise only one is stored: the upper if it +exists, else the lower. + +Only the lists of names from directories are merged. Other content +such as metadata and extended attributes are reported for the upper +directory only. These attributes of the lower directory are hidden. + +whiteouts and opaque directories +-------------------------------- + +In order to support rm and rmdir without changing the lower +filesystem, an overlay filesystem needs to record in the upper filesystem +that files have been removed. This is done using whiteouts and opaque +directories (non-directories are always opaque). + +A whiteout is created as a character device with 0/0 device number. +When a whiteout is found in the upper level of a merged directory, any +matching name in the lower level is ignored, and the whiteout itself +is also hidden. + +A directory is made opaque by setting the xattr "trusted.overlay.opaque" +to "y". Where the upper filesystem contains an opaque directory, any +directory in the lower filesystem with the same name is ignored. + +readdir +------- + +When a 'readdir' request is made on a merged directory, the upper and +lower directories are each read and the name lists merged in the +obvious way (upper is read first, then lower - entries that already +exist are not re-added). This merged name list is cached in the +'struct file' and so remains as long as the file is kept open. If the +directory is opened and read by two processes at the same time, they +will each have separate caches. A seekdir to the start of the +directory (offset 0) followed by a readdir will cause the cache to be +discarded and rebuilt. + +This means that changes to the merged directory do not appear while a +directory is being read. This is unlikely to be noticed by many +programs. + +seek offsets are assigned sequentially when the directories are read. +Thus if + - read part of a directory + - remember an offset, and close the directory + - re-open the directory some time later + - seek to the remembered offset + +there may be little correlation between the old and new locations in +the list of filenames, particularly if anything has changed in the +directory. + +Readdir on directories that are not merged is simply handled by the +underlying directory (upper or lower). + + +Non-directories +--------------- + +Objects that are not directories (files, symlinks, device-special +files etc.) are presented either from the upper or lower filesystem as +appropriate. When a file in the lower filesystem is accessed in a way +the requires write-access, such as opening for write access, changing +some metadata etc., the file is first copied from the lower filesystem +to the upper filesystem (copy_up). Note that creating a hard-link +also requires copy_up, though of course creation of a symlink does +not. + +The copy_up may turn out to be unnecessary, for example if the file is +opened for read-write but the data is not modified. + +The copy_up process first makes sure that the containing directory +exists in the upper filesystem - creating it and any parents as +necessary. It then creates the object with the same metadata (owner, +mode, mtime, symlink-target etc.) and then if the object is a file, the +data is copied from the lower to the upper filesystem. Finally any +extended attributes are copied up. + +Once the copy_up is complete, the overlay filesystem simply +provides direct access to the newly created file in the upper +filesystem - future operations on the file are barely noticed by the +overlay filesystem (though an operation on the name of the file such as +rename or unlink will of course be noticed and handled). + + +Non-standard behavior +--------------------- + +The copy_up operation essentially creates a new, identical file and +moves it over to the old name. The new file may be on a different +filesystem, so both st_dev and st_ino of the file may change. + +Any open files referring to this inode will access the old data and +metadata. Similarly any file locks obtained before copy_up will not +apply to the copied up file. + +On a file opened with O_RDONLY fchmod(2), fchown(2), futimesat(2) and +fsetxattr(2) will fail with EROFS. + +If a file with multiple hard links is copied up, then this will +"break" the link. Changes will not be propagated to other names +referring to the same inode. + +Symlinks in /proc/PID/ and /proc/PID/fd which point to a non-directory +object in overlayfs will not contain valid absolute paths, only +relative paths leading up to the filesystem's root. This will be +fixed in the future. + +Some operations are not atomic, for example a crash during copy_up or +rename will leave the filesystem in an inconsistent state. This will +be addressed in the future. + +Changes to underlying filesystems +--------------------------------- + +Offline changes, when the overlay is not mounted, are allowed to either +the upper or the lower trees. + +Changes to the underlying filesystems while part of a mounted overlay +filesystem are not allowed. If the underlying filesystem is changed, +the behavior of the overlay is undefined, though it will not result in +a crash or deadlock. diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index fceff7c00a3c..20bf204426ca 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -364,6 +364,7 @@ struct inode_operations { int (*atomic_open)(struct inode *, struct dentry *, struct file *, unsigned open_flag, umode_t create_mode, int *opened); int (*tmpfile) (struct inode *, struct dentry *, umode_t); + int (*dentry_open)(struct dentry *, struct file *, const struct cred *); }; Again, all methods are called without any locks being held, unless @@ -696,6 +697,12 @@ struct address_space_operations { but instead uses bmap to find out where the blocks in the file are and uses those addresses directly. + dentry_open: *WARNING: probably going away soon, do not use!* This is an + alternative to f_op->open(), the difference is that this method may open + a file not necessarily originating from the same filesystem as the one + i_op->open() was called on. It may be useful for stacking filesystems + which want to allow native I/O directly on underlying files. + invalidatepage: If a page has PagePrivate set, then invalidatepage will be called when part or all of the page is to be removed diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 7dbe5ec9d9cd..4c81a860cc2b 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1015,10 +1015,14 @@ bytes respectively. Such letter suffixes can also be entirely omitted. Format: {"off" | "on" | "skip[mbr]"} efi= [EFI] - Format: { "old_map" } + Format: { "old_map", "nochunk", "noruntime" } old_map [X86-64]: switch to the old ioremap-based EFI runtime services mapping. 32-bit still uses this one by default. + nochunk: disable reading files in "chunks" in the EFI + boot stub, as chunking can cause problems with some + firmware implementations. + noruntime : disable EFI runtime services support efi_no_storage_paranoia [EFI; X86] Using this parameter you can use more than 50% of @@ -1260,7 +1264,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. i8042.noloop [HW] Disable the AUX Loopback command while probing for the AUX port i8042.nomux [HW] Don't check presence of an active multiplexing - controller. Default: true. + controller i8042.nopnp [HW] Don't use ACPIPnP / PnPBIOS to discover KBD/AUX controllers i8042.notimeout [HW] Ignore timeout condition signalled by controller @@ -1303,6 +1307,18 @@ bytes respectively. Such letter suffixes can also be entirely omitted. .cdrom .chs .ignore_cable are additional options See Documentation/ide/ide.txt. + ide-generic.probe-mask= [HW] (E)IDE subsystem + Format: <int> + Probe mask for legacy ISA IDE ports. Depending on + platform up to 6 ports are supported, enabled by + setting corresponding bits in the mask to 1. The + default value is 0x0, which has a special meaning. + On systems that have PCI, it triggers scanning the + PCI bus for the first and the second port, which + are then probed. On systems without PCI the value + of 0x0 enables probing the two first ports as if it + was 0x3. + ide-pci-generic.all-generic-ide [HW] (E)IDE subsystem Claim all unknown PCI IDE storage controllers. @@ -1583,6 +1599,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. kmemleak= [KNL] Boot-time kmemleak enable/disable Valid arguments: on, off Default: on + Built with CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF=y, + the default is off. kmemcheck= [X86] Boot-time kmemcheck enable/disable/one-shot mode Valid arguments: 0, 1, 2 @@ -2232,7 +2250,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. nodsp [SH] Disable hardware DSP at boot time. - noefi [X86] Disable EFI runtime services support. + noefi Disable EFI runtime services support. noexec [IA-64] @@ -3465,6 +3483,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. e.g. base its process migration decisions on it. Default is on. + topology_updates= [KNL, PPC, NUMA] + Format: {off} + Specify if the kernel should ignore (off) + topology updates sent by the hypervisor to this + LPAR. + tp720= [HW,PS2] tpm_suspend_pcr=[HW,TPM] diff --git a/Documentation/kmemleak.txt b/Documentation/kmemleak.txt index f4f033c8d856..45e777f4e41d 100644 --- a/Documentation/kmemleak.txt +++ b/Documentation/kmemleak.txt @@ -62,6 +62,10 @@ Memory may be allocated or freed before kmemleak is initialised and these actions are stored in an early log buffer. The size of this buffer is configured via the CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE option. +If CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF are enabled, the kmemleak is +disabled by default. Passing "kmemleak=on" on the kernel command +line enables the function. + Basic Algorithm --------------- diff --git a/Documentation/mailbox.txt b/Documentation/mailbox.txt new file mode 100644 index 000000000000..60f43ff629aa --- /dev/null +++ b/Documentation/mailbox.txt @@ -0,0 +1,122 @@ + The Common Mailbox Framework + Jassi Brar <jaswinder.singh@linaro.org> + + This document aims to help developers write client and controller +drivers for the API. But before we start, let us note that the +client (especially) and controller drivers are likely going to be +very platform specific because the remote firmware is likely to be +proprietary and implement non-standard protocol. So even if two +platforms employ, say, PL320 controller, the client drivers can't +be shared across them. Even the PL320 driver might need to accommodate +some platform specific quirks. So the API is meant mainly to avoid +similar copies of code written for each platform. Having said that, +nothing prevents the remote f/w to also be Linux based and use the +same api there. However none of that helps us locally because we only +ever deal at client's protocol level. + Some of the choices made during implementation are the result of this +peculiarity of this "common" framework. + + + + Part 1 - Controller Driver (See include/linux/mailbox_controller.h) + + Allocate mbox_controller and the array of mbox_chan. +Populate mbox_chan_ops, except peek_data() all are mandatory. +The controller driver might know a message has been consumed +by the remote by getting an IRQ or polling some hardware flag +or it can never know (the client knows by way of the protocol). +The method in order of preference is IRQ -> Poll -> None, which +the controller driver should set via 'txdone_irq' or 'txdone_poll' +or neither. + + + Part 2 - Client Driver (See include/linux/mailbox_client.h) + + The client might want to operate in blocking mode (synchronously +send a message through before returning) or non-blocking/async mode (submit +a message and a callback function to the API and return immediately). + + +struct demo_client { + struct mbox_client cl; + struct mbox_chan *mbox; + struct completion c; + bool async; + /* ... */ +}; + +/* + * This is the handler for data received from remote. The behaviour is purely + * dependent upon the protocol. This is just an example. + */ +static void message_from_remote(struct mbox_client *cl, void *mssg) +{ + struct demo_client *dc = container_of(mbox_client, + struct demo_client, cl); + if (dc->aysnc) { + if (is_an_ack(mssg)) { + /* An ACK to our last sample sent */ + return; /* Or do something else here */ + } else { /* A new message from remote */ + queue_req(mssg); + } + } else { + /* Remote f/w sends only ACK packets on this channel */ + return; + } +} + +static void sample_sent(struct mbox_client *cl, void *mssg, int r) +{ + struct demo_client *dc = container_of(mbox_client, + struct demo_client, cl); + complete(&dc->c); +} + +static void client_demo(struct platform_device *pdev) +{ + struct demo_client *dc_sync, *dc_async; + /* The controller already knows async_pkt and sync_pkt */ + struct async_pkt ap; + struct sync_pkt sp; + + dc_sync = kzalloc(sizeof(*dc_sync), GFP_KERNEL); + dc_async = kzalloc(sizeof(*dc_async), GFP_KERNEL); + + /* Populate non-blocking mode client */ + dc_async->cl.dev = &pdev->dev; + dc_async->cl.rx_callback = message_from_remote; + dc_async->cl.tx_done = sample_sent; + dc_async->cl.tx_block = false; + dc_async->cl.tx_tout = 0; /* doesn't matter here */ + dc_async->cl.knows_txdone = false; /* depending upon protocol */ + dc_async->async = true; + init_completion(&dc_async->c); + + /* Populate blocking mode client */ + dc_sync->cl.dev = &pdev->dev; + dc_sync->cl.rx_callback = message_from_remote; + dc_sync->cl.tx_done = NULL; /* operate in blocking mode */ + dc_sync->cl.tx_block = true; + dc_sync->cl.tx_tout = 500; /* by half a second */ + dc_sync->cl.knows_txdone = false; /* depending upon protocol */ + dc_sync->async = false; + + /* ASync mailbox is listed second in 'mboxes' property */ + dc_async->mbox = mbox_request_channel(&dc_async->cl, 1); + /* Populate data packet */ + /* ap.xxx = 123; etc */ + /* Send async message to remote */ + mbox_send_message(dc_async->mbox, &ap); + + /* Sync mailbox is listed first in 'mboxes' property */ + dc_sync->mbox = mbox_request_channel(&dc_sync->cl, 0); + /* Populate data packet */ + /* sp.abc = 123; etc */ + /* Send message to remote in blocking mode */ + mbox_send_message(dc_sync->mbox, &sp); + /* At this point 'sp' has been sent */ + + /* Now wait for async chan to be done */ + wait_for_completion(&dc_async->c); +} diff --git a/Documentation/power/pm_qos_interface.txt b/Documentation/power/pm_qos_interface.txt index a5da5c7e7128..129f7c0e1483 100644 --- a/Documentation/power/pm_qos_interface.txt +++ b/Documentation/power/pm_qos_interface.txt @@ -5,7 +5,8 @@ performance expectations by drivers, subsystems and user space applications on one of the parameters. Two different PM QoS frameworks are available: -1. PM QoS classes for cpu_dma_latency, network_latency, network_throughput. +1. PM QoS classes for cpu_dma_latency, network_latency, network_throughput, +memory_bandwidth. 2. the per-device PM QoS framework provides the API to manage the per-device latency constraints and PM QoS flags. @@ -13,6 +14,7 @@ Each parameters have defined units: * latency: usec * timeout: usec * throughput: kbs (kilo bit / sec) + * memory bandwidth: mbs (mega bit / sec) 1. PM QoS framework diff --git a/Documentation/prctl/Makefile b/Documentation/prctl/Makefile index 3e3232dcb2b8..2948b7b124b9 100644 --- a/Documentation/prctl/Makefile +++ b/Documentation/prctl/Makefile @@ -1,5 +1,5 @@ # List of programs to build -hostprogs-y := disable-tsc-ctxt-sw-stress-test disable-tsc-on-off-stress-test disable-tsc-test +hostprogs-$(CONFIG_X86) := disable-tsc-ctxt-sw-stress-test disable-tsc-on-off-stress-test disable-tsc-test # Tell kbuild to always build the programs always := $(hostprogs-y) diff --git a/Documentation/ptp/testptp.mk b/Documentation/ptp/testptp.mk new file mode 100644 index 000000000000..4ef2d9755421 --- /dev/null +++ b/Documentation/ptp/testptp.mk @@ -0,0 +1,33 @@ +# PTP 1588 clock support - User space test program +# +# Copyright (C) 2010 OMICRON electronics GmbH +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +CC = $(CROSS_COMPILE)gcc +INC = -I$(KBUILD_OUTPUT)/usr/include +CFLAGS = -Wall $(INC) +LDLIBS = -lrt +PROGS = testptp + +all: $(PROGS) + +testptp: testptp.o + +clean: + rm -f testptp.o + +distclean: clean + rm -f $(PROGS) diff --git a/Documentation/scsi/osd.txt b/Documentation/scsi/osd.txt index da162f7fd5f5..5a9879bad073 100644 --- a/Documentation/scsi/osd.txt +++ b/Documentation/scsi/osd.txt @@ -184,8 +184,7 @@ Any problems, questions, bug reports, lonely OSD nights, please email: More up-to-date information can be found on: http://open-osd.org -Boaz Harrosh <bharrosh@panasas.com> -Benny Halevy <bhalevy@panasas.com> +Boaz Harrosh <ooo@electrozaur.com> References ========== diff --git a/Documentation/target/tcmu-design.txt b/Documentation/target/tcmu-design.txt new file mode 100644 index 000000000000..5518465290bf --- /dev/null +++ b/Documentation/target/tcmu-design.txt @@ -0,0 +1,378 @@ +Contents: + +1) TCM Userspace Design + a) Background + b) Benefits + c) Design constraints + d) Implementation overview + i. Mailbox + ii. Command ring + iii. Data Area + e) Device discovery + f) Device events + g) Other contingencies +2) Writing a user pass-through handler + a) Discovering and configuring TCMU uio devices + b) Waiting for events on the device(s) + c) Managing the command ring +3) Command filtering and pass_level +4) A final note + + +TCM Userspace Design +-------------------- + +TCM is another name for LIO, an in-kernel iSCSI target (server). +Existing TCM targets run in the kernel. TCMU (TCM in Userspace) +allows userspace programs to be written which act as iSCSI targets. +This document describes the design. + +The existing kernel provides modules for different SCSI transport +protocols. TCM also modularizes the data storage. There are existing +modules for file, block device, RAM or using another SCSI device as +storage. These are called "backstores" or "storage engines". These +built-in modules are implemented entirely as kernel code. + +Background: + +In addition to modularizing the transport protocol used for carrying +SCSI commands ("fabrics"), the Linux kernel target, LIO, also modularizes +the actual data storage as well. These are referred to as "backstores" +or "storage engines". The target comes with backstores that allow a +file, a block device, RAM, or another SCSI device to be used for the +local storage needed for the exported SCSI LUN. Like the rest of LIO, +these are implemented entirely as kernel code. + +These backstores cover the most common use cases, but not all. One new +use case that other non-kernel target solutions, such as tgt, are able +to support is using Gluster's GLFS or Ceph's RBD as a backstore. The +target then serves as a translator, allowing initiators to store data +in these non-traditional networked storage systems, while still only +using standard protocols themselves. + +If the target is a userspace process, supporting these is easy. tgt, +for example, needs only a small adapter module for each, because the +modules just use the available userspace libraries for RBD and GLFS. + +Adding support for these backstores in LIO is considerably more +difficult, because LIO is entirely kernel code. Instead of undertaking +the significant work to port the GLFS or RBD APIs and protocols to the +kernel, another approach is to create a userspace pass-through +backstore for LIO, "TCMU". + + +Benefits: + +In addition to allowing relatively easy support for RBD and GLFS, TCMU +will also allow easier development of new backstores. TCMU combines +with the LIO loopback fabric to become something similar to FUSE +(Filesystem in Userspace), but at the SCSI layer instead of the +filesystem layer. A SUSE, if you will. + +The disadvantage is there are more distinct components to configure, and +potentially to malfunction. This is unavoidable, but hopefully not +fatal if we're careful to keep things as simple as possible. + +Design constraints: + +- Good performance: high throughput, low latency +- Cleanly handle if userspace: + 1) never attaches + 2) hangs + 3) dies + 4) misbehaves +- Allow future flexibility in user & kernel implementations +- Be reasonably memory-efficient +- Simple to configure & run +- Simple to write a userspace backend + + +Implementation overview: + +The core of the TCMU interface is a memory region that is shared +between kernel and userspace. Within this region is: a control area +(mailbox); a lockless producer/consumer circular buffer for commands +to be passed up, and status returned; and an in/out data buffer area. + +TCMU uses the pre-existing UIO subsystem. UIO allows device driver +development in userspace, and this is conceptually very close to the +TCMU use case, except instead of a physical device, TCMU implements a +memory-mapped layout designed for SCSI commands. Using UIO also +benefits TCMU by handling device introspection (e.g. a way for +userspace to determine how large the shared region is) and signaling +mechanisms in both directions. + +There are no embedded pointers in the memory region. Everything is +expressed as an offset from the region's starting address. This allows +the ring to still work if the user process dies and is restarted with +the region mapped at a different virtual address. + +See target_core_user.h for the struct definitions. + +The Mailbox: + +The mailbox is always at the start of the shared memory region, and +contains a version, details about the starting offset and size of the +command ring, and head and tail pointers to be used by the kernel and +userspace (respectively) to put commands on the ring, and indicate +when the commands are completed. + +version - 1 (userspace should abort if otherwise) +flags - none yet defined. +cmdr_off - The offset of the start of the command ring from the start +of the memory region, to account for the mailbox size. +cmdr_size - The size of the command ring. This does *not* need to be a +power of two. +cmd_head - Modified by the kernel to indicate when a command has been +placed on the ring. +cmd_tail - Modified by userspace to indicate when it has completed +processing of a command. + +The Command Ring: + +Commands are placed on the ring by the kernel incrementing +mailbox.cmd_head by the size of the command, modulo cmdr_size, and +then signaling userspace via uio_event_notify(). Once the command is +completed, userspace updates mailbox.cmd_tail in the same way and +signals the kernel via a 4-byte write(). When cmd_head equals +cmd_tail, the ring is empty -- no commands are currently waiting to be +processed by userspace. + +TCMU commands start with a common header containing "len_op", a 32-bit +value that stores the length, as well as the opcode in the lowest +unused bits. Currently only two opcodes are defined, TCMU_OP_PAD and +TCMU_OP_CMD. When userspace encounters a command with PAD opcode, it +should skip ahead by the bytes in "length". (The kernel inserts PAD +entries to ensure each CMD entry fits contigously into the circular +buffer.) + +When userspace handles a CMD, it finds the SCSI CDB (Command Data +Block) via tcmu_cmd_entry.req.cdb_off. This is an offset from the +start of the overall shared memory region, not the entry. The data +in/out buffers are accessible via tht req.iov[] array. Note that +each iov.iov_base is also an offset from the start of the region. + +TCMU currently does not support BIDI operations. + +When completing a command, userspace sets rsp.scsi_status, and +rsp.sense_buffer if necessary. Userspace then increments +mailbox.cmd_tail by entry.hdr.length (mod cmdr_size) and signals the +kernel via the UIO method, a 4-byte write to the file descriptor. + +The Data Area: + +This is shared-memory space after the command ring. The organization +of this area is not defined in the TCMU interface, and userspace +should access only the parts referenced by pending iovs. + + +Device Discovery: + +Other devices may be using UIO besides TCMU. Unrelated user processes +may also be handling different sets of TCMU devices. TCMU userspace +processes must find their devices by scanning sysfs +class/uio/uio*/name. For TCMU devices, these names will be of the +format: + +tcm-user/<hba_num>/<device_name>/<subtype>/<path> + +where "tcm-user" is common for all TCMU-backed UIO devices. <hba_num> +and <device_name> allow userspace to find the device's path in the +kernel target's configfs tree. Assuming the usual mount point, it is +found at: + +/sys/kernel/config/target/core/user_<hba_num>/<device_name> + +This location contains attributes such as "hw_block_size", that +userspace needs to know for correct operation. + +<subtype> will be a userspace-process-unique string to identify the +TCMU device as expecting to be backed by a certain handler, and <path> +will be an additional handler-specific string for the user process to +configure the device, if needed. The name cannot contain ':', due to +LIO limitations. + +For all devices so discovered, the user handler opens /dev/uioX and +calls mmap(): + +mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0) + +where size must be equal to the value read from +/sys/class/uio/uioX/maps/map0/size. + + +Device Events: + +If a new device is added or removed, a notification will be broadcast +over netlink, using a generic netlink family name of "TCM-USER" and a +multicast group named "config". This will include the UIO name as +described in the previous section, as well as the UIO minor +number. This should allow userspace to identify both the UIO device and +the LIO device, so that after determining the device is supported +(based on subtype) it can take the appropriate action. + + +Other contingencies: + +Userspace handler process never attaches: + +- TCMU will post commands, and then abort them after a timeout period + (30 seconds.) + +Userspace handler process is killed: + +- It is still possible to restart and re-connect to TCMU + devices. Command ring is preserved. However, after the timeout period, + the kernel will abort pending tasks. + +Userspace handler process hangs: + +- The kernel will abort pending tasks after a timeout period. + +Userspace handler process is malicious: + +- The process can trivially break the handling of devices it controls, + but should not be able to access kernel memory outside its shared + memory areas. + + +Writing a user pass-through handler (with example code) +------------------------------------------------------- + +A user process handing a TCMU device must support the following: + +a) Discovering and configuring TCMU uio devices +b) Waiting for events on the device(s) +c) Managing the command ring: Parsing operations and commands, + performing work as needed, setting response fields (scsi_status and + possibly sense_buffer), updating cmd_tail, and notifying the kernel + that work has been finished + +First, consider instead writing a plugin for tcmu-runner. tcmu-runner +implements all of this, and provides a higher-level API for plugin +authors. + +TCMU is designed so that multiple unrelated processes can manage TCMU +devices separately. All handlers should make sure to only open their +devices, based opon a known subtype string. + +a) Discovering and configuring TCMU UIO devices: + +(error checking omitted for brevity) + +int fd, dev_fd; +char buf[256]; +unsigned long long map_len; +void *map; + +fd = open("/sys/class/uio/uio0/name", O_RDONLY); +ret = read(fd, buf, sizeof(buf)); +close(fd); +buf[ret-1] = '\0'; /* null-terminate and chop off the \n */ + +/* we only want uio devices whose name is a format we expect */ +if (strncmp(buf, "tcm-user", 8)) + exit(-1); + +/* Further checking for subtype also needed here */ + +fd = open(/sys/class/uio/%s/maps/map0/size, O_RDONLY); +ret = read(fd, buf, sizeof(buf)); +close(fd); +str_buf[ret-1] = '\0'; /* null-terminate and chop off the \n */ + +map_len = strtoull(buf, NULL, 0); + +dev_fd = open("/dev/uio0", O_RDWR); +map = mmap(NULL, map_len, PROT_READ|PROT_WRITE, MAP_SHARED, dev_fd, 0); + + +b) Waiting for events on the device(s) + +while (1) { + char buf[4]; + + int ret = read(dev_fd, buf, 4); /* will block */ + + handle_device_events(dev_fd, map); +} + + +c) Managing the command ring + +#include <linux/target_core_user.h> + +int handle_device_events(int fd, void *map) +{ + struct tcmu_mailbox *mb = map; + struct tcmu_cmd_entry *ent = (void *) mb + mb->cmdr_off + mb->cmd_tail; + int did_some_work = 0; + + /* Process events from cmd ring until we catch up with cmd_head */ + while (ent != (void *)mb + mb->cmdr_off + mb->cmd_head) { + + if (tcmu_hdr_get_op(&ent->hdr) == TCMU_OP_CMD) { + uint8_t *cdb = (void *)mb + ent->req.cdb_off; + bool success = true; + + /* Handle command here. */ + printf("SCSI opcode: 0x%x\n", cdb[0]); + + /* Set response fields */ + if (success) + ent->rsp.scsi_status = SCSI_NO_SENSE; + else { + /* Also fill in rsp->sense_buffer here */ + ent->rsp.scsi_status = SCSI_CHECK_CONDITION; + } + } + else { + /* Do nothing for PAD entries */ + } + + /* update cmd_tail */ + mb->cmd_tail = (mb->cmd_tail + tcmu_hdr_get_len(&ent->hdr)) % mb->cmdr_size; + ent = (void *) mb + mb->cmdr_off + mb->cmd_tail; + did_some_work = 1; + } + + /* Notify the kernel that work has been finished */ + if (did_some_work) { + uint32_t buf = 0; + + write(fd, &buf, 4); + } + + return 0; +} + + +Command filtering and pass_level +-------------------------------- + +TCMU supports a "pass_level" option with valid values of 0 or 1. When +the value is 0 (the default), nearly all SCSI commands received for +the device are passed through to the handler. This allows maximum +flexibility but increases the amount of code required by the handler, +to support all mandatory SCSI commands. If pass_level is set to 1, +then only IO-related commands are presented, and the rest are handled +by LIO's in-kernel command emulation. The commands presented at level +1 include all versions of: + +READ +WRITE +WRITE_VERIFY +XDWRITEREAD +WRITE_SAME +COMPARE_AND_WRITE +SYNCHRONIZE_CACHE +UNMAP + + +A final note +------------ + +Please be careful to return codes as defined by the SCSI +specifications. These are different than some values defined in the +scsi/scsi.h include file. For example, CHECK CONDITION's status code +is 2, not 1. diff --git a/Documentation/vDSO/Makefile b/Documentation/vDSO/Makefile index 2b99e57207c1..ee075c3d2124 100644 --- a/Documentation/vDSO/Makefile +++ b/Documentation/vDSO/Makefile @@ -10,3 +10,6 @@ always := $(hostprogs-y) HOSTCFLAGS := -I$(objtree)/usr/include -std=gnu99 HOSTCFLAGS_vdso_standalone_test_x86.o := -fno-asynchronous-unwind-tables -fno-stack-protector HOSTLOADLIBES_vdso_standalone_test_x86 := -nostdlib +ifeq ($(CONFIG_X86_32),y) +HOSTLOADLIBES_vdso_standalone_test_x86 += -lgcc_s +endif diff --git a/Documentation/vDSO/vdso_standalone_test_x86.c b/Documentation/vDSO/vdso_standalone_test_x86.c index d46240265c50..93b0ebf8cc38 100644 --- a/Documentation/vDSO/vdso_standalone_test_x86.c +++ b/Documentation/vDSO/vdso_standalone_test_x86.c @@ -63,7 +63,7 @@ static inline void linux_exit(int code) x86_syscall3(__NR_exit, code, 0, 0); } -void to_base10(char *lastdig, uint64_t n) +void to_base10(char *lastdig, time_t n) { while (n) { *lastdig = (n % 10) + '0'; diff --git a/Documentation/vm/hugetlbpage.txt b/Documentation/vm/hugetlbpage.txt index bdd4bb97fff7..b64e0af9cc56 100644 --- a/Documentation/vm/hugetlbpage.txt +++ b/Documentation/vm/hugetlbpage.txt @@ -274,7 +274,7 @@ This command mounts a (pseudo) filesystem of type hugetlbfs on the directory /mnt/huge. Any files created on /mnt/huge uses huge pages. The uid and gid options sets the owner and group of the root of the file system. By default the uid and gid of the current process are taken. The mode option sets the -mode of root of file system to value & 0777. This value is given in octal. +mode of root of file system to value & 01777. This value is given in octal. By default the value 0755 is picked. The size option sets the maximum value of memory (huge pages) allowed for that filesystem (/mnt/huge). The size is rounded down to HPAGE_SIZE. The option nr_inodes sets the maximum number of |